parent
79a46d18b6
commit
110523d6f3
29
GNUmakefile
29
GNUmakefile
|
@ -1,24 +1,23 @@
|
|||
#
|
||||
# GNUmakefile for Galois field library
|
||||
#
|
||||
#
|
||||
# The default flags do *not* have the SSE instructions enabled.
|
||||
# Please cd to flag_tester and run which_compile_flags.sh to see which SSE instructions
|
||||
# your machine and compiler support, and which flags you should include below.
|
||||
|
||||
CFLAGS = -O3
|
||||
LDFLAGS = -O3
|
||||
|
||||
SRCS = gf_w4.c gf_w8.c gf_w16.c gf_w32.c gf_w64.c gf_w128.c gf_wgen.c gf.c gf_unit.c \
|
||||
gf_time.c gf_mult.c gf_method.c gf_methods.c gf_div.c gf_rand.c gf_general.c \
|
||||
gf_poly.c gf_example_1.c gf_add.c gf_example_2.c gf_example_3.c gf_example_4.c \
|
||||
gf_inline_time.c
|
||||
gf_inline_time.c gf_example_5.c gf_example_6.c gf_example_7.c
|
||||
|
||||
HDRS = gf_complete.h gf_int.h
|
||||
|
||||
EXECUTABLES = gf_mult gf_div gf_add gf_unit gf_time gf_methods gf_poly \
|
||||
gf_example_1 gf_example_2 gf_example_3 gf_example_4 gf_inline_time
|
||||
|
||||
CFLAGS = -O3 -msse4 -maes -mpclmul -DINTEL_SSE4 -DINTEL_PCLMUL
|
||||
LDFLAGS = -O3 -msse4 -maes -mpclmul
|
||||
|
||||
# Use these if you don't have INTEL_PCLMUL
|
||||
# CFLAGS = -O3 -msse4 -DINTEL_SSE4
|
||||
# LDFLAGS = -O3 -msse4
|
||||
gf_example_1 gf_example_2 gf_example_3 gf_example_4 gf_inline_time \
|
||||
gf_example_5 gf_example_6 gf_example_7
|
||||
|
||||
RM = /bin/rm -f
|
||||
|
||||
|
@ -45,6 +44,9 @@ gf_example_1: gf_example_1.o gf_complete.a
|
|||
gf_example_2: gf_example_2.o gf_complete.a
|
||||
gf_example_3: gf_example_3.o gf_complete.a
|
||||
gf_example_4: gf_example_4.o gf_complete.a
|
||||
gf_example_5: gf_example_5.o gf_complete.a
|
||||
gf_example_6: gf_example_6.o gf_complete.a
|
||||
gf_example_7: gf_example_7.o gf_complete.a
|
||||
gf_mult: gf_mult.o gf_complete.a
|
||||
gf_div: gf_div.o gf_complete.a
|
||||
gf_poly: gf_poly.o gf_complete.a
|
||||
|
@ -54,7 +56,8 @@ clean:
|
|||
$(RM) $(OBJS) gf_div.c
|
||||
|
||||
spotless: clean
|
||||
$(RM) *~ $(EXECUTABLES)
|
||||
$(RM) *~ $(EXECUTABLES) which_compile_flags
|
||||
$(RM) gf_complete.a
|
||||
|
||||
gf_div.o: gf_complete.h gf_method.h
|
||||
gf_methods.o: gf_complete.h gf_method.h
|
||||
|
@ -71,8 +74,12 @@ gf_example_1.o: gf_complete.h gf_rand.h
|
|||
gf_example_2.o: gf_complete.h gf_rand.h
|
||||
gf_example_3.o: gf_complete.h gf_rand.h
|
||||
gf_example_4.o: gf_complete.h gf_rand.h
|
||||
gf_example_5.o: gf_complete.h gf_rand.h
|
||||
gf_example_6.o: gf_complete.h gf_rand.h
|
||||
gf_example_7.o: gf_complete.h gf_rand.h
|
||||
gf_general.o: gf_complete.h gf_int.h gf_general.h gf_rand.h
|
||||
gf_mult.o: gf_complete.h gf_method.h
|
||||
gf.o: gf_complete.h gf_int.h
|
||||
gf_method.o: gf_complete.h
|
||||
|
||||
gf_div.c: gf_mult.c
|
||||
|
|
Binary file not shown.
Binary file not shown.
16
README.txt
16
README.txt
|
@ -1,5 +1,13 @@
|
|||
This is GF-Complete, Revision 0.1.
|
||||
This is GF-Complete, Revision 1.0.
|
||||
|
||||
The user's manual is in the file Manual.pdf.
|
||||
|
||||
There are two online homes for GF-Complete:
|
||||
|
||||
- https://bitbucket.org/jimplank/gf-complete
|
||||
- http://www.cs.utk.edu/~plank/plank/papers/CS-13-716.html
|
||||
|
||||
When compiling this for the first time, cd to flag_tester, and
|
||||
do "sh which_compile_flags.sh xxx", where xxx is the compiler
|
||||
that you will use in the GNUMakefile.
|
||||
|
||||
Please see http://www.cs.utk.edu/~plank/plank/papers/CS-13-703.html for the user's
|
||||
manual and other important documentation about this library, including more
|
||||
recent revisions.
|
||||
|
|
777
explanation.html
777
explanation.html
|
@ -1,777 +0,0 @@
|
|||
<h3>Code structure as of 7/20/2012</h3>
|
||||
|
||||
written by Jim.
|
||||
<p>
|
||||
Ok -- once again, I have messed with the structure. My goal is flexible and efficient.
|
||||
It's similar to the stuff before, but better because it makes things like Euclid's
|
||||
method much cleaner.
|
||||
<p>
|
||||
I think we're ready to hack.
|
||||
<p>
|
||||
<p>
|
||||
<hr>
|
||||
<h3>Files</h3>
|
||||
<UL>
|
||||
<LI> <a href=GNUmakefile><b>GNUmakefile</b></a>: Makefile
|
||||
<LI> <a href=README><b>README</b></a>: Empty readme
|
||||
<LI> <a href=explanation.html><b>explanation.html</b></a>: This file.
|
||||
<LI> <a href=gf.c><b>gf.c</b></a>: Main gf routines
|
||||
<LI> <a href=gf.h><b>gf.h</b></a>: Main gf prototypes and typedefs
|
||||
<LI> <a href=gf_int.h><b>gf_int.h</b></a>: Prototypes and typedefs for common routines for the
|
||||
internal gf implementations.
|
||||
<LI> <a href=gf_method.c><b>gf_method.c</b></a>: Code to help parse argc/argv to define the method.
|
||||
This way, various programs can be consistent with how they handle the command line.
|
||||
<LI> <a href=gf_method.h><b>gf_method.h</b></a>: Prototypes for ibid.
|
||||
<LI> <a href=gf_methods.c><b>gf_methods.c</b></a>: This program prints out how to define
|
||||
the various methods on the command line. My idea is to beef this up so that you can
|
||||
give it a method spec on the command line, and it will tell you whether it's valid, or
|
||||
why it's invalid. I haven't written that part yet.
|
||||
<LI> <a href=gf_mult.c><b>gf_mult.c</b></a>: Program to do single multiplication.
|
||||
<LI> <a href=gf_mult.c><b>gf_mult.c</b></a>: Program to do single divisions -- it's created
|
||||
in the makefile with a sed script on gf_mult.c.
|
||||
<LI> <a href=gf_time.c><b>gf_time.c</b></a>: Time tester
|
||||
<LI> <a href=gf_unit.c><b>gf_unit.c</b></a>: Unit tester
|
||||
<LI> <a href=gf_54.c><b>gf_54.c</b></a>: A simple example program that multiplies
|
||||
5 and 4 in GF(2^4).
|
||||
<LI> <a href=gf_w4.c><b>gf_w4.c</b></a>: Implementation of code for <i>w</i> = 4.
|
||||
(For now, only SHIFT and LOG, plus EUCLID & MATRIX).
|
||||
<LI> <a href=gf_w8.c><b>gf_w8.c</b></a>: Implementation of code for <i>w</i> = 8.
|
||||
(For now, only SHIFT plus EUCLID & MATRIX).
|
||||
<LI> <a href=gf_w16.c><b>gf_w16.c</b></a>: Implementation of code for <i>w</i> = 16.
|
||||
(For now, only SHIFT plus EUCLID & MATRIX).
|
||||
<LI> <a href=gf_w32.c><b>gf_w32.c</b></a>: Implementation of code for <i>w</i> = 32.
|
||||
(For now, only SHIFT plus EUCLID & MATRIX).
|
||||
<LI> <a href=gf_w64.c><b>gf_w64.c</b></a>: Implementation of code for <i>w</i> = 64.
|
||||
(For now, only SHIFT and EUCLID.
|
||||
<LI> I don't have gf_w128.c or gf_gen.c yet.
|
||||
</UL>
|
||||
|
||||
<hr>
|
||||
<h3>Prototypes and typedefs in gf.h</h3>
|
||||
|
||||
The main structure that users will see is in <b>gf.h</b>, and it is of type
|
||||
<b>gf_t</b>:
|
||||
|
||||
<p><center><table border=3 cellpadding=3><td><pre>
|
||||
typedef struct gf {
|
||||
gf_func_a_b multiply;
|
||||
gf_func_a_b divide;
|
||||
gf_func_a inverse;
|
||||
gf_region multiply_region;
|
||||
void *scratch;
|
||||
} gf_t;
|
||||
</pre></td></table></center><p>
|
||||
|
||||
We can beef it up later with buf-buf or buf-acc. The problem is that the paper is
|
||||
already bloated, so right now, I want to keep it lean.
|
||||
<p>
|
||||
The types of the procedures are big unions, so that they work with the following
|
||||
types of arguments:
|
||||
|
||||
<p><center><table border=3 cellpadding=3><td><pre>
|
||||
typedef uint8_t gf_val_4_t;
|
||||
typedef uint8_t gf_val_8_t;
|
||||
typedef uint16_t gf_val_16_t;
|
||||
typedef uint32_t gf_val_32_t;
|
||||
typedef uint64_t gf_val_64_t;
|
||||
typedef uint64_t *gf_val_128_t;
|
||||
typedef uint32_t gf_val_gen_t; /* The intent here is for general values <= 32 */
|
||||
</pre></td></table></center><p>
|
||||
|
||||
To use one of these, you need to create one with <b>gf_init_easy()</b> or
|
||||
<b>gf_init_hard()</b>. Let's concentrate on the former:
|
||||
|
||||
<p><center><table border=3 cellpadding=3><td><pre>
|
||||
extern int gf_init_easy(gf_t *gf, int w, int mult_type);
|
||||
</pre></td></table></center><p>
|
||||
|
||||
You pass it memory for a <b>gf_t</b>, a value of <b>w</b> and
|
||||
a variable that says how to do multiplication. The valid values of <b>mult_type</b>
|
||||
are enumerated in <b>gf.h</b>:
|
||||
|
||||
<p><center><table border=3 cellpadding=3><td><pre>
|
||||
typedef enum {GF_MULT_DEFAULT,
|
||||
GF_MULT_SHIFT,
|
||||
GF_MULT_GROUP,
|
||||
GF_MULT_BYTWO_p,
|
||||
GF_MULT_BYTWO_b,
|
||||
GF_MULT_TABLE,
|
||||
GF_MULT_LOG_TABLE,
|
||||
GF_MULT_SPLIT_TABLE,
|
||||
GF_MULT_COMPOSITE } gf_mult_type_t;
|
||||
</pre></td></table></center><p>
|
||||
|
||||
After creating the <b>gf_t</b>, you use its <b>multiply</b> method
|
||||
to multiply, using the union's fields to work with the various types.
|
||||
It looks easier than my explanation. For example, suppose you wanted to multiply 5 and 4 in <i>GF(2<sup>4</sup>)</i>.
|
||||
You can do it as in
|
||||
<b><a href=gf_54.c>gf_54.c</a></b>
|
||||
|
||||
<p><center><table border=3 cellpadding=3><td><pre>
|
||||
#include "gf.h"
|
||||
|
||||
main()
|
||||
{
|
||||
gf_t gf;
|
||||
|
||||
gf_init_easy(&gf, 4, GF_MULT_DEFAULT);
|
||||
printf("%d\n", gf.multiply.w4(&gf, 5, 4));
|
||||
exit(0);
|
||||
}
|
||||
</pre></td></table></center><p>
|
||||
|
||||
|
||||
If you wanted to multiply in <i>GF(2<sup>8</sup>)</i>, then you'd have to use 8 as a parameter
|
||||
to <b>gf_init_easy</b>, and call the multiplier as <b>gf.mult.w8()</b>.
|
||||
<p>
|
||||
When you're done with your <b>gf_t</b>, you should call <b>gf_free()</b> on it so
|
||||
that it can free memory that it has allocated. We'll talk more about memory later, but if you
|
||||
create your <b>gf_t</b> with <b>gf_init_easy</b>, then it calls <b>malloc()</b>, and
|
||||
if you care about freeing memory, you'll have to call <b>gf_free()</b>.
|
||||
<p>
|
||||
|
||||
<hr>
|
||||
<h3>Memory allocation</h3>
|
||||
|
||||
Each implementation of a multiplication technique keeps around its
|
||||
own data. For example, <b>GF_MULT_TABLE</b> keeps around
|
||||
multiplication and division tables, and <b>GF_MULT_LOG</b> maintains log and
|
||||
antilog tables. This data is stored in the pointer <b>scratch</b>. My intent
|
||||
is that the memory that is there is all that's required. In other
|
||||
words, the <b>multiply()</b>, <b>divide()</b>, <b>inverse()</b> and
|
||||
<b>multiply_region()</b> calls don't do any memory allocation.
|
||||
Moreover, <b>gf_init_easy()</b> only allocates one chunk of memory --
|
||||
the one in <b>scratch</b>.
|
||||
<p>
|
||||
If you don't want to have the initialization call allocate memory, you can use <b>gf_init_hard()</b>:
|
||||
|
||||
<p><center><table border=3 cellpadding=3><td><pre>
|
||||
extern int gf_init_hard(gf_t *gf,
|
||||
int w,
|
||||
int mult_type,
|
||||
int region_type,
|
||||
int divide_type,
|
||||
uint64_t prim_poly,
|
||||
int arg1,
|
||||
int arg2,
|
||||
gf_t *base_gf,
|
||||
void *scratch_memory);
|
||||
</pre></td></table></center><p>
|
||||
|
||||
The first three parameters are the same as <b>gf_init_easy()</b>.
|
||||
You can add additional arguments for performing <b>multiply_region</b>, and
|
||||
for performing division in the <b>region_type</b> and <b>divide_type</b>
|
||||
arguments. Their values are also defined in <b>gf.h</b>. You can
|
||||
mix the <b>region_type</b> values (e.g. "DOUBLE" and "SSE"):
|
||||
|
||||
<p><center><table border=3 cellpadding=3><td><pre>
|
||||
#define GF_REGION_DEFAULT (0x0)
|
||||
#define GF_REGION_SINGLE_TABLE (0x1)
|
||||
#define GF_REGION_DOUBLE_TABLE (0x2)
|
||||
#define GF_REGION_QUAD_TABLE (0x4)
|
||||
#define GF_REGION_LAZY (0x8)
|
||||
#define GF_REGION_SSE (0x10)
|
||||
#define GF_REGION_NOSSE (0x20)
|
||||
#define GF_REGION_STDMAP (0x40)
|
||||
#define GF_REGION_ALTMAP (0x80)
|
||||
#define GF_REGION_CAUCHY (0x100)
|
||||
|
||||
typedef uint32_t gf_region_type_t;
|
||||
|
||||
typedef enum { GF_DIVIDE_DEFAULT,
|
||||
GF_DIVIDE_MATRIX,
|
||||
GF_DIVIDE_EUCLID } gf_division_type_t;
|
||||
</pre></td></table></center><p>
|
||||
You can change
|
||||
the primitive polynomial with <b>prim_poly</b>, give additional arguments with
|
||||
<b>arg1</b> and <b>arg2</b> and give a base Galois Field for composite fields.
|
||||
Finally, you can pass it a pointer to memory in <b>scratch_memory</b>. That
|
||||
way, you can avoid having <b>gf_init_hard()</b> call <b>malloc()</b>.
|
||||
<p>
|
||||
There is a procedure called <b>gf_scratch_size()</b> that lets you know the minimum
|
||||
size for <b>scratch_memory</b>, depending on <i>w</i>, the multiplication type
|
||||
and the arguments:
|
||||
|
||||
<p><center><table border=3 cellpadding=3><td><pre>
|
||||
extern int gf_scratch_size(int w,
|
||||
int mult_type,
|
||||
int region_type,
|
||||
int divide_type,
|
||||
int arg1,
|
||||
int arg2);
|
||||
</pre></td></table></center><p>
|
||||
|
||||
You can specify default arguments in <b>gf_init_hard()</b>:
|
||||
<UL>
|
||||
<LI> <b>region_type</b> = <b>GF_REGION_DEFAULT</b>
|
||||
<LI> <b>divide_type</b> = <b>GF_REGION_DEFAULT</b>
|
||||
<LI> <b>prim_poly</b> = 0
|
||||
<LI> <b>arg1</b> = 0
|
||||
<LI> <b>arg2</b> = 0
|
||||
<LI> <b>base_gf</b> = <b>NULL</b>
|
||||
<LI> <b>scratch_memory</b> = <b>NULL</b>
|
||||
</UL>
|
||||
If any argument is equal to its default, then default actions are taken (e.g. a
|
||||
standard primitive polynomial is used, or memory is allocated for <b>scratch_memory</b>).
|
||||
In fact, <b>gf_init_easy()</b> simply calls <b>gf_init_hard()</b> with the default
|
||||
parameters.
|
||||
<p>
|
||||
<b>gf_free()</b> frees memory that was allocated with <b>gf_init_easy()</b>
|
||||
or <b>gf_init_hard()</b>. The <b>recursive</b> parameter is in case you
|
||||
use composite fields, and want to recursively free the base fields.
|
||||
If you pass <b>scratch_memory</b> to <b>gf_init_hard()</b>, then you typically
|
||||
don't need to call <b>gf_free()</b>. It won't hurt to call it, though.
|
||||
|
||||
<hr>
|
||||
<h3>gf_mult and gf_div</h3>
|
||||
|
||||
For the moment, I have few things completely implemented, but that's because I want
|
||||
to be able to explain the structure, and how to specify methods. In particular, for
|
||||
<i>w=4</i>, I have implemented <b>SHIFT</b> and <b>LOG</b>. For <i>w=8, 16, 32, 64</i>
|
||||
I have implemented <b>SHIFT</b>. For all <i>w ≤ 32</i>, I have implemented both
|
||||
Euclid's algorithm for inversion, and the matrix method for inversion. For
|
||||
<i>w=64</i>, it's just Euclid. You can
|
||||
test these all with <b>gf_mult</b> and <b>gf_div</b>. Here are a few calls:
|
||||
|
||||
<pre>
|
||||
UNIX> <font color=darkred><b>gf_mult 7 11 4</b></font> - Default
|
||||
4
|
||||
UNIX> <font color=darkred><b>gf_mult 7 11 4 SHIFT - -</b></font> - Use shift
|
||||
4
|
||||
UNIX> <font color=darkred><b>gf_mult 7 11 4 LOG - -</b></font> - Use logs
|
||||
4
|
||||
UNIX> <font color=darkred><b>gf_div 4 7 4</b></font> - Default
|
||||
11
|
||||
UNIX> <font color=darkred><b>gf_div 4 7 4 LOG - -</b></font> - Use logs
|
||||
11
|
||||
UNIX> <font color=darkred><b>gf_div 4 7 4 LOG - EUCLID</b></font> - Use Euclid instead of logs
|
||||
11
|
||||
UNIX> <font color=darkred><b>gf_div 4 7 4 LOG - MATRIX</b></font> - Use Matrix inversion instead of logs
|
||||
11
|
||||
UNIX> <font color=darkred><b>gf_div 4 7 4 SHIFT - -</b></font> - Default
|
||||
11
|
||||
UNIX> <font color=darkred><b>gf_div 4 7 4 SHIFT - EUCLID</b></font> - Use Euclid (which is the default)
|
||||
11
|
||||
UNIX> <font color=darkred><b>gf_div 4 7 4 SHIFT - MATRIX</b></font> - Use Matrix inversion instead of logs
|
||||
11
|
||||
UNIX> <font color=darkred><b>gf_mult 200 211 8</b></font> - The remainder are shift/Euclid
|
||||
201
|
||||
UNIX> <font color=darkred><b>gf_div 201 211 8</b></font>
|
||||
200
|
||||
UNIX> <font color=darkred><b>gf_mult 60000 65111 16</b></font>
|
||||
63515
|
||||
UNIX> <font color=darkred><b>gf_div 63515 65111 16</b></font>
|
||||
60000
|
||||
UNIX> <font color=darkred><b>gf_mult abcd0001 9afbf788 32h</b></font>
|
||||
b0359681
|
||||
UNIX> <font color=darkred><b>gf_div b0359681 9afbf788 32h</b></font>
|
||||
abcd0001
|
||||
UNIX> <font color=darkred><b>gf_mult abcd00018c8b8c8a 9afbf7887f6d8e5b 64h</b></font>
|
||||
3a7def35185bd571
|
||||
UNIX> <font color=darkred><b>gf_mult abcd00018c8b8c8a 9afbf7887f6d8e5b 64h</b></font>
|
||||
3a7def35185bd571
|
||||
UNIX> <font color=darkred><b>gf_div 3a7def35185bd571 9afbf7887f6d8e5b 64h</b></font>
|
||||
abcd00018c8b8c8a
|
||||
UNIX> <font color=darkred><b></b></font>
|
||||
</pre>
|
||||
|
||||
You can see all the methods with <b>gf_methods</b>. We have a lot of implementing to do:
|
||||
|
||||
<pre>
|
||||
UNIX> <font color=darkred><b>gf_methods</b></font>
|
||||
To specify the methods, do one of the following:
|
||||
- leave empty to use defaults
|
||||
- use a single dash to use defaults
|
||||
- specify MULTIPLY REGION DIVIDE
|
||||
|
||||
Legal values of MULTIPLY:
|
||||
SHIFT: shift
|
||||
GROUP g_mult g_reduce: the Group technique - see the paper
|
||||
BYTWO_p: BYTWO doubling the product.
|
||||
BYTWO_b: BYTWO doubling b (more efficient thatn BYTWO_p)
|
||||
TABLE: Full multiplication table
|
||||
LOG: Discrete logs
|
||||
LOG_ZERO: Discrete logs with a large table for zeros
|
||||
SPLIT g_a g_b: Split tables defined by g_a and g_b
|
||||
COMPOSITE k l [METHOD]: Composite field, recursively specify the
|
||||
method of the base field in GF(2^l)
|
||||
|
||||
Legal values of REGION: Specify multiples with commas e.g. 'DOUBLE,LAZY'
|
||||
-: Use defaults
|
||||
SINGLE/DOUBLE/QUAD: Expand tables
|
||||
LAZY: Lazily create table (only applies to TABLE and SPLIT)
|
||||
SSE/NOSSE: Use 128-bit SSE instructions if you can
|
||||
CAUCHY/ALTMAP/STDMAP: Use different memory mappings
|
||||
|
||||
Legal values of DIVIDE:
|
||||
-: Use defaults
|
||||
MATRIX: Use matrix inversion
|
||||
EUCLID: Use the extended Euclidian algorithm.
|
||||
|
||||
See the user's manual for more information.
|
||||
There are many restrictions, so it is better to simply use defaults in most cases.
|
||||
UNIX> <font color=darkred><b></b></font>
|
||||
</pre>
|
||||
|
||||
<hr>
|
||||
<h3>gf_unit and gf_time</h3>
|
||||
|
||||
<b><a href=gf_unit.c>gf_unit.c</a></b> is a unit tester, and
|
||||
<b><a href=gf_time.c>gf_time.c</a></b> is a time tester.
|
||||
|
||||
They are called as follows:
|
||||
|
||||
<p><center><table border=3 cellpadding=3><td><pre>
|
||||
UNIX> <font color=darkred><b>gf_unit w tests seed [METHOD] </b></font>
|
||||
UNIX> <font color=darkred><b>gf_time w tests seed size(bytes) iterations [METHOD] </b></font>
|
||||
</pre></td></table></center><p>
|
||||
|
||||
The <b>tests</b> parameter is one or more of the following characters:
|
||||
|
||||
<UL>
|
||||
<LI> A: Do all tests
|
||||
<LI> S: Test only single operations (multiplication/division)
|
||||
<LI> R: Test only region operations
|
||||
<LI> V: Verbose Output
|
||||
</UL>
|
||||
|
||||
<b>seed</b> is a seed for <b>srand48()</b> -- using -1 defaults to the current time.
|
||||
<p>
|
||||
For example, testing the defaults with w=4:
|
||||
|
||||
<pre>
|
||||
UNIX> <font color=darkred><b>gf_unit 4 AV 1 LOG - -</b></font>
|
||||
Seed: 1
|
||||
Testing single multiplications/divisions.
|
||||
Testing Inversions.
|
||||
Testing buffer-constant, src != dest, xor = 0
|
||||
Testing buffer-constant, src != dest, xor = 1
|
||||
Testing buffer-constant, src == dest, xor = 0
|
||||
Testing buffer-constant, src == dest, xor = 1
|
||||
UNIX> <font color=darkred><b>gf_unit 4 AV 1 SHIFT - -</b></font>
|
||||
Seed: 1
|
||||
Testing single multiplications/divisions.
|
||||
Testing Inversions.
|
||||
No multiply_region.
|
||||
UNIX> <font color=darkred><b></b></font>
|
||||
</pre>
|
||||
|
||||
There is no <b>multiply_region()</b> method defined for <b>SHIFT</b>.
|
||||
Thus, the procedures are <b>NULL</b> and the unit tester ignores them.
|
||||
<p>
|
||||
At the moment, I only have the unit tester working for w=4.
|
||||
<p>
|
||||
<b>gf_time</b> takes the size of an array (in bytes) and a number of iterations, and
|
||||
tests the speed of both single and region operations. The tests are:
|
||||
|
||||
<UL>
|
||||
<LI> A: All
|
||||
<LI> S: All Single Operations
|
||||
<LI> R: All Region Operations
|
||||
<LI> M: Single: Multiplications
|
||||
<LI> D: Single: Divisions
|
||||
<LI> I: Single: Inverses
|
||||
<LI> B: Region: Multipy_Region
|
||||
</UL>
|
||||
|
||||
Here are some examples with <b>SHIFT</b> and <b>LOG</b> on my mac.
|
||||
|
||||
<pre>
|
||||
UNIX> <font color=darkred><b>gf_time 4 A 1 102400 1024 LOG - -</b></font>
|
||||
Seed: 1
|
||||
Multiply: 0.538126 s 185.830 Mega-ops/s
|
||||
Divide: 0.520825 s 192.003 Mega-ops/s
|
||||
Inverse: 0.631198 s 158.429 Mega-ops/s
|
||||
Buffer-Const,s!=d,xor=0: 0.478395 s 209.032 MB/s
|
||||
Buffer-Const,s!=d,xor=1: 0.524245 s 190.751 MB/s
|
||||
Buffer-Const,s==d,xor=0: 0.471851 s 211.931 MB/s
|
||||
Buffer-Const,s==d,xor=1: 0.528275 s 189.295 MB/s
|
||||
UNIX> <font color=darkred><b>gf_time 4 A 1 102400 1024 LOG - EUCLID</b></font>
|
||||
Seed: 1
|
||||
Multiply: 0.555512 s 180.014 Mega-ops/s
|
||||
Divide: 5.359434 s 18.659 Mega-ops/s
|
||||
Inverse: 4.911719 s 20.359 Mega-ops/s
|
||||
Buffer-Const,s!=d,xor=0: 0.496097 s 201.573 MB/s
|
||||
Buffer-Const,s!=d,xor=1: 0.538536 s 185.689 MB/s
|
||||
Buffer-Const,s==d,xor=0: 0.485564 s 205.946 MB/s
|
||||
Buffer-Const,s==d,xor=1: 0.540227 s 185.107 MB/s
|
||||
UNIX> <font color=darkred><b>gf_time 4 A 1 102400 1024 LOG - MATRIX</b></font>
|
||||
Seed: 1
|
||||
Multiply: 0.544005 s 183.822 Mega-ops/s
|
||||
Divide: 7.602822 s 13.153 Mega-ops/s
|
||||
Inverse: 7.000564 s 14.285 Mega-ops/s
|
||||
Buffer-Const,s!=d,xor=0: 0.474868 s 210.585 MB/s
|
||||
Buffer-Const,s!=d,xor=1: 0.527588 s 189.542 MB/s
|
||||
Buffer-Const,s==d,xor=0: 0.473130 s 211.358 MB/s
|
||||
Buffer-Const,s==d,xor=1: 0.529877 s 188.723 MB/s
|
||||
UNIX> <font color=darkred><b>gf_time 4 A 1 102400 1024 SHIFT - -</b></font>
|
||||
Seed: 1
|
||||
Multiply: 2.708842 s 36.916 Mega-ops/s
|
||||
Divide: 8.756882 s 11.420 Mega-ops/s
|
||||
Inverse: 5.695511 s 17.558 Mega-ops/s
|
||||
UNIX> <font color=darkred><b></b></font>
|
||||
</pre>
|
||||
|
||||
At the moment, I only have the timer working for w=4.
|
||||
|
||||
<hr>
|
||||
<h3>Walking you through <b>LOG</b></h3>
|
||||
|
||||
To see how <b>scratch</b> is used to store data, let's look at what happens when
|
||||
you call <b>gf_init_easy(&gf, 4, GF_MULT_LOG);</b>
|
||||
First, <b>gf_init_easy()</b> calls <b>gf_init_hard()</b> with default parameters.
|
||||
This is in <b><a href=gf.c>gf.c</a></b>.
|
||||
<p>
|
||||
<b>gf_init_hard()</b>' first job is to set up the scratch.
|
||||
The scratch's type is <b>gf_internal_t</b>, defined in
|
||||
<b><a href=gf_int.h>gf_int.h</a></b>:
|
||||
|
||||
<p><center><table border=3 cellpadding=3><td><pre>
|
||||
typedef struct {
|
||||
int mult_type;
|
||||
int region_type;
|
||||
int divide_type;
|
||||
int w;
|
||||
uint64_t prim_poly;
|
||||
int free_me;
|
||||
int arg1;
|
||||
int arg2;
|
||||
gf_t *base_gf;
|
||||
void *private;
|
||||
} gf_internal_t;
|
||||
</pre></td></table></center><p>
|
||||
|
||||
All the fields are straightfoward, with the exception of <b>private</b>. That is
|
||||
a <b>(void *)</b> which points to the implementation's private data.
|
||||
<p>
|
||||
Here's the code for
|
||||
<b>gf_init_hard()</b>:
|
||||
|
||||
<p><center><table border=3 cellpadding=3><td><pre>
|
||||
int gf_init_hard(gf_t *gf, int w, int mult_type,
|
||||
int region_type,
|
||||
int divide_type,
|
||||
uint64_t prim_poly,
|
||||
int arg1, int arg2,
|
||||
gf_t *base_gf,
|
||||
void *scratch_memory)
|
||||
{
|
||||
int sz;
|
||||
gf_internal_t *h;
|
||||
|
||||
|
||||
if (scratch_memory == NULL) {
|
||||
sz = gf_scratch_size(w, mult_type, region_type, divide_type, arg1, arg2);
|
||||
if (sz <= 0) return 0;
|
||||
h = (gf_internal_t *) malloc(sz);
|
||||
h->free_me = 1;
|
||||
} else {
|
||||
h = scratch_memory;
|
||||
h->free_me = 0;
|
||||
}
|
||||
gf->scratch = (void *) h;
|
||||
h->mult_type = mult_type;
|
||||
h->region_type = region_type;
|
||||
h->divide_type = divide_type;
|
||||
h->w = w;
|
||||
h->prim_poly = prim_poly;
|
||||
h->arg1 = arg1;
|
||||
h->arg2 = arg2;
|
||||
h->base_gf = base_gf;
|
||||
h->private = (void *) gf->scratch;
|
||||
h->private += (sizeof(gf_internal_t));
|
||||
|
||||
switch(w) {
|
||||
case 4: return gf_w4_init(gf);
|
||||
case 8: return gf_w8_init(gf);
|
||||
case 16: return gf_w16_init(gf);
|
||||
case 32: return gf_w32_init(gf);
|
||||
case 64: return gf_w64_init(gf);
|
||||
case 128: return gf_dummy_init(gf);
|
||||
default: return 0;
|
||||
}
|
||||
}
|
||||
</pre></td></table></center><p>
|
||||
|
||||
The first thing it does is determine if it has to allocate space for <b>scratch</b>.
|
||||
If it must, it uses <b>gf_scratch_size()</b> to figure out how big the space must be.
|
||||
It then sets <b>gf->scratch</b> to this space, and sets all of the fields of the
|
||||
scratch to the arguments in <b>gf_init_hard()</b>. The <b>private</b> pointer is
|
||||
set to be the space just after the pointer <b>gf->private</b>. Again, it is up to
|
||||
<b>gf_scratch_size()</b> to make sure there is enough space for the scratch, and
|
||||
for all of the private data needed by the implementation.
|
||||
<p>
|
||||
Once the scratch is set up, <b>gf_init_hard()</b> calls <b>gf_w4_init()</b>. This is
|
||||
in <b><a href=gf_w4.c>gf_w4.c</a></b>, and it is a
|
||||
simple dispatcher to the various initialization routines, plus it
|
||||
sets <b>EUCLID</b> and <b>MATRIX</b> if need be:
|
||||
|
||||
<p><center><table border=3 cellpadding=3><td><pre>
|
||||
int gf_w4_init(gf_t *gf)
|
||||
{
|
||||
gf_internal_t *h;
|
||||
|
||||
h = (gf_internal_t *) gf->scratch;
|
||||
if (h->prim_poly == 0) h->prim_poly = 0x13;
|
||||
|
||||
gf->multiply.w4 = NULL;
|
||||
gf->divide.w4 = NULL;
|
||||
gf->inverse.w4 = NULL;
|
||||
gf->multiply_region.w4 = NULL;
|
||||
|
||||
switch(h->mult_type) {
|
||||
case GF_MULT_SHIFT: if (gf_w4_shift_init(gf) == 0) return 0; break;
|
||||
case GF_MULT_LOG_TABLE: if (gf_w4_log_init(gf) == 0) return 0; break;
|
||||
case GF_MULT_DEFAULT: if (gf_w4_log_init(gf) == 0) return 0; break;
|
||||
default: return 0;
|
||||
}
|
||||
if (h->divide_type == GF_DIVIDE_EUCLID) {
|
||||
gf->divide.w4 = gf_w4_divide_from_inverse;
|
||||
gf->inverse.w4 = gf_w4_euclid;
|
||||
} else if (h->divide_type == GF_DIVIDE_MATRIX) {
|
||||
gf->divide.w4 = gf_w4_divide_from_inverse;
|
||||
gf->inverse.w4 = gf_w4_matrix;
|
||||
}
|
||||
|
||||
if (gf->inverse.w4 != NULL && gf->divide.w4 == NULL) {
|
||||
gf->divide.w4 = gf_w4_divide_from_inverse;
|
||||
}
|
||||
if (gf->inverse.w4 == NULL && gf->divide.w4 != NULL) {
|
||||
gf->inverse.w4 = gf_w4_inverse_from_divide;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
</pre></td></table></center><p>
|
||||
|
||||
The code in <b>gf_w4_log_init()</b> sets up the log and antilog tables, and sets
|
||||
the <b>multiply.w4</b>, <b>divide.w4</b> etc routines to be the ones for logs. The
|
||||
tables are put into <b>gf->scratch->private</b>, which is typecast to a <b>struct
|
||||
gf_logtable_data *</b>:
|
||||
|
||||
<p><center><table border=3 cellpadding=3><td><pre>
|
||||
struct gf_logtable_data {
|
||||
gf_val_4_t log_tbl[GF_FIELD_SIZE];
|
||||
gf_val_4_t antilog_tbl[GF_FIELD_SIZE * 2];
|
||||
gf_val_4_t *antilog_tbl_div;
|
||||
};
|
||||
.......
|
||||
|
||||
static
|
||||
int gf_w4_log_init(gf_t *gf)
|
||||
{
|
||||
gf_internal_t *h;
|
||||
struct gf_logtable_data *ltd;
|
||||
int i, b;
|
||||
|
||||
h = (gf_internal_t *) gf->scratch;
|
||||
ltd = h->private;
|
||||
|
||||
ltd->log_tbl[0] = 0;
|
||||
|
||||
ltd->antilog_tbl_div = ltd->antilog_tbl + (GF_FIELD_SIZE-1);
|
||||
b = 1;
|
||||
for (i = 0; i < GF_FIELD_SIZE-1; i++) {
|
||||
ltd->log_tbl[b] = (gf_val_8_t)i;
|
||||
ltd->antilog_tbl[i] = (gf_val_8_t)b;
|
||||
ltd->antilog_tbl[i+GF_FIELD_SIZE-1] = (gf_val_8_t)b;
|
||||
b <<= 1;
|
||||
if (b & GF_FIELD_SIZE) {
|
||||
b = b ^ h->prim_poly;
|
||||
}
|
||||
}
|
||||
|
||||
gf->inverse.w4 = gf_w4_inverse_from_divide;
|
||||
gf->divide.w4 = gf_w4_log_divide;
|
||||
gf->multiply.w4 = gf_w4_log_multiply;
|
||||
gf->multiply_region.w4 = gf_w4_log_multiply_region;
|
||||
return 1;
|
||||
}
|
||||
</pre></td></table></center><p>
|
||||
|
||||
And of course the individual routines use <b>h->private</b> to access the tables:
|
||||
|
||||
<p><center><table border=3 cellpadding=3><td><pre>
|
||||
static
|
||||
inline
|
||||
gf_val_8_t gf_w4_log_multiply (gf_t *gf, gf_val_8_t a, gf_val_8_t b)
|
||||
{
|
||||
struct gf_logtable_data *ltd;
|
||||
|
||||
ltd = (struct gf_logtable_data *) ((gf_internal_t *) (gf->scratch))->private;
|
||||
return (a == 0 || b == 0) ? 0 : ltd->antilog_tbl[(unsigned)(ltd->log_tbl[a] + ltd->log_tbl[b])];
|
||||
}
|
||||
</pre></td></table></center><p>
|
||||
|
||||
Finally, it's important that the proper sizes are put into
|
||||
<b>gf_w4_scratch_size()</b> for each implementation:
|
||||
|
||||
<p><center><table border=3 cellpadding=3><td><pre>
|
||||
int gf_w4_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2)
|
||||
{
|
||||
int region_tbl_size;
|
||||
switch(mult_type)
|
||||
{
|
||||
case GF_MULT_DEFAULT:
|
||||
case GF_MULT_LOG_TABLE:
|
||||
return sizeof(gf_internal_t) + sizeof(struct gf_logtable_data) + 64;
|
||||
break;
|
||||
case GF_MULT_SHIFT:
|
||||
return sizeof(gf_internal_t);
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
</pre></td></table></center><p>
|
||||
I hope that's enough explanation for y'all to start implementing. Let me know if you have
|
||||
problems -- thanks -- Jim
|
||||
|
||||
<hr>
|
||||
The initial structure has been set for w=4, 8, 16, 32 and 64, with implementations of SHIFT and EUCLID, and for w <= 32, MATRIX. There are some weird caveats:
|
||||
|
||||
<UL>
|
||||
<LI> For w=32 and w=64, the primitive polynomial does not have the leading one.
|
||||
<LI> I'd like for naming to be:
|
||||
<p>
|
||||
<UL>
|
||||
<b>gf_w</b><i>w</i><b>_</b><i>technique</i></i><b>_</b><i>funcationality</i><b>()</b>.
|
||||
</UL>
|
||||
<p>
|
||||
For example, the log techniques for w=4 are:
|
||||
<pre>
|
||||
gf_w4_log_multiply()
|
||||
gf_w4_log_divide()
|
||||
gf_w4_log_multiply_region()
|
||||
gf_w4_log_init()
|
||||
</pre>
|
||||
<p>
|
||||
<LI> I'd also like a header block on implementations that says who wrote it.
|
||||
</UL>
|
||||
|
||||
<hr>
|
||||
<h3>Things we need to Implement: <i>w=4</i></h3>
|
||||
|
||||
<p><table border=3 cellpadding=2>
|
||||
<tr> <td> SHIFT </td> <td> Done - Jim </td> </tr>
|
||||
<tr> <td> BYTWO_p </td> <td>Done - Jim</td> </tr>
|
||||
<tr> <td> BYTWO_b </td> <td>Done - Jim</td> </tr>
|
||||
<tr> <td> BYTWO_p, SSE </td> <td>Done - Jim</td> </tr>
|
||||
<tr> <td> BYTWO_b, SSE </td> <td>Done - Jim</td> </tr>
|
||||
<tr> <td> Single TABLE </td> <td> Done - Jim </td> </tr>
|
||||
<tr> <td> Double TABLE </td> <td> Done - Jim </td> </tr>
|
||||
<tr> <td> Double TABLE, SSE </td> <td> Done - Jim </td> </tr>
|
||||
<tr> <td> Quad TABLE </td> <td>Done - Jim</td> </tr>
|
||||
<tr> <td> Lazy Quad TABLE </td> <td>Done - Jim</td> </tr>
|
||||
<tr> <td> LOG </td> <td> Done - Jim </td> </tr>
|
||||
</table><p>
|
||||
|
||||
<hr>
|
||||
<h3>Things we need to Implement: <i>w=8</i></h3>
|
||||
|
||||
<p><table border=3 cellpadding=2>
|
||||
<tr> <td> SHIFT </td> <td> Done - Jim </td> </tr>
|
||||
<tr> <td> BYTWO_p </td> <td>Done - Jim </td> </tr>
|
||||
<tr> <td> BYTWO_b </td> <td>Done - Jim </td> </tr>
|
||||
<tr> <td> BYTWO_p, SSE </td> <td>Done - Jim </td> </tr>
|
||||
<tr> <td> BYTWO_b, SSE </td> <td>Done - Jim </td> </tr>
|
||||
<tr> <td> Single TABLE </td> <td> Done - Kevin </td> </tr>
|
||||
<tr> <td> Double TABLE </td> <td> Done - Jim </td> </tr>
|
||||
<tr> <td> Lazy Double TABLE </td> <td> Done - Jim </td> </tr>
|
||||
<tr> <td> Split 2 1 (Half) SSE </td> <td>Done - Jim</td> </tr>
|
||||
<tr> <td> Composite, k=2 </td> <td> Done - Kevin (alt mapping not passing unit test) </td> </tr>
|
||||
<tr> <td> LOG </td> <td> Done - Kevin </td> </tr>
|
||||
<tr> <td> LOG ZERO</td> <td> Done - Jim</td> </tr>
|
||||
</table><p>
|
||||
|
||||
<hr>
|
||||
<h3>Things we need to Implement: <i>w=16</i></h3>
|
||||
|
||||
<p><table border=3 cellpadding=2>
|
||||
<tr> <td> SHIFT </td> <td> Done - Jim </td> </tr>
|
||||
<tr> <td> BYTWO_p </td> <td>Done - Jim</td> </tr>
|
||||
<tr> <td> BYTWO_b </td> <td>Done - Jim</td> </tr>
|
||||
<tr> <td> BYTWO_p, SSE </td> <td>Done - Jim</td> </tr>
|
||||
<tr> <td> BYTWO_b, SSE </td> <td>Done - Jim</td> </tr>
|
||||
<tr> <td> Lazy TABLE </td> <td>Done - Jim</td> </tr>
|
||||
<tr> <td> Split 4 16 No-SSE, lazy </td> <td>Done - Jim</td> </tr>
|
||||
<tr> <td> Split 4 16 SSE, lazy </td> <td>Done - Jim</td> </tr>
|
||||
<tr> <td> Split 4 16 SSE, lazy, alternate mapping </td> <td>Done - Jim</td> </tr>
|
||||
<tr> <td> Split 8 16, lazy </td> <td>Done - Jim</td> </tr>
|
||||
<tr> <td> Composite, k=2, stdmap recursive </td> <td> Done - Kevin</td> </tr>
|
||||
<tr> <td> Composite, k=2, altmap recursive </td> <td> Done - Kevin</td> </tr>
|
||||
<tr> <td> Composite, k=2, stdmap inline </td> <td> Done - Kevin</td> </tr>
|
||||
<tr> <td> LOG </td> <td> Done - Kevin </td> </tr>
|
||||
<tr> <td> LOG ZERO</td> <td> Done - Kevin </td> </tr>
|
||||
<tr> <td> Group 4 4 </td> <td>Done - Jim: I don't see a reason to implement others, although 4-8 will be faster, and 8 8 will have faster region ops. They'll never beat SPLIT.</td> </tr>
|
||||
</table><p>
|
||||
|
||||
<hr>
|
||||
<h3>Things we need to Implement: <i>w=32</i></h3>
|
||||
|
||||
<p><table border=3 cellpadding=2>
|
||||
<tr> <td> SHIFT </td> <td> Done - Jim </td> </tr>
|
||||
<tr> <td> BYTWO_p </td> <td>Done - Jim</td> </tr>
|
||||
<tr> <td> BYTWO_b </td> <td>Done - Jim</td> </tr>
|
||||
<tr> <td> BYTWO_p, SSE </td> <td>Done - Jim</td> </tr>
|
||||
<tr> <td> BYTWO_b, SSE </td> <td>Done - Jim</td> </tr>
|
||||
<tr> <td> Split 2 32,lazy </td> <td>Done - Jim</td> </tr>
|
||||
<tr> <td> Split 2 32, SSE, lazy </td> <td>Done - Jim</td> </tr>
|
||||
<tr> <td> Split 4 32, lazy </td> <td>Done - Jim</td> </tr>
|
||||
<tr> <td> Split 4 32, SSE,ALTMAP lazy </td> <td>Done - Jim</td> </tr>
|
||||
<tr> <td> Split 4 32, SSE, lazy </td> <td>Done - Jim</td> </tr>
|
||||
<tr> <td> Split 8 8 </td> <td>Done - Jim </td> </tr>
|
||||
<tr> <td> Group, g_s == g_r </td> <td>Done - Jim</td></tr>
|
||||
<tr> <td> Group, any g_s and g_r</td> <td>Done - Jim</td></tr>
|
||||
<tr> <td> Composite, k=2, stdmap recursive </td> <td> Done - Kevin</td> </tr>
|
||||
<tr> <td> Composite, k=2, altmap recursive </td> <td> Done - Kevin</td> </tr>
|
||||
<tr> <td> Composite, k=2, stdmap inline </td> <td> Done - Kevin</td> </tr>
|
||||
</table><p>
|
||||
<hr>
|
||||
<h3>Things we need to Implement: <i>w=64</i></h3>
|
||||
|
||||
<p><table border=3 cellpadding=2>
|
||||
<tr> <td> SHIFT </td> <td> Done - Jim </td> </tr>
|
||||
<tr> <td> BYTWO_p </td> <td> - </td> </tr>
|
||||
<tr> <td> BYTWO_b </td> <td> - </td> </tr>
|
||||
<tr> <td> BYTWO_p, SSE </td> <td> - </td> </tr>
|
||||
<tr> <td> BYTWO_b, SSE </td> <td> - </td> </tr>
|
||||
<tr> <td> Split 16 1 SSE, maybe lazy </td> <td> - </td> </tr>
|
||||
<tr> <td> Split 8 1 lazy </td> <td> - </td> </tr>
|
||||
<tr> <td> Split 8 8 </td> <td> - </td> </tr>
|
||||
<tr> <td> Split 8 8 lazy </td> <td> - </td> </tr>
|
||||
<tr> <td> Group </td> <td> - </td> </tr>
|
||||
<tr> <td> Composite, k=2, alternate mapping </td> <td> - </td> </tr>
|
||||
</table><p>
|
||||
<hr>
|
||||
<h3>Things we need to Implement: <i>w=128</i></h3>
|
||||
|
||||
<p><table border=3 cellpadding=2>
|
||||
<tr> <td> SHIFT </td> <td> Done - Will </td> </tr>
|
||||
<tr> <td> BYTWO_p </td> <td> - </td> </tr>
|
||||
<tr> <td> BYTWO_b </td> <td> - </td> </tr>
|
||||
<tr> <td> BYTWO_p, SSE </td> <td> - </td> </tr>
|
||||
<tr> <td> BYTWO_b, SSE </td> <td> - </td> </tr>
|
||||
<tr> <td> Split 32 1 SSE, maybe lazy </td> <td> - </td> </tr>
|
||||
<tr> <td> Split 16 1 lazy </td> <td> - </td> </tr>
|
||||
<tr> <td> Split 16 16 - Maybe that's insanity</td> <td> - </td> </tr>
|
||||
<tr> <td> Split 16 16 lazy </td> <td> - </td> </tr>
|
||||
<tr> <td> Group (SSE) </td> <td> - </td> </tr>
|
||||
<tr> <td> Composite, k=?, alternate mapping </td> <td> - </td> </tr>
|
||||
</table><p>
|
||||
<hr>
|
||||
<h3>Things we need to Implement: <i>w=general between 1 & 32</i></h3>
|
||||
|
||||
<p><table border=3 cellpadding=2>
|
||||
<tr> <td> CAUCHY Region (SSE XOR)</td> <td> Done - Jim </td> </tr>
|
||||
<tr> <td> SHIFT </td> <td> Done - Jim </td> </tr>
|
||||
<tr> <td> TABLE </td> <td> Done - Jim </td> </tr>
|
||||
<tr> <td> LOG </td> <td> Done - Jim </td> </tr>
|
||||
<tr> <td> BYTWO_p </td> <td>Done - Jim</td> </tr>
|
||||
<tr> <td> BYTWO_b </td> <td>Done - Jim</td> </tr>
|
||||
<tr> <td> Group, g_s == g_r </td> <td>Done - Jim</td></tr>
|
||||
<tr> <td> Group, any g_s and g_r</td> <td>Done - Jim</td></tr>
|
||||
<tr> <td> Split - do we need it?</td> <td>Done - Jim</td></tr>
|
||||
<tr> <td> Composite - do we need it?</td> <td> - </td></tr>
|
||||
<tr> <td> Split - do we need it?</td> <td> - </td></tr>
|
||||
<tr> <td> Logzero?</td> <td> - </td></tr>
|
||||
</table><p>
|
|
@ -0,0 +1,10 @@
|
|||
Run which_compile_flags.sh and it will print out the compile flags to use in
|
||||
GNUmakefile. By default, this script uses "cc" as its compiler but you can
|
||||
pass in the name of your compiler as an argument.
|
||||
|
||||
EXAMPLE: "./which_compile_flags.sh clang"
|
||||
|
||||
This script will run "clang" in the above example so be warned that if you type
|
||||
something like "rm" for that argument, you get what you asked for. Also, make
|
||||
sure that the compiler that you pass to which_compile_flags.sh is the same as
|
||||
the compiler in GNUmakefile.
|
|
@ -0,0 +1,120 @@
|
|||
/*
|
||||
* flag_test.c - copied from whats_my_sse.c to output proper compile
|
||||
* flags for the GNUmakefile
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "intel_cpu_capabilities.h"
|
||||
|
||||
void usage()
|
||||
{
|
||||
fprintf(stderr, "usage: flag_test <compiler name>\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
//make sure to extend these buffers if more flags are added to this program
|
||||
char cflags[1000], ldflags[1000], buf[1000];
|
||||
FILE *file;
|
||||
char sse_found = 0;
|
||||
|
||||
if(argc != 2)
|
||||
usage();
|
||||
|
||||
sprintf(cflags, "CFLAGS = -O3");
|
||||
sprintf(ldflags, "LDFLAGS = -O3");
|
||||
|
||||
if(cpu_has_feature(CPU_CAP_SSE42))
|
||||
{
|
||||
sprintf(buf, "%s sse_test.c -o sse4 -msse4 -DSSE4 2> /dev/null", argv[1]);
|
||||
system(buf);
|
||||
if(file = fopen("sse4", "r"))
|
||||
{
|
||||
fclose(file);
|
||||
|
||||
//run program and compare to the included output
|
||||
system("./sse4 > temp.txt 2> /dev/null");
|
||||
system("diff sse4_test.txt temp.txt > diff.txt 2> /dev/null");
|
||||
file = fopen("diff.txt", "r");
|
||||
if(fgetc(file) == EOF)
|
||||
{
|
||||
strcat(cflags, " -msse4 -DINTEL_SSE4");
|
||||
strcat(ldflags, " -msse4");
|
||||
sse_found = 1;
|
||||
}
|
||||
fclose(file);
|
||||
}
|
||||
}
|
||||
|
||||
if(cpu_has_feature(CPU_CAP_SSSE3) && !sse_found)
|
||||
{
|
||||
sprintf(buf, "%s sse_test.c -o ssse3 -mssse3 -DSSSE3 2> /dev/null", argv[1]);
|
||||
system(buf);
|
||||
if(file = fopen("ssse3", "r"))
|
||||
{
|
||||
fclose(file);
|
||||
|
||||
//run program and compare to the included output
|
||||
system("./ssse3 > temp.txt 2> /dev/null");
|
||||
system("diff ssse3_test.txt temp.txt > diff.txt 2> /dev/null");
|
||||
file = fopen("diff.txt", "r");
|
||||
if(fgetc(file) == EOF)
|
||||
{
|
||||
strcat(cflags, " -mssse3 -DINTEL_SSSE3");
|
||||
strcat(ldflags, " -mssse3");
|
||||
sse_found = 1;
|
||||
}
|
||||
fclose(file);
|
||||
}
|
||||
}
|
||||
|
||||
if(cpu_has_feature(CPU_CAP_SSE2) && !sse_found)
|
||||
{
|
||||
sprintf(buf, "%s sse_test.c -o sse2 -msse2 -DSSE2 2> /dev/null", argv[1]);
|
||||
system(buf);
|
||||
if(file = fopen("sse2", "r"))
|
||||
{
|
||||
fclose(file);
|
||||
|
||||
//run program and compare to the included output
|
||||
system("./sse2 > temp.txt 2> /dev/null");
|
||||
system("diff sse2_test.txt temp.txt > diff.txt 2> /dev/null");
|
||||
file = fopen("diff.txt", "r");
|
||||
if(fgetc(file) == EOF)
|
||||
{
|
||||
strcat(cflags, " -msse2 -DINTEL_SSE2");
|
||||
strcat(ldflags, " -msse2");
|
||||
sse_found = 1;
|
||||
}
|
||||
fclose(file);
|
||||
}
|
||||
}
|
||||
|
||||
if(cpu_has_feature(CPU_CAP_PCLMULQDQ) && sse_found)
|
||||
{
|
||||
sprintf(buf, "%s pclmul_test.c -o pclmul -maes -mpclmul 2> /dev/null"
|
||||
, argv[1]);
|
||||
system(buf);
|
||||
if(file = fopen("pclmul", "r"))
|
||||
{
|
||||
fclose(file);
|
||||
|
||||
//run program and compare to the included output
|
||||
system("./pclmul > temp.txt 2> /dev/null");
|
||||
system("diff pclmul_test.txt temp.txt > diff.txt 2> /dev/null");
|
||||
file = fopen("diff.txt", "r");
|
||||
if(fgetc(file) == EOF)
|
||||
{
|
||||
strcat(cflags, " -maes -mpclmul -DINTEL_PCLMUL");
|
||||
strcat(ldflags, " -maes -mpclmul");
|
||||
}
|
||||
fclose(file);
|
||||
}
|
||||
}
|
||||
|
||||
printf("%s\n%s\n", cflags, ldflags);
|
||||
}
|
|
@ -16,7 +16,7 @@
|
|||
#define CPU_CPSSE 0x2000
|
||||
#define CPU_CAP_SSE3 (CPU_CPSSE | 0)
|
||||
#define CPU_CAP_PCLMULQDQ (CPU_CPSSE | 1)
|
||||
#define CPU_CAP_SSSE3 (CPU_CPSSE | 10)
|
||||
#define CPU_CAP_SSSE3 (CPU_CPSSE | 9)
|
||||
#define CPU_CAP_SSE41 (CPU_CPSSE | 19)
|
||||
#define CPU_CAP_SSE42 (CPU_CPSSE | 20)
|
||||
#define CPU_CAP_AVX (CPU_CPSSE | 28)
|
||||
|
@ -25,7 +25,6 @@
|
|||
__asm__ __volatile__ ("cpuid":\
|
||||
"=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) : "a" (func));
|
||||
|
||||
inline
|
||||
int
|
||||
cpu_has_feature (unsigned which)
|
||||
{
|
|
@ -0,0 +1,40 @@
|
|||
#include <wmmintrin.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#define MM_PRINT8(s, r) { uint8_t blah[16], ii; printf("%-20s", s); _mm_storeu_si128((__m128i *)blah, r); for (ii = 0; ii < 16; ii += 1) printf("%s%02x", (ii%4==0) ? " " : " ", blah[15-ii]); printf("\n"); }
|
||||
|
||||
|
||||
int main()
|
||||
{
|
||||
uint64_t answer;
|
||||
uint32_t pp;
|
||||
__m128i a, b, c;
|
||||
|
||||
a = _mm_set1_epi8(0x0D);
|
||||
b = _mm_set_epi32(0,0,0,0x0A);
|
||||
pp = 0x13;
|
||||
MM_PRINT8("a", a);
|
||||
MM_PRINT8("b", b);
|
||||
|
||||
c = _mm_clmulepi64_si128(a, b, 0);
|
||||
MM_PRINT8("a clm b", c);
|
||||
|
||||
a = _mm_set1_epi8(0xf0);
|
||||
MM_PRINT8("a", a);
|
||||
b = _mm_and_si128(a, c);
|
||||
b = _mm_srli_epi64(b, 4);
|
||||
MM_PRINT8("shifted", b);
|
||||
|
||||
|
||||
a = _mm_set_epi32(0,0,0,pp);
|
||||
MM_PRINT8("PP", a);
|
||||
|
||||
b = _mm_clmulepi64_si128(a, b, 0);
|
||||
MM_PRINT8("PP clm over", b);
|
||||
|
||||
c = _mm_xor_si128(c,b);
|
||||
MM_PRINT8("Answer", c);
|
||||
//answer = _mm_extract_epi64(c, 0);
|
||||
//printf("%llx\n", answer);
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
a 0d 0d 0d 0d 0d 0d 0d 0d 0d 0d 0d 0d 0d 0d 0d 0d
|
||||
b 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0a
|
||||
a clm b 00 00 00 00 00 00 00 00 72 72 72 72 72 72 72 72
|
||||
a f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0
|
||||
shifted 00 00 00 00 00 00 00 00 07 07 07 07 07 07 07 07
|
||||
PP 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 13
|
||||
PP clm over 00 00 00 00 00 00 00 00 79 79 79 79 79 79 79 79
|
||||
Answer 00 00 00 00 00 00 00 00 0b 0b 0b 0b 0b 0b 0b 0b
|
|
@ -0,0 +1,30 @@
|
|||
a 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
|
||||
b 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
|
||||
c 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02
|
||||
d 12 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03
|
||||
a sl16 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08 04 00
|
||||
b sl32 40 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08 04
|
||||
c sl64 44 40 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08
|
||||
d sl128 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 00 00
|
||||
a sr16 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
|
||||
b sr32 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
|
||||
c sr64 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02
|
||||
d sr128 00 00 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03
|
||||
d = a^b 1f 01 03 01 07 01 03 01 0f 01 03 01 07 01 03 01
|
||||
d = a-b epi8 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
|
||||
d = a-b epi16 fe ff fe ff fe ff fe ff fe ff fe ff fe ff fe ff
|
||||
d = a-b epi32 fe fe fe ff fe fe fe ff fe fe fe ff fe fe fe ff
|
||||
d = a-b epi64 fe fe fe fe fe fe fe ff fe fe fe fe fe fe fe ff
|
||||
d set_epi8 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
|
||||
d set_epi32 12 34 56 78 9a bc de f0 12 34 56 78 9a bc de f0
|
||||
d set1_epi64 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0
|
||||
d set1_epi32 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2
|
||||
d set1_epi16 af f3 af f3 af f3 af f3 af f3 af f3 af f3 af f3
|
||||
d set1_epi8 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5
|
||||
d packus_epi16(d,d) 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
c unpackhi(a,d) 00 0f 00 0e 00 0d 00 0c 00 0b 00 0a 00 09 00 08
|
||||
b unpacklo(c,a) 07 00 06 0b 05 00 04 0a 03 00 02 09 01 00 00 08
|
||||
d and(d,b) 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
a stored to mem: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
|
||||
d setzero 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
c 05 05 05 05 05 05 05 05 05 05 05 05 05 05 05 05
|
|
@ -0,0 +1,35 @@
|
|||
a 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
|
||||
b 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
|
||||
c 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02
|
||||
d 12 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03
|
||||
a sl16 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08 04 00
|
||||
b sl32 40 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08 04
|
||||
c sl64 44 40 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08
|
||||
d sl128 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 00 00
|
||||
a sr16 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
|
||||
b sr32 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
|
||||
c sr64 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02
|
||||
d sr128 00 00 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03
|
||||
d = a^b 1f 01 03 01 07 01 03 01 0f 01 03 01 07 01 03 01
|
||||
d = a-b epi8 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
|
||||
d = a-b epi16 fe ff fe ff fe ff fe ff fe ff fe ff fe ff fe ff
|
||||
d = a-b epi32 fe fe fe ff fe fe fe ff fe fe fe ff fe fe fe ff
|
||||
d = a-b epi64 fe fe fe fe fe fe fe ff fe fe fe fe fe fe fe ff
|
||||
d set_epi8 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
|
||||
d set_epi32 12 34 56 78 9a bc de f0 12 34 56 78 9a bc de f0
|
||||
d set1_epi64 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0
|
||||
d set1_epi32 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2
|
||||
d set1_epi16 af f3 af f3 af f3 af f3 af f3 af f3 af f3 af f3
|
||||
d set1_epi8 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5
|
||||
d packus_epi16(d,d) 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
c unpackhi(a,d) 00 0f 00 0e 00 0d 00 0c 00 0b 00 0a 00 09 00 08
|
||||
b unpacklo(c,a) 07 00 06 0b 05 00 04 0a 03 00 02 09 01 00 00 08
|
||||
d and(d,b) 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
a stored to mem: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
|
||||
d setzero 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
d insert32 @ 2 00 00 00 00 ab cd 12 34 00 00 00 00 00 00 00 00
|
||||
extract_epi32 @ 2: abcd1234
|
||||
d insert64 @ 0 00 00 00 00 ab cd 12 34 fe dc ba 12 91 82 73 64
|
||||
extract_epi64 @ 0: fedcba1291827364
|
||||
c 05 05 05 05 05 05 05 05 05 05 05 05 05 05 05 05
|
||||
a shuffle(b, c) 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02
|
|
@ -0,0 +1,142 @@
|
|||
#ifdef SSE4
|
||||
#define SSSE3
|
||||
#include <nmmintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef SSSE3
|
||||
#define SSE2
|
||||
#include <tmmintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef SSE2
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
#define MM_PRINT8(s, r) { uint8_t blah[16], ii; printf("%-20s", s); _mm_storeu_si128((__m128i *)blah, r); for (ii = 0; ii < 16; ii += 1) printf("%s%02x", (ii%4==0) ? " " : " ", blah[15-ii]); printf("\n"); }
|
||||
|
||||
int main()
|
||||
{
|
||||
uint32_t u32;
|
||||
uint64_t u64;
|
||||
uint8_t *ui8 = malloc(20), i;
|
||||
__m128i a, b, c, d;
|
||||
|
||||
for(i=0; i < 20; i++)
|
||||
ui8[i] = i;
|
||||
|
||||
a = _mm_load_si128( (__m128i *) ui8 );
|
||||
b = _mm_loadu_si128( (__m128i *) (ui8+1));
|
||||
c = _mm_loadu_si128( (__m128i *) (ui8+2));
|
||||
d = _mm_loadu_si128( (__m128i *) (ui8+3));
|
||||
|
||||
MM_PRINT8("a", a);
|
||||
MM_PRINT8("b", b);
|
||||
MM_PRINT8("c", c);
|
||||
MM_PRINT8("d", d);
|
||||
|
||||
a = _mm_slli_epi16(a, 2);
|
||||
b = _mm_slli_epi32(b, 2);
|
||||
c = _mm_slli_epi64(c, 2);
|
||||
d = _mm_slli_si128(d, 2);
|
||||
|
||||
MM_PRINT8("a sl16", a);
|
||||
MM_PRINT8("b sl32", b);
|
||||
MM_PRINT8("c sl64", c);
|
||||
MM_PRINT8("d sl128", d);
|
||||
|
||||
a = _mm_srli_epi16(a, 2);
|
||||
b = _mm_srli_epi32(b, 2);
|
||||
c = _mm_srli_epi64(c, 2);
|
||||
d = _mm_srli_si128(d, 2);
|
||||
|
||||
MM_PRINT8("a sr16", a);
|
||||
MM_PRINT8("b sr32", b);
|
||||
MM_PRINT8("c sr64", c);
|
||||
MM_PRINT8("d sr128", d);
|
||||
|
||||
d = _mm_xor_si128(a, b);
|
||||
MM_PRINT8("d = a^b", d);
|
||||
|
||||
d = _mm_sub_epi8(a, b);
|
||||
MM_PRINT8("d = a-b epi8", d);
|
||||
|
||||
d = _mm_sub_epi16(a, b);
|
||||
MM_PRINT8("d = a-b epi16", d);
|
||||
|
||||
d = _mm_sub_epi32(a, b);
|
||||
MM_PRINT8("d = a-b epi32", d);
|
||||
|
||||
d = _mm_sub_epi64(a, b);
|
||||
MM_PRINT8("d = a-b epi64", d);
|
||||
|
||||
d = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
|
||||
MM_PRINT8("d set_epi8", d);
|
||||
|
||||
d = _mm_set_epi32(0x12345678, 0x9abcdef0, 0x12345678, 0x9abcdef0);
|
||||
MM_PRINT8("d set_epi32", d);
|
||||
|
||||
d = _mm_set1_epi64x(0xF0F0F0F0F0F0F0F0ULL);
|
||||
MM_PRINT8("d set1_epi64", d);
|
||||
|
||||
d = _mm_set1_epi32(0xe2e2e2e2);
|
||||
MM_PRINT8("d set1_epi32", d);
|
||||
|
||||
d = _mm_set1_epi16(0xaff3);
|
||||
MM_PRINT8("d set1_epi16", d);
|
||||
|
||||
d = _mm_set1_epi8(0xc5);
|
||||
MM_PRINT8("d set1_epi8", d);
|
||||
|
||||
d = _mm_packus_epi16(d, d);
|
||||
MM_PRINT8("d packus_epi16(d,d)", d);
|
||||
|
||||
c = _mm_unpackhi_epi8(a, d);
|
||||
MM_PRINT8("c unpackhi(a,d)", c);
|
||||
|
||||
b = _mm_unpacklo_epi8(c, a);
|
||||
MM_PRINT8("b unpacklo(c,a)", b);
|
||||
|
||||
d = _mm_and_si128(d, b);
|
||||
MM_PRINT8("d and(d,b)", d);
|
||||
|
||||
_mm_store_si128( (__m128i *) ui8, a);
|
||||
printf("a stored to mem: ");
|
||||
for(i=0; i < 16; i++)
|
||||
printf("%u ", ui8[i]);
|
||||
printf("\n");
|
||||
|
||||
d = _mm_setzero_si128();
|
||||
MM_PRINT8("d setzero", d);
|
||||
|
||||
u32 = 0xABCD1234;
|
||||
u64 = 0xFEDCBA1291827364ULL;
|
||||
|
||||
#ifdef SSE4
|
||||
d = _mm_insert_epi32(d, u32, 2);
|
||||
MM_PRINT8("d insert32 @ 2", d);
|
||||
|
||||
u32 = 0;
|
||||
u32 = _mm_extract_epi32(d, 2);
|
||||
printf("extract_epi32 @ 2: %x\n", u32);
|
||||
|
||||
d = _mm_insert_epi64(d, u64, 0);
|
||||
MM_PRINT8("d insert64 @ 0", d);
|
||||
|
||||
u64 = 0;
|
||||
u64 = _mm_extract_epi64(d, 0);
|
||||
printf("extract_epi64 @ 0: %" PRIx64 "\n", u64);
|
||||
#endif
|
||||
|
||||
c = _mm_set1_epi8(5);
|
||||
MM_PRINT8("c", c);
|
||||
|
||||
#ifdef SSSE3
|
||||
a = _mm_shuffle_epi8(b, c);
|
||||
MM_PRINT8("a shuffle(b, c)", a);
|
||||
#endif
|
||||
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
a 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
|
||||
b 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
|
||||
c 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02
|
||||
d 12 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03
|
||||
a sl16 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08 04 00
|
||||
b sl32 40 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08 04
|
||||
c sl64 44 40 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08
|
||||
d sl128 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 00 00
|
||||
a sr16 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
|
||||
b sr32 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
|
||||
c sr64 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02
|
||||
d sr128 00 00 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03
|
||||
d = a^b 1f 01 03 01 07 01 03 01 0f 01 03 01 07 01 03 01
|
||||
d = a-b epi8 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
|
||||
d = a-b epi16 fe ff fe ff fe ff fe ff fe ff fe ff fe ff fe ff
|
||||
d = a-b epi32 fe fe fe ff fe fe fe ff fe fe fe ff fe fe fe ff
|
||||
d = a-b epi64 fe fe fe fe fe fe fe ff fe fe fe fe fe fe fe ff
|
||||
d set_epi8 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
|
||||
d set_epi32 12 34 56 78 9a bc de f0 12 34 56 78 9a bc de f0
|
||||
d set1_epi64 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0
|
||||
d set1_epi32 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2
|
||||
d set1_epi16 af f3 af f3 af f3 af f3 af f3 af f3 af f3 af f3
|
||||
d set1_epi8 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5
|
||||
d packus_epi16(d,d) 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
c unpackhi(a,d) 00 0f 00 0e 00 0d 00 0c 00 0b 00 0a 00 09 00 08
|
||||
b unpacklo(c,a) 07 00 06 0b 05 00 04 0a 03 00 02 09 01 00 00 08
|
||||
d and(d,b) 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
a stored to mem: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
|
||||
d setzero 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
|
||||
c 05 05 05 05 05 05 05 05 05 05 05 05 05 05 05 05
|
||||
a shuffle(b, c) 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02
|
|
@ -0,0 +1,19 @@
|
|||
if [ -n "$1" ]; then
|
||||
CC=$1
|
||||
else
|
||||
CC=cc
|
||||
fi
|
||||
|
||||
$CC flag_test.c -o flag_test 2> /dev/null
|
||||
if [ -e "flag_test" ]; then
|
||||
OUTPUT=`./flag_test $CC 2> /dev/null`
|
||||
if [ -n "$OUTPUT" ]; then
|
||||
echo "$OUTPUT"
|
||||
else
|
||||
printf "CFLAGS = -O3\nLDFLAGS = -O3\n"
|
||||
fi
|
||||
else
|
||||
printf "$CC failed to compile flag_test.c\n"
|
||||
fi
|
||||
|
||||
rm sse4 sse2 ssse3 pclmul diff.txt flag_test temp.txt 2> /dev/null
|
660
gf.c
660
gf.c
|
@ -8,6 +8,405 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
int _gf_errno = GF_E_DEFAULT;
|
||||
|
||||
void gf_error()
|
||||
{
|
||||
char *s;
|
||||
|
||||
switch(_gf_errno) {
|
||||
case GF_E_DEFAULT: s = "No Error."; break;
|
||||
case GF_E_TWOMULT: s = "Cannot specify two -m's."; break;
|
||||
case GF_E_TWO_DIV: s = "Cannot specify two -d's."; break;
|
||||
case GF_E_POLYSPC: s = "-p needs to be followed by a number in hex (0x optional)."; break;
|
||||
case GF_E_GROUPAR: s = "Ran out of arguments in -m GROUP."; break;
|
||||
case GF_E_GROUPNU: s = "In -m GROUP g_s g_r -- g_s and g_r need to be numbers."; break;
|
||||
case GF_E_SPLITAR: s = "Ran out of arguments in -m SPLIT."; break;
|
||||
case GF_E_SPLITNU: s = "In -m SPLIT w_a w_b -- w_a and w_b need to be numbers."; break;
|
||||
case GF_E_FEWARGS: s = "Not enough arguments (Perhaps end with '-'?)"; break;
|
||||
case GF_E_CFM___W: s = "-m CARRY_FREE, w must be 4, 8, 16, 32, 64 or 128."; break;
|
||||
case GF_E_COMPXPP: s = "-m COMPOSITE, No poly specified, and we don't have a default for the given sub-field."; break;
|
||||
case GF_E_BASE__W: s = "-m COMPOSITE and the base field is not for w/2."; break;
|
||||
case GF_E_CFM4POL: s = "-m CARRY_FREE, w=4. (Prim-poly & 0xc) must equal 0."; break;
|
||||
case GF_E_CFM8POL: s = "-m CARRY_FREE, w=8. (Prim-poly & 0x80) must equal 0."; break;
|
||||
case GF_E_CF16POL: s = "-m CARRY_FREE, w=16. (Prim-poly & 0xe000) must equal 0."; break;
|
||||
case GF_E_CF32POL: s = "-m CARRY_FREE, w=32. (Prim-poly & 0xfe000000) must equal 0."; break;
|
||||
case GF_E_CF64POL: s = "-m CARRY_FREE, w=64. (Prim-poly & 0xfffe000000000000ULL) must equal 0."; break;
|
||||
case GF_E_MDEFDIV: s = "If multiplication method == default, can't change division."; break;
|
||||
case GF_E_MDEFREG: s = "If multiplication method == default, can't change region."; break;
|
||||
case GF_E_MDEFARG: s = "If multiplication method == default, can't use arg1/arg2."; break;
|
||||
case GF_E_DIVCOMP: s = "Cannot change the division technique with -m COMPOSITE."; break;
|
||||
case GF_E_DOUQUAD: s = "Cannot specify -r DOUBLE and -r QUAD."; break;
|
||||
case GF_E_SSE__NO: s = "Cannot specify -r SSE and -r NOSSE."; break;
|
||||
case GF_E_CAUCHYB: s = "Cannot specify -r CAUCHY and any other -r."; break;
|
||||
case GF_E_CAUCOMP: s = "Cannot specify -m COMPOSITE and -r CAUCHY."; break;
|
||||
case GF_E_CAUGT32: s = "Cannot specify -r CAUCHY with w > 32."; break;
|
||||
case GF_E_ARG1SET: s = "Only use arg1 with SPLIT, GROUP or COMPOSITE."; break;
|
||||
case GF_E_ARG2SET: s = "Only use arg2 with SPLIT or GROUP."; break;
|
||||
case GF_E_MATRIXW: s = "Cannot specify -d MATRIX with w > 32."; break;
|
||||
case GF_E_BAD___W: s = "W must be 1-32, 64 or 128."; break;
|
||||
case GF_E_DOUBLET: s = "Can only specify -r DOUBLE with -m TABLE."; break;
|
||||
case GF_E_DOUBLEW: s = "Can only specify -r DOUBLE w = 4 or w = 8."; break;
|
||||
case GF_E_DOUBLEJ: s = "Cannot specify -r DOUBLE with -r ALTMAP|SSE|NOSSE."; break;
|
||||
case GF_E_DOUBLEL: s = "Can only specify -r DOUBLE -r LAZY with w = 8"; break;
|
||||
case GF_E_QUAD__T: s = "Can only specify -r QUAD with -m TABLE."; break;
|
||||
case GF_E_QUAD__W: s = "Can only specify -r QUAD w = 4."; break;
|
||||
case GF_E_QUAD__J: s = "Cannot specify -r QUAD with -r ALTMAP|SSE|NOSSE."; break;
|
||||
case GF_E_BADPOLY: s = "Bad primitive polynomial (high bits set)."; break;
|
||||
case GF_E_COMP_PP: s = "Bad primitive polynomial -- bigger than sub-field."; break;
|
||||
case GF_E_LAZY__X: s = "If -r LAZY, then -r must be DOUBLE or QUAD."; break;
|
||||
case GF_E_ALTSHIF: s = "Cannot specify -m SHIFT and -r ALTMAP."; break;
|
||||
case GF_E_SSESHIF: s = "Cannot specify -m SHIFT and -r SSE|NOSSE."; break;
|
||||
case GF_E_ALT_CFM: s = "Cannot specify -m CARRY_FREE and -r ALTMAP."; break;
|
||||
case GF_E_SSE_CFM: s = "Cannot specify -m CARRY_FREE and -r SSE|NOSSE."; break;
|
||||
case GF_E_PCLMULX: s = "Specified -m CARRY_FREE, but PCLMUL is not supported."; break;
|
||||
case GF_E_ALT_BY2: s = "Cannot specify -m BYTWO_x and -r ALTMAP."; break;
|
||||
case GF_E_BY2_SSE: s = "Specified -m BYTWO_x -r SSE, but SSE2 is not supported."; break;
|
||||
case GF_E_LOGBADW: s = "With Log Tables, w must be <= 27."; break;
|
||||
case GF_E_LOG___J: s = "Cannot use Log tables with -r ALTMAP|SSE|NOSSE."; break;
|
||||
case GF_E_LOGPOLY: s = "Cannot use Log tables because the polynomial is not primitive."; break;
|
||||
case GF_E_ZERBADW: s = "With -m LOG_ZERO, w must be 8 or 16."; break;
|
||||
case GF_E_ZEXBADW: s = "With -m LOG_ZERO_EXT, w must be 8."; break;
|
||||
case GF_E_GR_ARGX: s = "With -m GROUP, arg1 and arg2 must be >= 0."; break;
|
||||
case GF_E_GR_W_48: s = "With -m GROUP, w cannot be 4 or 8."; break;
|
||||
case GF_E_GR_W_16: s = "With -m GROUP, w == 16, arg1 and arg2 must be 4."; break;
|
||||
case GF_E_GR_128A: s = "With -m GROUP, w == 128, arg1 must be 4, and arg2 in { 4,8,16 }."; break;
|
||||
case GF_E_GR_SSE4: s = "With -m GROUP, w == 128, you need SSE4."; break;
|
||||
case GF_E_GR_A_27: s = "With -m GROUP, arg1 and arg2 must be <= 27."; break;
|
||||
case GF_E_GR_AR_W: s = "With -m GROUP, arg1 and arg2 must be <= w."; break;
|
||||
case GF_E_GR____J: s = "Cannot use GROUP with -r ALTMAP|SSE|NOSSE."; break;
|
||||
case GF_E_TABLE_W: s = "With -m TABLE, w must be < 15, or == 16."; break;
|
||||
case GF_E_TAB_SSE: s = "With -m TABLE, SSE|NOSSE only applies to w=4."; break;
|
||||
case GF_E_TABSSE3: s = "With -m TABLE, -r SSE, you need SSSE3 supported."; break;
|
||||
case GF_E_TAB_ALT: s = "With -m TABLE, you cannot use ALTMAP."; break;
|
||||
case GF_E_SP128AR: s = "With -m SPLIT, w=128, bad arg1/arg2."; break;
|
||||
case GF_E_SP128AL: s = "With -m SPLIT, w=128, -r SSE requires -r ALTMAP."; break;
|
||||
case GF_E_SP128AS: s = "With -m SPLIT, w=128, ALTMAP needs SSSE3 supported."; break;
|
||||
case GF_E_SP128_A: s = "With -m SPLIT, w=128, -r SSE|NOSSE only with arg1/arg2 = 4/128."; break;
|
||||
case GF_E_SP128_S: s = "With -m SPLIT, w=128, -r ALTMAP only with arg1/arg2 = 4/128."; break;
|
||||
case GF_E_SPLIT_W: s = "With -m SPLIT, w must be in {8, 16, 32, 64, 128}."; break;
|
||||
case GF_E_SP_16AR: s = "With -m SPLIT, w=16, Bad arg1/arg2."; break;
|
||||
case GF_E_SP_16_A: s = "With -m SPLIT, w=16, -r ALTMAP only with arg1/arg2 = 4/16."; break;
|
||||
case GF_E_SP_16_S: s = "With -m SPLIT, w=16, -r SSE|NOSSE only with arg1/arg2 = 4/16."; break;
|
||||
case GF_E_SP_32AR: s = "With -m SPLIT, w=32, Bad arg1/arg2."; break;
|
||||
case GF_E_SP_32AS: s = "With -m SPLIT, w=32, -r ALTMAP needs SSSE3 supported."; break;
|
||||
case GF_E_SP_32_A: s = "With -m SPLIT, w=32, -r ALTMAP only with arg1/arg2 = 4/32."; break;
|
||||
case GF_E_SP_32_S: s = "With -m SPLIT, w=32, -r SSE|NOSSE only with arg1/arg2 = 4/32."; break;
|
||||
case GF_E_SP_64AR: s = "With -m SPLIT, w=64, Bad arg1/arg2."; break;
|
||||
case GF_E_SP_64AS: s = "With -m SPLIT, w=64, -r ALTMAP needs SSSE3 supported."; break;
|
||||
case GF_E_SP_64_A: s = "With -m SPLIT, w=64, -r ALTMAP only with arg1/arg2 = 4/64."; break;
|
||||
case GF_E_SP_64_S: s = "With -m SPLIT, w=64, -r SSE|NOSSE only with arg1/arg2 = 4/64."; break;
|
||||
case GF_E_SP_8_AR: s = "With -m SPLIT, w=8, Bad arg1/arg2."; break;
|
||||
case GF_E_SP_8__A: s = "With -m SPLIT, w=8, Can't have -r ALTMAP."; break;
|
||||
case GF_E_SP_SSE3: s = "With -m SPLIT, Need SSSE3 support for SSE."; break;
|
||||
case GF_E_COMP_A2: s = "With -m COMPOSITE, arg1 must equal 2."; break;
|
||||
case GF_E_COMP_SS: s = "With -m COMPOSITE, -r SSE and -r NOSSE do not apply."; break;
|
||||
case GF_E_COMP__W: s = "With -m COMPOSITE, w must be 8, 16, 32, 64 or 128."; break;
|
||||
case GF_E_UNKFLAG: s = "Unknown method flag - should be -m, -d, -r or -p."; break;
|
||||
case GF_E_UNKNOWN: s = "Unknown multiplication type."; break;
|
||||
case GF_E_UNK_REG: s = "Unknown region type."; break;
|
||||
case GF_E_UNK_DIV: s = "Unknown division type."; break;
|
||||
default: s = "Undefined error.";
|
||||
}
|
||||
|
||||
fprintf(stderr, "%s\n", s);
|
||||
}
|
||||
|
||||
uint64_t gf_composite_get_default_poly(gf_t *base)
|
||||
{
|
||||
gf_internal_t *h;
|
||||
int rv;
|
||||
|
||||
h = (gf_internal_t *) base->scratch;
|
||||
if (h->w == 4) {
|
||||
if (h->mult_type == GF_MULT_COMPOSITE) return 0;
|
||||
if (h->prim_poly == 0x13) return 2;
|
||||
return 0;
|
||||
}
|
||||
if (h->w == 8) {
|
||||
if (h->mult_type == GF_MULT_COMPOSITE) return 0;
|
||||
if (h->prim_poly == 0x11d) return 3;
|
||||
return 0;
|
||||
}
|
||||
if (h->w == 16) {
|
||||
if (h->mult_type == GF_MULT_COMPOSITE) {
|
||||
rv = gf_composite_get_default_poly(h->base_gf);
|
||||
if (rv != h->prim_poly) return 0;
|
||||
if (rv == 3) return 0x105;
|
||||
return 0;
|
||||
} else {
|
||||
if (h->prim_poly == 0x1100b) return 2;
|
||||
if (h->prim_poly == 0x1002d) return 7;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if (h->w == 32) {
|
||||
if (h->mult_type == GF_MULT_COMPOSITE) {
|
||||
rv = gf_composite_get_default_poly(h->base_gf);
|
||||
if (rv != h->prim_poly) return 0;
|
||||
if (rv == 2) return 0x10005;
|
||||
if (rv == 7) return 0x10008;
|
||||
if (rv == 0x105) return 0x10002;
|
||||
return 0;
|
||||
} else {
|
||||
if (h->prim_poly == 0x400007) return 2;
|
||||
if (h->prim_poly == 0xc5) return 3;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if (h->w == 64) {
|
||||
if (h->mult_type == GF_MULT_COMPOSITE) {
|
||||
rv = gf_composite_get_default_poly(h->base_gf);
|
||||
if (rv != h->prim_poly) return 0;
|
||||
if (rv == 3) return 0x100000009ULL;
|
||||
if (rv == 2) return 0x100000004ULL;
|
||||
if (rv == 0x10005) return 0x100000003ULL;
|
||||
if (rv == 0x10002) return 0x100000005ULL;
|
||||
if (rv == 0x10008) return 0x100000006ULL; /* JSP: (0x0x100000003 works too,
|
||||
but I want to differentiate cases). */
|
||||
return 0;
|
||||
} else {
|
||||
if (h->prim_poly == 0x1bULL) return 2;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int gf_error_check(int w, int mult_type, int region_type, int divide_type,
|
||||
int arg1, int arg2, uint64_t poly, gf_t *base)
|
||||
{
|
||||
int sse4 = 0;
|
||||
int sse3 = 0;
|
||||
int sse2 = 0;
|
||||
int pclmul = 0;
|
||||
int rdouble, rquad, rlazy, rsse, rnosse, raltmap, rcauchy, tmp;
|
||||
uint64_t pp;
|
||||
gf_internal_t *sub, *subsub, *subsubsub;
|
||||
|
||||
rdouble = (region_type & GF_REGION_DOUBLE_TABLE);
|
||||
rquad = (region_type & GF_REGION_QUAD_TABLE);
|
||||
rlazy = (region_type & GF_REGION_LAZY);
|
||||
rsse = (region_type & GF_REGION_SSE);
|
||||
rnosse = (region_type & GF_REGION_NOSSE);
|
||||
raltmap = (region_type & GF_REGION_ALTMAP);
|
||||
rcauchy = (region_type & GF_REGION_CAUCHY);
|
||||
|
||||
if (divide_type != GF_DIVIDE_DEFAULT &&
|
||||
divide_type != GF_DIVIDE_MATRIX &&
|
||||
divide_type != GF_DIVIDE_EUCLID) {
|
||||
_gf_errno = GF_E_UNK_DIV;
|
||||
return 0;
|
||||
}
|
||||
|
||||
tmp = ( GF_REGION_DOUBLE_TABLE | GF_REGION_QUAD_TABLE | GF_REGION_LAZY |
|
||||
GF_REGION_SSE | GF_REGION_NOSSE | GF_REGION_ALTMAP | GF_REGION_CAUCHY );
|
||||
if (region_type & (~tmp)) { _gf_errno = GF_E_UNK_REG; return 0; }
|
||||
|
||||
#ifdef INTEL_SSE2
|
||||
sse2 = 1;
|
||||
#endif
|
||||
|
||||
#ifdef INTEL_SSSE3
|
||||
sse3 = 1;
|
||||
#endif
|
||||
|
||||
#ifdef INTEL_SSE4
|
||||
sse4 = 1;
|
||||
#endif
|
||||
|
||||
#ifdef INTEL_PCLMUL
|
||||
pclmul = 1;
|
||||
#endif
|
||||
|
||||
|
||||
if (w < 1 || (w > 32 && w != 64 && w != 128)) { _gf_errno = GF_E_BAD___W; return 0; }
|
||||
|
||||
if (mult_type != GF_MULT_COMPOSITE && w < 64) {
|
||||
if ((poly >> (w+1)) != 0) { _gf_errno = GF_E_BADPOLY; return 0; }
|
||||
}
|
||||
|
||||
if (mult_type == GF_MULT_DEFAULT) {
|
||||
if (divide_type != GF_DIVIDE_DEFAULT) { _gf_errno = GF_E_MDEFDIV; return 0; }
|
||||
if (region_type != GF_REGION_DEFAULT) { _gf_errno = GF_E_MDEFREG; return 0; }
|
||||
if (arg1 != 0 || arg2 != 0) { _gf_errno = GF_E_MDEFARG; return 0; }
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (rsse && rnosse) { _gf_errno = GF_E_SSE__NO; return 0; }
|
||||
if (rcauchy && w > 32) { _gf_errno = GF_E_CAUGT32; return 0; }
|
||||
if (rcauchy && region_type != GF_REGION_CAUCHY) { _gf_errno = GF_E_CAUCHYB; return 0; }
|
||||
if (rcauchy && mult_type == GF_MULT_COMPOSITE) { _gf_errno = GF_E_CAUCOMP; return 0; }
|
||||
|
||||
if (arg1 != 0 && mult_type != GF_MULT_COMPOSITE &&
|
||||
mult_type != GF_MULT_SPLIT_TABLE && mult_type != GF_MULT_GROUP) {
|
||||
_gf_errno = GF_E_ARG1SET;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (arg2 != 0 && mult_type != GF_MULT_SPLIT_TABLE && mult_type != GF_MULT_GROUP) {
|
||||
_gf_errno = GF_E_ARG2SET;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (divide_type == GF_DIVIDE_MATRIX && w > 32) { _gf_errno = GF_E_MATRIXW; return 0; }
|
||||
|
||||
if (rdouble) {
|
||||
if (rquad) { _gf_errno = GF_E_DOUQUAD; return 0; }
|
||||
if (mult_type != GF_MULT_TABLE) { _gf_errno = GF_E_DOUBLET; return 0; }
|
||||
if (w != 4 && w != 8) { _gf_errno = GF_E_DOUBLEW; return 0; }
|
||||
if (rsse || rnosse || raltmap) { _gf_errno = GF_E_DOUBLEJ; return 0; }
|
||||
if (rlazy && w == 4) { _gf_errno = GF_E_DOUBLEL; return 0; }
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (rquad) {
|
||||
if (mult_type != GF_MULT_TABLE) { _gf_errno = GF_E_QUAD__T; return 0; }
|
||||
if (w != 4) { _gf_errno = GF_E_QUAD__W; return 0; }
|
||||
if (rsse || rnosse || raltmap) { _gf_errno = GF_E_QUAD__J; return 0; }
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (rlazy) { _gf_errno = GF_E_LAZY__X; return 0; }
|
||||
|
||||
if (mult_type == GF_MULT_SHIFT) {
|
||||
if (raltmap) { _gf_errno = GF_E_ALTSHIF; return 0; }
|
||||
if (rsse || rnosse) { _gf_errno = GF_E_SSESHIF; return 0; }
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (mult_type == GF_MULT_CARRY_FREE) {
|
||||
if (w != 4 && w != 8 && w != 16 &&
|
||||
w != 32 && w != 64 && w != 128) { _gf_errno = GF_E_CFM___W; return 0; }
|
||||
if (w == 4 && (poly & 0xc)) { _gf_errno = GF_E_CFM4POL; return 0; }
|
||||
if (w == 8 && (poly & 0x80)) { _gf_errno = GF_E_CFM8POL; return 0; }
|
||||
if (w == 16 && (poly & 0xe000)) { _gf_errno = GF_E_CF16POL; return 0; }
|
||||
if (w == 32 && (poly & 0xfe000000)) { _gf_errno = GF_E_CF32POL; return 0; }
|
||||
if (w == 64 && (poly & 0xfffe000000000000ULL)) { _gf_errno = GF_E_CF64POL; return 0; }
|
||||
if (raltmap) { _gf_errno = GF_E_ALT_CFM; return 0; }
|
||||
if (rsse || rnosse) { _gf_errno = GF_E_SSE_CFM; return 0; }
|
||||
if (!pclmul) { _gf_errno = GF_E_PCLMULX; return 0; }
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (mult_type == GF_MULT_BYTWO_p || mult_type == GF_MULT_BYTWO_b) {
|
||||
if (raltmap) { _gf_errno = GF_E_ALT_BY2; return 0; }
|
||||
if (rsse && !sse2) { _gf_errno = GF_E_BY2_SSE; return 0; }
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (mult_type == GF_MULT_LOG_TABLE || mult_type == GF_MULT_LOG_ZERO
|
||||
|| mult_type == GF_MULT_LOG_ZERO_EXT ) {
|
||||
if (w > 27) { _gf_errno = GF_E_LOGBADW; return 0; }
|
||||
if (raltmap || rsse || rnosse) { _gf_errno = GF_E_LOG___J; return 0; }
|
||||
|
||||
if (mult_type == GF_MULT_LOG_TABLE) return 1;
|
||||
|
||||
if (w != 8 && w != 16) { _gf_errno = GF_E_ZERBADW; return 0; }
|
||||
|
||||
if (mult_type == GF_MULT_LOG_ZERO) return 1;
|
||||
|
||||
if (w != 8) { _gf_errno = GF_E_ZEXBADW; return 0; }
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (mult_type == GF_MULT_GROUP) {
|
||||
if (arg1 <= 0 || arg2 <= 0) { _gf_errno = GF_E_GR_ARGX; return 0; }
|
||||
if (w == 4 || w == 8) { _gf_errno = GF_E_GR_W_48; return 0; }
|
||||
if (w == 16 && (arg1 != 4 || arg2 != 4)) { _gf_errno = GF_E_GR_W_16; return 0; }
|
||||
if (w == 128 && (arg1 != 4 ||
|
||||
(arg2 != 4 && arg2 != 8 && arg2 != 16))) { _gf_errno = GF_E_GR_128A; return 0; }
|
||||
if (w == 128 && !sse4) { _gf_errno = GF_E_GR_SSE4; return 0; }
|
||||
if (arg1 > 27 || arg2 > 27) { _gf_errno = GF_E_GR_A_27; return 0; }
|
||||
if (arg1 > w || arg2 > w) { _gf_errno = GF_E_GR_AR_W; return 0; }
|
||||
if (raltmap || rsse || rnosse) { _gf_errno = GF_E_GR____J; return 0; }
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (mult_type == GF_MULT_TABLE) {
|
||||
if (w != 16 && w >= 15) { _gf_errno = GF_E_TABLE_W; return 0; }
|
||||
if (w != 4 && (rsse || rnosse)) { _gf_errno = GF_E_TAB_SSE; return 0; }
|
||||
if (rsse && !sse3) { _gf_errno = GF_E_TABSSE3; return 0; }
|
||||
if (raltmap) { _gf_errno = GF_E_TAB_ALT; return 0; }
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (mult_type == GF_MULT_SPLIT_TABLE) {
|
||||
if (arg1 > arg2) {
|
||||
tmp = arg1;
|
||||
arg1 = arg2;
|
||||
arg2 = tmp;
|
||||
}
|
||||
if (w == 8) {
|
||||
if (arg1 != 4 || arg2 != 8) { _gf_errno = GF_E_SP_8_AR; return 0; }
|
||||
if (rsse && !sse3) { _gf_errno = GF_E_SP_SSE3; return 0; }
|
||||
if (raltmap) { _gf_errno = GF_E_SP_8__A; return 0; }
|
||||
} else if (w == 16) {
|
||||
if (arg1 == 4 && arg2 == 16) {
|
||||
if (rsse && !sse3) { _gf_errno = GF_E_SP_SSE3; return 0; }
|
||||
} else if (arg1 == 8 && (arg2 == 16 || arg2 == 8)) {
|
||||
if (rsse || rnosse) { _gf_errno = GF_E_SP_16_S; return 0; }
|
||||
if (raltmap) { _gf_errno = GF_E_SP_16_A; return 0; }
|
||||
} else { _gf_errno = GF_E_SP_16AR; return 0; }
|
||||
} else if (w == 32) {
|
||||
if ((arg1 == 8 && arg2 == 8) ||
|
||||
(arg1 == 8 && arg2 == 32) ||
|
||||
(arg1 == 16 && arg2 == 32)) {
|
||||
if (rsse || rnosse) { _gf_errno = GF_E_SP_32_S; return 0; }
|
||||
if (raltmap) { _gf_errno = GF_E_SP_32_A; return 0; }
|
||||
} else if ((arg1 == 4 && arg2 == 32) ||
|
||||
(arg1 == 4 && arg2 == 32)) {
|
||||
if (rsse && !sse3) { _gf_errno = GF_E_SP_SSE3; return 0; }
|
||||
if (raltmap && arg1 != 4) { _gf_errno = GF_E_SP_32_A; return 0; }
|
||||
if (raltmap && !sse3) { _gf_errno = GF_E_SP_32AS; return 0; }
|
||||
if (raltmap && rnosse) { _gf_errno = GF_E_SP_32AS; return 0; }
|
||||
} else { _gf_errno = GF_E_SP_32AR; return 0; }
|
||||
} else if (w == 64) {
|
||||
if ((arg1 == 8 && arg2 == 8) ||
|
||||
(arg1 == 8 && arg2 == 64) ||
|
||||
(arg1 == 16 && arg2 == 64)) {
|
||||
if (rsse || rnosse) { _gf_errno = GF_E_SP_64_S; return 0; }
|
||||
if (raltmap) { _gf_errno = GF_E_SP_64_A; return 0; }
|
||||
} else if (arg1 == 4 && arg2 == 64) {
|
||||
if (rsse && !sse3) { _gf_errno = GF_E_SP_SSE3; return 0; }
|
||||
if (raltmap && !sse3) { _gf_errno = GF_E_SP_64AS; return 0; }
|
||||
if (raltmap && rnosse) { _gf_errno = GF_E_SP_64AS; return 0; }
|
||||
} else { _gf_errno = GF_E_SP_64AR; return 0; }
|
||||
} else if (w == 128) {
|
||||
if (arg1 == 8 && arg2 == 128) {
|
||||
if (rsse || rnosse) { _gf_errno = GF_E_SP128_S; return 0; }
|
||||
if (raltmap) { _gf_errno = GF_E_SP128_A; return 0; }
|
||||
} else if (arg1 == 4 && arg2 == 128) {
|
||||
if (rsse && !sse3) { _gf_errno = GF_E_SP_SSE3; return 0; }
|
||||
if (raltmap && !sse3) { _gf_errno = GF_E_SP128AS; return 0; }
|
||||
if (raltmap && rnosse) { _gf_errno = GF_E_SP128AS; return 0; }
|
||||
if (!raltmap && rsse) { _gf_errno = GF_E_SP128AL; return 0; }
|
||||
} else { _gf_errno = GF_E_SP128AR; return 0; }
|
||||
} else { _gf_errno = GF_E_SPLIT_W; return 0; }
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (mult_type == GF_MULT_COMPOSITE) {
|
||||
if (w != 8 && w != 16 && w != 32
|
||||
&& w != 64 && w != 128) { _gf_errno = GF_E_COMP__W; return 0; }
|
||||
if ((poly >> (w/2)) != 0) { _gf_errno = GF_E_COMP_PP; return 0; }
|
||||
if (divide_type != GF_DIVIDE_DEFAULT) { _gf_errno = GF_E_DIVCOMP; return 0; }
|
||||
if (arg1 != 2) { _gf_errno = GF_E_COMP_A2; return 0; }
|
||||
if (rsse || rnosse) { _gf_errno = GF_E_COMP_SS; return 0; }
|
||||
if (base != NULL) {
|
||||
sub = (gf_internal_t *) base->scratch;
|
||||
if (sub->w != w/2) { _gf_errno = GF_E_BASE__W; return 0; }
|
||||
if (poly == 0) {
|
||||
if (gf_composite_get_default_poly(base) == 0) { _gf_errno = GF_E_COMPXPP; return 0; }
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
_gf_errno = GF_E_UNKNOWN;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int gf_scratch_size(int w,
|
||||
int mult_type,
|
||||
int region_type,
|
||||
|
@ -15,6 +414,8 @@ int gf_scratch_size(int w,
|
|||
int arg1,
|
||||
int arg2)
|
||||
{
|
||||
if (gf_error_check(w, mult_type, region_type, divide_type, arg1, arg2, 0, NULL) == 0) return 0;
|
||||
|
||||
switch(w) {
|
||||
case 4: return gf_w4_scratch_size(mult_type, region_type, divide_type, arg1, arg2);
|
||||
case 8: return gf_w8_scratch_size(mult_type, region_type, divide_type, arg1, arg2);
|
||||
|
@ -26,16 +427,31 @@ int gf_scratch_size(int w,
|
|||
}
|
||||
}
|
||||
|
||||
int gf_dummy_init(gf_t *gf)
|
||||
extern int gf_size(gf_t *gf)
|
||||
{
|
||||
return 0;
|
||||
gf_internal_t *h;
|
||||
int s;
|
||||
|
||||
s = sizeof(gf_t);
|
||||
h = (gf_internal_t *) gf->scratch;
|
||||
s += gf_scratch_size(h->w, h->mult_type, h->region_type, h->divide_type, h->arg1, h->arg2);
|
||||
if (h->mult_type == GF_MULT_COMPOSITE) s += gf_size(h->base_gf);
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
int gf_init_easy(gf_t *gf, int w)
|
||||
{
|
||||
return gf_init_hard(gf, w, GF_MULT_DEFAULT, GF_REGION_DEFAULT, GF_DIVIDE_DEFAULT, 0, 0, 0, NULL, NULL);
|
||||
return gf_init_hard(gf, w, GF_MULT_DEFAULT, GF_REGION_DEFAULT, GF_DIVIDE_DEFAULT,
|
||||
0, 0, 0, NULL, NULL);
|
||||
}
|
||||
|
||||
/* Allen: What's going on here is this function is putting info into the
|
||||
scratch mem of gf, and then calling the relevant REAL init
|
||||
func for the word size. Probably done this way to consolidate
|
||||
those aspects of initialization that don't rely on word size,
|
||||
and then take care of word-size-specific stuff. */
|
||||
|
||||
int gf_init_hard(gf_t *gf, int w, int mult_type,
|
||||
int region_type,
|
||||
int divide_type,
|
||||
|
@ -46,11 +462,14 @@ int gf_init_hard(gf_t *gf, int w, int mult_type,
|
|||
{
|
||||
int sz;
|
||||
gf_internal_t *h;
|
||||
|
||||
|
||||
if (gf_error_check(w, mult_type, region_type, divide_type,
|
||||
arg1, arg2, prim_poly, base_gf) == 0) return 0;
|
||||
|
||||
sz = gf_scratch_size(w, mult_type, region_type, divide_type, arg1, arg2);
|
||||
|
||||
if (sz <= 0) return 0;
|
||||
|
||||
if (sz <= 0) return 0; /* This shouldn't happen, as all errors should get caught
|
||||
in gf_error_check() */
|
||||
|
||||
if (scratch_memory == NULL) {
|
||||
h = (gf_internal_t *) malloc(sz);
|
||||
h->free_me = 1;
|
||||
|
@ -71,8 +490,6 @@ int gf_init_hard(gf_t *gf, int w, int mult_type,
|
|||
h->private += (sizeof(gf_internal_t));
|
||||
gf->extract_word.w32 = NULL;
|
||||
|
||||
//printf("Created w=%d, with mult_type=%d and region_type=%d\n", w, mult_type, region_type);
|
||||
|
||||
switch(w) {
|
||||
case 4: return gf_w4_init(gf);
|
||||
case 8: return gf_w8_init(gf);
|
||||
|
@ -94,6 +511,7 @@ int gf_free(gf_t *gf, int recursive)
|
|||
free(h->base_gf);
|
||||
}
|
||||
if (h->free_me) free(h);
|
||||
return 0; /* Making compiler happy */
|
||||
}
|
||||
|
||||
void gf_alignment_error(char *s, int a)
|
||||
|
@ -105,9 +523,9 @@ void gf_alignment_error(char *s, int a)
|
|||
}
|
||||
|
||||
static
|
||||
void gf_invert_binary_matrix(int *mat, int *inv, int rows) {
|
||||
void gf_invert_binary_matrix(uint32_t *mat, uint32_t *inv, int rows) {
|
||||
int cols, i, j, k;
|
||||
int tmp;
|
||||
uint32_t tmp;
|
||||
|
||||
cols = rows;
|
||||
|
||||
|
@ -172,34 +590,6 @@ uint32_t gf_bitmatrix_inverse(uint32_t y, int w, uint32_t pp)
|
|||
return inv[0];
|
||||
}
|
||||
|
||||
/*
|
||||
void gf_two_byte_region_table_multiply(gf_region_data *rd, uint16_t *base)
|
||||
{
|
||||
uint64_t p, ta, shift, tb;
|
||||
uint64_t *s64, *d64
|
||||
|
||||
s64 = rd->s_start;
|
||||
d64 = rd->d_start;
|
||||
|
||||
while (s64 < (uint64_t *) rd->s_top) {
|
||||
p = (rd->xor) ? *d64 : 0;
|
||||
ta = *s64;
|
||||
|
||||
shift = 0;
|
||||
while (ta != 0) {
|
||||
tb = base[ta&0xffff];
|
||||
p ^= (tb << shift);
|
||||
ta >>= 16;
|
||||
shift += 16;
|
||||
}
|
||||
|
||||
*d64 = p;
|
||||
d64++;
|
||||
s64++;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
void gf_two_byte_region_table_multiply(gf_region_data *rd, uint16_t *base)
|
||||
{
|
||||
uint64_t a, prod;
|
||||
|
@ -226,8 +616,8 @@ void gf_two_byte_region_table_multiply(gf_region_data *rd, uint16_t *base)
|
|||
prod ^= base[a >> 48];
|
||||
prod ^= *d64;
|
||||
*d64 = prod;
|
||||
*s64++;
|
||||
*d64++;
|
||||
s64++;
|
||||
d64++;
|
||||
}
|
||||
} else {
|
||||
while (d64 != top) {
|
||||
|
@ -243,8 +633,8 @@ void gf_two_byte_region_table_multiply(gf_region_data *rd, uint16_t *base)
|
|||
prod <<= 16;
|
||||
prod ^= base[a >> 48];
|
||||
*d64 = prod;
|
||||
*s64++;
|
||||
*d64++;
|
||||
s64++;
|
||||
d64++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -307,9 +697,71 @@ static void gf_slow_multiply_region(gf_region_data *rd, void *src, void *dest, v
|
|||
}
|
||||
}
|
||||
|
||||
/* If align>16, you align to 16 bytes, but make sure that within the aligned region bytes is a multiple of align. However, you make sure that the region itself is a multiple of align.
|
||||
/* JSP - The purpose of this procedure is to error check alignment,
|
||||
and to set up the region operation so that it can best leverage
|
||||
large words.
|
||||
|
||||
If align = -1, then this is cauchy. You need to make sure that bytes is a multiple of w. */
|
||||
It stores its information in rd.
|
||||
|
||||
Assuming you're not doing Cauchy coding, (see below for that),
|
||||
then w will be 4, 8, 16, 32 or 64. It can't be 128 (probably
|
||||
should change that).
|
||||
|
||||
src and dest must then be aligned on ceil(w/8)-byte boundaries.
|
||||
Moreover, bytes must be a multiple of ceil(w/8). If the variable
|
||||
align is equal to ceil(w/8), then we will set s_start = src,
|
||||
d_start = dest, s_top to (src+bytes) and d_top to (dest+bytes).
|
||||
And we return -- the implementation will go ahead and do the
|
||||
multiplication on individual words (e.g. using discrete logs).
|
||||
|
||||
If align is greater than ceil(w/8), then the implementation needs
|
||||
to work on groups of "align" bytes. For example, suppose you are
|
||||
implementing BYTWO, without SSE. Then you will be doing the region
|
||||
multiplication in units of 8 bytes, so align = 8. Or, suppose you
|
||||
are doing a Quad table in GF(2^4). You will be doing the region
|
||||
multiplication in units of 2 bytes, so align = 2. Or, suppose you
|
||||
are doing split multiplication with SSE operations in GF(2^8).
|
||||
Then align = 16. Worse yet, suppose you are doing split
|
||||
multiplication with SSE operations in GF(2^16), with or without
|
||||
ALTMAP. Then, you will be doing the multiplication on 256 bits at
|
||||
a time. So align = 32.
|
||||
|
||||
When align does not equal ceil(w/8), we split the region
|
||||
multiplication into three parts. We are going to make s_start be
|
||||
the first address greater than or equal to src that is a multiple
|
||||
of align. s_top is going to be the largest address >= src+bytes
|
||||
such that (s_top - s_start) is a multiple of align. We do the
|
||||
same with d_start and d_top. When we say that "src and dest must
|
||||
be aligned with respect to each other, we mean that s_start-src
|
||||
must equal d_start-dest.
|
||||
|
||||
Now, the region multiplication is done in three parts -- the part
|
||||
between src and s_start must be done using single words.
|
||||
Similarly, the part between s_top and src+bytes must also be done
|
||||
using single words. The part between s_start and s_top will be
|
||||
done in chunks of "align" bytes.
|
||||
|
||||
One final thing -- if align > 16, then s_start and d_start will be
|
||||
aligned on a 16 byte boundary. Perhaps we should have two
|
||||
variables: align and chunksize. Then we'd have s_start & d_start
|
||||
aligned to "align", and have s_top-s_start be a multiple of
|
||||
chunksize. That may be less confusing, but it would be a big
|
||||
change.
|
||||
|
||||
Finally, if align = -1, then we are doing Cauchy multiplication,
|
||||
using only XOR's. In this case, we're not going to care about
|
||||
alignment because we are just doing XOR's. Instead, the only
|
||||
thing we care about is that bytes must be a multiple of w.
|
||||
|
||||
This is not to say that alignment doesn't matter in performance
|
||||
with XOR's. See that discussion in gf_multby_one().
|
||||
|
||||
After you call gf_set_region_data(), the procedure
|
||||
gf_do_initial_region_alignment() calls gf->multiply.w32() on
|
||||
everything between src and s_start. The procedure
|
||||
gf_do_final_region_alignment() calls gf->multiply.w32() on
|
||||
everything between s_top and src+bytes.
|
||||
*/
|
||||
|
||||
void gf_set_region_data(gf_region_data *rd,
|
||||
gf_t *gf,
|
||||
|
@ -326,7 +778,7 @@ void gf_set_region_data(gf_region_data *rd,
|
|||
uint32_t a;
|
||||
unsigned long uls, uld;
|
||||
|
||||
if (gf == NULL) {
|
||||
if (gf == NULL) { /* JSP - Can be NULL if you're just doing XOR's */
|
||||
wb = 1;
|
||||
} else {
|
||||
h = gf->scratch;
|
||||
|
@ -347,7 +799,7 @@ void gf_set_region_data(gf_region_data *rd,
|
|||
|
||||
a = (align <= 16) ? align : 16;
|
||||
|
||||
if (align == -1) { /* This is cauchy. Error check bytes, then set up the pointers
|
||||
if (align == -1) { /* JSP: This is cauchy. Error check bytes, then set up the pointers
|
||||
so that there are no alignment regions. */
|
||||
if (bytes % h->w != 0) {
|
||||
fprintf(stderr, "Error in region multiply operation.\n");
|
||||
|
@ -386,14 +838,14 @@ void gf_set_region_data(gf_region_data *rd,
|
|||
}
|
||||
|
||||
uls %= a;
|
||||
if (uls != 0) uls = (align-uls);
|
||||
if (uls != 0) uls = (a-uls);
|
||||
rd->s_start = rd->src + uls;
|
||||
rd->d_start = rd->dest + uls;
|
||||
bytes -= uls;
|
||||
|
||||
bytes -= (bytes % align);
|
||||
rd->s_top = rd->s_start + bytes;
|
||||
rd->d_top = rd->d_start + bytes;
|
||||
|
||||
}
|
||||
|
||||
void gf_do_initial_region_alignment(gf_region_data *rd)
|
||||
|
@ -413,25 +865,76 @@ void gf_multby_zero(void *dest, int bytes, int xor)
|
|||
return;
|
||||
}
|
||||
|
||||
/* JSP - gf_multby_one tries to do this in the most efficient way
|
||||
possible. If xor = 0, then simply call memcpy() since that
|
||||
should be optimized by the system. Otherwise, try to do the xor
|
||||
in the following order:
|
||||
|
||||
If src and dest are aligned with respect to each other on 16-byte
|
||||
boundaries and you have SSE instructions, then use aligned SSE
|
||||
instructions.
|
||||
|
||||
If they aren't but you still have SSE instructions, use unaligned
|
||||
SSE instructions.
|
||||
|
||||
If there are no SSE instructions, but they are aligned with
|
||||
respect to each other on 8-byte boundaries, then do them with
|
||||
uint64_t's.
|
||||
|
||||
Otherwise, call gf_unaligned_xor(), which does the following:
|
||||
align a destination pointer along an 8-byte boundary, and then
|
||||
memcpy 32 bytes at a time from the src pointer to an array of
|
||||
doubles. I'm not sure if that's the best -- probably needs
|
||||
testing, but this seems like it could be a black hole.
|
||||
*/
|
||||
|
||||
static void gf_unaligned_xor(void *src, void *dest, int bytes);
|
||||
|
||||
void gf_multby_one(void *src, void *dest, int bytes, int xor)
|
||||
{
|
||||
#ifdef INTEL_SSE4
|
||||
#ifdef INTEL_SSE2
|
||||
__m128i ms, md;
|
||||
#endif
|
||||
unsigned long uls, uld;
|
||||
uint8_t *s8, *d8, *dtop8;
|
||||
uint64_t *s64, *d64, *dtop64;
|
||||
int abytes;
|
||||
|
||||
gf_region_data rd;
|
||||
|
||||
if (!xor) {
|
||||
memcpy(dest, src, bytes);
|
||||
return;
|
||||
}
|
||||
uls = (unsigned long) src;
|
||||
uld = (unsigned long) dest;
|
||||
|
||||
#ifdef INTEL_SSE4
|
||||
#ifdef INTEL_SSE2
|
||||
s8 = (uint8_t *) src;
|
||||
d8 = (uint8_t *) dest;
|
||||
abytes = bytes & 0xfffffff0;
|
||||
if (uls % 16 == uld % 16) {
|
||||
gf_set_region_data(&rd, NULL, src, dest, bytes, 1, xor, 16);
|
||||
while (s8 != rd.s_start) {
|
||||
*d8 ^= *s8;
|
||||
d8++;
|
||||
s8++;
|
||||
}
|
||||
while (s8 < (uint8_t *) rd.s_top) {
|
||||
ms = _mm_load_si128 ((__m128i *)(s8));
|
||||
md = _mm_load_si128 ((__m128i *)(d8));
|
||||
md = _mm_xor_si128(md, ms);
|
||||
_mm_store_si128((__m128i *)(d8), md);
|
||||
s8 += 16;
|
||||
d8 += 16;
|
||||
}
|
||||
while (s8 != (uint8_t *) src + bytes) {
|
||||
*d8 ^= *s8;
|
||||
d8++;
|
||||
s8++;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
abytes = (bytes & 0xfffffff0);
|
||||
|
||||
while (d8 < (uint8_t *) dest + abytes) {
|
||||
ms = _mm_loadu_si128 ((__m128i *)(s8));
|
||||
|
@ -449,8 +952,11 @@ void gf_multby_one(void *src, void *dest, int bytes, int xor)
|
|||
return;
|
||||
#endif
|
||||
|
||||
/* If you don't have SSE, you'd better be aligned..... */
|
||||
|
||||
if (uls % 8 != uld % 8) {
|
||||
gf_unaligned_xor(src, dest, bytes);
|
||||
return;
|
||||
}
|
||||
|
||||
gf_set_region_data(&rd, NULL, src, dest, bytes, 1, xor, 8);
|
||||
s8 = (uint8_t *) src;
|
||||
d8 = (uint8_t *) dest;
|
||||
|
@ -480,3 +986,47 @@ void gf_multby_one(void *src, void *dest, int bytes, int xor)
|
|||
}
|
||||
return;
|
||||
}
|
||||
|
||||
#define UNALIGNED_BUFSIZE (8)
|
||||
|
||||
static void gf_unaligned_xor(void *src, void *dest, int bytes)
|
||||
{
|
||||
uint64_t scopy[UNALIGNED_BUFSIZE], *d64;
|
||||
int i;
|
||||
gf_region_data rd;
|
||||
uint8_t *s8, *d8;
|
||||
|
||||
/* JSP - call gf_set_region_data(), but use dest in both places. This is
|
||||
because I only want to set up dest. If I used src, gf_set_region_data()
|
||||
would fail because src and dest are not aligned to each other wrt
|
||||
8-byte pointers. I know this will actually align d_start to 16 bytes.
|
||||
If I change gf_set_region_data() to split alignment & chunksize, then
|
||||
I could do this correctly. */
|
||||
|
||||
gf_set_region_data(&rd, NULL, dest, dest, bytes, 1, 1, 8*UNALIGNED_BUFSIZE);
|
||||
s8 = (uint8_t *) src;
|
||||
d8 = (uint8_t *) dest;
|
||||
|
||||
while (d8 < (uint8_t *) rd.d_start) {
|
||||
*d8 ^= *s8;
|
||||
d8++;
|
||||
s8++;
|
||||
}
|
||||
|
||||
d64 = (uint64_t *) d8;
|
||||
while (d64 < (uint64_t *) rd.d_top) {
|
||||
memcpy(scopy, s8, 8*UNALIGNED_BUFSIZE);
|
||||
s8 += 8*UNALIGNED_BUFSIZE;
|
||||
for (i = 0; i < UNALIGNED_BUFSIZE; i++) {
|
||||
*d64 ^= scopy[i];
|
||||
d64++;
|
||||
}
|
||||
}
|
||||
|
||||
d8 = (uint8_t *) d64;
|
||||
while (d8 < (uint8_t *) (dest+bytes)) {
|
||||
*d8 ^= *s8;
|
||||
d8++;
|
||||
s8++;
|
||||
}
|
||||
}
|
||||
|
|
29
gf_54.c
29
gf_54.c
|
@ -1,29 +0,0 @@
|
|||
/*
|
||||
* Multiplies four and five in GF(2^4).
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "gf_complete.h"
|
||||
|
||||
main()
|
||||
{
|
||||
gf_t gf;
|
||||
void *scratch;
|
||||
int size;
|
||||
|
||||
size = gf_scratch_size(16, GF_MULT_SPLIT_TABLE,
|
||||
GF_REGION_SSE | GF_REGION_ALTMAP,
|
||||
GF_DIVIDE_DEFAULT,
|
||||
16, 4);
|
||||
if (size == -1) exit(1); /* It failed. That shouldn't happen*/
|
||||
scratch = (void *) malloc(size);
|
||||
if (scratch == NULL) { perror("malloc"); exit(1); }
|
||||
if (!gf_init_hard(&gf, 16, GF_MULT_SPLIT_TABLE,
|
||||
GF_REGION_SSE | GF_REGION_ALTMAP,
|
||||
GF_DIVIDE_DEFAULT,
|
||||
0, 16, 4, NULL, scratch)) exit(1);
|
||||
printf("Yo\n");
|
||||
}
|
2
gf_add.c
2
gf_add.c
|
@ -16,7 +16,7 @@ void usage(char *s)
|
|||
fprintf(stderr, " If w has an h on the end, treat a, b and the sum as hexadecimal (no 0x required)\n");
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, " legal w are: 1-32, 64 and 128\n");
|
||||
fprintf(stderr, " 128 is hex only (i.e. '128' will be an error - do '128h')\n");
|
||||
fprintf(stderr, " 128 is hex only (i.e. '128' will be an error - do '128h')\n");
|
||||
|
||||
if (s != NULL) fprintf(stderr, "%s", s);
|
||||
exit(1);
|
||||
|
|
|
@ -4,22 +4,30 @@
|
|||
#pragma once
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef INTEL_SSE4
|
||||
#include <nmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#include <smmintrin.h>
|
||||
#ifdef INTEL_SSE4
|
||||
#define INTEL_SSSE3
|
||||
#include <nmmintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef INTEL_PCLMUL
|
||||
#include <wmmintrin.h>
|
||||
#ifdef INTEL_SSSE3
|
||||
#define INTEL_SSE2
|
||||
#include <tmmintrin.h>
|
||||
#endif
|
||||
|
||||
/* This does either memcpy or xor, depending on "xor" */
|
||||
#ifdef INTEL_SSE2
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
extern void gf_multby_one(void *src, void *dest, int bytes, int xor);
|
||||
#ifdef INTEL_PCLMUL
|
||||
#include <wmmintrin.h>
|
||||
#ifdef INTEL_SSE4
|
||||
#define INTEL_SSE4_PCLMUL
|
||||
#endif
|
||||
#ifdef INTEL_SSSE3
|
||||
#define INTEL_SSSE3_PCLMUL
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define GF_W128_IS_ZERO(val) (val[0] == 0 && val[1] == 0)
|
||||
#define GF_W128_EQUAL(val1, val2) ((val1[0] == val2[0]) && (val1[1] == val2[1]))
|
||||
|
||||
/* These are the different ways to perform multiplication.
|
||||
Not all are implemented for all values of w.
|
||||
|
@ -27,30 +35,30 @@ extern void gf_multby_one(void *src, void *dest, int bytes, int xor);
|
|||
|
||||
typedef enum {GF_MULT_DEFAULT,
|
||||
GF_MULT_SHIFT,
|
||||
GF_MULT_CARRY_FREE,
|
||||
GF_MULT_GROUP,
|
||||
GF_MULT_BYTWO_p,
|
||||
GF_MULT_BYTWO_b,
|
||||
GF_MULT_TABLE,
|
||||
GF_MULT_LOG_TABLE,
|
||||
GF_MULT_LOG_ZERO,
|
||||
GF_MULT_LOG_ZERO_EXT,
|
||||
GF_MULT_SPLIT_TABLE,
|
||||
GF_MULT_COMPOSITE } gf_mult_type_t;
|
||||
|
||||
/* These are the different ways to optimize region
|
||||
operations. They are bits because you can compose them:
|
||||
You can mix SINGLE/DOUBLE/QUAD, LAZY, SSE/NOSSE, STDMAP/ALTMAP/CAUCHY.
|
||||
operations. They are bits because you can compose them.
|
||||
Certain optimizations only apply to certain gf_mult_type_t's.
|
||||
Again, please see documentation for how to use these */
|
||||
|
||||
#define GF_REGION_DEFAULT (0x0)
|
||||
#define GF_REGION_SINGLE_TABLE (0x1)
|
||||
#define GF_REGION_DOUBLE_TABLE (0x2)
|
||||
#define GF_REGION_QUAD_TABLE (0x4)
|
||||
#define GF_REGION_LAZY (0x8)
|
||||
#define GF_REGION_SSE (0x10)
|
||||
#define GF_REGION_NOSSE (0x20)
|
||||
#define GF_REGION_STDMAP (0x40)
|
||||
#define GF_REGION_ALTMAP (0x80)
|
||||
#define GF_REGION_CAUCHY (0x100)
|
||||
#define GF_REGION_DOUBLE_TABLE (0x1)
|
||||
#define GF_REGION_QUAD_TABLE (0x2)
|
||||
#define GF_REGION_LAZY (0x4)
|
||||
#define GF_REGION_SSE (0x8)
|
||||
#define GF_REGION_NOSSE (0x10)
|
||||
#define GF_REGION_ALTMAP (0x20)
|
||||
#define GF_REGION_CAUCHY (0x40)
|
||||
|
||||
typedef uint32_t gf_region_type_t;
|
||||
|
||||
|
@ -74,6 +82,9 @@ typedef uint32_t gf_val_32_t;
|
|||
typedef uint64_t gf_val_64_t;
|
||||
typedef uint64_t *gf_val_128_t;
|
||||
|
||||
extern int _gf_errno;
|
||||
extern void gf_error();
|
||||
|
||||
typedef struct gf *GFP;
|
||||
|
||||
typedef union gf_func_a_b {
|
||||
|
@ -109,8 +120,21 @@ typedef struct gf {
|
|||
void *scratch;
|
||||
} gf_t;
|
||||
|
||||
/* Initializes the GF to defaults. Pass it a pointer to a gf_t.
|
||||
Returns 0 on failure, 1 on success. */
|
||||
|
||||
extern int gf_init_easy(GFP gf, int w);
|
||||
|
||||
/* Initializes the GF changing the defaults.
|
||||
Returns 0 on failure, 1 on success.
|
||||
Pass it a pointer to a gf_t.
|
||||
For mult_type and divide_type, use one of gf_mult_type_t gf_divide_type_t .
|
||||
For region_type, OR together the GF_REGION_xxx's defined above.
|
||||
Use 0 as prim_poly for defaults. Otherwise, the leading 1 is optional.
|
||||
Use NULL for scratch_memory to have init_hard allocate memory. Otherwise,
|
||||
use gf_scratch_size() to determine how big scratch_memory has to be.
|
||||
*/
|
||||
|
||||
extern int gf_init_hard(GFP gf,
|
||||
int w,
|
||||
int mult_type,
|
||||
|
@ -122,6 +146,9 @@ extern int gf_init_hard(GFP gf,
|
|||
GFP base_gf,
|
||||
void *scratch_memory);
|
||||
|
||||
/* Determines the size for scratch_memory.
|
||||
Returns 0 on failure and non-zero on success. */
|
||||
|
||||
extern int gf_scratch_size(int w,
|
||||
int mult_type,
|
||||
int region_type,
|
||||
|
@ -129,25 +156,32 @@ extern int gf_scratch_size(int w,
|
|||
int arg1,
|
||||
int arg2);
|
||||
|
||||
/* This reports the gf_scratch_size of a gf_t that has already been created */
|
||||
|
||||
extern int gf_size(GFP gf);
|
||||
|
||||
/* Frees scratch memory if gf_init_easy/gf_init_hard called malloc.
|
||||
If recursive = 1, then it calls itself recursively on base_gf. */
|
||||
|
||||
extern int gf_free(GFP gf, int recursive);
|
||||
|
||||
/* This is support for inline single multiplications and divisions.
|
||||
I know it's yucky, but if you've got to be fast, you've got to be fast.
|
||||
We'll support inlines for w=4, w=8 and w=16.
|
||||
We support inlining for w=4, w=8 and w=16.
|
||||
|
||||
To use inline multiplication and division with w=4 or 8, you should use the
|
||||
default gf_t, or one with a single table. Otherwise, gf_w4/8_get_mult_table()
|
||||
will return NULL. */
|
||||
will return NULL. Similarly, with w=16, the gf_t must be LOG */
|
||||
|
||||
uint8_t *gf_w4_get_mult_table(GFP gf);
|
||||
uint8_t *gf_w4_get_div_table(GFP gf);
|
||||
|
||||
#define GF_W4_INLINE_MULTDIV(table, a, b) (table[((a)<<4)|b])
|
||||
#define GF_W4_INLINE_MULTDIV(table, a, b) (table[((a)<<4)|(b)])
|
||||
|
||||
uint8_t *gf_w8_get_mult_table(GFP gf);
|
||||
uint8_t *gf_w8_get_div_table(GFP gf);
|
||||
|
||||
#define GF_W8_INLINE_MULTDIV(table, a, b) (table[(((uint32_t) a)<<8)|b])
|
||||
#define GF_W8_INLINE_MULTDIV(table, a, b) (table[(((uint32_t) (a))<<8)|(b)])
|
||||
|
||||
uint16_t *gf_w16_get_log_table(GFP gf);
|
||||
uint16_t *gf_w16_get_mult_alog_table(GFP gf);
|
||||
|
|
|
@ -0,0 +1,73 @@
|
|||
/*
|
||||
* gf_example_5.c
|
||||
*
|
||||
* Demonstrating altmap and extract_word
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <getopt.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "gf_complete.h"
|
||||
#include "gf_rand.h"
|
||||
|
||||
void usage(char *s)
|
||||
{
|
||||
fprintf(stderr, "usage: gf_example_5\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
uint16_t *a, *b;
|
||||
int i, j;
|
||||
gf_t gf;
|
||||
|
||||
if (gf_init_hard(&gf, 16, GF_MULT_SPLIT_TABLE, GF_REGION_ALTMAP, GF_DIVIDE_DEFAULT,
|
||||
0, 16, 4, NULL, NULL) == 0) {
|
||||
fprintf(stderr, "gf_init_hard failed\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
a = (uint16_t *) malloc(200);
|
||||
b = (uint16_t *) malloc(200);
|
||||
|
||||
a += 6;
|
||||
b += 6;
|
||||
|
||||
MOA_Seed(0);
|
||||
|
||||
for (i = 0; i < 30; i++) a[i] = MOA_Random_W(16, 1);
|
||||
|
||||
gf.multiply_region.w32(&gf, a, b, 0x1234, 30*2, 0);
|
||||
|
||||
printf("a: 0x%lx b: 0x%lx\n", (unsigned long) a, (unsigned long) b);
|
||||
|
||||
for (i = 0; i < 30; i += 10) {
|
||||
printf("\n");
|
||||
printf(" ");
|
||||
for (j = 0; j < 10; j++) printf(" %4d", i+j);
|
||||
printf("\n");
|
||||
|
||||
printf("a:");
|
||||
for (j = 0; j < 10; j++) printf(" %04x", a[i+j]);
|
||||
printf("\n");
|
||||
|
||||
printf("b:");
|
||||
for (j = 0; j < 10; j++) printf(" %04x", b[i+j]);
|
||||
printf("\n");
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
for (i = 0; i < 15; i ++) {
|
||||
printf("Word %2d: 0x%04x * 0x1234 = 0x%04x ", i,
|
||||
gf.extract_word.w32(&gf, a, 30*2, i),
|
||||
gf.extract_word.w32(&gf, b, 30*2, i));
|
||||
printf("Word %2d: 0x%04x * 0x1234 = 0x%04x\n", i+15,
|
||||
gf.extract_word.w32(&gf, a, 30*2, i+15),
|
||||
gf.extract_word.w32(&gf, b, 30*2, i+15));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,79 @@
|
|||
/*
|
||||
* gf_example_6.c
|
||||
*
|
||||
* Demonstrating altmap and extract_word
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <getopt.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "gf_complete.h"
|
||||
#include "gf_rand.h"
|
||||
|
||||
void usage(char *s)
|
||||
{
|
||||
fprintf(stderr, "usage: gf_example_6\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
uint32_t *a, *b;
|
||||
int i, j;
|
||||
gf_t gf, gf_16;
|
||||
|
||||
if (gf_init_hard(&gf_16, 16, GF_MULT_LOG_TABLE, GF_REGION_DEFAULT, GF_DIVIDE_DEFAULT,
|
||||
0, 0, 0, NULL, NULL) == 0) {
|
||||
fprintf(stderr, "gf_init_hard (6) failed\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (gf_init_hard(&gf, 32, GF_MULT_COMPOSITE, GF_REGION_ALTMAP, GF_DIVIDE_DEFAULT,
|
||||
0, 2, 0, &gf_16, NULL) == 0) {
|
||||
fprintf(stderr, "gf_init_hard (32) failed\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
a = (uint32_t *) malloc(200);
|
||||
b = (uint32_t *) malloc(200);
|
||||
|
||||
a += 3;
|
||||
b += 3;
|
||||
|
||||
MOA_Seed(0);
|
||||
|
||||
for (i = 0; i < 30; i++) a[i] = MOA_Random_W(32, 1);
|
||||
|
||||
gf.multiply_region.w32(&gf, a, b, 0x12345678, 30*4, 0);
|
||||
|
||||
printf("a: 0x%lx b: 0x%lx\n", (unsigned long) a, (unsigned long) b);
|
||||
|
||||
for (i = 0; i < 30; i += 10) {
|
||||
printf("\n");
|
||||
printf(" ");
|
||||
for (j = 0; j < 10; j++) printf(" %8d", i+j);
|
||||
printf("\n");
|
||||
|
||||
printf("a:");
|
||||
for (j = 0; j < 10; j++) printf(" %08x", a[i+j]);
|
||||
printf("\n");
|
||||
|
||||
printf("b:");
|
||||
for (j = 0; j < 10; j++) printf(" %08x", b[i+j]);
|
||||
printf("\n");
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
for (i = 0; i < 15; i ++) {
|
||||
printf("Word %2d: 0x%08x * 0x12345678 = 0x%08x ", i,
|
||||
gf.extract_word.w32(&gf, a, 30*4, i),
|
||||
gf.extract_word.w32(&gf, b, 30*4, i));
|
||||
printf("Word %2d: 0x%08x * 0x12345678 = 0x%08x\n", i+15,
|
||||
gf.extract_word.w32(&gf, a, 30*4, i+15),
|
||||
gf.extract_word.w32(&gf, b, 30*4, i+15));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
/*
|
||||
* gf_example_7.c
|
||||
*
|
||||
* Demonstrating extract_word and Cauchy
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <getopt.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "gf_complete.h"
|
||||
#include "gf_rand.h"
|
||||
|
||||
void usage(char *s)
|
||||
{
|
||||
fprintf(stderr, "usage: gf_example_7\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
uint8_t *a, *b;
|
||||
int i, j;
|
||||
gf_t gf;
|
||||
|
||||
if (gf_init_hard(&gf, 3, GF_MULT_TABLE, GF_REGION_CAUCHY, GF_DIVIDE_DEFAULT, 0, 0, 0, NULL, NULL) == 0) {
|
||||
fprintf(stderr, "gf_init_hard failed\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
a = (uint8_t *) malloc(3);
|
||||
b = (uint8_t *) malloc(3);
|
||||
|
||||
MOA_Seed(0);
|
||||
|
||||
for (i = 0; i < 3; i++) a[i] = MOA_Random_W(8, 1);
|
||||
|
||||
gf.multiply_region.w32(&gf, a, b, 5, 3, 0);
|
||||
|
||||
printf("a: 0x%lx b: 0x%lx\n", (unsigned long) a, (unsigned long) b);
|
||||
|
||||
printf("\n");
|
||||
printf("a: 0x%02x 0x%02x 0x%02x\n", a[0], a[1], a[2]);
|
||||
printf("b: 0x%02x 0x%02x 0x%02x\n", b[0], b[1], b[2]);
|
||||
printf("\n");
|
||||
|
||||
printf("a bits:");
|
||||
for (i = 0; i < 3; i++) {
|
||||
printf(" ");
|
||||
for (j = 7; j >= 0; j--) printf("%c", (a[i] & (1 << j)) ? '1' : '0');
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
printf("b bits:");
|
||||
for (i = 0; i < 3; i++) {
|
||||
printf(" ");
|
||||
for (j = 7; j >= 0; j--) printf("%c", (b[i] & (1 << j)) ? '1' : '0');
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
printf("\n");
|
||||
for (i = 0; i < 8; i++) {
|
||||
printf("Word %2d: %d * 5 = %d\n", i,
|
||||
gf.extract_word.w32(&gf, a, 3, i),
|
||||
gf.extract_word.w32(&gf, b, 3, i));
|
||||
}
|
||||
}
|
115
gf_general.c
115
gf_general.c
|
@ -95,12 +95,20 @@ void gf_general_set_random(gf_general_t *v, int w, int zero_ok)
|
|||
}
|
||||
}
|
||||
|
||||
void gf_general_val_to_s(gf_general_t *v, int w, char *s)
|
||||
void gf_general_val_to_s(gf_general_t *v, int w, char *s, int hex)
|
||||
{
|
||||
if (w <= 32) {
|
||||
sprintf(s, "%x", v->w32);
|
||||
if (hex) {
|
||||
sprintf(s, "%x", v->w32);
|
||||
} else {
|
||||
sprintf(s, "%d", v->w32);
|
||||
}
|
||||
} else if (w <= 64) {
|
||||
sprintf(s, "%llx", (long long unsigned int) v->w64);
|
||||
if (hex) {
|
||||
sprintf(s, "%llx", (long long unsigned int) v->w64);
|
||||
} else {
|
||||
sprintf(s, "%lld", (long long unsigned int) v->w64);
|
||||
}
|
||||
} else {
|
||||
if (v->w128[0] == 0) {
|
||||
sprintf(s, "%llx", (long long unsigned int) v->w128[1]);
|
||||
|
@ -111,6 +119,64 @@ void gf_general_val_to_s(gf_general_t *v, int w, char *s)
|
|||
}
|
||||
}
|
||||
|
||||
int gf_general_s_to_val(gf_general_t *v, int w, char *s, int hex)
|
||||
{
|
||||
int l;
|
||||
int save;
|
||||
|
||||
if (w <= 32) {
|
||||
if (hex) {
|
||||
if (sscanf(s, "%x", &(v->w32)) == 0) return 0;
|
||||
} else {
|
||||
if (sscanf(s, "%d", &(v->w32)) == 0) return 0;
|
||||
}
|
||||
if (w == 32) return 1;
|
||||
if (w == 31) {
|
||||
if (v->w32 & (1 << 31)) return 0;
|
||||
return 1;
|
||||
}
|
||||
if (v->w32 & ~((1 << w)-1)) return 0;
|
||||
return 1;
|
||||
} else if (w <= 64) {
|
||||
if (hex) return (sscanf(s, "%llx", &(v->w64)) == 1);
|
||||
return (sscanf(s, "%lld", &(v->w64)) == 1);
|
||||
} else {
|
||||
if (!hex) return 0;
|
||||
l = strlen(s);
|
||||
if (l <= 16) {
|
||||
v->w128[0] = 0;
|
||||
return (sscanf(s, "%llx", &(v->w128[1])) == 1);
|
||||
} else {
|
||||
if (l > 32) return 0;
|
||||
save = s[l-16];
|
||||
s[l-16] = '\0';
|
||||
if (sscanf(s, "%llx", &(v->w128[0])) == 0) {
|
||||
s[l-16] = save;
|
||||
return 0;
|
||||
}
|
||||
return (sscanf(s+(l-16), "%llx", &(v->w128[1])) == 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void gf_general_add(gf_t *gf, gf_general_t *a, gf_general_t *b, gf_general_t *c)
|
||||
{
|
||||
gf_internal_t *h;
|
||||
int w;
|
||||
|
||||
h = (gf_internal_t *) gf->scratch;
|
||||
w = h->w;
|
||||
|
||||
if (w <= 32) {
|
||||
c->w32 = a->w32 ^ b->w32;
|
||||
} else if (w <= 64) {
|
||||
c->w64 = a->w64 ^ b->w64;
|
||||
} else {
|
||||
c->w128[0] = a->w128[0] ^ b->w128[0];
|
||||
c->w128[1] = a->w128[1] ^ b->w128[1];
|
||||
}
|
||||
}
|
||||
|
||||
void gf_general_multiply(gf_t *gf, gf_general_t *a, gf_general_t *b, gf_general_t *c)
|
||||
{
|
||||
gf_internal_t *h;
|
||||
|
@ -229,19 +295,19 @@ void gf_general_do_region_check(gf_t *gf, gf_general_t *a, void *orig_a, void *o
|
|||
|
||||
if (!gf_general_are_equal(&ft, &sb, w)) {
|
||||
|
||||
printf("Problem with region multiply (all values in hex):\n");
|
||||
printf(" Target address base: 0x%lx. Word 0x%x of 0x%x. Xor: %d\n",
|
||||
fprintf(stderr,"Problem with region multiply (all values in hex):\n");
|
||||
fprintf(stderr," Target address base: 0x%lx. Word 0x%x of 0x%x. Xor: %d\n",
|
||||
(unsigned long) final_target, i, words, xor);
|
||||
gf_general_val_to_s(a, w, sa);
|
||||
gf_general_val_to_s(&oa, w, soa);
|
||||
gf_general_val_to_s(&ot, w, sot);
|
||||
gf_general_val_to_s(&ft, w, sft);
|
||||
gf_general_val_to_s(&sb, w, ssb);
|
||||
printf(" Value: %s\n", sa);
|
||||
printf(" Original source word: %s\n", soa);
|
||||
if (xor) printf(" XOR with target word: %s\n", sot);
|
||||
printf(" Product word: %s\n", sft);
|
||||
printf(" It should be: %s\n", ssb);
|
||||
gf_general_val_to_s(a, w, sa, 1);
|
||||
gf_general_val_to_s(&oa, w, soa, 1);
|
||||
gf_general_val_to_s(&ot, w, sot, 1);
|
||||
gf_general_val_to_s(&ft, w, sft, 1);
|
||||
gf_general_val_to_s(&sb, w, ssb, 1);
|
||||
fprintf(stderr," Value: %s\n", sa);
|
||||
fprintf(stderr," Original source word: %s\n", soa);
|
||||
if (xor) fprintf(stderr," XOR with target word: %s\n", sot);
|
||||
fprintf(stderr," Product word: %s\n", sft);
|
||||
fprintf(stderr," It should be: %s\n", ssb);
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
@ -251,7 +317,7 @@ void gf_general_set_up_single_timing_test(int w, void *ra, void *rb, int size)
|
|||
{
|
||||
void *top;
|
||||
gf_general_t g;
|
||||
uint8_t *r8;
|
||||
uint8_t *r8, *r8a;
|
||||
uint16_t *r16;
|
||||
uint32_t *r32;
|
||||
uint64_t *r64;
|
||||
|
@ -263,6 +329,8 @@ void gf_general_set_up_single_timing_test(int w, void *ra, void *rb, int size)
|
|||
However, don't allow for zeros in rb, because that will screw up
|
||||
division.
|
||||
|
||||
When w is 4, you fill the regions with random 4-bit words in each byte.
|
||||
|
||||
Otherwise, treat every four bytes as an uint32_t
|
||||
and fill it with a random value mod (1 << w).
|
||||
*/
|
||||
|
@ -296,6 +364,17 @@ void gf_general_set_up_single_timing_test(int w, void *ra, void *rb, int size)
|
|||
}
|
||||
rb += (w/8);
|
||||
}
|
||||
} else if (w == 4) {
|
||||
r8a = (uint8_t *) ra;
|
||||
r8 = (uint8_t *) rb;
|
||||
while (r8 < (uint8_t *) top) {
|
||||
gf_general_set_random(&g, w, 1);
|
||||
*r8a = g.w32;
|
||||
gf_general_set_random(&g, w, 0);
|
||||
*r8 = g.w32;
|
||||
r8a++;
|
||||
r8++;
|
||||
}
|
||||
} else {
|
||||
r32 = (uint32_t *) ra;
|
||||
for (i = 0; i < size/4; i++) r32[i] = MOA_Random_W(w, 1);
|
||||
|
@ -306,7 +385,7 @@ void gf_general_set_up_single_timing_test(int w, void *ra, void *rb, int size)
|
|||
|
||||
/* This sucks, but in order to time, you really need to avoid putting ifs in
|
||||
the inner loops. So, I'm doing a separate timing test for each w:
|
||||
8, 16, 32, 64, 128 and everything else. Fortunately, the "everything else"
|
||||
(4 & 8), 16, 32, 64, 128 and everything else. Fortunately, the "everything else"
|
||||
tests can be equivalent to w=32.
|
||||
|
||||
I'm also putting the results back into ra, because otherwise, the optimizer might
|
||||
|
@ -327,7 +406,7 @@ int gf_general_do_single_timing_test(gf_t *gf, void *ra, void *rb, int size, cha
|
|||
w = h->w;
|
||||
top = ra + size;
|
||||
|
||||
if (w == 8) {
|
||||
if (w == 8 || w == 4) {
|
||||
r8a = (uint8_t *) ra;
|
||||
r8b = (uint8_t *) rb;
|
||||
top8 = (uint8_t *) top;
|
||||
|
|
|
@ -32,10 +32,12 @@ int gf_general_is_zero(gf_general_t *v, int w);
|
|||
int gf_general_is_one(gf_general_t *v, int w);
|
||||
int gf_general_are_equal(gf_general_t *v1, gf_general_t *v2, int w);
|
||||
|
||||
void gf_general_val_to_s(gf_general_t *v, int w, char *s);
|
||||
void gf_general_val_to_s(gf_general_t *v, int w, char *s, int hex);
|
||||
int gf_general_s_to_val(gf_general_t *v, int w, char *s, int hex);
|
||||
|
||||
void gf_general_set_random(gf_general_t *v, int w, int zero_ok);
|
||||
|
||||
void gf_general_add(gf_t *gf, gf_general_t *a, gf_general_t *b, gf_general_t *c);
|
||||
void gf_general_multiply(gf_t *gf, gf_general_t *a, gf_general_t *b, gf_general_t *c);
|
||||
void gf_general_divide(gf_t *gf, gf_general_t *a, gf_general_t *b, gf_general_t *c);
|
||||
void gf_general_inverse(gf_t *gf, gf_general_t *a, gf_general_t *b);
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include "gf_complete.h"
|
||||
#include "gf_rand.h"
|
||||
|
|
100
gf_int.h
100
gf_int.h
|
@ -51,11 +51,15 @@ extern int gf_wgen_scratch_size(int w, int mult_type, int region_type, int divid
|
|||
void gf_wgen_cauchy_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor);
|
||||
gf_val_32_t gf_wgen_extract_word(gf_t *gf, void *start, int bytes, int index);
|
||||
|
||||
|
||||
extern void gf_alignment_error(char *s, int a);
|
||||
|
||||
extern uint32_t gf_bitmatrix_inverse(uint32_t y, int w, uint32_t pp);
|
||||
|
||||
/* This returns the correct default for prim_poly when base is used as the base
|
||||
field for COMPOSITE. It returns 0 if we don't have a default prim_poly. */
|
||||
|
||||
extern uint64_t gf_composite_get_default_poly(gf_t *base);
|
||||
|
||||
/* This structure lets you define a region multiply. It helps because you can handle
|
||||
unaligned portions of the data with the procedures below, which really cleans
|
||||
up the code. */
|
||||
|
@ -96,3 +100,97 @@ extern void gf_do_final_region_alignment(gf_region_data *rd);
|
|||
extern void gf_two_byte_region_table_multiply(gf_region_data *rd, uint16_t *base);
|
||||
|
||||
extern void gf_multby_zero(void *dest, int bytes, int xor);
|
||||
extern void gf_multby_one(void *src, void *dest, int bytes, int xor);
|
||||
|
||||
typedef enum {GF_E_MDEFDIV, /* Dev != Default && Mult == Default */
|
||||
GF_E_MDEFREG, /* Reg != Default && Mult == Default */
|
||||
GF_E_MDEFARG, /* Args != Default && Mult == Default */
|
||||
GF_E_DIVCOMP, /* Mult == Composite && Div != Default */
|
||||
GF_E_CAUCOMP, /* Mult == Composite && Reg == CAUCHY */
|
||||
GF_E_DOUQUAD, /* Reg == DOUBLE && Reg == QUAD */
|
||||
GF_E_SSE__NO, /* Reg == SSE && Reg == NOSSE */
|
||||
GF_E_CAUCHYB, /* Reg == CAUCHY && Other Reg */
|
||||
GF_E_CAUGT32, /* Reg == CAUCHY && w > 32*/
|
||||
GF_E_ARG1SET, /* Arg1 != 0 && Mult \notin COMPOSITE/SPLIT/GROUP */
|
||||
GF_E_ARG2SET, /* Arg2 != 0 && Mult \notin SPLIT/GROUP */
|
||||
GF_E_MATRIXW, /* Div == MATRIX && w > 32 */
|
||||
GF_E_BAD___W, /* Illegal w */
|
||||
GF_E_DOUBLET, /* Reg == DOUBLE && Mult != TABLE */
|
||||
GF_E_DOUBLEW, /* Reg == DOUBLE && w \notin {4,8} */
|
||||
GF_E_DOUBLEJ, /* Reg == DOUBLE && other Reg */
|
||||
GF_E_DOUBLEL, /* Reg == DOUBLE & LAZY but w = 4 */
|
||||
GF_E_QUAD__T, /* Reg == QUAD && Mult != TABLE */
|
||||
GF_E_QUAD__W, /* Reg == QUAD && w != 4 */
|
||||
GF_E_QUAD__J, /* Reg == QUAD && other Reg */
|
||||
GF_E_LAZY__X, /* Reg == LAZY && not DOUBLE or QUAD*/
|
||||
GF_E_ALTSHIF, /* Mult == Shift && Reg == ALTMAP */
|
||||
GF_E_SSESHIF, /* Mult == Shift && Reg == SSE|NOSSE */
|
||||
GF_E_ALT_CFM, /* Mult == CARRY_FREE && Reg == ALTMAP */
|
||||
GF_E_SSE_CFM, /* Mult == CARRY_FREE && Reg == SSE|NOSSE */
|
||||
GF_E_PCLMULX, /* Mult == Carry_Free && No PCLMUL */
|
||||
GF_E_ALT_BY2, /* Mult == Bytwo_x && Reg == ALTMAP */
|
||||
GF_E_BY2_SSE, /* Mult == Bytwo_x && Reg == SSE && No SSE2 */
|
||||
GF_E_LOGBADW, /* Mult == LOGx, w too big*/
|
||||
GF_E_LOG___J, /* Mult == LOGx, && Reg == SSE|ALTMAP|NOSSE */
|
||||
GF_E_ZERBADW, /* Mult == LOG_ZERO, w \notin {8,16} */
|
||||
GF_E_ZEXBADW, /* Mult == LOG_ZERO_EXT, w != 8 */
|
||||
GF_E_LOGPOLY, /* Mult == LOG & poly not primitive */
|
||||
GF_E_GR_ARGX, /* Mult == GROUP, Bad arg1/2 */
|
||||
GF_E_GR_W_48, /* Mult == GROUP, w \in { 4, 8 } */
|
||||
GF_E_GR_W_16, /* Mult == GROUP, w == 16, arg1 != 4 || arg2 != 4 */
|
||||
GF_E_GR_128A, /* Mult == GROUP, w == 128, bad args */
|
||||
GF_E_GR_SSE4, /* Mult == GROUP, w == 128, No SSE4 */
|
||||
GF_E_GR_A_27, /* Mult == GROUP, either arg > 27 */
|
||||
GF_E_GR_AR_W, /* Mult == GROUP, either arg > w */
|
||||
GF_E_GR____J, /* Mult == GROUP, Reg == SSE|ALTMAP|NOSSE */
|
||||
GF_E_TABLE_W, /* Mult == TABLE, w too big */
|
||||
GF_E_TAB_SSE, /* Mult == TABLE, SSE|NOSSE only apply to w == 4 */
|
||||
GF_E_TABSSE3, /* Mult == TABLE, Need SSSE3 for SSE */
|
||||
GF_E_TAB_ALT, /* Mult == TABLE, Reg == ALTMAP */
|
||||
GF_E_SP128AR, /* Mult == SPLIT, w=128, Bad arg1/arg2 */
|
||||
GF_E_SP128AL, /* Mult == SPLIT, w=128, SSE requires ALTMAP */
|
||||
GF_E_SP128AS, /* Mult == SPLIT, w=128, ALTMAP requires SSE */
|
||||
GF_E_SP128_A, /* Mult == SPLIT, w=128, SSE only with 4/128 */
|
||||
GF_E_SP128_S, /* Mult == SPLIT, w=128, ALTMAP only with 4/128 */
|
||||
GF_E_SPLIT_W, /* Mult == SPLIT, Bad w (8, 16, 32, 64, 128) */
|
||||
GF_E_SP_16AR, /* Mult == SPLIT, w=16, Bad arg1/arg2 */
|
||||
GF_E_SP_16_A, /* Mult == SPLIT, w=16, ALTMAP only with 4/16 */
|
||||
GF_E_SP_16_S, /* Mult == SPLIT, w=16, SSE only with 4/16 */
|
||||
GF_E_SP_32AR, /* Mult == SPLIT, w=32, Bad arg1/arg2 */
|
||||
GF_E_SP_32AS, /* Mult == SPLIT, w=32, ALTMAP requires SSE */
|
||||
GF_E_SP_32_A, /* Mult == SPLIT, w=32, ALTMAP only with 4/32 */
|
||||
GF_E_SP_32_S, /* Mult == SPLIT, w=32, SSE only with 4/32 */
|
||||
GF_E_SP_64AR, /* Mult == SPLIT, w=64, Bad arg1/arg2 */
|
||||
GF_E_SP_64AS, /* Mult == SPLIT, w=64, ALTMAP requires SSE */
|
||||
GF_E_SP_64_A, /* Mult == SPLIT, w=64, ALTMAP only with 4/64 */
|
||||
GF_E_SP_64_S, /* Mult == SPLIT, w=64, SSE only with 4/64 */
|
||||
GF_E_SP_8_AR, /* Mult == SPLIT, w=8, Bad arg1/arg2 */
|
||||
GF_E_SP_8__A, /* Mult == SPLIT, w=8, no ALTMAP */
|
||||
GF_E_SP_SSE3, /* Mult == SPLIT, Need SSSE3 for SSE */
|
||||
GF_E_COMP_A2, /* Mult == COMP, arg1 must be = 2 */
|
||||
GF_E_COMP_SS, /* Mult == COMP, SSE|NOSSE */
|
||||
GF_E_COMP__W, /* Mult == COMP, Bad w. */
|
||||
GF_E_UNKFLAG, /* Unknown flag in create_from.... */
|
||||
GF_E_UNKNOWN, /* Unknown mult_type. */
|
||||
GF_E_UNK_REG, /* Unknown region_type. */
|
||||
GF_E_UNK_DIV, /* Unknown divide_type. */
|
||||
GF_E_CFM___W, /* Mult == CFM, Bad w. */
|
||||
GF_E_CFM4POL, /* Mult == CFM & Prim Poly has high bits set. */
|
||||
GF_E_CFM8POL, /* Mult == CFM & Prim Poly has high bits set. */
|
||||
GF_E_CF16POL, /* Mult == CFM & Prim Poly has high bits set. */
|
||||
GF_E_CF32POL, /* Mult == CFM & Prim Poly has high bits set. */
|
||||
GF_E_CF64POL, /* Mult == CFM & Prim Poly has high bits set. */
|
||||
GF_E_FEWARGS, /* Too few args in argc/argv. */
|
||||
GF_E_BADPOLY, /* Bad primitive polynomial -- too many bits set. */
|
||||
GF_E_COMP_PP, /* Bad primitive polynomial -- bigger than sub-field. */
|
||||
GF_E_COMPXPP, /* Can't derive a default pp for composite field. */
|
||||
GF_E_BASE__W, /* Composite -- Base field is the wrong size. */
|
||||
GF_E_TWOMULT, /* In create_from... two -m's. */
|
||||
GF_E_TWO_DIV, /* In create_from... two -d's. */
|
||||
GF_E_POLYSPC, /* Bad numbera after -p. */
|
||||
GF_E_SPLITAR, /* Ran out of arguments in SPLIT */
|
||||
GF_E_SPLITNU, /* Arguments not integers in SPLIT. */
|
||||
GF_E_GROUPAR, /* Ran out of arguments in GROUP */
|
||||
GF_E_GROUPNU, /* Arguments not integers in GROUP. */
|
||||
GF_E_DEFAULT } gf_error_type_t;
|
||||
|
||||
|
|
307
gf_method.c
307
gf_method.c
|
@ -11,179 +11,172 @@
|
|||
#include <time.h>
|
||||
|
||||
#include "gf_complete.h"
|
||||
#include "gf_int.h"
|
||||
#include "gf_method.h"
|
||||
|
||||
void methods_to_stderr()
|
||||
{
|
||||
fprintf(stderr, "To specify the methods, do one of the following: \n");
|
||||
fprintf(stderr, " - leave empty to use defaults\n");
|
||||
fprintf(stderr, " - use a single dash to use defaults\n");
|
||||
fprintf(stderr, " - specify MULTIPLY REGION DIVIDE\n");
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "Legal values of MULTIPLY:\n");
|
||||
fprintf(stderr, " SHIFT: shift\n");
|
||||
fprintf(stderr, " GROUP g_mult g_reduce: the Group technique - see the paper\n");
|
||||
fprintf(stderr, " BYTWO_p: BYTWO doubling the product.\n");
|
||||
fprintf(stderr, " BYTWO_b: BYTWO doubling b (more efficient thatn BYTWO_p)\n");
|
||||
fprintf(stderr, " TABLE: Full multiplication table\n");
|
||||
fprintf(stderr, " LOG: Discrete logs\n");
|
||||
fprintf(stderr, " LOG_ZERO: Discrete logs with a large table for zeros\n");
|
||||
fprintf(stderr, " LOG_ZERO_EXT: Discrete logs with an extra large table for zeros\n");
|
||||
fprintf(stderr, " SPLIT g_a g_b: Split tables defined by g_a and g_b\n");
|
||||
fprintf(stderr, " COMPOSITE k rec METHOD: Composite field. GF((2^l)^k), l=w/k.\n");
|
||||
fprintf(stderr, " rec = 0 means inline single multiplication\n");
|
||||
fprintf(stderr, " rec = 1 means recursive single multiplication\n");
|
||||
fprintf(stderr, " METHOD is the method of the base field in GF(2^l)\n");
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "Legal values of REGION: Specify multiples with commas e.g. 'DOUBLE,LAZY'\n");
|
||||
fprintf(stderr, " -: Use defaults\n");
|
||||
fprintf(stderr, " SINGLE/DOUBLE/QUAD: Expand tables\n");
|
||||
fprintf(stderr, " LAZY: Lazily create table (only applies to TABLE and SPLIT)\n");
|
||||
fprintf(stderr, " SSE/NOSSE: Use 128-bit SSE instructions if you can\n");
|
||||
fprintf(stderr, " CAUCHY/ALTMAP/STDMAP: Use different memory mappings\n");
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "Legal values of DIVIDE:\n");
|
||||
fprintf(stderr, " -: Use defaults\n");
|
||||
fprintf(stderr, " MATRIX: Use matrix inversion\n");
|
||||
fprintf(stderr, " EUCLID: Use the extended Euclidian algorithm.\n");
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "See the user's manual for more information.\n");
|
||||
fprintf(stderr, "There are many restrictions, so it is better to simply use defaults in most cases.\n");
|
||||
}
|
||||
|
||||
int create_gf_from_argv(gf_t *gf, int w, int argc, char **argv, int starting)
|
||||
{
|
||||
int mult_type, divide_type, region_type;
|
||||
uint32_t prim_poly = 0;
|
||||
int arg1, arg2, subrg_size;
|
||||
uint64_t prim_poly;
|
||||
gf_t *base;
|
||||
char *crt, *x, *y;
|
||||
|
||||
if (argc <= starting || strcmp(argv[starting], "-") == 0) {
|
||||
if (!gf_init_easy(gf, w)) return 0;
|
||||
return (argc <= starting) ? starting : starting+1;
|
||||
}
|
||||
|
||||
mult_type = GF_MULT_DEFAULT;
|
||||
region_type = GF_REGION_DEFAULT;
|
||||
divide_type = GF_DIVIDE_DEFAULT;
|
||||
|
||||
arg1 = 0;
|
||||
arg2 = 0;
|
||||
prim_poly = 0;
|
||||
base = NULL;
|
||||
subrg_size = 0;
|
||||
|
||||
if (argc < starting+3) return 0;
|
||||
|
||||
if (strcmp(argv[starting], "SHIFT") == 0) {
|
||||
mult_type = GF_MULT_SHIFT;
|
||||
starting++;
|
||||
} else if (strcmp(argv[starting], "GROUP") == 0) {
|
||||
mult_type = GF_MULT_GROUP;
|
||||
if (argc < starting+5) return 0;
|
||||
if (sscanf(argv[starting+1], "%d", &arg1) == 0 ||
|
||||
sscanf(argv[starting+2], "%d", &arg2) == 0 ||
|
||||
arg1 <= 0 || arg2 <= 0 || arg1 >= w || arg2 >= w) return 0;
|
||||
starting += 3;
|
||||
} else if (strcmp(argv[starting], "BYTWO_p") == 0) {
|
||||
mult_type = GF_MULT_BYTWO_p;
|
||||
starting++;
|
||||
} else if (strcmp(argv[starting], "BYTWO_b") == 0) {
|
||||
mult_type = GF_MULT_BYTWO_b;
|
||||
starting++;
|
||||
} else if (strcmp(argv[starting], "TABLE") == 0) {
|
||||
mult_type = GF_MULT_TABLE;
|
||||
starting++;
|
||||
} else if (strcmp(argv[starting], "LOG") == 0) {
|
||||
mult_type = GF_MULT_LOG_TABLE;
|
||||
starting++;
|
||||
} else if (strcmp(argv[starting], "LOG_ZERO") == 0) {
|
||||
mult_type = GF_MULT_LOG_TABLE;
|
||||
arg1 = 1;
|
||||
starting++;
|
||||
} else if (strcmp(argv[starting], "LOG_ZERO_EXT") == 0) {
|
||||
mult_type = GF_MULT_LOG_TABLE;
|
||||
arg1 = 2;
|
||||
starting++;
|
||||
} else if (strcmp(argv[starting], "SPLIT") == 0) {
|
||||
mult_type = GF_MULT_SPLIT_TABLE;
|
||||
if (argc < starting+5) return 0;
|
||||
if (sscanf(argv[starting+1], "%d", &arg1) == 0 ||
|
||||
sscanf(argv[starting+2], "%d", &arg2) == 0 ||
|
||||
arg1 <= 0 || arg2 <= 0 || w % arg1 != 0 || w % arg2 != 0) return 0;
|
||||
starting += 3;
|
||||
} else if (strcmp(argv[starting], "COMPOSITE") == 0) {
|
||||
mult_type = GF_MULT_COMPOSITE;
|
||||
if (argc < starting+6) return 0;
|
||||
if (sscanf(argv[starting+1], "%d", &arg1) == 0 ||
|
||||
sscanf(argv[starting+2], "%d", &arg2) == 0 ||
|
||||
arg1 <= 1 || w %arg1 != 0 || ((arg2 | 1) != 1)) return 0;
|
||||
base = (gf_t *) malloc(sizeof(gf_t));
|
||||
starting = create_gf_from_argv(base, w/arg1, argc, argv, starting+3);
|
||||
if (starting == 0) { free(base); return 0; }
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (argc < starting+2) {
|
||||
if (base != NULL) gf_free(base, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (strcmp(argv[starting], "-") == 0) {
|
||||
region_type = GF_REGION_DEFAULT;
|
||||
} else {
|
||||
crt = strdup(argv[starting]);
|
||||
region_type = 0;
|
||||
x = crt;
|
||||
do {
|
||||
y = strchr(x, ',');
|
||||
if (y != NULL) *y = '\0';
|
||||
if (strcmp(x, "DOUBLE") == 0) {
|
||||
region_type |= GF_REGION_DOUBLE_TABLE;
|
||||
} else if (strcmp(x, "QUAD") == 0) {
|
||||
region_type |= GF_REGION_QUAD_TABLE;
|
||||
} else if (strcmp(x, "SINGLE") == 0) {
|
||||
region_type |= GF_REGION_SINGLE_TABLE;
|
||||
} else if (strcmp(x, "LAZY") == 0) {
|
||||
region_type |= GF_REGION_LAZY;
|
||||
} else if (strcmp(x, "SSE") == 0) {
|
||||
region_type |= GF_REGION_SSE;
|
||||
} else if (strcmp(x, "NOSSE") == 0) {
|
||||
region_type |= GF_REGION_NOSSE;
|
||||
} else if (strcmp(x, "CAUCHY") == 0) {
|
||||
region_type |= GF_REGION_CAUCHY;
|
||||
} else if (strcmp(x, "ALTMAP") == 0) {
|
||||
region_type |= GF_REGION_ALTMAP;
|
||||
} else if (strcmp(x, "STDMAP") == 0) {
|
||||
region_type |= GF_REGION_STDMAP;
|
||||
arg1 = 0;
|
||||
arg2 = 0;
|
||||
while (1) {
|
||||
if (argc > starting) {
|
||||
if (strcmp(argv[starting], "-m") == 0) {
|
||||
starting++;
|
||||
if (mult_type != GF_MULT_DEFAULT) {
|
||||
if (base != NULL) gf_free(base, 1);
|
||||
_gf_errno = GF_E_TWOMULT;
|
||||
return 0;
|
||||
}
|
||||
if (strcmp(argv[starting], "SHIFT") == 0) {
|
||||
mult_type = GF_MULT_SHIFT;
|
||||
starting++;
|
||||
} else if (strcmp(argv[starting], "CARRY_FREE") == 0) {
|
||||
mult_type = GF_MULT_CARRY_FREE;
|
||||
starting++;
|
||||
} else if (strcmp(argv[starting], "GROUP") == 0) {
|
||||
mult_type = GF_MULT_GROUP;
|
||||
if (argc < starting + 3) {
|
||||
_gf_errno = GF_E_GROUPAR;
|
||||
return 0;
|
||||
}
|
||||
if (sscanf(argv[starting+1], "%d", &arg1) == 0 ||
|
||||
sscanf(argv[starting+2], "%d", &arg2) == 0) {
|
||||
_gf_errno = GF_E_GROUPNU;
|
||||
return 0;
|
||||
}
|
||||
starting += 3;
|
||||
} else if (strcmp(argv[starting], "BYTWO_p") == 0) {
|
||||
mult_type = GF_MULT_BYTWO_p;
|
||||
starting++;
|
||||
} else if (strcmp(argv[starting], "BYTWO_b") == 0) {
|
||||
mult_type = GF_MULT_BYTWO_b;
|
||||
starting++;
|
||||
} else if (strcmp(argv[starting], "TABLE") == 0) {
|
||||
mult_type = GF_MULT_TABLE;
|
||||
starting++;
|
||||
} else if (strcmp(argv[starting], "LOG") == 0) {
|
||||
mult_type = GF_MULT_LOG_TABLE;
|
||||
starting++;
|
||||
} else if (strcmp(argv[starting], "LOG_ZERO") == 0) {
|
||||
mult_type = GF_MULT_LOG_ZERO;
|
||||
starting++;
|
||||
} else if (strcmp(argv[starting], "LOG_ZERO_EXT") == 0) {
|
||||
mult_type = GF_MULT_LOG_ZERO_EXT;
|
||||
starting++;
|
||||
} else if (strcmp(argv[starting], "SPLIT") == 0) {
|
||||
mult_type = GF_MULT_SPLIT_TABLE;
|
||||
if (argc < starting + 3) {
|
||||
_gf_errno = GF_E_SPLITAR;
|
||||
return 0;
|
||||
}
|
||||
if (sscanf(argv[starting+1], "%d", &arg1) == 0 ||
|
||||
sscanf(argv[starting+2], "%d", &arg2) == 0) {
|
||||
_gf_errno = GF_E_SPLITNU;
|
||||
return 0;
|
||||
}
|
||||
starting += 3;
|
||||
} else if (strcmp(argv[starting], "COMPOSITE") == 0) {
|
||||
mult_type = GF_MULT_COMPOSITE;
|
||||
if (argc < starting + 2) { _gf_errno = GF_E_FEWARGS; return 0; }
|
||||
if (sscanf(argv[starting+1], "%d", &arg1) == 0) {
|
||||
_gf_errno = GF_E_COMP_A2;
|
||||
return 0;
|
||||
}
|
||||
starting += 2;
|
||||
base = (gf_t *) malloc(sizeof(gf_t));
|
||||
starting = create_gf_from_argv(base, w/arg1, argc, argv, starting);
|
||||
if (starting == 0) {
|
||||
free(base);
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
if (base != NULL) gf_free(base, 1);
|
||||
_gf_errno = GF_E_UNKNOWN;
|
||||
return 0;
|
||||
}
|
||||
} else if (strcmp(argv[starting], "-r") == 0) {
|
||||
starting++;
|
||||
if (strcmp(argv[starting], "DOUBLE") == 0) {
|
||||
region_type |= GF_REGION_DOUBLE_TABLE;
|
||||
starting++;
|
||||
} else if (strcmp(argv[starting], "QUAD") == 0) {
|
||||
region_type |= GF_REGION_QUAD_TABLE;
|
||||
starting++;
|
||||
} else if (strcmp(argv[starting], "LAZY") == 0) {
|
||||
region_type |= GF_REGION_LAZY;
|
||||
starting++;
|
||||
} else if (strcmp(argv[starting], "SSE") == 0) {
|
||||
region_type |= GF_REGION_SSE;
|
||||
starting++;
|
||||
} else if (strcmp(argv[starting], "NOSSE") == 0) {
|
||||
region_type |= GF_REGION_NOSSE;
|
||||
starting++;
|
||||
} else if (strcmp(argv[starting], "CAUCHY") == 0) {
|
||||
region_type |= GF_REGION_CAUCHY;
|
||||
starting++;
|
||||
} else if (strcmp(argv[starting], "ALTMAP") == 0) {
|
||||
region_type |= GF_REGION_ALTMAP;
|
||||
starting++;
|
||||
} else {
|
||||
if (base != NULL) gf_free(base, 1);
|
||||
_gf_errno = GF_E_UNK_REG;
|
||||
return 0;
|
||||
}
|
||||
} else if (strcmp(argv[starting], "-p") == 0) {
|
||||
starting++;
|
||||
if (sscanf(argv[starting], "%llx", (long long unsigned int *)(&prim_poly)) == 0) {
|
||||
if (base != NULL) gf_free(base, 1);
|
||||
_gf_errno = GF_E_POLYSPC;
|
||||
return 0;
|
||||
}
|
||||
starting++;
|
||||
} else if (strcmp(argv[starting], "-d") == 0) {
|
||||
starting++;
|
||||
if (divide_type != GF_DIVIDE_DEFAULT) {
|
||||
if (base != NULL) gf_free(base, 1);
|
||||
_gf_errno = GF_E_TWO_DIV;
|
||||
return 0;
|
||||
} else if (strcmp(argv[starting], "EUCLID") == 0) {
|
||||
divide_type = GF_DIVIDE_EUCLID;
|
||||
starting++;
|
||||
} else if (strcmp(argv[starting], "MATRIX") == 0) {
|
||||
divide_type = GF_DIVIDE_MATRIX;
|
||||
starting++;
|
||||
} else {
|
||||
_gf_errno = GF_E_UNK_DIV;
|
||||
return 0;
|
||||
}
|
||||
} else if (strcmp(argv[starting], "-") == 0) {
|
||||
/*
|
||||
printf("Scratch size: %d\n", gf_scratch_size(w,
|
||||
mult_type, region_type, divide_type, arg1, arg2));
|
||||
*/
|
||||
if (gf_init_hard(gf, w, mult_type, region_type, divide_type,
|
||||
prim_poly, arg1, arg2, base, NULL) == 0) {
|
||||
if (base != NULL) gf_free(base, 1);
|
||||
return 0;
|
||||
} else
|
||||
return starting + 1;
|
||||
} else {
|
||||
if (base != NULL) gf_free(base, 1);
|
||||
free(crt);
|
||||
_gf_errno = GF_E_UNKFLAG;
|
||||
return 0;
|
||||
}
|
||||
if (y != NULL) x = y+1;
|
||||
} while (y != NULL);
|
||||
free(crt);
|
||||
} else {
|
||||
if (base != NULL) gf_free(base, 1);
|
||||
_gf_errno = GF_E_FEWARGS;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
starting++;
|
||||
|
||||
if (strcmp(argv[starting], "-") == 0) {
|
||||
divide_type = GF_DIVIDE_DEFAULT;
|
||||
} else if (strcmp(argv[starting], "MATRIX") == 0) {
|
||||
divide_type = GF_DIVIDE_MATRIX;
|
||||
} else if (strcmp(argv[starting], "EUCLID") == 0) {
|
||||
divide_type = GF_DIVIDE_EUCLID;
|
||||
} else {
|
||||
if (base != NULL) gf_free(base, 1);
|
||||
return 0;
|
||||
}
|
||||
starting++;
|
||||
|
||||
if (!gf_init_hard(gf, w, mult_type, region_type, divide_type, prim_poly, arg1, arg2, base, NULL)) {
|
||||
if (base != NULL) gf_free(base, 1);
|
||||
return 0;
|
||||
}
|
||||
return starting;
|
||||
}
|
||||
|
|
|
@ -8,8 +8,9 @@
|
|||
|
||||
#include "gf_complete.h"
|
||||
|
||||
/* This prints out the error string defining the methods that you can put on argv*/
|
||||
extern void methods_to_stderr();
|
||||
/* Parses argv starting at "starting".
|
||||
|
||||
Returns 0 on failure.
|
||||
On success, it returns one past the last argument it read in argv. */
|
||||
|
||||
/* Parses argv starting at "starting" */
|
||||
extern int create_gf_from_argv(gf_t *gf, int w, int argc, char **argv, int starting);
|
||||
|
|
131
gf_methods.c
131
gf_methods.c
|
@ -11,58 +11,26 @@
|
|||
|
||||
#include "gf_complete.h"
|
||||
#include "gf_method.h"
|
||||
#include "gf_int.h"
|
||||
|
||||
#define NMULTS (15)
|
||||
static char *mults[NMULTS] = { "SHIFT", "GROUP44", "GROUP48", "BYTWO_p", "BYTWO_b",
|
||||
"TABLE", "LOG", "LOG_ZERO", "LOG_ZERO_EXT", "SPLIT2", "SPLIT4", "SPLIT8", "SPLIT88", "COMPOSITE-0", "COMPOSITE-1" };
|
||||
#define NMULTS (16)
|
||||
static char *mults[NMULTS] = { "SHIFT", "CARRY_FREE", "GROUP44", "GROUP48", "BYTWO_p", "BYTWO_b",
|
||||
"TABLE", "LOG", "LOG_ZERO", "LOG_ZERO_EXT", "SPLIT2",
|
||||
"SPLIT4", "SPLIT8", "SPLIT16", "SPLIT88", "COMPOSITE" };
|
||||
|
||||
#define NREGIONS (96)
|
||||
static char *regions[NREGIONS] = { "-", "SINGLE", "DOUBLE", "QUAD",
|
||||
"LAZY", "SINGLE,LAZY", "DOUBLE,LAZY", "QUAD,LAZY", "SSE",
|
||||
"SINGLE,SSE", "DOUBLE,SSE", "QUAD,SSE", "LAZY,SSE",
|
||||
"SINGLE,LAZY,SSE", "DOUBLE,LAZY,SSE", "QUAD,LAZY,SSE", "NOSSE",
|
||||
"SINGLE,NOSSE", "DOUBLE,NOSSE", "QUAD,NOSSE", "LAZY,NOSSE",
|
||||
"SINGLE,LAZY,NOSSE", "DOUBLE,LAZY,NOSSE", "QUAD,LAZY,NOSSE",
|
||||
"STDMAP", "SINGLE,STDMAP", "DOUBLE,STDMAP", "QUAD,STDMAP",
|
||||
"LAZY,STDMAP", "SINGLE,LAZY,STDMAP", "DOUBLE,LAZY,STDMAP",
|
||||
"QUAD,LAZY,STDMAP", "SSE,STDMAP", "SINGLE,SSE,STDMAP",
|
||||
"DOUBLE,SSE,STDMAP", "QUAD,SSE,STDMAP", "LAZY,SSE,STDMAP",
|
||||
"SINGLE,LAZY,SSE,STDMAP", "DOUBLE,LAZY,SSE,STDMAP",
|
||||
"QUAD,LAZY,SSE,STDMAP", "NOSSE,STDMAP", "SINGLE,NOSSE,STDMAP",
|
||||
"DOUBLE,NOSSE,STDMAP", "QUAD,NOSSE,STDMAP", "LAZY,NOSSE,STDMAP",
|
||||
"SINGLE,LAZY,NOSSE,STDMAP", "DOUBLE,LAZY,NOSSE,STDMAP",
|
||||
"QUAD,LAZY,NOSSE,STDMAP", "ALTMAP", "SINGLE,ALTMAP", "DOUBLE,ALTMAP",
|
||||
"QUAD,ALTMAP", "LAZY,ALTMAP", "SINGLE,LAZY,ALTMAP",
|
||||
"DOUBLE,LAZY,ALTMAP", "QUAD,LAZY,ALTMAP", "SSE,ALTMAP",
|
||||
"SINGLE,SSE,ALTMAP", "DOUBLE,SSE,ALTMAP", "QUAD,SSE,ALTMAP",
|
||||
"LAZY,SSE,ALTMAP", "SINGLE,LAZY,SSE,ALTMAP",
|
||||
"DOUBLE,LAZY,SSE,ALTMAP", "QUAD,LAZY,SSE,ALTMAP", "NOSSE,ALTMAP",
|
||||
"SINGLE,NOSSE,ALTMAP", "DOUBLE,NOSSE,ALTMAP", "QUAD,NOSSE,ALTMAP",
|
||||
"LAZY,NOSSE,ALTMAP", "SINGLE,LAZY,NOSSE,ALTMAP",
|
||||
"DOUBLE,LAZY,NOSSE,ALTMAP", "QUAD,LAZY,NOSSE,ALTMAP", "CAUCHY",
|
||||
"SINGLE,CAUCHY", "DOUBLE,CAUCHY", "QUAD,CAUCHY", "LAZY,CAUCHY",
|
||||
"SINGLE,LAZY,CAUCHY", "DOUBLE,LAZY,CAUCHY", "QUAD,LAZY,CAUCHY",
|
||||
"SSE,CAUCHY", "SINGLE,SSE,CAUCHY", "DOUBLE,SSE,CAUCHY",
|
||||
"QUAD,SSE,CAUCHY", "LAZY,SSE,CAUCHY", "SINGLE,LAZY,SSE,CAUCHY",
|
||||
"DOUBLE,LAZY,SSE,CAUCHY", "QUAD,LAZY,SSE,CAUCHY", "NOSSE,CAUCHY",
|
||||
"SINGLE,NOSSE,CAUCHY", "DOUBLE,NOSSE,CAUCHY", "QUAD,NOSSE,CAUCHY",
|
||||
"LAZY,NOSSE,CAUCHY", "SINGLE,LAZY,NOSSE,CAUCHY",
|
||||
"DOUBLE,LAZY,NOSSE,CAUCHY", "QUAD,LAZY,NOSSE,CAUCHY" };
|
||||
#define NREGIONS (7)
|
||||
static char *regions[NREGIONS] = { "DOUBLE", "QUAD", "LAZY", "SSE", "NOSSE",
|
||||
"ALTMAP", "CAUCHY" };
|
||||
|
||||
#define NDIVS (3)
|
||||
static char *divides[NDIVS] = { "-", "MATRIX", "EUCLID" };
|
||||
#define NDIVS (2)
|
||||
static char *divides[NDIVS] = { "MATRIX", "EUCLID" };
|
||||
|
||||
int main()
|
||||
int main()
|
||||
{
|
||||
int m, r, d, w, i, sa, j;
|
||||
char *argv[20];
|
||||
int m, r, d, w, i, sa, j, k, reset;
|
||||
char *argv[50];
|
||||
gf_t gf;
|
||||
char divs[200], ks[10], ls[10];
|
||||
|
||||
methods_to_stderr();
|
||||
|
||||
printf("\n");
|
||||
printf("Implemented Methods: \n\n");
|
||||
|
||||
for (i = 2; i < 8; i++) {
|
||||
w = (1 << i);
|
||||
|
@ -70,9 +38,14 @@ int main()
|
|||
if (create_gf_from_argv(&gf, w, 1, argv, 0) > 0) {
|
||||
printf("w=%d: -\n", w);
|
||||
gf_free(&gf, 1);
|
||||
} else if (_gf_errno == GF_E_DEFAULT) {
|
||||
fprintf(stderr, "Unlabeled failed method: w=%d: -\n", 2);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
for (m = 0; m < NMULTS; m++) {
|
||||
sa = 0;
|
||||
argv[sa++] = "-m";
|
||||
if (strcmp(mults[m], "GROUP44") == 0) {
|
||||
argv[sa++] = "GROUP";
|
||||
argv[sa++] = "4";
|
||||
|
@ -96,46 +69,66 @@ int main()
|
|||
sprintf(ls, "%d", w);
|
||||
argv[sa++] = ls;
|
||||
argv[sa++] = "8";
|
||||
} else if (strcmp(mults[m], "SPLIT16") == 0) {
|
||||
argv[sa++] = "SPLIT";
|
||||
sprintf(ls, "%d", w);
|
||||
argv[sa++] = ls;
|
||||
argv[sa++] = "16";
|
||||
} else if (strcmp(mults[m], "SPLIT88") == 0) {
|
||||
argv[sa++] = "SPLIT";
|
||||
argv[sa++] = "8";
|
||||
argv[sa++] = "8";
|
||||
} else if (strcmp(mults[m], "COMPOSITE-0") == 0) {
|
||||
} else if (strcmp(mults[m], "COMPOSITE") == 0) {
|
||||
argv[sa++] = "COMPOSITE";
|
||||
argv[sa++] = "2";
|
||||
argv[sa++] = "0";
|
||||
argv[sa++] = "-";
|
||||
} else if (strcmp(mults[m], "COMPOSITE-1") == 0) {
|
||||
argv[sa++] = "COMPOSITE";
|
||||
argv[sa++] = "2";
|
||||
argv[sa++] = "1";
|
||||
argv[sa++] = "-";
|
||||
} else {
|
||||
argv[sa++] = mults[m];
|
||||
}
|
||||
for (r = 0; r < NREGIONS; r++) {
|
||||
argv[sa++] = regions[r];
|
||||
strcpy(divs, "");
|
||||
for (d = 0; d < NDIVS; d++) {
|
||||
argv[sa++] = divides[d];
|
||||
/* printf("w=%d:", w);
|
||||
for (j = 0; j < sa; j++) printf(" %s", argv[j]);
|
||||
printf("\n"); */
|
||||
if (create_gf_from_argv(&gf, w, sa, argv, 0) > 0) {
|
||||
strcat(divs, "|");
|
||||
strcat(divs, divides[d]);
|
||||
gf_free(&gf, 1);
|
||||
}
|
||||
sa--;
|
||||
reset = sa;
|
||||
for (r = 0; r < (1 << NREGIONS); r++) {
|
||||
sa = reset;
|
||||
for (k = 0; k < NREGIONS; k++) {
|
||||
if (r & 1 << k) {
|
||||
argv[sa++] = "-r";
|
||||
argv[sa++] = regions[k];
|
||||
}
|
||||
}
|
||||
if (strlen(divs) > 0) {
|
||||
argv[sa++] = "-";
|
||||
if (create_gf_from_argv(&gf, w, sa, argv, 0) > 0) {
|
||||
printf("w=%d:", w);
|
||||
for (j = 0; j < sa; j++) printf(" %s", argv[j]);
|
||||
printf(" %s\n", divs+1);
|
||||
printf("\n");
|
||||
gf_free(&gf, 1);
|
||||
} else if (_gf_errno == GF_E_DEFAULT) {
|
||||
fprintf(stderr, "Unlabeled failed method: w=%d:", w);
|
||||
for (j = 0; j < sa; j++) fprintf(stderr, " %s", argv[j]);
|
||||
fprintf(stderr, "\n");
|
||||
exit(1);
|
||||
}
|
||||
sa--;
|
||||
for (d = 0; d < NDIVS; d++) {
|
||||
argv[sa++] = "-d";
|
||||
argv[sa++] = divides[d];
|
||||
/* printf("w=%d:", w);
|
||||
for (j = 0; j < sa; j++) printf(" %s", argv[j]);
|
||||
printf("\n"); */
|
||||
argv[sa++] = "-";
|
||||
if (create_gf_from_argv(&gf, w, sa, argv, 0) > 0) {
|
||||
printf("w=%d:", w);
|
||||
for (j = 0; j < sa; j++) printf(" %s", argv[j]);
|
||||
printf("\n");
|
||||
gf_free(&gf, 1);
|
||||
} else if (_gf_errno == GF_E_DEFAULT) {
|
||||
fprintf(stderr, "Unlabeled failed method: w=%d:", w);
|
||||
for (j = 0; j < sa; j++) fprintf(stderr, " %s", argv[j]);
|
||||
fprintf(stderr, "\n");
|
||||
exit(1);
|
||||
}
|
||||
sa-=3;
|
||||
}
|
||||
}
|
||||
sa--;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
118
gf_mult.c
118
gf_mult.c
|
@ -12,105 +12,53 @@
|
|||
|
||||
#include "gf_complete.h"
|
||||
#include "gf_method.h"
|
||||
#include "gf_general.h"
|
||||
|
||||
void usage(char *s)
|
||||
void usage(int why)
|
||||
{
|
||||
fprintf(stderr, "usage: gf_mult a b w [method] - does multiplication of a and b in GF(2^w)\n");
|
||||
fprintf(stderr, " If w has an h on the end, treat a, b and the product as hexadecimal (no 0x required)\n");
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, " legal w are: 1-32, 64 and 128\n");
|
||||
fprintf(stderr, " 128 is hex only (i.e. '128' will be an error - do '128h')\n");
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, " For method specification, type gf_methods\n");
|
||||
|
||||
if (s != NULL) fprintf(stderr, "%s", s);
|
||||
if (why == 'W') {
|
||||
fprintf(stderr, "Bad w.\n");
|
||||
fprintf(stderr, "Legal w are: 1 - 32, 64 and 128.\n");
|
||||
fprintf(stderr, "Append 'h' to w to treat a, b and the product as hexadecimal.\n");
|
||||
fprintf(stderr, "w=128 is hex only (i.e. '128' will be an error - do '128h')\n");
|
||||
}
|
||||
if (why == 'A') fprintf(stderr, "Bad a\n");
|
||||
if (why == 'B') fprintf(stderr, "Bad b\n");
|
||||
if (why == 'M') {
|
||||
fprintf(stderr, "Bad Method Specification: ");
|
||||
gf_error();
|
||||
}
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int read_128(char *s, uint64_t *v)
|
||||
{
|
||||
int l, t;
|
||||
char save;
|
||||
|
||||
l = strlen(s);
|
||||
if (l > 32) return 0;
|
||||
|
||||
if (l > 16) {
|
||||
if (sscanf(s + (l-16), "%llx", (long long unsigned int *) &(v[1])) == 0) return 0;
|
||||
save = s[l-16];
|
||||
s[l-16] = '\0';
|
||||
t = sscanf(s, "%llx", (long long unsigned int *) &(v[0]));
|
||||
s[l-16] = save;
|
||||
return t;
|
||||
} else {
|
||||
v[0] = 0;
|
||||
return sscanf(s, "%llx", (long long unsigned int *)&(v[1]));
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
void print_128(uint64_t *v)
|
||||
{
|
||||
if (v[0] > 0) {
|
||||
printf("%llx", (long long unsigned int) v[0]);
|
||||
printf("%016llx", (long long unsigned int) v[1]);
|
||||
} else {
|
||||
printf("%llx", (long long unsigned int) v[1]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int hex, al, bl, w;
|
||||
uint32_t a, b, c, top;
|
||||
uint64_t a64, b64, c64;
|
||||
uint64_t a128[2], b128[2], c128[2];
|
||||
char *format;
|
||||
int hex, w;
|
||||
gf_t gf;
|
||||
gf_general_t a, b, c;
|
||||
char output[50];
|
||||
|
||||
if (argc < 4) usage(NULL);
|
||||
if (sscanf(argv[3], "%d", &w) == 0) usage("Bad w\n");
|
||||
if (argc < 4) usage(' ');
|
||||
|
||||
if (w <= 0 || (w > 32 && w != 64 && w != 128)) usage("Bad w");
|
||||
if (sscanf(argv[3], "%d", &w) == 0) usage('W');
|
||||
if (w <= 0 || (w > 32 && w != 64 && w != 128)) usage('W');
|
||||
|
||||
hex = (strchr(argv[3], 'h') != NULL);
|
||||
if (create_gf_from_argv(&gf, w, argc, argv, 4) == 0) usage("\nBad Method\n");
|
||||
if (!hex && w == 128) usage('W');
|
||||
|
||||
if (!hex && w == 128) usage(NULL);
|
||||
|
||||
if (w <= 32) {
|
||||
format = (hex) ? "%x" : "%u";
|
||||
if (sscanf(argv[1], format, &a) == 0) usage("Bad a\n");
|
||||
if (sscanf(argv[2], format, &b) == 0) usage("Bad b\n");
|
||||
|
||||
if (w < 32) {
|
||||
top = (w == 31) ? 0x80000000 : (1 << w);
|
||||
if (w != 32 && a >= top) usage("a is too large\n");
|
||||
if (w != 32 && b >= top) usage("b is too large\n");
|
||||
}
|
||||
|
||||
c = gf.multiply.w32(&gf, a, b);
|
||||
printf(format, c);
|
||||
printf("\n");
|
||||
|
||||
} else if (w == 64) {
|
||||
format = (hex) ? "%llx" : "%llu";
|
||||
if (sscanf(argv[1], format, &a64) == 0) usage("Bad a\n");
|
||||
if (sscanf(argv[2], format, &b64) == 0) usage("Bad b\n");
|
||||
c64 = gf.multiply.w64(&gf, a64, b64);
|
||||
|
||||
printf(format, c64);
|
||||
printf("\n");
|
||||
|
||||
} else if (w == 128) {
|
||||
|
||||
if (read_128(argv[1], a128) == 0) usage("Bad a\n");
|
||||
if (read_128(argv[2], b128) == 0) usage("Bad b\n");
|
||||
gf.multiply.w128(&gf, a128, b128, c128);
|
||||
|
||||
print_128(c128);
|
||||
if (argc == 4) {
|
||||
if (gf_init_easy(&gf, w) == 0) usage('M');
|
||||
} else {
|
||||
if (create_gf_from_argv(&gf, w, argc, argv, 4) == 0) usage('M');
|
||||
}
|
||||
|
||||
if (!gf_general_s_to_val(&a, w, argv[1], hex)) usage('A');
|
||||
if (!gf_general_s_to_val(&b, w, argv[2], hex)) usage('B');
|
||||
|
||||
gf_general_multiply(&gf, &a, &b, &c);
|
||||
gf_general_val_to_s(&c, w, output, hex);
|
||||
|
||||
printf("%s\n", output);
|
||||
exit(0);
|
||||
}
|
||||
|
|
744
gf_poly.c
744
gf_poly.c
|
@ -1,560 +1,268 @@
|
|||
/*
|
||||
* gf_poly.c - program to help find primitive polynomials in composite fields
|
||||
gf_poly.c - program to help find irreducible polynomials in composite fields,
|
||||
using the Ben-Or algorithm.
|
||||
|
||||
James S. Plank
|
||||
|
||||
Please see the following paper for a
|
||||
description of the Ben-Or algorithm:
|
||||
|
||||
author S. Gao and D. Panario
|
||||
title Tests and Constructions of Irreducible Polynomials over Finite Fields
|
||||
booktitle Foundations of Computational Mathematics
|
||||
year 1997
|
||||
publisher Springer Verlag
|
||||
pages 346-361
|
||||
|
||||
The basic technique is this. You have a polynomial f(x) whose coefficients are
|
||||
in a base field GF(2^w). The polynomial is of degree n. You need to do the
|
||||
following for all i from 1 to n/2:
|
||||
|
||||
Construct x^(2^w)^i modulo f. That will be a polynomial of maximum degree n-1
|
||||
with coefficients in GF(2^w). You construct that polynomial by starting with x
|
||||
and doubling it w times, each time taking the result modulo f. Then you
|
||||
multiply that by itself i times, again each time taking the result modulo f.
|
||||
|
||||
When you're done, you need to "subtract" x -- since addition = subtraction =
|
||||
XOR, that means XOR x.
|
||||
|
||||
Now, find the GCD of that last polynomial and f, using Euclid's algorithm. If
|
||||
the GCD is not one, then f is reducible. If it is not reducible for each of
|
||||
those i, then it is irreducible.
|
||||
|
||||
In this code, I am using a gf_general_t to represent elements of GF(2^w). This
|
||||
is so that I can use base fields that are GF(2^64) or GF(2^128).
|
||||
|
||||
I have two main procedures. The first is x_to_q_to_i_minus_x, which calculates
|
||||
x^(2^w)^i - x, putting the result into a gf_general_t * called retval.
|
||||
|
||||
The second is gcd_one, which takes a polynomial of degree n and a second one
|
||||
of degree n-1, and uses Euclid's algorithm to decide if their GCD == 1.
|
||||
|
||||
These can be made faster (e.g. calculate x^(2^w) once and store it).
|
||||
*/
|
||||
|
||||
#include "gf_complete.h"
|
||||
#include "gf_method.h"
|
||||
#include "gf_general.h"
|
||||
#include "gf_int.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#define GF_POLY_COEF_MASK8 0xff
|
||||
#define GF_POLY_COEF_MASK16 0xffff
|
||||
#define GF_POLY_COEF_MASK32 0xffffffff
|
||||
#define GF_POLY_COEF_MASK64 0xffffffffffffffff
|
||||
char *BM = "Bad Method: ";
|
||||
|
||||
#define LLUI (long long unsigned int)
|
||||
|
||||
struct gf_poly_coef_s;
|
||||
|
||||
typedef struct gf_poly_coef_s {
|
||||
uint64_t coef;
|
||||
uint64_t power;
|
||||
struct gf_poly_coef_s *next;
|
||||
} gf_poly_coef_t;
|
||||
|
||||
typedef struct gf_poly_s {
|
||||
gf_poly_coef_t *leading_coef;
|
||||
uint64_t num_coefs;
|
||||
gf_t *coef_gf;
|
||||
int w;
|
||||
} gf_poly_t;
|
||||
|
||||
static uint64_t gf_add(int w, uint64_t a, uint64_t b)
|
||||
void usage(char *s)
|
||||
{
|
||||
if (w == 8) {
|
||||
return (a & GF_POLY_COEF_MASK8) ^ (b & GF_POLY_COEF_MASK8);
|
||||
} else if (w == 16) {
|
||||
return (a & GF_POLY_COEF_MASK16) ^ (b & GF_POLY_COEF_MASK16);
|
||||
} else if (w == 32) {
|
||||
return (a & GF_POLY_COEF_MASK32) ^ (b & GF_POLY_COEF_MASK32);
|
||||
} else if (w == 64) {
|
||||
return (a & GF_POLY_COEF_MASK64) ^ (b & GF_POLY_COEF_MASK64);
|
||||
}
|
||||
}
|
||||
|
||||
static uint64_t gf_mult(int w, gf_t* gf, uint64_t a, uint64_t b)
|
||||
{
|
||||
if (w <= 32) {
|
||||
return gf->multiply.w32(gf, a, b);
|
||||
} else if (w == 64) {
|
||||
return gf->multiply.w64(gf, a, b);
|
||||
}
|
||||
}
|
||||
|
||||
static uint64_t gf_divide(int w, gf_t* gf, uint64_t a, uint64_t b)
|
||||
{
|
||||
if (w <= 32) {
|
||||
return gf->divide.w32(gf, a, b);
|
||||
} else if (w == 64) {
|
||||
return gf->divide.w64(gf, a, b);
|
||||
}
|
||||
}
|
||||
|
||||
static uint64_t gf_inverse(int w, gf_t* gf, uint64_t a)
|
||||
{
|
||||
if (w <= 32) {
|
||||
return gf->inverse.w32(gf, a);
|
||||
} else if (w == 64) {
|
||||
return gf->inverse.w64(gf, a);
|
||||
}
|
||||
}
|
||||
|
||||
gf_poly_t* gf_poly_init(int w, gf_t *gf)
|
||||
{
|
||||
gf_poly_t *gf_poly = (gf_poly_t*)malloc(sizeof(gf_poly_t));
|
||||
|
||||
if (gf_poly == NULL || gf == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
gf_poly->leading_coef = NULL;
|
||||
gf_poly->num_coefs = 0;
|
||||
gf_poly->coef_gf = gf;
|
||||
gf_poly->w = w;
|
||||
|
||||
return gf_poly;
|
||||
}
|
||||
|
||||
void gf_poly_print(gf_poly_t *gf_poly, char *message)
|
||||
{
|
||||
gf_poly_coef_t *tmp;
|
||||
|
||||
if (gf_poly == NULL) {
|
||||
fprintf(stderr, "0 * x^0\n");
|
||||
return;
|
||||
}
|
||||
|
||||
tmp = gf_poly->leading_coef;
|
||||
|
||||
while (tmp != NULL) {
|
||||
printf("%llu * x^%llu", LLUI tmp->coef, LLUI tmp->power);
|
||||
tmp = tmp->next;
|
||||
if (tmp) {
|
||||
printf(" + ");
|
||||
}
|
||||
}
|
||||
|
||||
if (message != NULL) {
|
||||
printf(": %s\n", message);
|
||||
}
|
||||
}
|
||||
|
||||
gf_poly_t* gf_poly_copy(gf_poly_t *poly)
|
||||
{
|
||||
gf_poly_t *new_poly = (gf_poly_t*)malloc(sizeof(gf_poly_t));
|
||||
gf_poly_coef_t *tmp = poly->leading_coef;
|
||||
|
||||
if (new_poly == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
new_poly->leading_coef = NULL;
|
||||
new_poly->num_coefs = 0;
|
||||
new_poly->coef_gf = poly->coef_gf;
|
||||
new_poly->w = poly->w;
|
||||
|
||||
while (tmp != NULL) {
|
||||
gf_poly_add_coef(new_poly, tmp->coef, tmp->power);
|
||||
|
||||
tmp = tmp->next;
|
||||
}
|
||||
|
||||
return new_poly;
|
||||
}
|
||||
|
||||
void gf_poly_clear(gf_poly_t* a)
|
||||
{
|
||||
while (a->leading_coef != NULL) {
|
||||
gf_poly_coef_t *tmp = a->leading_coef;
|
||||
|
||||
a->leading_coef = tmp->next;
|
||||
|
||||
free(tmp);
|
||||
}
|
||||
}
|
||||
|
||||
void gf_poly_free(gf_poly_t **a)
|
||||
{
|
||||
gf_poly_clear(*a);
|
||||
free(*a);
|
||||
*a = NULL;
|
||||
}
|
||||
|
||||
gf_poly_coef_t* gf_poly_create_node(uint64_t coef, uint64_t power)
|
||||
{
|
||||
gf_poly_coef_t* node = (gf_poly_coef_t*)malloc(sizeof(gf_poly_coef_t));
|
||||
|
||||
if (node == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
node->coef = coef;
|
||||
node->power = power;
|
||||
node->next = NULL;
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
int gf_poly_remove_node(gf_poly_t *gf_poly, uint64_t power)
|
||||
{
|
||||
gf_poly_coef_t* iter = gf_poly->leading_coef;
|
||||
|
||||
if (iter->power == power) {
|
||||
gf_poly->leading_coef = iter->next;
|
||||
free(iter);
|
||||
return 0;
|
||||
}
|
||||
|
||||
while (iter->next != NULL) {
|
||||
if (iter->next->power == power) {
|
||||
gf_poly_coef_t* tmp = iter->next;
|
||||
iter->next = iter->next->next;
|
||||
free(tmp);
|
||||
return 0;
|
||||
}
|
||||
iter = iter->next;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
int gf_poly_add_coef(gf_poly_t *gf_poly, uint64_t coef_val, uint64_t power)
|
||||
{
|
||||
gf_poly_coef_t* node;
|
||||
gf_poly_coef_t* iter = gf_poly->leading_coef;
|
||||
|
||||
/*
|
||||
* The new node has the highest power, or there are no terms
|
||||
*/
|
||||
if (gf_poly->leading_coef == NULL || gf_poly->leading_coef->power < power) {
|
||||
node = gf_poly_create_node(coef_val, power);
|
||||
node->next = gf_poly->leading_coef;
|
||||
gf_poly->leading_coef = node;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* The new node is of the same power, add the coefs
|
||||
*/
|
||||
if (gf_poly->leading_coef->power == power) {
|
||||
gf_poly->leading_coef->coef = gf_add(gf_poly->w, gf_poly->leading_coef->coef, coef_val);
|
||||
if (gf_poly->leading_coef->coef == 0) {
|
||||
gf_poly_remove_node(gf_poly, power);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
while (iter->next != NULL) {
|
||||
if (iter->next->power == power) {
|
||||
iter->next->coef = gf_add(gf_poly->w, iter->next->coef, coef_val);
|
||||
|
||||
if (iter->next->coef == 0) {
|
||||
gf_poly_remove_node(gf_poly, power);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
if (iter->next->power < power) {
|
||||
node = gf_poly_create_node(coef_val, power);
|
||||
node->next = iter->next;
|
||||
iter->next = node;
|
||||
return 0;
|
||||
}
|
||||
iter = iter->next;
|
||||
}
|
||||
|
||||
/*
|
||||
* The power passed in is lower than any in the existing poly
|
||||
*/
|
||||
node = gf_poly_create_node(coef_val, power);
|
||||
iter->next = node;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute a+b and store in a
|
||||
*/
|
||||
int gf_poly_add(gf_poly_t* a, gf_poly_t* b)
|
||||
{
|
||||
gf_poly_coef_t* iter = b->leading_coef;
|
||||
|
||||
while (iter != NULL) {
|
||||
gf_poly_add_coef(a, iter->coef, iter->power);
|
||||
iter = iter->next;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute a*b and store in a
|
||||
*/
|
||||
int gf_poly_mult(gf_poly_t* a, gf_poly_t* b)
|
||||
{
|
||||
gf_poly_coef_t* a_iter = a->leading_coef;
|
||||
|
||||
/*
|
||||
* Remove one node at a time from 'a', starting with
|
||||
* highest power. Multiply the removed (coef,power)
|
||||
* by every entry of 'b,' adding each product into 'a.'
|
||||
*/
|
||||
while (a_iter != NULL) {
|
||||
gf_poly_coef_t* tmp = a_iter;
|
||||
gf_poly_coef_t* b_iter = b->leading_coef;
|
||||
|
||||
uint64_t a_power = a_iter->power;
|
||||
uint64_t a_coef = a_iter->coef;
|
||||
a_iter = a_iter->next;
|
||||
gf_poly_remove_node(a, tmp->power);
|
||||
|
||||
while (b_iter != NULL) {
|
||||
uint64_t new_power = b_iter->power + a_power;
|
||||
uint64_t new_coef = gf_mult(a->w, a->coef_gf, b_iter->coef, a_coef);
|
||||
|
||||
gf_poly_add_coef(a, new_coef, new_power);
|
||||
|
||||
b_iter = b_iter->next;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute a % b and store in a
|
||||
*/
|
||||
int gf_poly_reduce(gf_poly_t* a, gf_poly_t* b)
|
||||
{
|
||||
gf_poly_t* c = gf_poly_init(a->w, a->coef_gf);
|
||||
gf_poly_coef_t* a_iter = a->leading_coef;
|
||||
gf_poly_coef_t* b_iter = b->leading_coef;
|
||||
|
||||
/*
|
||||
* Reduce until the degree of 'a' is less than
|
||||
* the degree of 'b.' At that point 'a' will
|
||||
* contain the remainder of a / b.
|
||||
*/
|
||||
while (a_iter && (a_iter->power >= b_iter->power)) {
|
||||
|
||||
/*
|
||||
* Get the degree and leading coef of the current
|
||||
* 'b'.
|
||||
*/
|
||||
uint64_t reduce_power = a_iter->power - b_iter->power;
|
||||
uint64_t reduce_coef = gf_divide(a->w, a->coef_gf, a_iter->coef, b_iter->coef);
|
||||
|
||||
/*
|
||||
* Create a poly that will get rid of leading power
|
||||
* of 'b' when added: c*x^(n-m)*b(x), where c
|
||||
* is the leading coef of 'a', n is the deg of 'a'
|
||||
* and m is the degree of 'b'.
|
||||
*/
|
||||
gf_poly_add_coef(c, reduce_coef, reduce_power);
|
||||
gf_poly_mult(c, b);
|
||||
|
||||
/*
|
||||
* Add the newly created poly, which will reduce
|
||||
* a(x) by at least one term (leading term).
|
||||
*/
|
||||
gf_poly_add(a, c);
|
||||
|
||||
gf_poly_clear(c);
|
||||
|
||||
/*
|
||||
* Grab the new leading term of 'a'
|
||||
*/
|
||||
a_iter = a->leading_coef;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the GCD of a and b, return the result
|
||||
*/
|
||||
gf_poly_t* gf_poly_gcd(gf_poly_t* a, gf_poly_t* b)
|
||||
{
|
||||
gf_poly_t *r1, *r2;
|
||||
gf_poly_t* tmp_swp;
|
||||
|
||||
if (a->leading_coef == NULL || b->leading_coef == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (a->leading_coef->power > b->leading_coef->power) {
|
||||
r1 = a;
|
||||
r2 = b;
|
||||
} else {
|
||||
r1 = b;
|
||||
r2 = a;
|
||||
}
|
||||
|
||||
while ( 1 ) {
|
||||
if (r2->leading_coef == NULL) {
|
||||
break;
|
||||
}
|
||||
if (r2->leading_coef->power == 0 && r2->leading_coef->coef <= 1) {
|
||||
break;
|
||||
}
|
||||
|
||||
gf_poly_reduce(r1, r2);
|
||||
tmp_swp = r1;
|
||||
r1 = r2;
|
||||
r2 = tmp_swp;
|
||||
}
|
||||
|
||||
return r1;
|
||||
}
|
||||
|
||||
/*
|
||||
* The Ben-Or algorithm for determining irreducibility
|
||||
*/
|
||||
int gf_poly_is_irred(gf_poly_t* poly)
|
||||
{
|
||||
gf_poly_t *gcd;
|
||||
gf_poly_t *prod_of_irred;
|
||||
uint64_t prod_of_irred_power = ((unsigned long long) 1) << poly->w;
|
||||
int n = poly->leading_coef->power / 2;
|
||||
int i;
|
||||
int ret = 0;
|
||||
gf_poly_t *a = gf_poly_copy(poly);
|
||||
|
||||
prod_of_irred = gf_poly_init(a->w, a->coef_gf);
|
||||
|
||||
|
||||
for (i = 1; i <= n; i++) {
|
||||
gf_poly_add_coef(prod_of_irred, 1, prod_of_irred_power);
|
||||
gf_poly_add_coef(prod_of_irred, 1, 1);
|
||||
|
||||
gf_poly_reduce(prod_of_irred, a);
|
||||
|
||||
gcd = gf_poly_gcd(a, prod_of_irred);
|
||||
|
||||
/*
|
||||
* It is irreducible if it is not the product of
|
||||
* non-trivial factors (non-constant). Therefore,
|
||||
* the GCD of the poly and prod_of_irred should be
|
||||
* a constant (0 or 0-degree polynomial).
|
||||
*/
|
||||
if (gcd == NULL) {
|
||||
ret = -1;
|
||||
break;
|
||||
} else if (gcd->leading_coef->power != 0) {
|
||||
ret = -1;
|
||||
break;
|
||||
} else if (gcd->leading_coef->power == 0) {
|
||||
ret = 0;
|
||||
break;
|
||||
fprintf(stderr, "usage: gf_poly w(base-field) method power:coef [ power:coef .. ]\n");
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, " use - for the default method.\n");
|
||||
fprintf(stderr, " use 0x in front of the coefficient if it's in hex\n");
|
||||
fprintf(stderr, " \n");
|
||||
fprintf(stderr, " For example, to test whether x^2 + 2x + 1 is irreducible\n");
|
||||
fprintf(stderr, " in GF(2^16), the call is:\n");
|
||||
fprintf(stderr, " \n");
|
||||
fprintf(stderr, " gf_poly 16 - 2:1 1:2 0:1\n");
|
||||
fprintf(stderr, " \n");
|
||||
fprintf(stderr, " See the user's manual for more information.\n");
|
||||
if (s != NULL) {
|
||||
fprintf(stderr, "\n");
|
||||
if (s == BM) {
|
||||
fprintf(stderr, "%s", s);
|
||||
gf_error();
|
||||
} else {
|
||||
ret = -1;
|
||||
break;
|
||||
fprintf(stderr, "%s\n", s);
|
||||
}
|
||||
|
||||
// Need if to avoid a overflow error
|
||||
if ((i + 1) <= n) {
|
||||
prod_of_irred_power *= prod_of_irred_power;
|
||||
}
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int gcd_one(gf_t *gf, int w, int n, gf_general_t *poly, gf_general_t *prod)
|
||||
{
|
||||
gf_general_t *a, *b, zero, factor, p;
|
||||
int i, j, da, db;
|
||||
char buf[30];
|
||||
|
||||
gf_general_set_zero(&zero, w);
|
||||
|
||||
a = (gf_general_t *) malloc(sizeof(gf_general_t) * n+1);
|
||||
b = (gf_general_t *) malloc(sizeof(gf_general_t) * n);
|
||||
for (i = 0; i <= n; i++) gf_general_add(gf, &zero, poly+i, a+i);
|
||||
for (i = 0; i < n; i++) gf_general_add(gf, &zero, prod+i, b+i);
|
||||
|
||||
da = n;
|
||||
while (1) {
|
||||
for (db = n-1; db >= 0 && gf_general_is_zero(b+db, w); db--) ;
|
||||
if (db < 0) return 0;
|
||||
if (db == 0) return 1;
|
||||
for (j = da; j >= db; j--) {
|
||||
if (!gf_general_is_zero(a+j, w)) {
|
||||
gf_general_divide(gf, a+j, b+db, &factor);
|
||||
for (i = 0; i <= db; i++) {
|
||||
gf_general_multiply(gf, b+i, &factor, &p);
|
||||
gf_general_add(gf, &p, a+(i+j-db), a+(i+j-db));
|
||||
}
|
||||
}
|
||||
}
|
||||
for (i = 0; i < n; i++) {
|
||||
gf_general_add(gf, a+i, &zero, &p);
|
||||
gf_general_add(gf, b+i, &zero, a+i);
|
||||
gf_general_add(gf, &p, &zero, b+i);
|
||||
}
|
||||
gf_poly_clear(prod_of_irred);
|
||||
}
|
||||
|
||||
gf_poly_free(&a);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int is_suitible_s(int w, gf_t *gf, uint64_t s)
|
||||
void x_to_q_to_i_minus_x(gf_t *gf, int w, int n, gf_general_t *poly, int logq, int i, gf_general_t *retval)
|
||||
{
|
||||
uint64_t num_elems = ((unsigned long long) 1) << w;
|
||||
uint64_t i = 2;
|
||||
uint64_t i_inv;
|
||||
gf_general_t x;
|
||||
gf_general_t *x_to_q;
|
||||
gf_general_t *product;
|
||||
gf_general_t p, zero, factor;
|
||||
int j, k, lq;
|
||||
char buf[20];
|
||||
|
||||
for (; i < num_elems; i++) {
|
||||
i_inv = gf_inverse(w, gf, i);
|
||||
if ((i ^ i_inv) == s) {
|
||||
fprintf(stderr, "Bailed on %llu ^ %llu = %llu\n", LLUI i, LLUI i_inv, LLUI s);
|
||||
return -1;
|
||||
gf_general_set_zero(&zero, w);
|
||||
product = (gf_general_t *) malloc(sizeof(gf_general_t) * n*2);
|
||||
x_to_q = (gf_general_t *) malloc(sizeof(gf_general_t) * n);
|
||||
for (j = 0; j < n; j++) gf_general_set_zero(x_to_q+j, w);
|
||||
gf_general_set_one(x_to_q+1, w);
|
||||
|
||||
for (lq = 0; lq < logq; lq++) {
|
||||
for (j = 0; j < n*2; j++) gf_general_set_zero(product+j, w);
|
||||
for (j = 0; j < n; j++) {
|
||||
for (k = 0; k < n; k++) {
|
||||
gf_general_multiply(gf, x_to_q+j, x_to_q+k, &p);
|
||||
gf_general_add(gf, product+(j+k), &p, product+(j+k));
|
||||
}
|
||||
}
|
||||
if (i % 1000000000 == 0) fprintf(stderr, "Processed %llu\n", LLUI i);
|
||||
for (j = n*2-1; j >= n; j--) {
|
||||
if (!gf_general_is_zero(product+j, w)) {
|
||||
gf_general_add(gf, product+j, &zero, &factor);
|
||||
for (k = 0; k <= n; k++) {
|
||||
gf_general_multiply(gf, poly+k, &factor, &p);
|
||||
gf_general_add(gf, product+(j-n+k), &p, product+(j-n+k));
|
||||
}
|
||||
}
|
||||
}
|
||||
for (j = 0; j < n; j++) gf_general_add(gf, product+j, &zero, x_to_q+j);
|
||||
}
|
||||
for (j = 0; j < n; j++) gf_general_set_zero(retval+j, w);
|
||||
gf_general_set_one(retval, w);
|
||||
|
||||
while (i > 0) {
|
||||
for (j = 0; j < n*2; j++) gf_general_set_zero(product+j, w);
|
||||
for (j = 0; j < n; j++) {
|
||||
for (k = 0; k < n; k++) {
|
||||
gf_general_multiply(gf, x_to_q+j, retval+k, &p);
|
||||
gf_general_add(gf, product+(j+k), &p, product+(j+k));
|
||||
}
|
||||
}
|
||||
for (j = n*2-1; j >= n; j--) {
|
||||
if (!gf_general_is_zero(product+j, w)) {
|
||||
gf_general_add(gf, product+j, &zero, &factor);
|
||||
for (k = 0; k <= n; k++) {
|
||||
gf_general_multiply(gf, poly+k, &factor, &p);
|
||||
gf_general_add(gf, product+(j-n+k), &p, product+(j-n+k));
|
||||
}
|
||||
}
|
||||
}
|
||||
for (j = 0; j < n; j++) gf_general_add(gf, product+j, &zero, retval+j);
|
||||
i--;
|
||||
}
|
||||
|
||||
return 0;
|
||||
gf_general_set_one(&x, w);
|
||||
gf_general_add(gf, &x, retval+1, retval+1);
|
||||
|
||||
free(product);
|
||||
free(x_to_q);
|
||||
}
|
||||
|
||||
static void
|
||||
usage(char *cmd)
|
||||
{
|
||||
fprintf(stderr, "%s w <GF args> S <s value>\n", cmd);
|
||||
fprintf(stderr, "\t will build a trinomial x^2+S*x+1\n");
|
||||
fprintf(stderr, "OR\n");
|
||||
fprintf(stderr, "%s w <GF args> G coef1,power1 <coef2,power2> ... <coefn,powern>\n", cmd);
|
||||
fprintf(stderr, "\t will build a polynomial coef1^(power1) + ... + coefn^(powern)\n");
|
||||
fprintf(stderr, "Example: ./gf_poly 8 - - - G 1,2 2,1 1,0\n");
|
||||
fprintf(stderr, "\t will build a polynomial x^2+2*x+1 with coefs from GF(2^8)\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* Find irred poly of form x^2+sx+1
|
||||
* a_n*x^n + a_(n-1)*x^(n-1) + ...
|
||||
*
|
||||
* Terms are specified as: a_i,i a_j,j, ... where
|
||||
* i is the degree of the term and a_i is the coef
|
||||
*
|
||||
*/
|
||||
int main(int argc, char **argv)
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
int w, i, power, n, ap, success, j;
|
||||
gf_t gf;
|
||||
int ret;
|
||||
int w;
|
||||
int i;
|
||||
uint64_t irred_coef_s;
|
||||
gf_poly_t *irred_poly;
|
||||
char *term;
|
||||
gf_general_t *poly, *prod;
|
||||
char *string, *ptr;
|
||||
char buf[100];
|
||||
|
||||
bzero(&gf, sizeof(gf_t));
|
||||
if (argc < 4) usage(NULL);
|
||||
|
||||
if (argc < 4) {
|
||||
usage(argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
w = atoi(argv[1]);
|
||||
|
||||
ret = create_gf_from_argv(&gf, w, argc, argv, 3);
|
||||
if (sscanf(argv[1], "%d", &w) != 1 || w <= 0) usage("Bad w.");
|
||||
ap = create_gf_from_argv(&gf, w, argc, argv, 2);
|
||||
|
||||
if (ret <= 0) {
|
||||
fprintf(stderr, "Could not create a GF\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
irred_poly = gf_poly_init(w, &gf);
|
||||
if (ap == 0) usage(BM);
|
||||
|
||||
i = ret + 1;
|
||||
if (ap == argc) usage("No powers/coefficients given.");
|
||||
|
||||
if (strlen(argv[i]) > 1) {
|
||||
usage(argv[0]);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (argv[i][0] == 'S') {
|
||||
i++;
|
||||
irred_coef_s = (uint64_t)strtoull(argv[i], NULL, 10);
|
||||
|
||||
/*
|
||||
* If this is a trinomial of the form x^2+s*x+1, then
|
||||
* we can do a quick pre-check to see if this may be
|
||||
* an irreducible polynomial.
|
||||
*/
|
||||
if (is_suitible_s(w, &gf, irred_coef_s) < 0) {
|
||||
fprintf(stderr, "%llu is not a suitable coeffient!\n", LLUI irred_coef_s);
|
||||
return -1;
|
||||
} else {
|
||||
fprintf(stderr, "%llu IS A suitable coeffient!\n", LLUI irred_coef_s);
|
||||
n = -1;
|
||||
for (i = ap; i < argc; i++) {
|
||||
if (strchr(argv[i], ':') == NULL || sscanf(argv[i], "%d:", &power) != 1) {
|
||||
string = (char *) malloc(sizeof(char)*(strlen(argv[i]+100)));
|
||||
sprintf(string, "Argument '%s' not in proper format of power:coefficient\n", argv[i]);
|
||||
usage(string);
|
||||
}
|
||||
if (power < 0) usage("Can't have negative powers\n");
|
||||
if (power > n) n = power;
|
||||
}
|
||||
|
||||
poly = (gf_general_t *) malloc(sizeof(gf_general_t)*(n+1));
|
||||
for (i = 0; i <= n; i++) gf_general_set_zero(poly+i, w);
|
||||
prod = (gf_general_t *) malloc(sizeof(gf_general_t)*n);
|
||||
|
||||
gf_poly_add_coef(irred_poly, 1, 2);
|
||||
gf_poly_add_coef(irred_poly, irred_coef_s, 1);
|
||||
gf_poly_add_coef(irred_poly, 1, 0);
|
||||
for (i = ap; i < argc; i++) {
|
||||
sscanf(argv[i], "%d:", &power);
|
||||
ptr = strchr(argv[i], ':');
|
||||
ptr++;
|
||||
if (strncmp(ptr, "0x", 2) == 0) {
|
||||
success = gf_general_s_to_val(poly+power, w, ptr+2, 1);
|
||||
} else {
|
||||
success = gf_general_s_to_val(poly+power, w, ptr, 0);
|
||||
}
|
||||
if (success == 0) {
|
||||
string = (char *) malloc(sizeof(char)*(strlen(argv[i]+100)));
|
||||
sprintf(string, "Argument '%s' not in proper format of power:coefficient\n", argv[i]);
|
||||
usage(string);
|
||||
}
|
||||
}
|
||||
|
||||
} else if (argv[i][0] == 'G') {
|
||||
term = argv[++i];
|
||||
|
||||
|
||||
while (term != NULL) {
|
||||
uint64_t coef = strtoull(strtok(term, ","), NULL, 10);
|
||||
uint64_t power = strtoull(strtok(NULL, ","), NULL, 10);
|
||||
|
||||
gf_poly_add_coef(irred_poly, coef, power);
|
||||
|
||||
if (i < argc) {
|
||||
term = argv[++i];
|
||||
printf("Poly:");
|
||||
for (power = n; power >= 0; power--) {
|
||||
if (!gf_general_is_zero(poly+power, w)) {
|
||||
printf("%s", (power == n) ? " " : " + ");
|
||||
if (!gf_general_is_one(poly+power, w)) {
|
||||
gf_general_val_to_s(poly+power, w, buf, 1);
|
||||
if (n > 0) {
|
||||
printf("(0x%s)", buf);
|
||||
} else {
|
||||
printf("0x%s", buf);
|
||||
}
|
||||
}
|
||||
if (power == 0) {
|
||||
if (gf_general_is_one(poly+power, w)) printf("1");
|
||||
} else if (power == 1) {
|
||||
printf("x");
|
||||
} else {
|
||||
break;
|
||||
printf("x^%d", power);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
usage(argv[0]);
|
||||
exit(1);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
if (!gf_general_is_one(poly+n, w)) {
|
||||
printf("\n");
|
||||
printf("Can't do Ben-Or, because the polynomial is not monic.\n");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
for (i = 1; i <= n/2; i++) {
|
||||
x_to_q_to_i_minus_x(&gf, w, n, poly, w, i, prod);
|
||||
if (!gcd_one(&gf, w, n, poly, prod)) {
|
||||
printf("Reducible.\n");
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
gf_poly_print(irred_poly, " specified via the command line\n");
|
||||
|
||||
ret = gf_poly_is_irred(irred_poly);
|
||||
|
||||
if (ret < 0) {
|
||||
gf_poly_print(irred_poly, " IS NOT irreducible\n");
|
||||
} else {
|
||||
gf_poly_print(irred_poly, " IS irreducible\n");
|
||||
}
|
||||
|
||||
return 0;
|
||||
printf("Irreducible.\n");
|
||||
exit(0);
|
||||
}
|
||||
|
|
25
gf_time.c
25
gf_time.c
|
@ -9,7 +9,7 @@
|
|||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include "gf_complete.h"
|
||||
#include "gf_method.h"
|
||||
|
@ -43,10 +43,14 @@ void problem(char *s)
|
|||
exit(1);
|
||||
}
|
||||
|
||||
char *BM = "Bad Method: ";
|
||||
|
||||
void usage(char *s)
|
||||
{
|
||||
fprintf(stderr, "usage: gf_time w tests seed size(bytes) iterations [method [params]] - does timing\n");
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "does unit testing in GF(2^w)\n");
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "Legal w are: 1 - 32, 64 and 128\n");
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "Tests may be any combination of:\n");
|
||||
|
@ -63,9 +67,12 @@ void usage(char *s)
|
|||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "Use -1 for time(0) as a seed.\n");
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "For method specification, type gf_methods\n");
|
||||
fprintf(stderr, "\n");
|
||||
if (s != NULL) fprintf(stderr, "%s\n", s);
|
||||
if (s == BM) {
|
||||
fprintf(stderr, "%s", BM);
|
||||
gf_error();
|
||||
} else if (s != NULL) {
|
||||
fprintf(stderr, "%s\n", s);
|
||||
}
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
@ -84,9 +91,15 @@ int main(int argc, char **argv)
|
|||
time_t t0;
|
||||
uint8_t *ra, *rb;
|
||||
gf_general_t a;
|
||||
|
||||
|
||||
if (argc < 6) usage(NULL);
|
||||
if (sscanf(argv[1], "%d", &w) == 0) usage("Bad w\n");
|
||||
|
||||
if (sscanf(argv[1], "%d", &w) == 0){
|
||||
usage("Bad w[-pp]\n");
|
||||
}
|
||||
|
||||
|
||||
if (sscanf(argv[3], "%ld", &t0) == 0) usage("Bad seed\n");
|
||||
if (sscanf(argv[4], "%d", &size) == 0) usage("Bad size\n");
|
||||
if (sscanf(argv[5], "%d", &iterations) == 0) usage("Bad iterations\n");
|
||||
|
@ -99,7 +112,7 @@ int main(int argc, char **argv)
|
|||
if ((w > 32 && w != 64 && w != 128) || w < 0) usage("Bad w");
|
||||
if ((size * 8) % w != 0) usage ("Bad size -- must be a multiple of w*8\n");
|
||||
|
||||
if (!create_gf_from_argv(&gf, w, argc, argv, 6)) usage("Bad Method");
|
||||
if (!create_gf_from_argv(&gf, w, argc, argv, 6)) usage(BM);
|
||||
|
||||
strcpy(tests, "");
|
||||
for (i = 0; i < argv[2][i] != '\0'; i++) {
|
||||
|
|
243
gf_unit.c
243
gf_unit.c
|
@ -10,6 +10,7 @@
|
|||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#include <signal.h>
|
||||
|
||||
#include "gf_complete.h"
|
||||
#include "gf_int.h"
|
||||
|
@ -18,6 +19,8 @@
|
|||
#include "gf_general.h"
|
||||
|
||||
#define REGION_SIZE (16384)
|
||||
#define RMASK (0x00000000ffffffffLL)
|
||||
#define LMASK (0xffffffff00000000LL)
|
||||
|
||||
void problem(char *s)
|
||||
{
|
||||
|
@ -26,11 +29,14 @@ void problem(char *s)
|
|||
exit(1);
|
||||
}
|
||||
|
||||
char *BM = "Bad Method: ";
|
||||
|
||||
void usage(char *s)
|
||||
{
|
||||
fprintf(stderr, "usage: gf_unit w tests seed [method] - does unit testing in GF(2^w)\n");
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "Legal w are: 1 - 32, 64 and 128\n");
|
||||
fprintf(stderr, " 128 is hex only (i.e. '128' will be an error - do '128h')\n");
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "Tests may be any combination of:\n");
|
||||
fprintf(stderr, " A: All\n");
|
||||
|
@ -40,16 +46,28 @@ void usage(char *s)
|
|||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "Use -1 for time(0) as a seed.\n");
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "For method specification, type gf_methods\n");
|
||||
fprintf(stderr, "\n");
|
||||
if (s != NULL) fprintf(stderr, "%s\n", s);
|
||||
if (s == BM) {
|
||||
fprintf(stderr, "%s", BM);
|
||||
gf_error();
|
||||
} else if (s != NULL) {
|
||||
fprintf(stderr, "%s\n", s);
|
||||
}
|
||||
exit(1);
|
||||
}
|
||||
|
||||
void SigHandler(int v)
|
||||
{
|
||||
fprintf(stderr, "Problem: SegFault!\n");
|
||||
fflush(stdout);
|
||||
exit(2);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
signal(SIGSEGV, SigHandler);
|
||||
|
||||
int w, i, verbose, single, region, tested, top;
|
||||
int start, end, xor;
|
||||
int s_start, d_start, bytes, xor, alignment_test;
|
||||
gf_t gf, gf_def;
|
||||
time_t t0;
|
||||
gf_internal_t *h;
|
||||
|
@ -61,15 +79,21 @@ int main(int argc, char **argv)
|
|||
char *ra, *rb, *rc, *rd, *target;
|
||||
int align;
|
||||
|
||||
|
||||
if (argc < 4) usage(NULL);
|
||||
if (sscanf(argv[1], "%d", &w) == 0) usage("Bad w\n");
|
||||
|
||||
if (sscanf(argv[1], "%d", &w) == 0){
|
||||
usage("Bad w\n");
|
||||
}
|
||||
|
||||
if (sscanf(argv[3], "%ld", &t0) == 0) usage("Bad seed\n");
|
||||
if (t0 == -1) t0 = time(0);
|
||||
MOA_Seed(t0);
|
||||
|
||||
if (w > 32 && w != 64 && w != 128) usage("Bad w");
|
||||
|
||||
if (create_gf_from_argv(&gf, w, argc, argv, 4) == 0) usage("Bad Method");
|
||||
if (create_gf_from_argv(&gf, w, argc, argv, 4) == 0) usage(BM);
|
||||
printf("Size (bytes): %d\n", gf_size(&gf));
|
||||
|
||||
for (i = 0; i < strlen(argv[2]); i++) {
|
||||
if (strchr("ASRV", argv[2][i]) == NULL) usage("Bad test\n");
|
||||
|
@ -83,10 +107,18 @@ int main(int argc, char **argv)
|
|||
ai = (gf_general_t *) malloc(sizeof(gf_general_t));
|
||||
bi = (gf_general_t *) malloc(sizeof(gf_general_t));
|
||||
|
||||
ra = (char *) malloc(sizeof(char)*REGION_SIZE);
|
||||
rb = (char *) malloc(sizeof(char)*REGION_SIZE);
|
||||
rc = (char *) malloc(sizeof(char)*REGION_SIZE);
|
||||
rd = (char *) malloc(sizeof(char)*REGION_SIZE);
|
||||
//15 bytes extra to make sure it's 16byte aligned
|
||||
ra = (char *) malloc(sizeof(char)*REGION_SIZE+15);
|
||||
rb = (char *) malloc(sizeof(char)*REGION_SIZE+15);
|
||||
rc = (char *) malloc(sizeof(char)*REGION_SIZE+15);
|
||||
rd = (char *) malloc(sizeof(char)*REGION_SIZE+15);
|
||||
|
||||
//this still assumes 8 byte aligned pointer from malloc
|
||||
//(which is usual on 32-bit machines)
|
||||
ra += (uint64_t)ra & 0xf;
|
||||
rb += (uint64_t)rb & 0xf;
|
||||
rc += (uint64_t)rc & 0xf;
|
||||
rd += (uint64_t)rd & 0xf;
|
||||
|
||||
if (w <= 32) {
|
||||
mask = 0;
|
||||
|
@ -97,8 +129,9 @@ int main(int argc, char **argv)
|
|||
single = (strchr(argv[2], 'S') != NULL || strchr(argv[2], 'A') != NULL);
|
||||
region = (strchr(argv[2], 'R') != NULL || strchr(argv[2], 'A') != NULL);
|
||||
|
||||
if (!gf_init_easy(&gf_def, w)) problem("No default for this value of w");
|
||||
|
||||
if (!gf_init_hard(&gf_def, w, GF_MULT_DEFAULT, GF_REGION_DEFAULT, GF_DIVIDE_DEFAULT,
|
||||
(h->mult_type != GF_MULT_COMPOSITE) ? h->prim_poly : 0, 0, 0, NULL, NULL))
|
||||
problem("No default for this value of w");
|
||||
if (w == 4) {
|
||||
mult4 = gf_w4_get_mult_table(&gf);
|
||||
div4 = gf_w4_get_div_table(&gf);
|
||||
|
@ -129,21 +162,71 @@ int main(int argc, char **argv)
|
|||
if (w <= 10) {
|
||||
a->w32 = i % (1 << w);
|
||||
b->w32 = (i >> w);
|
||||
} else if (i < 10) {
|
||||
gf_general_set_zero(a, w);
|
||||
gf_general_set_random(b, w, 1);
|
||||
} else if (i < 20) {
|
||||
gf_general_set_random(a, w, 1);
|
||||
gf_general_set_zero(b, w);
|
||||
} else if (i < 30) {
|
||||
gf_general_set_one(a, w);
|
||||
gf_general_set_random(b, w, 1);
|
||||
} else if (i < 40) {
|
||||
gf_general_set_random(a, w, 1);
|
||||
gf_general_set_one(b, w);
|
||||
|
||||
//Allen: the following conditions were being run 10 times each. That didn't seem like nearly enough to
|
||||
//me for these special cases, so I converted to doing this mod stuff to easily make the number of times
|
||||
//run both larger and proportional to the total size of the run.
|
||||
} else {
|
||||
gf_general_set_random(a, w, 1);
|
||||
gf_general_set_random(b, w, 1);
|
||||
switch (i % 32)
|
||||
{
|
||||
case 0:
|
||||
gf_general_set_zero(a, w);
|
||||
gf_general_set_random(b, w, 1);
|
||||
break;
|
||||
case 1:
|
||||
gf_general_set_random(a, w, 1);
|
||||
gf_general_set_zero(b, w);
|
||||
break;
|
||||
case 2:
|
||||
gf_general_set_one(a, w);
|
||||
gf_general_set_random(b, w, 1);
|
||||
break;
|
||||
case 3:
|
||||
gf_general_set_random(a, w, 1);
|
||||
gf_general_set_one(b, w);
|
||||
break;
|
||||
default:
|
||||
gf_general_set_random(a, w, 1);
|
||||
gf_general_set_random(b, w, 1);
|
||||
}
|
||||
}
|
||||
|
||||
//Allen: the following special cases for w=64 are based on the code below for w=128.
|
||||
//These w=64 cases are based on Dr. Plank's suggestion because some of the methods for w=64
|
||||
//involve splitting it in two. I think they're less likely to give errors than the 128-bit case
|
||||
//though, because the 128 bit case is always split in two.
|
||||
//As with w=128, I'm arbitrarily deciding to do this sort of thing with a quarter of the cases
|
||||
if (w == 64) {
|
||||
switch (i % 32)
|
||||
{
|
||||
case 0: if (!gf_general_is_one(a, w)) a->w64 &= RMASK; break;
|
||||
case 1: if (!gf_general_is_one(a, w)) a->w64 &= LMASK; break;
|
||||
case 2: if (!gf_general_is_one(a, w)) a->w64 &= RMASK; if (!gf_general_is_one(b, w)) b->w64 &= RMASK; break;
|
||||
case 3: if (!gf_general_is_one(a, w)) a->w64 &= RMASK; if (!gf_general_is_one(b, w)) b->w64 &= LMASK; break;
|
||||
case 4: if (!gf_general_is_one(a, w)) a->w64 &= LMASK; if (!gf_general_is_one(b, w)) b->w64 &= RMASK; break;
|
||||
case 5: if (!gf_general_is_one(a, w)) a->w64 &= LMASK; if (!gf_general_is_one(b, w)) b->w64 &= LMASK; break;
|
||||
case 6: if (!gf_general_is_one(b, w)) b->w64 &= RMASK; break;
|
||||
case 7: if (!gf_general_is_one(b, w)) b->w64 &= LMASK; break;
|
||||
}
|
||||
}
|
||||
|
||||
//Allen: for w=128, we have important special cases where one half or the other of the number is all
|
||||
//zeros. The probability of hitting such a number randomly is 1^-64, so if we don't force these cases
|
||||
//we'll probably never hit them. This could be implemented more efficiently by changing the set-random
|
||||
//function for w=128, but I think this is easier to follow.
|
||||
//I'm arbitrarily deciding to do this sort of thing with a quarter of the cases
|
||||
if (w == 128) {
|
||||
switch (i % 32)
|
||||
{
|
||||
case 0: if (!gf_general_is_one(a, w)) a->w128[0] = 0; break;
|
||||
case 1: if (!gf_general_is_one(a, w)) a->w128[1] = 0; break;
|
||||
case 2: if (!gf_general_is_one(a, w)) a->w128[0] = 0; if (!gf_general_is_one(b, w)) b->w128[0] = 0; break;
|
||||
case 3: if (!gf_general_is_one(a, w)) a->w128[0] = 0; if (!gf_general_is_one(b, w)) b->w128[1] = 0; break;
|
||||
case 4: if (!gf_general_is_one(a, w)) a->w128[1] = 0; if (!gf_general_is_one(b, w)) b->w128[0] = 0; break;
|
||||
case 5: if (!gf_general_is_one(a, w)) a->w128[1] = 0; if (!gf_general_is_one(b, w)) b->w128[1] = 0; break;
|
||||
case 6: if (!gf_general_is_one(b, w)) b->w128[0] = 0; break;
|
||||
case 7: if (!gf_general_is_one(b, w)) b->w128[1] = 0; break;
|
||||
}
|
||||
}
|
||||
|
||||
tested = 0;
|
||||
|
@ -195,10 +278,10 @@ int main(int argc, char **argv)
|
|||
gf_general_multiply(&gf_def, a, b, d);
|
||||
|
||||
if (!gf_general_are_equal(c, d, w)) {
|
||||
gf_general_val_to_s(a, w, as);
|
||||
gf_general_val_to_s(b, w, bs);
|
||||
gf_general_val_to_s(c, w, cs);
|
||||
gf_general_val_to_s(d, w, ds);
|
||||
gf_general_val_to_s(a, w, as, 1);
|
||||
gf_general_val_to_s(b, w, bs, 1);
|
||||
gf_general_val_to_s(c, w, cs, 1);
|
||||
gf_general_val_to_s(d, w, ds, 1);
|
||||
printf("Error in single multiplication (all numbers in hex):\n\n");
|
||||
printf(" gf.multiply(gf, %s, %s) = %s\n", as, bs, cs);
|
||||
printf(" The default gf multiplier returned %s\n", ds);
|
||||
|
@ -216,9 +299,9 @@ int main(int argc, char **argv)
|
|||
if (((gf_general_is_zero(a, w) || gf_general_is_zero(b, w)) && !gf_general_is_zero(c, w)) ||
|
||||
(gf_general_is_one(a, w) && !gf_general_are_equal(b, c, w)) ||
|
||||
(gf_general_is_one(b, w) && !gf_general_are_equal(a, c, w))) {
|
||||
gf_general_val_to_s(a, w, as);
|
||||
gf_general_val_to_s(b, w, bs);
|
||||
gf_general_val_to_s(c, w, cs);
|
||||
gf_general_val_to_s(a, w, as, 1);
|
||||
gf_general_val_to_s(b, w, bs, 1);
|
||||
gf_general_val_to_s(c, w, cs, 1);
|
||||
printf("Error in single multiplication (all numbers in hex):\n\n");
|
||||
printf(" gf.multiply(gf, %s, %s) = %s, which is clearly wrong.\n", as, bs, cs);
|
||||
;
|
||||
|
@ -229,9 +312,9 @@ int main(int argc, char **argv)
|
|||
/* Dumb check to make sure that it's not returning numbers that are too big: */
|
||||
|
||||
if (w < 32 && (c->w32 & mask) != c->w32) {
|
||||
gf_general_val_to_s(a, w, as);
|
||||
gf_general_val_to_s(b, w, bs);
|
||||
gf_general_val_to_s(c, w, cs);
|
||||
gf_general_val_to_s(a, w, as, 1);
|
||||
gf_general_val_to_s(b, w, bs, 1);
|
||||
gf_general_val_to_s(c, w, cs, 1);
|
||||
printf("Error in single multiplication (all numbers in hex):\n\n");
|
||||
printf(" gf.multiply.w32(gf, %s, %s) = %s, which is too big.\n", as, bs, cs);
|
||||
exit(1);
|
||||
|
@ -242,10 +325,10 @@ int main(int argc, char **argv)
|
|||
if (!gf_general_is_zero(a, w)) {
|
||||
gf_general_divide(&gf, c, a, d);
|
||||
if (!gf_general_are_equal(b, d, w)) {
|
||||
gf_general_val_to_s(a, w, as);
|
||||
gf_general_val_to_s(b, w, bs);
|
||||
gf_general_val_to_s(c, w, cs);
|
||||
gf_general_val_to_s(d, w, ds);
|
||||
gf_general_val_to_s(a, w, as, 1);
|
||||
gf_general_val_to_s(b, w, bs, 1);
|
||||
gf_general_val_to_s(c, w, cs, 1);
|
||||
gf_general_val_to_s(d, w, ds, 1);
|
||||
printf("Error in single multiplication/division (all numbers in hex):\n\n");
|
||||
printf(" gf.multiply(gf, %s, %s) = %s, but gf.divide(gf, %s, %s) = %s\n", as, bs, cs, cs, as, ds);
|
||||
exit(1);
|
||||
|
@ -257,40 +340,82 @@ int main(int argc, char **argv)
|
|||
|
||||
if (region) {
|
||||
if (verbose) { printf("Testing region multiplications\n"); fflush(stdout); }
|
||||
for (i = 0; i < 1000; i++) {
|
||||
if (i < 20) {
|
||||
gf_general_set_zero(a, w);
|
||||
} else if (i < 40) {
|
||||
gf_general_set_one(a, w);
|
||||
} else if (i < 60) {
|
||||
gf_general_set_two(a, w);
|
||||
} else {
|
||||
gf_general_set_random(a, w, 1);
|
||||
for (i = 0; i < 1024; i++) {
|
||||
//Allen: changing to a switch thing as with the single ops to make things proportional
|
||||
switch (i % 32)
|
||||
{
|
||||
case 0:
|
||||
gf_general_set_zero(a, w);
|
||||
break;
|
||||
case 1:
|
||||
gf_general_set_one(a, w);
|
||||
break;
|
||||
case 2:
|
||||
gf_general_set_two(a, w);
|
||||
break;
|
||||
default:
|
||||
gf_general_set_random(a, w, 1);
|
||||
}
|
||||
MOA_Fill_Random_Region(ra, REGION_SIZE);
|
||||
MOA_Fill_Random_Region(rb, REGION_SIZE);
|
||||
xor = i%2;
|
||||
xor = (i/32)%2;
|
||||
align = w/8;
|
||||
if (align == 0) align = 1;
|
||||
if (align > 16) align = 16;
|
||||
|
||||
/* JSP - Cauchy test. When w < 32 & it doesn't equal 4, 8 or 16, the default is
|
||||
equal to GF_REGION_CAUCHY, even if GF_REGION_CAUCHY is not set. We are testing
|
||||
three alignments here:
|
||||
|
||||
1. Anything goes -- no alignment guaranteed.
|
||||
2. Perfect alignment. Here src and dest must be aligned wrt each other,
|
||||
and bytes must be a multiple of 16*w.
|
||||
3. Imperfect alignment. Here we'll have src and dest be aligned wrt each
|
||||
other, but bytes is simply a multiple of w. That means some XOR's will
|
||||
be aligned, and some won't.
|
||||
*/
|
||||
|
||||
if ((h->region_type & GF_REGION_CAUCHY) || (w < 32 && w != 4 && w != 8 && w != 16)) {
|
||||
start = MOA_Random_W(5, 1);
|
||||
end = REGION_SIZE - MOA_Random_W(5, 1);
|
||||
alignment_test = (i%3);
|
||||
|
||||
s_start = MOA_Random_W(5, 1);
|
||||
if (alignment_test == 0) {
|
||||
d_start = MOA_Random_W(5, 1);
|
||||
} else {
|
||||
d_start = s_start;
|
||||
}
|
||||
|
||||
bytes = (d_start > s_start) ? REGION_SIZE - d_start : REGION_SIZE - s_start;
|
||||
bytes -= MOA_Random_W(5, 1);
|
||||
if (alignment_test == 1) {
|
||||
bytes -= (bytes % (w*16));
|
||||
} else {
|
||||
bytes -= (bytes % w);
|
||||
}
|
||||
|
||||
target = rb;
|
||||
while ((end-start)%w != 0) end--;
|
||||
|
||||
/* JSP - Otherwise, we're testing a non-cauchy test, and alignment
|
||||
must be more strict. We have to make sure that the regions are
|
||||
aligned wrt each other on 16-byte pointers. */
|
||||
|
||||
} else {
|
||||
start = MOA_Random_W(5, 1) * align;
|
||||
end = REGION_SIZE - (MOA_Random_W(5, 1) * align);
|
||||
s_start = MOA_Random_W(5, 1) * align;
|
||||
d_start = s_start;
|
||||
bytes = REGION_SIZE - s_start - MOA_Random_W(5, 1);
|
||||
bytes -= (bytes % align);
|
||||
|
||||
if (h->mult_type == GF_MULT_COMPOSITE && (h->region_type & GF_REGION_ALTMAP)) {
|
||||
target = rb ;
|
||||
} else {
|
||||
target = ((i%4)/2) ? rb : ra;
|
||||
target = (i/64)%2 ? rb : ra;
|
||||
}
|
||||
}
|
||||
|
||||
memcpy(rc, ra, REGION_SIZE);
|
||||
memcpy(rd, target, REGION_SIZE);
|
||||
gf_general_do_region_multiply(&gf, a, ra+start, target+start, end-start, xor);
|
||||
gf_general_do_region_check(&gf, a, rc+start, rd+start, target+start, end-start, xor);
|
||||
gf_general_do_region_multiply(&gf, a, ra+s_start, target+d_start, bytes, xor);
|
||||
gf_general_do_region_check(&gf, a, rc+s_start, rd+d_start, target+d_start, bytes, xor);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
376
gf_w4.c
376
gf_w4.c
|
@ -100,7 +100,6 @@ gf_val_32_t gf_w4_euclid (gf_t *gf, gf_val_32_t b)
|
|||
y_im1 = 0;
|
||||
|
||||
while (e_i != 1) {
|
||||
|
||||
e_ip1 = e_im1;
|
||||
d_ip1 = d_im1;
|
||||
c_i = 0;
|
||||
|
@ -108,6 +107,7 @@ gf_val_32_t gf_w4_euclid (gf_t *gf, gf_val_32_t b)
|
|||
while (d_ip1 >= d_i) {
|
||||
c_i ^= (1 << (d_ip1 - d_i));
|
||||
e_ip1 ^= (e_i << (d_ip1 - d_i));
|
||||
if (e_ip1 == 0) return 0;
|
||||
while ((e_ip1 & (1 << d_ip1)) == 0) d_ip1--;
|
||||
}
|
||||
|
||||
|
@ -146,6 +146,110 @@ gf_val_32_t gf_w4_matrix (gf_t *gf, gf_val_32_t b)
|
|||
return gf_bitmatrix_inverse(b, 4, ((gf_internal_t *) (gf->scratch))->prim_poly);
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
inline
|
||||
gf_val_32_t
|
||||
gf_w4_shift_multiply (gf_t *gf, gf_val_32_t a, gf_val_32_t b)
|
||||
{
|
||||
uint8_t product, i, pp;
|
||||
gf_internal_t *h;
|
||||
|
||||
h = (gf_internal_t *) gf->scratch;
|
||||
pp = h->prim_poly;
|
||||
|
||||
product = 0;
|
||||
|
||||
for (i = 0; i < GF_FIELD_WIDTH; i++) {
|
||||
if (a & (1 << i)) product ^= (b << i);
|
||||
}
|
||||
for (i = (GF_FIELD_WIDTH*2-2); i >= GF_FIELD_WIDTH; i--) {
|
||||
if (product & (1 << i)) product ^= (pp << (i-GF_FIELD_WIDTH));
|
||||
}
|
||||
return product;
|
||||
}
|
||||
|
||||
/* Ben: This function works, but it is 33% slower than the normal shift mult */
|
||||
|
||||
static
|
||||
inline
|
||||
gf_val_32_t
|
||||
gf_w4_clm_multiply (gf_t *gf, gf_val_32_t a4, gf_val_32_t b4)
|
||||
{
|
||||
gf_val_32_t rv = 0;
|
||||
|
||||
#ifdef INTEL_SSE4_PCLMUL
|
||||
|
||||
__m128i a, b;
|
||||
__m128i result;
|
||||
__m128i prim_poly;
|
||||
__m128i w;
|
||||
gf_internal_t * h = gf->scratch;
|
||||
|
||||
a = _mm_insert_epi32 (_mm_setzero_si128(), a4, 0);
|
||||
b = _mm_insert_epi32 (a, b4, 0);
|
||||
|
||||
prim_poly = _mm_set_epi32(0, 0, 0, (uint32_t)(h->prim_poly & 0x1fULL));
|
||||
|
||||
/* Do the initial multiply */
|
||||
|
||||
result = _mm_clmulepi64_si128 (a, b, 0);
|
||||
|
||||
/* Ben/JSP: Do prim_poly reduction once. We are guaranteed that we will only
|
||||
have to do the reduction only once, because (w-2)/z == 1. Where
|
||||
z is equal to the number of zeros after the leading 1.
|
||||
|
||||
_mm_clmulepi64_si128 is the carryless multiply operation. Here
|
||||
_mm_srli_epi64 shifts the result to the right by 4 bits. This allows
|
||||
us to multiply the prim_poly by the leading bits of the result. We
|
||||
then xor the result of that operation back with the result. */
|
||||
|
||||
w = _mm_clmulepi64_si128 (prim_poly, _mm_srli_epi64 (result, 4), 0);
|
||||
result = _mm_xor_si128 (result, w);
|
||||
|
||||
/* Extracts 32 bit value from result. */
|
||||
|
||||
rv = ((gf_val_32_t)_mm_extract_epi32(result, 0));
|
||||
#endif
|
||||
return rv;
|
||||
}
|
||||
|
||||
static
|
||||
void
|
||||
gf_w4_multiply_region_from_single(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int
|
||||
xor)
|
||||
{
|
||||
gf_region_data rd;
|
||||
uint8_t *s8;
|
||||
uint8_t *d8;
|
||||
|
||||
if (val == 0) { gf_multby_zero(dest, bytes, xor); return; }
|
||||
if (val == 1) { gf_multby_one(src, dest, bytes, xor); return; }
|
||||
|
||||
gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 1);
|
||||
gf_do_initial_region_alignment(&rd);
|
||||
|
||||
s8 = (uint8_t *) rd.s_start;
|
||||
d8 = (uint8_t *) rd.d_start;
|
||||
|
||||
if (xor) {
|
||||
while (d8 < ((uint8_t *) rd.d_top)) {
|
||||
*d8 ^= (gf->multiply.w32(gf, val, (*s8 & 0xf)) |
|
||||
((gf->multiply.w32(gf, val, (*s8 >> 4))) << 4));
|
||||
d8++;
|
||||
s8++;
|
||||
}
|
||||
} else {
|
||||
while (d8 < ((uint8_t *) rd.d_top)) {
|
||||
*d8 = (gf->multiply.w32(gf, val, (*s8 & 0xf)) |
|
||||
((gf->multiply.w32(gf, val, (*s8 >> 4))) << 4));
|
||||
d8++;
|
||||
s8++;
|
||||
}
|
||||
}
|
||||
gf_do_final_region_alignment(&rd);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------
|
||||
IMPLEMENTATION: LOG_TABLE:
|
||||
|
||||
|
@ -220,18 +324,28 @@ int gf_w4_log_init(gf_t *gf)
|
|||
h = (gf_internal_t *) gf->scratch;
|
||||
ltd = h->private;
|
||||
|
||||
ltd->log_tbl[0] = 0;
|
||||
for (i = 0; i < GF_FIELD_SIZE; i++)
|
||||
ltd->log_tbl[i]=0;
|
||||
|
||||
ltd->antilog_tbl_div = ltd->antilog_tbl + (GF_FIELD_SIZE-1);
|
||||
b = 1;
|
||||
for (i = 0; i < GF_FIELD_SIZE-1; i++) {
|
||||
ltd->log_tbl[b] = i;
|
||||
ltd->antilog_tbl[i] = b;
|
||||
ltd->antilog_tbl[i+GF_FIELD_SIZE-1] = b;
|
||||
b <<= 1;
|
||||
if (b & GF_FIELD_SIZE) {
|
||||
b = b ^ h->prim_poly;
|
||||
}
|
||||
i = 0;
|
||||
do {
|
||||
if (ltd->log_tbl[b] != 0 && i != 0) {
|
||||
fprintf(stderr, "Cannot construct log table: Polynomial is not primitive.\n\n");
|
||||
return 0;
|
||||
}
|
||||
ltd->log_tbl[b] = i;
|
||||
ltd->antilog_tbl[i] = b;
|
||||
ltd->antilog_tbl[i+GF_FIELD_SIZE-1] = b;
|
||||
b <<= 1;
|
||||
i++;
|
||||
if (b & GF_FIELD_SIZE) b = b ^ h->prim_poly;
|
||||
} while (b != 1);
|
||||
|
||||
if (i != GF_FIELD_SIZE - 1) {
|
||||
_gf_errno = GF_E_LOGPOLY;
|
||||
return 0;
|
||||
}
|
||||
|
||||
gf->inverse.w32 = gf_w4_inverse_from_divide;
|
||||
|
@ -300,7 +414,7 @@ static
|
|||
void
|
||||
gf_w4_single_table_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
|
||||
{
|
||||
#ifdef INTEL_SSE4
|
||||
#ifdef INTEL_SSSE3
|
||||
gf_region_data rd;
|
||||
uint8_t *base, *sptr, *dptr, *top;
|
||||
__m128i tl, loset, h4, r, va, th;
|
||||
|
@ -351,37 +465,17 @@ int gf_w4_single_table_init(gf_t *gf)
|
|||
gf_internal_t *h;
|
||||
struct gf_single_table_data *std;
|
||||
int a, b, prod, loga, logb;
|
||||
uint8_t log_tbl[GF_FIELD_SIZE];
|
||||
uint8_t antilog_tbl[GF_FIELD_SIZE*2];
|
||||
int sse;
|
||||
|
||||
sse = 0;
|
||||
#ifdef INTEL_SSE4
|
||||
sse = 1;
|
||||
#endif
|
||||
|
||||
h = (gf_internal_t *) gf->scratch;
|
||||
std = (struct gf_single_table_data *)h->private;
|
||||
|
||||
b = 1;
|
||||
for (a = 0; a < GF_MULT_GROUP_SIZE; a++) {
|
||||
log_tbl[b] = a;
|
||||
antilog_tbl[a] = b;
|
||||
antilog_tbl[a+GF_MULT_GROUP_SIZE] = b;
|
||||
b <<= 1;
|
||||
if (b & GF_FIELD_SIZE) {
|
||||
b = b ^ h->prim_poly;
|
||||
}
|
||||
}
|
||||
|
||||
bzero(std->mult, sizeof(uint8_t) * GF_FIELD_SIZE * GF_FIELD_SIZE);
|
||||
bzero(std->div, sizeof(uint8_t) * GF_FIELD_SIZE * GF_FIELD_SIZE);
|
||||
|
||||
for (a = 1; a < GF_FIELD_SIZE; a++) {
|
||||
loga = log_tbl[a];
|
||||
for (b = 1; b < GF_FIELD_SIZE; b++) {
|
||||
logb = log_tbl[b];
|
||||
prod = antilog_tbl[loga+logb];
|
||||
prod = gf_w4_shift_multiply(gf, a, b);
|
||||
std->mult[a][b] = prod;
|
||||
std->div[prod][b] = a;
|
||||
}
|
||||
|
@ -390,11 +484,16 @@ int gf_w4_single_table_init(gf_t *gf)
|
|||
gf->inverse.w32 = NULL;
|
||||
gf->divide.w32 = gf_w4_single_table_divide;
|
||||
gf->multiply.w32 = gf_w4_single_table_multiply;
|
||||
if ((h->region_type & GF_REGION_SSE) || (h->mult_type == GF_MULT_DEFAULT && sse)) {
|
||||
gf->multiply_region.w32 = gf_w4_single_table_sse_multiply_region;
|
||||
} else {
|
||||
#ifdef INTEL_SSSE3
|
||||
if(h->region_type & (GF_REGION_NOSSE | GF_REGION_CAUCHY))
|
||||
gf->multiply_region.w32 = gf_w4_single_table_multiply_region;
|
||||
else
|
||||
gf->multiply_region.w32 = gf_w4_single_table_sse_multiply_region;
|
||||
#else
|
||||
gf->multiply_region.w32 = gf_w4_single_table_multiply_region;
|
||||
}
|
||||
if (h->region_type & GF_REGION_SSE) return 0;
|
||||
#endif
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -458,32 +557,17 @@ int gf_w4_double_table_init(gf_t *gf)
|
|||
gf_internal_t *h;
|
||||
struct gf_double_table_data *std;
|
||||
int a, b, c, prod, loga, logb, ab;
|
||||
uint8_t log_tbl[GF_FIELD_SIZE];
|
||||
uint8_t antilog_tbl[GF_FIELD_SIZE*2];
|
||||
uint8_t mult[GF_FIELD_SIZE][GF_FIELD_SIZE];
|
||||
|
||||
h = (gf_internal_t *) gf->scratch;
|
||||
std = (struct gf_double_table_data *)h->private;
|
||||
|
||||
b = 1;
|
||||
for (a = 0; a < GF_MULT_GROUP_SIZE; a++) {
|
||||
log_tbl[b] = a;
|
||||
antilog_tbl[a] = b;
|
||||
antilog_tbl[a+GF_MULT_GROUP_SIZE] = b;
|
||||
b <<= 1;
|
||||
if (b & GF_FIELD_SIZE) {
|
||||
b = b ^ h->prim_poly;
|
||||
}
|
||||
}
|
||||
|
||||
bzero(mult, sizeof(uint8_t) * GF_FIELD_SIZE * GF_FIELD_SIZE);
|
||||
bzero(std->div, sizeof(uint8_t) * GF_FIELD_SIZE * GF_FIELD_SIZE);
|
||||
|
||||
for (a = 1; a < GF_FIELD_SIZE; a++) {
|
||||
loga = log_tbl[a];
|
||||
for (b = 1; b < GF_FIELD_SIZE; b++) {
|
||||
logb = log_tbl[b];
|
||||
prod = antilog_tbl[loga+logb];
|
||||
prod = gf_w4_shift_multiply(gf, a, b);
|
||||
mult[a][b] = prod;
|
||||
std->div[prod][b] = a;
|
||||
}
|
||||
|
@ -600,32 +684,17 @@ int gf_w4_quad_table_init(gf_t *gf)
|
|||
gf_internal_t *h;
|
||||
struct gf_quad_table_data *std;
|
||||
int prod, loga, logb, ab, val, a, b, c, d, va, vb, vc, vd;
|
||||
uint8_t log_tbl[GF_FIELD_SIZE];
|
||||
uint8_t antilog_tbl[GF_FIELD_SIZE*2];
|
||||
uint8_t mult[GF_FIELD_SIZE][GF_FIELD_SIZE];
|
||||
|
||||
h = (gf_internal_t *) gf->scratch;
|
||||
std = (struct gf_quad_table_data *)h->private;
|
||||
|
||||
b = 1;
|
||||
for (a = 0; a < GF_MULT_GROUP_SIZE; a++) {
|
||||
log_tbl[b] = a;
|
||||
antilog_tbl[a] = b;
|
||||
antilog_tbl[a+GF_MULT_GROUP_SIZE] = b;
|
||||
b <<= 1;
|
||||
if (b & GF_FIELD_SIZE) {
|
||||
b = b ^ h->prim_poly;
|
||||
}
|
||||
}
|
||||
|
||||
bzero(mult, sizeof(uint8_t) * GF_FIELD_SIZE * GF_FIELD_SIZE);
|
||||
bzero(std->div, sizeof(uint8_t) * GF_FIELD_SIZE * GF_FIELD_SIZE);
|
||||
|
||||
for (a = 1; a < GF_FIELD_SIZE; a++) {
|
||||
loga = log_tbl[a];
|
||||
for (b = 1; b < GF_FIELD_SIZE; b++) {
|
||||
logb = log_tbl[b];
|
||||
prod = antilog_tbl[loga+logb];
|
||||
prod = gf_w4_shift_multiply(gf, a, b);
|
||||
mult[a][b] = prod;
|
||||
std->div[prod][b] = a;
|
||||
}
|
||||
|
@ -702,13 +771,18 @@ int gf_w4_table_init(gf_t *gf)
|
|||
{
|
||||
int rt;
|
||||
gf_internal_t *h;
|
||||
int issse3 = 0;
|
||||
|
||||
#ifdef INTEL_SSSE3
|
||||
issse3 = 1;
|
||||
#endif
|
||||
|
||||
h = (gf_internal_t *) gf->scratch;
|
||||
rt = (h->region_type);
|
||||
if (rt == 0 || rt == GF_REGION_CAUCHY) rt |= GF_REGION_SINGLE_TABLE;
|
||||
if (rt & GF_REGION_SINGLE_TABLE) {
|
||||
return gf_w4_single_table_init(gf);
|
||||
} else if (rt & GF_REGION_DOUBLE_TABLE) {
|
||||
|
||||
if (h->mult_type == GF_MULT_DEFAULT && !issse3) rt |= GF_REGION_DOUBLE_TABLE;
|
||||
|
||||
if (rt & GF_REGION_DOUBLE_TABLE) {
|
||||
return gf_w4_double_table_init(gf);
|
||||
} else if (rt & GF_REGION_QUAD_TABLE) {
|
||||
if (rt & GF_REGION_LAZY) {
|
||||
|
@ -717,7 +791,9 @@ int gf_w4_table_init(gf_t *gf)
|
|||
return gf_w4_quad_table_init(gf);
|
||||
}
|
||||
return gf_w4_double_table_init(gf);
|
||||
}
|
||||
} else {
|
||||
return gf_w4_single_table_init(gf);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -842,7 +918,7 @@ static
|
|||
void
|
||||
gf_w4_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
|
||||
{
|
||||
#ifdef INTEL_SSE4
|
||||
#ifdef INTEL_SSE2
|
||||
int i;
|
||||
uint8_t *s8, *d8;
|
||||
uint8_t vrev;
|
||||
|
@ -895,7 +971,7 @@ static
|
|||
void
|
||||
gf_w4_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
|
||||
{
|
||||
#ifdef INTEL_SSE4
|
||||
#ifdef INTEL_SSE2
|
||||
uint8_t *d8, *s8, tb;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
struct gf_bytwo_data *btd;
|
||||
|
@ -960,7 +1036,7 @@ static
|
|||
void
|
||||
gf_w4_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
|
||||
{
|
||||
#ifdef INTEL_SSE4
|
||||
#ifdef INTEL_SSE2
|
||||
int i;
|
||||
uint8_t *d8, *s8, tb;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
|
@ -986,7 +1062,7 @@ static
|
|||
void
|
||||
gf_w4_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
|
||||
{
|
||||
#ifdef INTEL_SSE4
|
||||
#ifdef INTEL_SSE2
|
||||
int i;
|
||||
uint8_t *d8, *s8, tb;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
|
@ -1014,7 +1090,7 @@ static
|
|||
void
|
||||
gf_w4_bytwo_b_sse_region_4_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
|
||||
{
|
||||
#ifdef INTEL_SSE4
|
||||
#ifdef INTEL_SSE2
|
||||
int i;
|
||||
uint8_t *d8, *s8, tb;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
|
@ -1041,7 +1117,7 @@ static
|
|||
void
|
||||
gf_w4_bytwo_b_sse_region_4_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
|
||||
{
|
||||
#ifdef INTEL_SSE4
|
||||
#ifdef INTEL_SSE2
|
||||
int i;
|
||||
uint8_t *d8, *s8, tb;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
|
@ -1071,7 +1147,7 @@ static
|
|||
void
|
||||
gf_w4_bytwo_b_sse_region_3_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
|
||||
{
|
||||
#ifdef INTEL_SSE4
|
||||
#ifdef INTEL_SSE2
|
||||
int i;
|
||||
uint8_t *d8, *s8, tb;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
|
@ -1099,7 +1175,7 @@ static
|
|||
void
|
||||
gf_w4_bytwo_b_sse_region_3_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
|
||||
{
|
||||
#ifdef INTEL_SSE4
|
||||
#ifdef INTEL_SSE2
|
||||
int i;
|
||||
uint8_t *d8, *s8, tb;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
|
@ -1127,7 +1203,7 @@ static
|
|||
void
|
||||
gf_w4_bytwo_b_sse_region_5_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
|
||||
{
|
||||
#ifdef INTEL_SSE4
|
||||
#ifdef INTEL_SSE2
|
||||
int i;
|
||||
uint8_t *d8, *s8, tb;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
|
@ -1156,7 +1232,7 @@ static
|
|||
void
|
||||
gf_w4_bytwo_b_sse_region_5_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
|
||||
{
|
||||
#ifdef INTEL_SSE4
|
||||
#ifdef INTEL_SSE2
|
||||
int i;
|
||||
uint8_t *d8, *s8, tb;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
|
@ -1185,7 +1261,7 @@ static
|
|||
void
|
||||
gf_w4_bytwo_b_sse_region_7_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
|
||||
{
|
||||
#ifdef INTEL_SSE4
|
||||
#ifdef INTEL_SSE2
|
||||
int i;
|
||||
uint8_t *d8, *s8, tb;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
|
@ -1215,7 +1291,7 @@ static
|
|||
void
|
||||
gf_w4_bytwo_b_sse_region_7_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
|
||||
{
|
||||
#ifdef INTEL_SSE4
|
||||
#ifdef INTEL_SSE2
|
||||
int i;
|
||||
uint8_t *d8, *s8, tb;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
|
@ -1245,7 +1321,7 @@ static
|
|||
void
|
||||
gf_w4_bytwo_b_sse_region_6_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
|
||||
{
|
||||
#ifdef INTEL_SSE4
|
||||
#ifdef INTEL_SSE2
|
||||
int i;
|
||||
uint8_t *d8, *s8, tb;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
|
@ -1274,7 +1350,7 @@ static
|
|||
void
|
||||
gf_w4_bytwo_b_sse_region_6_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
|
||||
{
|
||||
#ifdef INTEL_SSE4
|
||||
#ifdef INTEL_SSE2
|
||||
int i;
|
||||
uint8_t *d8, *s8, tb;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
|
@ -1303,7 +1379,7 @@ static
|
|||
void
|
||||
gf_w4_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
|
||||
{
|
||||
#ifdef INTEL_SSE4
|
||||
#ifdef INTEL_SSE2
|
||||
uint8_t *d8, *s8, tb;
|
||||
__m128i pp, m1, m2, t1, t2, va, vb;
|
||||
struct gf_bytwo_data *btd;
|
||||
|
@ -1853,114 +1929,107 @@ int gf_w4_bytwo_init(gf_t *gf)
|
|||
|
||||
if (h->mult_type == GF_MULT_BYTWO_p) {
|
||||
gf->multiply.w32 = gf_w4_bytwo_p_multiply;
|
||||
if (h->region_type == GF_REGION_SSE) {
|
||||
gf->multiply_region.w32 = gf_w4_bytwo_p_sse_multiply_region;
|
||||
} else {
|
||||
#ifdef INTEL_SSE2
|
||||
if (h->region_type & GF_REGION_NOSSE)
|
||||
gf->multiply_region.w32 = gf_w4_bytwo_p_nosse_multiply_region;
|
||||
else
|
||||
gf->multiply_region.w32 = gf_w4_bytwo_p_sse_multiply_region;
|
||||
#else
|
||||
gf->multiply_region.w32 = gf_w4_bytwo_p_nosse_multiply_region;
|
||||
}
|
||||
if (h->region_type & GF_REGION_SSE)
|
||||
return 0;
|
||||
#endif
|
||||
} else {
|
||||
gf->multiply.w32 = gf_w4_bytwo_b_multiply;
|
||||
if (h->region_type == GF_REGION_SSE) {
|
||||
gf->multiply_region.w32 = gf_w4_bytwo_b_sse_multiply_region;
|
||||
} else {
|
||||
#ifdef INTEL_SSE2
|
||||
if (h->region_type & GF_REGION_NOSSE)
|
||||
gf->multiply_region.w32 = gf_w4_bytwo_b_nosse_multiply_region;
|
||||
else
|
||||
gf->multiply_region.w32 = gf_w4_bytwo_b_sse_multiply_region;
|
||||
#else
|
||||
gf->multiply_region.w32 = gf_w4_bytwo_b_nosse_multiply_region;
|
||||
}
|
||||
if (h->region_type & GF_REGION_SSE)
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
gf->inverse.w32 = gf_w4_euclid;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/* ------------------------------------------------------------
|
||||
JSP: GF_MULT_SHIFT: The world's dumbest multiplication algorithm. I only
|
||||
include it for completeness. It does have the feature that it requires no
|
||||
extra memory.
|
||||
*/
|
||||
|
||||
static
|
||||
inline
|
||||
gf_val_32_t
|
||||
gf_w4_shift_multiply (gf_t *gf, gf_val_32_t a, gf_val_32_t b)
|
||||
static
|
||||
int gf_w4_cfm_init(gf_t *gf)
|
||||
{
|
||||
uint8_t product, i, pp;
|
||||
gf_internal_t *h;
|
||||
|
||||
|
||||
h = (gf_internal_t *) gf->scratch;
|
||||
pp = h->prim_poly;
|
||||
|
||||
product = 0;
|
||||
|
||||
for (i = 0; i < GF_FIELD_WIDTH; i++) {
|
||||
if (a & (1 << i)) product ^= (b << i);
|
||||
}
|
||||
for (i = (GF_FIELD_WIDTH*2-1); i >= GF_FIELD_WIDTH; i--) {
|
||||
if (product & (1 << i)) product ^= (pp << (i-GF_FIELD_WIDTH));
|
||||
}
|
||||
return product;
|
||||
#ifdef INTEL_SSE4_PCLMUL
|
||||
gf->multiply.w32 = gf_w4_clm_multiply;
|
||||
return 1;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
static
|
||||
int gf_w4_shift_init(gf_t *gf)
|
||||
{
|
||||
gf->multiply.w32 = gf_w4_shift_multiply;
|
||||
gf->inverse.w32 = gf_w4_euclid;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* JSP: I'm putting all error-checking into gf_error_check(), so you don't
|
||||
have to do error checking in scratch_size or in init */
|
||||
|
||||
int gf_w4_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2)
|
||||
{
|
||||
int region_tbl_size;
|
||||
int sss;
|
||||
int ss;
|
||||
int issse3 = 0;
|
||||
|
||||
sss = (GF_REGION_SINGLE_TABLE | GF_REGION_SSE | GF_REGION_NOSSE);
|
||||
ss = (GF_REGION_SSE | GF_REGION_NOSSE);
|
||||
#ifdef INTEL_SSSE3
|
||||
issse3 = 1;
|
||||
#endif
|
||||
|
||||
switch(mult_type)
|
||||
{
|
||||
case GF_MULT_BYTWO_p:
|
||||
case GF_MULT_BYTWO_b:
|
||||
if (arg1 != 0 || arg2 != 0) return -1;
|
||||
if (region_type != GF_REGION_CAUCHY) {
|
||||
if ((region_type | ss) != ss || (region_type & ss) == ss) return -1;
|
||||
}
|
||||
return sizeof(gf_internal_t) + sizeof(struct gf_bytwo_data);
|
||||
break;
|
||||
case GF_MULT_DEFAULT:
|
||||
case GF_MULT_TABLE:
|
||||
if (arg1 != 0 || arg2 != 0) return -1;
|
||||
if (region_type == GF_REGION_CAUCHY || region_type == (GF_REGION_CAUCHY | GF_REGION_SINGLE_TABLE)) {
|
||||
if (region_type == GF_REGION_CAUCHY) {
|
||||
return sizeof(gf_internal_t) + sizeof(struct gf_single_table_data) + 64;
|
||||
}
|
||||
if (mult_type == GF_MULT_DEFAULT || region_type == 0) region_type = GF_REGION_SINGLE_TABLE;
|
||||
if (region_type & GF_REGION_SINGLE_TABLE) {
|
||||
if ((region_type | sss) != sss) return -1;
|
||||
if ((region_type & sss) == sss) return -1;
|
||||
return sizeof(gf_internal_t) + sizeof(struct gf_single_table_data) + 64;
|
||||
} else if (region_type & GF_REGION_DOUBLE_TABLE) {
|
||||
if (region_type != GF_REGION_DOUBLE_TABLE) return -1;
|
||||
|
||||
if (mult_type == GF_MULT_DEFAULT && !issse3) region_type = GF_REGION_DOUBLE_TABLE;
|
||||
|
||||
if (region_type & GF_REGION_DOUBLE_TABLE) {
|
||||
return sizeof(gf_internal_t) + sizeof(struct gf_double_table_data) + 64;
|
||||
} else if (region_type & GF_REGION_QUAD_TABLE) {
|
||||
if ((region_type | GF_REGION_LAZY) != (GF_REGION_QUAD_TABLE | GF_REGION_LAZY)) return -1;
|
||||
if ((region_type & GF_REGION_LAZY) == 0) {
|
||||
return sizeof(gf_internal_t) + sizeof(struct gf_quad_table_data) + 64;
|
||||
} else {
|
||||
return sizeof(gf_internal_t) + sizeof(struct gf_quad_table_lazy_data) + 64;
|
||||
}
|
||||
} else {
|
||||
return sizeof(gf_internal_t) + sizeof(struct gf_single_table_data) + 64;
|
||||
}
|
||||
return -1;
|
||||
break;
|
||||
|
||||
case GF_MULT_LOG_TABLE:
|
||||
if (arg1 != 0 || arg2 != 0 || (region_type != 0 && region_type != GF_REGION_CAUCHY)) return -1;
|
||||
return sizeof(gf_internal_t) + sizeof(struct gf_logtable_data) + 64;
|
||||
break;
|
||||
case GF_MULT_CARRY_FREE:
|
||||
return sizeof(gf_internal_t);
|
||||
break;
|
||||
case GF_MULT_SHIFT:
|
||||
if (arg1 != 0 || arg2 != 0 || (region_type != 0 && region_type != GF_REGION_CAUCHY)) return -1;
|
||||
return sizeof(gf_internal_t);
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
|
@ -1970,7 +2039,7 @@ gf_w4_init (gf_t *gf)
|
|||
|
||||
h = (gf_internal_t *) gf->scratch;
|
||||
if (h->prim_poly == 0) h->prim_poly = 0x13;
|
||||
|
||||
h->prim_poly |= 0x10;
|
||||
gf->multiply.w32 = NULL;
|
||||
gf->divide.w32 = NULL;
|
||||
gf->inverse.w32 = NULL;
|
||||
|
@ -1978,13 +2047,13 @@ gf_w4_init (gf_t *gf)
|
|||
gf->extract_word.w32 = gf_w4_extract_word;
|
||||
|
||||
switch(h->mult_type) {
|
||||
case GF_MULT_SHIFT: if (gf_w4_shift_init(gf) == 0) return 0; break;
|
||||
case GF_MULT_CARRY_FREE: if (gf_w4_cfm_init(gf) == 0) return 0; break;
|
||||
case GF_MULT_SHIFT: if (gf_w4_shift_init(gf) == 0) return 0; break;
|
||||
case GF_MULT_BYTWO_p:
|
||||
case GF_MULT_BYTWO_b:
|
||||
if (gf_w4_bytwo_init(gf) == 0) return 0; break;
|
||||
case GF_MULT_LOG_TABLE: if (gf_w4_log_init(gf) == 0) return 0; break;
|
||||
case GF_MULT_BYTWO_b: if (gf_w4_bytwo_init(gf) == 0) return 0; break;
|
||||
case GF_MULT_LOG_TABLE: if (gf_w4_log_init(gf) == 0) return 0; break;
|
||||
case GF_MULT_DEFAULT:
|
||||
case GF_MULT_TABLE: if (gf_w4_table_init(gf) == 0) return 0; break;
|
||||
case GF_MULT_TABLE: if (gf_w4_table_init(gf) == 0) return 0; break;
|
||||
default: return 0;
|
||||
}
|
||||
|
||||
|
@ -1996,17 +2065,22 @@ gf_w4_init (gf_t *gf)
|
|||
gf->inverse.w32 = gf_w4_matrix;
|
||||
}
|
||||
|
||||
if (gf->inverse.w32 != NULL && gf->divide.w32 == NULL) {
|
||||
if (gf->divide.w32 == NULL) {
|
||||
gf->divide.w32 = gf_w4_divide_from_inverse;
|
||||
if (gf->inverse.w32 == NULL) gf->inverse.w32 = gf_w4_euclid;
|
||||
}
|
||||
if (gf->inverse.w32 == NULL && gf->divide.w32 != NULL) {
|
||||
gf->inverse.w32 = gf_w4_inverse_from_divide;
|
||||
}
|
||||
|
||||
if (gf->inverse.w32 == NULL) gf->inverse.w32 = gf_w4_inverse_from_divide;
|
||||
|
||||
if (h->region_type == GF_REGION_CAUCHY) {
|
||||
gf->multiply_region.w32 = gf_wgen_cauchy_region;
|
||||
gf->extract_word.w32 = gf_wgen_extract_word;
|
||||
}
|
||||
|
||||
if (gf->multiply_region.w32 == NULL) {
|
||||
gf->multiply_region.w32 = gf_w4_multiply_region_from_single;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
147
gf_wgen.c
147
gf_wgen.c
|
@ -93,6 +93,7 @@ gf_val_32_t gf_wgen_euclid (gf_t *gf, gf_val_32_t b)
|
|||
while (d_ip1 >= d_i) {
|
||||
c_i ^= (1 << (d_ip1 - d_i));
|
||||
e_ip1 ^= (e_i << (d_ip1 - d_i));
|
||||
if (e_ip1 == 0) return 0;
|
||||
while ((e_ip1 & (1 << d_ip1)) == 0) d_ip1--;
|
||||
}
|
||||
|
||||
|
@ -223,7 +224,7 @@ gf_wgen_bytwo_p_multiply (gf_t *gf, gf_val_32_t a, gf_val_32_t b)
|
|||
pp = h->prim_poly;
|
||||
|
||||
prod = 0;
|
||||
pmask = (1 << (h->w)-1);
|
||||
pmask = (1 << ((h->w)-1)); /*Ben: Had an operator precedence warning here*/
|
||||
amask = pmask;
|
||||
|
||||
while (amask != 0) {
|
||||
|
@ -508,16 +509,11 @@ int gf_wgen_table_8_init(gf_t *gf)
|
|||
}
|
||||
|
||||
for (a = 1; a < (1 << w); a++) {
|
||||
b = 1;
|
||||
p = a;
|
||||
do {
|
||||
for (b = 1; b < (1 << w); b++) {
|
||||
p = gf_wgen_shift_multiply(gf, a, b);
|
||||
std->mult[(a<<w)|b] = p;
|
||||
std->div[(p<<w)|b] = a;
|
||||
b = (b & (1 << (w-1))) ? (b << 1) ^ h->prim_poly : (b << 1);
|
||||
b &= ((1 << w)-1);
|
||||
p = (p & (1 << (w-1))) ? (p << 1) ^ h->prim_poly : (p << 1);
|
||||
p &= ((1 << w)-1);
|
||||
} while (b != 1);
|
||||
std->div[(p<<w)|a] = b;
|
||||
}
|
||||
}
|
||||
|
||||
gf->multiply.w32 = gf_wgen_table_8_multiply;
|
||||
|
@ -572,18 +568,13 @@ int gf_wgen_table_16_init(gf_t *gf)
|
|||
std->div[a] = 0;
|
||||
std->div[a<<w] = 0;
|
||||
}
|
||||
|
||||
|
||||
for (a = 1; a < (1 << w); a++) {
|
||||
b = 1;
|
||||
p = a;
|
||||
do {
|
||||
for (b = 1; b < (1 << w); b++) {
|
||||
p = gf_wgen_shift_multiply(gf, a, b);
|
||||
std->mult[(a<<w)|b] = p;
|
||||
std->div[(p<<w)|b] = a;
|
||||
b = (b & (1 << (w-1))) ? (b << 1) ^ h->prim_poly : (b << 1);
|
||||
b &= ((1 << w)-1);
|
||||
p = (p & (1 << (w-1))) ? (p << 1) ^ h->prim_poly : (p << 1);
|
||||
p &= ((1 << w)-1);
|
||||
} while (b != 1);
|
||||
std->div[(p<<w)|a] = b;
|
||||
}
|
||||
}
|
||||
|
||||
gf->multiply.w32 = gf_wgen_table_16_multiply;
|
||||
|
@ -599,6 +590,11 @@ int gf_wgen_table_init(gf_t *gf)
|
|||
h = (gf_internal_t *) gf->scratch;
|
||||
if (h->w <= 8) return gf_wgen_table_8_init(gf);
|
||||
if (h->w <= 14) return gf_wgen_table_16_init(gf);
|
||||
|
||||
/* Returning zero to make the compiler happy, but this won't get
|
||||
executed, because it is tested in _scratch_space. */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static
|
||||
|
@ -640,6 +636,7 @@ int gf_wgen_log_8_init(gf_t *gf)
|
|||
struct gf_wgen_log_w8_data *std;
|
||||
int w;
|
||||
uint32_t a, i;
|
||||
int check = 0;
|
||||
|
||||
h = (gf_internal_t *) gf->scratch;
|
||||
w = h->w;
|
||||
|
@ -649,17 +646,27 @@ int gf_wgen_log_8_init(gf_t *gf)
|
|||
std->anti = std->log + (1<<h->w);
|
||||
std->danti = std->anti + (1<<h->w)-1;
|
||||
|
||||
i = 0;
|
||||
for (i = 0; i < (1 << w); i++)
|
||||
std->log[i] = 0;
|
||||
|
||||
a = 1;
|
||||
do {
|
||||
for(i=0; i < (1<<w)-1; i++)
|
||||
{
|
||||
if (std->log[a] != 0) check = 1;
|
||||
std->log[a] = i;
|
||||
std->anti[i] = a;
|
||||
std->danti[i] = a;
|
||||
i++;
|
||||
a = (a & (1 << (w-1))) ? (a << 1) ^ h->prim_poly : (a << 1);
|
||||
a &= ((1 << w)-1);
|
||||
} while (a != 1);
|
||||
|
||||
a <<= 1;
|
||||
if(a & (1<<w))
|
||||
a ^= h->prim_poly;
|
||||
//a &= ((1 << w)-1);
|
||||
}
|
||||
|
||||
if (check != 0) {
|
||||
_gf_errno = GF_E_LOGPOLY;
|
||||
return 0;
|
||||
}
|
||||
|
||||
gf->multiply.w32 = gf_wgen_log_8_multiply;
|
||||
gf->divide.w32 = gf_wgen_log_8_divide;
|
||||
return 1;
|
||||
|
@ -704,6 +711,7 @@ int gf_wgen_log_16_init(gf_t *gf)
|
|||
struct gf_wgen_log_w16_data *std;
|
||||
int w;
|
||||
uint32_t a, i;
|
||||
int check = 0;
|
||||
|
||||
h = (gf_internal_t *) gf->scratch;
|
||||
w = h->w;
|
||||
|
@ -712,17 +720,28 @@ int gf_wgen_log_16_init(gf_t *gf)
|
|||
std->log = &(std->base);
|
||||
std->anti = std->log + (1<<h->w);
|
||||
std->danti = std->anti + (1<<h->w)-1;
|
||||
|
||||
i = 0;
|
||||
|
||||
for (i = 0; i < (1 << w); i++)
|
||||
std->log[i] = 0;
|
||||
|
||||
a = 1;
|
||||
do {
|
||||
for(i=0; i < (1<<w)-1; i++)
|
||||
{
|
||||
if (std->log[a] != 0) check = 1;
|
||||
std->log[a] = i;
|
||||
std->anti[i] = a;
|
||||
std->danti[i] = a;
|
||||
i++;
|
||||
a = (a & (1 << (w-1))) ? (a << 1) ^ h->prim_poly : (a << 1);
|
||||
a &= ((1 << w)-1);
|
||||
} while (a != 1);
|
||||
a <<= 1;
|
||||
if(a & (1<<w))
|
||||
a ^= h->prim_poly;
|
||||
//a &= ((1 << w)-1);
|
||||
}
|
||||
|
||||
if (check) {
|
||||
if (h->mult_type != GF_MULT_LOG_TABLE) return gf_wgen_shift_init(gf);
|
||||
_gf_errno = GF_E_LOGPOLY;
|
||||
return 0;
|
||||
}
|
||||
|
||||
gf->multiply.w32 = gf_wgen_log_16_multiply;
|
||||
gf->divide.w32 = gf_wgen_log_16_divide;
|
||||
|
@ -768,7 +787,8 @@ int gf_wgen_log_32_init(gf_t *gf)
|
|||
struct gf_wgen_log_w32_data *std;
|
||||
int w;
|
||||
uint32_t a, i;
|
||||
|
||||
int check = 0;
|
||||
|
||||
h = (gf_internal_t *) gf->scratch;
|
||||
w = h->w;
|
||||
std = (struct gf_wgen_log_w32_data *) h->private;
|
||||
|
@ -777,17 +797,27 @@ int gf_wgen_log_32_init(gf_t *gf)
|
|||
std->anti = std->log + (1<<h->w);
|
||||
std->danti = std->anti + (1<<h->w)-1;
|
||||
|
||||
i = 0;
|
||||
for (i = 0; i < (1 << w); i++)
|
||||
std->log[i] = 0;
|
||||
|
||||
a = 1;
|
||||
do {
|
||||
for(i=0; i < (1<<w)-1; i++)
|
||||
{
|
||||
if (std->log[a] != 0) check = 1;
|
||||
std->log[a] = i;
|
||||
std->anti[i] = a;
|
||||
std->danti[i] = a;
|
||||
i++;
|
||||
a = (a & (1 << (w-1))) ? (a << 1) ^ h->prim_poly : (a << 1);
|
||||
a &= ((1 << w)-1);
|
||||
} while (a != 1);
|
||||
|
||||
a <<= 1;
|
||||
if(a & (1<<w))
|
||||
a ^= h->prim_poly;
|
||||
//a &= ((1 << w)-1);
|
||||
}
|
||||
|
||||
if (check != 0) {
|
||||
_gf_errno = GF_E_LOGPOLY;
|
||||
return 0;
|
||||
}
|
||||
|
||||
gf->multiply.w32 = gf_wgen_log_32_multiply;
|
||||
gf->divide.w32 = gf_wgen_log_32_divide;
|
||||
return 1;
|
||||
|
@ -802,15 +832,16 @@ int gf_wgen_log_init(gf_t *gf)
|
|||
if (h->w <= 8) return gf_wgen_log_8_init(gf);
|
||||
if (h->w <= 16) return gf_wgen_log_16_init(gf);
|
||||
if (h->w <= 32) return gf_wgen_log_32_init(gf);
|
||||
|
||||
/* Returning zero to make the compiler happy, but this won't get
|
||||
executed, because it is tested in _scratch_space. */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int gf_wgen_scratch_size(int w, int mult_type, int region_type, int divide_type, int arg1, int arg2)
|
||||
{
|
||||
|
||||
if (w > 32 || w < 0) return -1;
|
||||
|
||||
if ((region_type | GF_REGION_CAUCHY) != GF_REGION_CAUCHY) return -1;
|
||||
|
||||
switch(mult_type)
|
||||
{
|
||||
case GF_MULT_DEFAULT:
|
||||
|
@ -828,40 +859,37 @@ int gf_wgen_scratch_size(int w, int mult_type, int region_type, int divide_type,
|
|||
case GF_MULT_SHIFT:
|
||||
case GF_MULT_BYTWO_b:
|
||||
case GF_MULT_BYTWO_p:
|
||||
if (arg1 != 0 || arg2 != 0) return -1;
|
||||
return sizeof(gf_internal_t);
|
||||
break;
|
||||
case GF_MULT_GROUP:
|
||||
if (arg1 <= 0 || arg2 <= 0) return -1;
|
||||
return sizeof(gf_internal_t) + sizeof(struct gf_wgen_group_data) +
|
||||
sizeof(uint32_t) * (1 << arg1) +
|
||||
sizeof(uint32_t) * (1 << arg2) + 64;
|
||||
break;
|
||||
|
||||
case GF_MULT_TABLE:
|
||||
if (arg1 != 0 || arg2 != 0) return -1;
|
||||
if (w <= 8) {
|
||||
return sizeof(gf_internal_t) + sizeof(struct gf_wgen_table_w8_data) +
|
||||
sizeof(uint8_t)*(1 << w)*(1<<w)*2 + 64;
|
||||
} else if (w < 15) {
|
||||
return sizeof(gf_internal_t) + sizeof(struct gf_wgen_table_w16_data) +
|
||||
sizeof(uint16_t)*(1 << w)*(1<<w)*2 + 64;
|
||||
} else return -1;
|
||||
}
|
||||
return 0;
|
||||
case GF_MULT_LOG_TABLE:
|
||||
if (arg1 != 0 || arg2 != 0) return -1;
|
||||
if (w <= 8) {
|
||||
return sizeof(gf_internal_t) + sizeof(struct gf_wgen_log_w8_data) +
|
||||
sizeof(uint8_t)*(1 << w)*3;
|
||||
} else if (w <= 16) {
|
||||
return sizeof(gf_internal_t) + sizeof(struct gf_wgen_log_w16_data) +
|
||||
sizeof(uint16_t)*(1 << w)*3;
|
||||
} else if (w <= 29) {
|
||||
} else if (w <= 27) {
|
||||
return sizeof(gf_internal_t) + sizeof(struct gf_wgen_log_w32_data) +
|
||||
sizeof(uint32_t)*(1 << w)*3;
|
||||
} else return -1;
|
||||
|
||||
} else
|
||||
return 0;
|
||||
default:
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -935,6 +963,13 @@ int gf_wgen_init(gf_t *gf)
|
|||
case 32: h->prim_poly = 00020000007; break;
|
||||
default: fprintf(stderr, "gf_wgen_init: w not defined yet\n"); exit(1);
|
||||
}
|
||||
} else {
|
||||
if (h->w == 32) {
|
||||
h->prim_poly &= 0xffffffff;
|
||||
} else {
|
||||
h->prim_poly |= (1 << h->w);
|
||||
if (h->prim_poly & ~((1ULL<<(h->w+1))-1)) return 0;
|
||||
}
|
||||
}
|
||||
|
||||
gf->multiply.w32 = NULL;
|
||||
|
@ -950,7 +985,7 @@ int gf_wgen_init(gf_t *gf)
|
|||
} else if (h->w <= 16) {
|
||||
if (gf_wgen_log_init(gf) == 0) return 0;
|
||||
} else {
|
||||
if (gf_wgen_group_init(gf) == 0) return 0;
|
||||
if (gf_wgen_bytwo_p_init(gf) == 0) return 0;
|
||||
}
|
||||
break;
|
||||
case GF_MULT_SHIFT: if (gf_wgen_shift_init(gf) == 0) return 0; break;
|
||||
|
|
|
@ -1,31 +0,0 @@
|
|||
License.txt
|
||||
README.txt
|
||||
GNUmakefile
|
||||
gf.c
|
||||
gf_add.c
|
||||
gf_complete.h
|
||||
gf_div.c
|
||||
gf_example_1.c
|
||||
gf_example_2.c
|
||||
gf_example_3.c
|
||||
gf_example_4.c
|
||||
gf_general.c
|
||||
gf_general.h
|
||||
gf_int.h
|
||||
gf_method.c
|
||||
gf_method.h
|
||||
gf_methods.c
|
||||
gf_mult.c
|
||||
gf_poly.c
|
||||
gf_rand.c
|
||||
gf_rand.h
|
||||
gf_time.c
|
||||
gf_unit.c
|
||||
gf_w128.c
|
||||
gf_w16.c
|
||||
gf_w32.c
|
||||
gf_w4.c
|
||||
gf_w64.c
|
||||
gf_w8.c
|
||||
gf_wgen.c
|
||||
whats_my_sse.c
|
|
@ -1,14 +0,0 @@
|
|||
if [ $# -lt 4 ]; then
|
||||
echo 'usage: sh tmp-test.sh w gf_specs (e.g. LOG - -)' >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
w=$1
|
||||
shift
|
||||
i=1024
|
||||
while [ $i -le 134217728 ]; do
|
||||
iter=`echo $i | awk '{ print (134217728/$1)*1 }'`
|
||||
echo $i $iter $w $* `./gf_time $w G -1 $i $iter $* | head -n 3 | tail -n 2`
|
||||
i=`echo $i | awk '{ print $1*2 }'`
|
||||
done
|
||||
|
15
tmp.sh
15
tmp.sh
|
@ -1,15 +0,0 @@
|
|||
for i in 5 10 ; do
|
||||
sed 's/1 }/'$i' }/' tmp-time-test.sh > tmp2.sh
|
||||
sh tmp2.sh 4 LOG - - >> tmp-$i-out.txt
|
||||
sh tmp2.sh 4 TABLE - - >> tmp-$i-out.txt
|
||||
sh tmp2.sh 4 TABLE SINGLE,SSE - >> tmp-$i-out.txt
|
||||
sh tmp2.sh 8 LOG - - >> tmp-$i-out.txt
|
||||
sh tmp2.sh 8 TABLE - - >> tmp-$i-out.txt
|
||||
sh tmp2.sh 8 SPLIT 8 4 SSE - >> tmp-$i-out.txt
|
||||
sh tmp2.sh 16 LOG - - >> tmp-$i-out.txt
|
||||
sh tmp2.sh 16 SPLIT 16 4 SSE,STDMAP - >> tmp-$i-out.txt
|
||||
sh tmp2.sh 16 SPLIT 16 4 SSE,ALTMAP - >> tmp-$i-out.txt
|
||||
sh tmp2.sh 32 SPLIT 8 8 - - >> tmp-$i-out.txt
|
||||
sh tmp2.sh 32 SPLIT 32 4 SSE,STDMAP - >> tmp-$i-out.txt
|
||||
sh tmp2.sh 32 SPLIT 32 4 SSE,ALTMAP - >> tmp-$i-out.txt
|
||||
done
|
162
tmp.txt
162
tmp.txt
|
@ -1,162 +0,0 @@
|
|||
Tables[0] = 0000000000000000 3b60e7ccf8f4454e 76c1cf99f1e88a9c 4da12855091ccfd2 ed839f33e3d11538 d6e378ff1b255076 9b4250aa12399fa4 a022b766eacddaea db073e67c7a22a6b e067d9ab3f566f25 adc6f1fe364aa0f7 96a61632cebee5b9 3684a15424733f53 0de44698dc877a1d 40456ecdd59bb5cf 7b2589012d6ff081
|
||||
Tij 81 cf 1d 53 b9 f7 25 6b ea a4 76 38 d2 9c 4e 00
|
||||
Tij f0 b5 7a 3f e5 a0 6f 2a da 9f 50 15 cf 8a 45 00
|
||||
Tij 6f 9b 87 73 be 4a 56 a2 cd 39 25 d1 1c e8 f4 00
|
||||
Tij 2d d5 dc 24 ce 36 3f c7 ea 12 1b e3 09 f1 f8 00
|
||||
Tij 01 cd 98 54 32 fe ab 67 66 aa ff 33 55 99 cc 00
|
||||
Tij 89 6e 46 a1 16 f1 d9 3e b7 50 78 9f 28 cf e7 00
|
||||
Tij 25 45 e4 84 a6 c6 67 07 22 42 e3 83 a1 c1 60 00
|
||||
Tij 7b 40 0d 36 96 ad e0 db a0 9b d6 ed 4d 76 3b 00
|
||||
Tables[1] = 0000000000000000 b60e7ccf8f4454cd 6c1cf99f1e88a981 da12855091ccfd4c d839f33e3d115302 6e378ff1b25507cf b4250aa12399fa83 022b766eacddae4e b073e67c7a22a61f 067d9ab3f566f2d2 dc6f1fe364aa0f9e 6a61632cebee5b53 684a15424733f51d de44698dc877a1d0 0456ecdd59bb5c9c b2589012d6ff0851
|
||||
Tij 51 9c d0 1d 53 9e d2 1f 4e 83 cf 02 4c 81 cd 00
|
||||
Tij 08 5c a1 f5 5b 0f f2 a6 ae fa 07 53 fd a9 54 00
|
||||
Tij ff bb 77 33 ee aa 66 22 dd 99 55 11 cc 88 44 00
|
||||
Tij d6 59 c8 47 eb 64 f5 7a ac 23 b2 3d 91 1e 8f 00
|
||||
Tij 12 dd 8d 42 2c e3 b3 7c 6e a1 f1 3e 50 9f cf 00
|
||||
Tij 90 ec 69 15 63 1f 9a e6 76 0a 8f f3 85 f9 7c 00
|
||||
Tij 58 56 44 4a 61 6f 7d 73 2b 25 37 39 12 1c 0e 00
|
||||
Tij b2 04 de 68 6a dc 06 b0 02 b4 6e d8 da 6c b6 00
|
||||
Tables[2] = 0000000000000000 60e7ccf8f4454c25 c1cf99f1e88a984a a12855091ccfd46f 839f33e3d115308f e378ff1b25507caa 4250aa12399fa8c5 22b766eacddae4e0 073e67c7a22a6105 67d9ab3f566f2d20 c6f1fe364aa0f94f a61632cebee5b56a 84a15424733f518a e44698dc877a1daf 456ecdd59bb5c9c0 2589012d6ff085e5
|
||||
Tij e5 c0 af 8a 6a 4f 20 05 e0 c5 aa 8f 6f 4a 25 00
|
||||
Tij 85 c9 1d 51 b5 f9 2d 61 e4 a8 7c 30 d4 98 4c 00
|
||||
Tij f0 b5 7a 3f e5 a0 6f 2a da 9f 50 15 cf 8a 45 00
|
||||
Tij 6f 9b 87 73 be 4a 56 a2 cd 39 25 d1 1c e8 f4 00
|
||||
Tij 2d d5 dc 24 ce 36 3f c7 ea 12 1b e3 09 f1 f8 00
|
||||
Tij 01 cd 98 54 32 fe ab 67 66 aa ff 33 55 99 cc 00
|
||||
Tij 89 6e 46 a1 16 f1 d9 3e b7 50 78 9f 28 cf e7 00
|
||||
Tij 25 45 e4 84 a6 c6 67 07 22 42 e3 83 a1 c1 60 00
|
||||
Tables[3] = 0000000000000000 0e7ccf8f4454c20a 1cf99f1e88a98414 12855091ccfd461e 39f33e3d11530828 378ff1b25507ca22 250aa12399fa8c3c 2b766eacddae4e36 73e67c7a22a61050 7d9ab3f566f2d25a 6f1fe364aa0f9444 61632cebee5b564e 4a15424733f51878 44698dc877a1da72 56ecdd59bb5c9c6c 589012d6ff085e66
|
||||
Tij 66 6c 72 78 4e 44 5a 50 36 3c 22 28 1e 14 0a 00
|
||||
Tij 5e 9c da 18 56 94 d2 10 4e 8c ca 08 46 84 c2 00
|
||||
Tij 08 5c a1 f5 5b 0f f2 a6 ae fa 07 53 fd a9 54 00
|
||||
Tij ff bb 77 33 ee aa 66 22 dd 99 55 11 cc 88 44 00
|
||||
Tij d6 59 c8 47 eb 64 f5 7a ac 23 b2 3d 91 1e 8f 00
|
||||
Tij 12 dd 8d 42 2c e3 b3 7c 6e a1 f1 3e 50 9f cf 00
|
||||
Tij 90 ec 69 15 63 1f 9a e6 76 0a 8f f3 85 f9 7c 00
|
||||
Tij 58 56 44 4a 61 6f 7d 73 2b 25 37 39 12 1c 0e 00
|
||||
Tables[4] = 0000000000000000 e7ccf8f4454c20a0 cf99f1e88a98415b 2855091ccfd461fb 9f33e3d1153082ad 78ff1b25507ca20d 50aa12399fa8c3f6 b766eacddae4e356 3e67c7a22a610541 d9ab3f566f2d25e1 f1fe364aa0f9441a 1632cebee5b564ba a15424733f5187ec 4698dc877a1da74c 6ecdd59bb5c9c6b7 89012d6ff085e617
|
||||
Tij 17 b7 4c ec ba 1a e1 41 56 f6 0d ad fb 5b a0 00
|
||||
Tij e6 c6 a7 87 64 44 25 05 e3 c3 a2 82 61 41 20 00
|
||||
Tij 85 c9 1d 51 b5 f9 2d 61 e4 a8 7c 30 d4 98 4c 00
|
||||
Tij f0 b5 7a 3f e5 a0 6f 2a da 9f 50 15 cf 8a 45 00
|
||||
Tij 6f 9b 87 73 be 4a 56 a2 cd 39 25 d1 1c e8 f4 00
|
||||
Tij 2d d5 dc 24 ce 36 3f c7 ea 12 1b e3 09 f1 f8 00
|
||||
Tij 01 cd 98 54 32 fe ab 67 66 aa ff 33 55 99 cc 00
|
||||
Tij 89 6e 46 a1 16 f1 d9 3e b7 50 78 9f 28 cf e7 00
|
||||
Tables[5] = 0000000000000000 7ccf8f4454c20a82 f99f1e88a9841504 855091ccfd461f86 f33e3d1153082a13 8ff1b25507ca2091 0aa12399fa8c3f17 766eacddae4e3595 e67c7a22a610543d 9ab3f566f2d25ebf 1fe364aa0f944139 632cebee5b564bbb 15424733f5187e2e 698dc877a1da74ac ecdd59bb5c9c6b2a 9012d6ff085e61a8
|
||||
Tij a8 2a ac 2e bb 39 bf 3d 95 17 91 13 86 04 82 00
|
||||
Tij 61 6b 74 7e 4b 41 5e 54 35 3f 20 2a 1f 15 0a 00
|
||||
Tij 5e 9c da 18 56 94 d2 10 4e 8c ca 08 46 84 c2 00
|
||||
Tij 08 5c a1 f5 5b 0f f2 a6 ae fa 07 53 fd a9 54 00
|
||||
Tij ff bb 77 33 ee aa 66 22 dd 99 55 11 cc 88 44 00
|
||||
Tij d6 59 c8 47 eb 64 f5 7a ac 23 b2 3d 91 1e 8f 00
|
||||
Tij 12 dd 8d 42 2c e3 b3 7c 6e a1 f1 3e 50 9f cf 00
|
||||
Tij 90 ec 69 15 63 1f 9a e6 76 0a 8f f3 85 f9 7c 00
|
||||
Tables[6] = 0000000000000000 ccf8f4454c20a861 99f1e88a984150d9 55091ccfd461f8b8 33e3d1153082a1a9 ff1b25507ca209c8 aa12399fa8c3f170 66eacddae4e35911 67c7a22a61054352 ab3f566f2d25eb33 fe364aa0f944138b 32cebee5b564bbea 5424733f5187e2fb 98dc877a1da74a9a cdd59bb5c9c6b222 012d6ff085e61a43
|
||||
Tij 43 22 9a fb ea 8b 33 52 11 70 c8 a9 b8 d9 61 00
|
||||
Tij 1a b2 4a e2 bb 13 eb 43 59 f1 09 a1 f8 50 a8 00
|
||||
Tij e6 c6 a7 87 64 44 25 05 e3 c3 a2 82 61 41 20 00
|
||||
Tij 85 c9 1d 51 b5 f9 2d 61 e4 a8 7c 30 d4 98 4c 00
|
||||
Tij f0 b5 7a 3f e5 a0 6f 2a da 9f 50 15 cf 8a 45 00
|
||||
Tij 6f 9b 87 73 be 4a 56 a2 cd 39 25 d1 1c e8 f4 00
|
||||
Tij 2d d5 dc 24 ce 36 3f c7 ea 12 1b e3 09 f1 f8 00
|
||||
Tij 01 cd 98 54 32 fe ab 67 66 aa ff 33 55 99 cc 00
|
||||
Tables[7] = 0000000000000000 cf8f4454c20a86a4 9f1e88a984150d53 5091ccfd461f8bf7 3e3d1153082a1abd f1b25507ca209c19 a12399fa8c3f17ee 6eacddae4e35914a 7c7a22a61054357a b3f566f2d25eb3de e364aa0f94413829 2cebee5b564bbe8d 424733f5187e2fc7 8dc877a1da74a963 dd59bb5c9c6b2294 12d6ff085e61a430
|
||||
Tij 30 94 63 c7 8d 29 de 7a 4a ee 19 bd f7 53 a4 00
|
||||
Tij a4 22 a9 2f be 38 b3 35 91 17 9c 1a 8b 0d 86 00
|
||||
Tij 61 6b 74 7e 4b 41 5e 54 35 3f 20 2a 1f 15 0a 00
|
||||
Tij 5e 9c da 18 56 94 d2 10 4e 8c ca 08 46 84 c2 00
|
||||
Tij 08 5c a1 f5 5b 0f f2 a6 ae fa 07 53 fd a9 54 00
|
||||
Tij ff bb 77 33 ee aa 66 22 dd 99 55 11 cc 88 44 00
|
||||
Tij d6 59 c8 47 eb 64 f5 7a ac 23 b2 3d 91 1e 8f 00
|
||||
Tij 12 dd 8d 42 2c e3 b3 7c 6e a1 f1 3e 50 9f cf 00
|
||||
Tables[8] = 0000000000000000 f8f4454c20a86af4 f1e88a984150d5f3 091ccfd461f8bf07 e3d1153082a1abfd 1b25507ca209c109 12399fa8c3f17e0e eacddae4e35914fa c7a22a61054357e1 3f566f2d25eb3d15 364aa0f944138212 cebee5b564bbe8e6 24733f5187e2fc1c dc877a1da74a96e8 d59bb5c9c6b229ef 2d6ff085e61a431b
|
||||
Tij 1b ef e8 1c e6 12 15 e1 fa 0e 09 fd 07 f3 f4 00
|
||||
Tij 43 29 96 fc e8 82 3d 57 14 7e c1 ab bf d5 6a 00
|
||||
Tij 1a b2 4a e2 bb 13 eb 43 59 f1 09 a1 f8 50 a8 00
|
||||
Tij e6 c6 a7 87 64 44 25 05 e3 c3 a2 82 61 41 20 00
|
||||
Tij 85 c9 1d 51 b5 f9 2d 61 e4 a8 7c 30 d4 98 4c 00
|
||||
Tij f0 b5 7a 3f e5 a0 6f 2a da 9f 50 15 cf 8a 45 00
|
||||
Tij 6f 9b 87 73 be 4a 56 a2 cd 39 25 d1 1c e8 f4 00
|
||||
Tij 2d d5 dc 24 ce 36 3f c7 ea 12 1b e3 09 f1 f8 00
|
||||
Tables[9] = 0000000000000000 8f4454c20a86afd9 1e88a984150d5fa9 91ccfd461f8bf070 3d1153082a1abf52 b25507ca209c108b 2399fa8c3f17e0fb acddae4e35914f22 7a22a61054357ea4 f566f2d25eb3d17d 64aa0f944138210d ebee5b564bbe8ed4 4733f5187e2fc1f6 c877a1da74a96e2f 59bb5c9c6b229e5f d6ff085e61a43186
|
||||
Tij 86 5f 2f f6 d4 0d 7d a4 22 fb 8b 52 70 a9 d9 00
|
||||
Tij 31 9e 6e c1 8e 21 d1 7e 4f e0 10 bf f0 5f af 00
|
||||
Tij a4 22 a9 2f be 38 b3 35 91 17 9c 1a 8b 0d 86 00
|
||||
Tij 61 6b 74 7e 4b 41 5e 54 35 3f 20 2a 1f 15 0a 00
|
||||
Tij 5e 9c da 18 56 94 d2 10 4e 8c ca 08 46 84 c2 00
|
||||
Tij 08 5c a1 f5 5b 0f f2 a6 ae fa 07 53 fd a9 54 00
|
||||
Tij ff bb 77 33 ee aa 66 22 dd 99 55 11 cc 88 44 00
|
||||
Tij d6 59 c8 47 eb 64 f5 7a ac 23 b2 3d 91 1e 8f 00
|
||||
Tables[10] = 0000000000000000 f4454c20a86afd48 e88a984150d5fa8b 1ccfd461f8bf07c3 d1153082a1abf50d 25507ca209c10845 399fa8c3f17e0f86 cddae4e35914f2ce a22a61054357ea01 566f2d25eb3d1749 4aa0f9441382108a bee5b564bbe8edc2 733f5187e2fc1f0c 877a1da74a96e244 9bb5c9c6b229e587 6ff085e61a4318cf
|
||||
Tij cf 87 44 0c c2 8a 49 01 ce 86 45 0d c3 8b 48 00
|
||||
Tij 18 e5 e2 1f ed 10 17 ea f2 0f 08 f5 07 fa fd 00
|
||||
Tij 43 29 96 fc e8 82 3d 57 14 7e c1 ab bf d5 6a 00
|
||||
Tij 1a b2 4a e2 bb 13 eb 43 59 f1 09 a1 f8 50 a8 00
|
||||
Tij e6 c6 a7 87 64 44 25 05 e3 c3 a2 82 61 41 20 00
|
||||
Tij 85 c9 1d 51 b5 f9 2d 61 e4 a8 7c 30 d4 98 4c 00
|
||||
Tij f0 b5 7a 3f e5 a0 6f 2a da 9f 50 15 cf 8a 45 00
|
||||
Tij 6f 9b 87 73 be 4a 56 a2 cd 39 25 d1 1c e8 f4 00
|
||||
Tables[11] = 0000000000000000 4454c20a86afd419 88a984150d5fa832 ccfd461f8bf07c2b 1153082a1abf507f 5507ca209c108466 99fa8c3f17e0f84d ddae4e35914f2c54 22a61054357ea0fe 66f2d25eb3d174e7 aa0f9441382108cc ee5b564bbe8edcd5 33f5187e2fc1f081 77a1da74a96e2498 bb5c9c6b229e58b3 ff085e61a4318caa
|
||||
Tij aa b3 98 81 d5 cc e7 fe 54 4d 66 7f 2b 32 19 00
|
||||
Tij 8c 58 24 f0 dc 08 74 a0 2c f8 84 50 7c a8 d4 00
|
||||
Tij 31 9e 6e c1 8e 21 d1 7e 4f e0 10 bf f0 5f af 00
|
||||
Tij a4 22 a9 2f be 38 b3 35 91 17 9c 1a 8b 0d 86 00
|
||||
Tij 61 6b 74 7e 4b 41 5e 54 35 3f 20 2a 1f 15 0a 00
|
||||
Tij 5e 9c da 18 56 94 d2 10 4e 8c ca 08 46 84 c2 00
|
||||
Tij 08 5c a1 f5 5b 0f f2 a6 ae fa 07 53 fd a9 54 00
|
||||
Tij ff bb 77 33 ee aa 66 22 dd 99 55 11 cc 88 44 00
|
||||
Tables[12] = 0000000000000000 454c20a86afd41fc 8a984150d5fa83f8 cfd461f8bf07c204 153082a1abf507eb 507ca209c1084617 9fa8c3f17e0f8413 dae4e35914f2c5ef 2a61054357ea0fd6 6f2d25eb3d174e2a a0f9441382108c2e e5b564bbe8edcdd2 3f5187e2fc1f083d 7a1da74a96e249c1 b5c9c6b229e58bc5 f085e61a4318ca39
|
||||
Tij 39 c5 c1 3d d2 2e 2a d6 ef 13 17 eb 04 f8 fc 00
|
||||
Tij ca 8b 49 08 cd 8c 4e 0f c5 84 46 07 c2 83 41 00
|
||||
Tij 18 e5 e2 1f ed 10 17 ea f2 0f 08 f5 07 fa fd 00
|
||||
Tij 43 29 96 fc e8 82 3d 57 14 7e c1 ab bf d5 6a 00
|
||||
Tij 1a b2 4a e2 bb 13 eb 43 59 f1 09 a1 f8 50 a8 00
|
||||
Tij e6 c6 a7 87 64 44 25 05 e3 c3 a2 82 61 41 20 00
|
||||
Tij 85 c9 1d 51 b5 f9 2d 61 e4 a8 7c 30 d4 98 4c 00
|
||||
Tij f0 b5 7a 3f e5 a0 6f 2a da 9f 50 15 cf 8a 45 00
|
||||
Tables[13] = 0000000000000000 54c20a86afd41fac a984150d5fa83f58 fd461f8bf07c20f4 53082a1abf507eab 07ca209c10846107 fa8c3f17e0f841f3 ae4e35914f2c5e5f a61054357ea0fd56 f2d25eb3d174e2fa 0f9441382108c20e 5b564bbe8edcdda2 f5187e2fc1f083fd a1da74a96e249c51 5c9c6b229e58bca5 085e61a4318ca309
|
||||
Tij 09 a5 51 fd a2 0e fa 56 5f f3 07 ab f4 58 ac 00
|
||||
Tij a3 bc 9c 83 dd c2 e2 fd 5e 41 61 7e 20 3f 1f 00
|
||||
Tij 8c 58 24 f0 dc 08 74 a0 2c f8 84 50 7c a8 d4 00
|
||||
Tij 31 9e 6e c1 8e 21 d1 7e 4f e0 10 bf f0 5f af 00
|
||||
Tij a4 22 a9 2f be 38 b3 35 91 17 9c 1a 8b 0d 86 00
|
||||
Tij 61 6b 74 7e 4b 41 5e 54 35 3f 20 2a 1f 15 0a 00
|
||||
Tij 5e 9c da 18 56 94 d2 10 4e 8c ca 08 46 84 c2 00
|
||||
Tij 08 5c a1 f5 5b 0f f2 a6 ae fa 07 53 fd a9 54 00
|
||||
Tables[14] = 0000000000000000 4c20a86afd41fab7 984150d5fa83f56e d461f8bf07c20fd9 3082a1abf507eac7 7ca209c108461070 a8c3f17e0f841fa9 e4e35914f2c5e51e 61054357ea0fd58e 2d25eb3d174e2f39 f9441382108c20e0 b564bbe8edcdda57 5187e2fc1f083f49 1da74a96e249c5fe c9c6b229e58bca27 85e61a4318ca3090
|
||||
Tij 90 27 fe 49 57 e0 39 8e 1e a9 70 c7 d9 6e b7 00
|
||||
Tij 30 ca c5 3f da 20 2f d5 e5 1f 10 ea 0f f5 fa 00
|
||||
Tij ca 8b 49 08 cd 8c 4e 0f c5 84 46 07 c2 83 41 00
|
||||
Tij 18 e5 e2 1f ed 10 17 ea f2 0f 08 f5 07 fa fd 00
|
||||
Tij 43 29 96 fc e8 82 3d 57 14 7e c1 ab bf d5 6a 00
|
||||
Tij 1a b2 4a e2 bb 13 eb 43 59 f1 09 a1 f8 50 a8 00
|
||||
Tij e6 c6 a7 87 64 44 25 05 e3 c3 a2 82 61 41 20 00
|
||||
Tij 85 c9 1d 51 b5 f9 2d 61 e4 a8 7c 30 d4 98 4c 00
|
||||
Tables[15] = 0000000000000000 c20a86afd41fab1c 84150d5fa83f5623 461f8bf07c20fd3f 082a1abf507eac5d ca209c1084610741 8c3f17e0f841fa7e 4e35914f2c5e5162 1054357ea0fd58ba d25eb3d174e2f3a6 9441382108c20e99 564bbe8edcdda585 187e2fc1f083f4e7 da74a96e249c5ffb 9c6b229e58bca2c4 5e61a4318ca309d8
|
||||
Tij d8 c4 fb e7 85 99 a6 ba 62 7e 41 5d 3f 23 1c 00
|
||||
Tij 09 a2 5f f4 a5 0e f3 58 51 fa 07 ac fd 56 ab 00
|
||||
Tij a3 bc 9c 83 dd c2 e2 fd 5e 41 61 7e 20 3f 1f 00
|
||||
Tij 8c 58 24 f0 dc 08 74 a0 2c f8 84 50 7c a8 d4 00
|
||||
Tij 31 9e 6e c1 8e 21 d1 7e 4f e0 10 bf f0 5f af 00
|
||||
Tij a4 22 a9 2f be 38 b3 35 91 17 9c 1a 8b 0d 86 00
|
||||
Tij 61 6b 74 7e 4b 41 5e 54 35 3f 20 2a 1f 15 0a 00
|
||||
Tij 5e 9c da 18 56 94 d2 10 4e 8c ca 08 46 84 c2 00
|
||||
Val= 3b60e7ccf8f4454e
|
||||
v0 28 4f 14 e3 1b f7 ee 76 b9 31 47 0a ba 8b 70 fc
|
||||
v0 12 56 28 59 66 cd d2 d2 1c 91 30 26 a8 95 0a a9
|
||||
v0 ee 5d 14 e3 fb c8 45 23 a9 fd 8c f1 ff c9 2c 93
|
||||
v0 65 ce 82 f2 dc ec 6b e2 53 a3 9c fb 07 70 e7 ad
|
||||
v0 1b 87 3d 7b 4d 15 1d c2 d2 45 f3 03 4b e4 f4 9b
|
||||
v0 3b 01 2b c5 c5 d2 9d a9 68 7c a2 61 c9 5b 49 90
|
||||
v0 5d 13 7d ef eb f1 52 da a0 29 89 ef 08 f2 51 3b
|
||||
v0 17 05 b3 80 77 3a f2 5e 82 7a c9 39 84 df 8e bf
|
||||
|
||||
p0 11 fc 47 f4 6c 01 44 ba ba 62 e7 3f ba fb ba 85
|
||||
p0 a6 fc 67 16 5f c3 95 fc 58 51 f4 fd 58 5f 58 a5
|
||||
p0 12 fc 1f b3 50 1e 3f 9a fd 5e 83 20 fd 9c fd dd
|
||||
p0 d9 fc 1e ee 22 42 10 7f a0 2c f0 7c a0 24 a0 dc
|
||||
p0 a2 fc 4c 30 41 ce ad eb 7e 4f c1 f0 7e 6e 7e 8e
|
||||
p0 8b fc 7c 7b 9f b5 38 67 35 91 2f 8b 35 a9 35 be
|
||||
p0 07 fc 89 1a 3b 21 fd db 54 35 7e 1f 54 74 54 4b
|
||||
p0 cf fc 94 5e 40 78 c2 31 10 4e 18 46 10 da 10 56
|
13
tmp2.sh
13
tmp2.sh
|
@ -1,13 +0,0 @@
|
|||
if [ $# -lt 4 ]; then
|
||||
echo 'usage: sh tmp-test.sh w gf_specs (e.g. LOG - -)' >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
w=$1
|
||||
shift
|
||||
i=1024
|
||||
while [ $i -le 1073741824 ]; do
|
||||
iter=`echo $i | awk '{ print (1073741824/$1)*10 }'`
|
||||
echo $i $iter $w $* `gf_time $w R -1 $i $iter $*`
|
||||
i=`echo $i | awk '{ print $1*2 }'`
|
||||
done
|
Loading…
Reference in New Issue