GF-Complete Release 1.0.

Please see the user's manual for details.
master
Jim Plank 2013-10-09 10:36:37 -04:00
parent 79a46d18b6
commit 110523d6f3
50 changed files with 7050 additions and 5481 deletions

View File

@ -1,24 +1,23 @@
#
# GNUmakefile for Galois field library
#
#
# The default flags do *not* have the SSE instructions enabled.
# Please cd to flag_tester and run which_compile_flags.sh to see which SSE instructions
# your machine and compiler support, and which flags you should include below.
CFLAGS = -O3
LDFLAGS = -O3
SRCS = gf_w4.c gf_w8.c gf_w16.c gf_w32.c gf_w64.c gf_w128.c gf_wgen.c gf.c gf_unit.c \
gf_time.c gf_mult.c gf_method.c gf_methods.c gf_div.c gf_rand.c gf_general.c \
gf_poly.c gf_example_1.c gf_add.c gf_example_2.c gf_example_3.c gf_example_4.c \
gf_inline_time.c
gf_inline_time.c gf_example_5.c gf_example_6.c gf_example_7.c
HDRS = gf_complete.h gf_int.h
EXECUTABLES = gf_mult gf_div gf_add gf_unit gf_time gf_methods gf_poly \
gf_example_1 gf_example_2 gf_example_3 gf_example_4 gf_inline_time
CFLAGS = -O3 -msse4 -maes -mpclmul -DINTEL_SSE4 -DINTEL_PCLMUL
LDFLAGS = -O3 -msse4 -maes -mpclmul
# Use these if you don't have INTEL_PCLMUL
# CFLAGS = -O3 -msse4 -DINTEL_SSE4
# LDFLAGS = -O3 -msse4
gf_example_1 gf_example_2 gf_example_3 gf_example_4 gf_inline_time \
gf_example_5 gf_example_6 gf_example_7
RM = /bin/rm -f
@ -45,6 +44,9 @@ gf_example_1: gf_example_1.o gf_complete.a
gf_example_2: gf_example_2.o gf_complete.a
gf_example_3: gf_example_3.o gf_complete.a
gf_example_4: gf_example_4.o gf_complete.a
gf_example_5: gf_example_5.o gf_complete.a
gf_example_6: gf_example_6.o gf_complete.a
gf_example_7: gf_example_7.o gf_complete.a
gf_mult: gf_mult.o gf_complete.a
gf_div: gf_div.o gf_complete.a
gf_poly: gf_poly.o gf_complete.a
@ -54,7 +56,8 @@ clean:
$(RM) $(OBJS) gf_div.c
spotless: clean
$(RM) *~ $(EXECUTABLES)
$(RM) *~ $(EXECUTABLES) which_compile_flags
$(RM) gf_complete.a
gf_div.o: gf_complete.h gf_method.h
gf_methods.o: gf_complete.h gf_method.h
@ -71,8 +74,12 @@ gf_example_1.o: gf_complete.h gf_rand.h
gf_example_2.o: gf_complete.h gf_rand.h
gf_example_3.o: gf_complete.h gf_rand.h
gf_example_4.o: gf_complete.h gf_rand.h
gf_example_5.o: gf_complete.h gf_rand.h
gf_example_6.o: gf_complete.h gf_rand.h
gf_example_7.o: gf_complete.h gf_rand.h
gf_general.o: gf_complete.h gf_int.h gf_general.h gf_rand.h
gf_mult.o: gf_complete.h gf_method.h
gf.o: gf_complete.h gf_int.h
gf_method.o: gf_complete.h
gf_div.c: gf_mult.c

Binary file not shown.

BIN
Manual.pdf Normal file

Binary file not shown.

1
README
View File

@ -1 +0,0 @@
This is a README file.

View File

@ -1,5 +1,13 @@
This is GF-Complete, Revision 0.1.
This is GF-Complete, Revision 1.0.
The user's manual is in the file Manual.pdf.
There are two online homes for GF-Complete:
- https://bitbucket.org/jimplank/gf-complete
- http://www.cs.utk.edu/~plank/plank/papers/CS-13-716.html
When compiling this for the first time, cd to flag_tester, and
do "sh which_compile_flags.sh xxx", where xxx is the compiler
that you will use in the GNUMakefile.
Please see http://www.cs.utk.edu/~plank/plank/papers/CS-13-703.html for the user's
manual and other important documentation about this library, including more
recent revisions.

View File

@ -1,777 +0,0 @@
<h3>Code structure as of 7/20/2012</h3>
written by Jim.
<p>
Ok -- once again, I have messed with the structure. My goal is flexible and efficient.
It's similar to the stuff before, but better because it makes things like Euclid's
method much cleaner.
<p>
I think we're ready to hack.
<p>
<p>
<hr>
<h3>Files</h3>
<UL>
<LI> <a href=GNUmakefile><b>GNUmakefile</b></a>: Makefile
<LI> <a href=README><b>README</b></a>: Empty readme
<LI> <a href=explanation.html><b>explanation.html</b></a>: This file.
<LI> <a href=gf.c><b>gf.c</b></a>: Main gf routines
<LI> <a href=gf.h><b>gf.h</b></a>: Main gf prototypes and typedefs
<LI> <a href=gf_int.h><b>gf_int.h</b></a>: Prototypes and typedefs for common routines for the
internal gf implementations.
<LI> <a href=gf_method.c><b>gf_method.c</b></a>: Code to help parse argc/argv to define the method.
This way, various programs can be consistent with how they handle the command line.
<LI> <a href=gf_method.h><b>gf_method.h</b></a>: Prototypes for ibid.
<LI> <a href=gf_methods.c><b>gf_methods.c</b></a>: This program prints out how to define
the various methods on the command line. My idea is to beef this up so that you can
give it a method spec on the command line, and it will tell you whether it's valid, or
why it's invalid. I haven't written that part yet.
<LI> <a href=gf_mult.c><b>gf_mult.c</b></a>: Program to do single multiplication.
<LI> <a href=gf_mult.c><b>gf_mult.c</b></a>: Program to do single divisions -- it's created
in the makefile with a sed script on gf_mult.c.
<LI> <a href=gf_time.c><b>gf_time.c</b></a>: Time tester
<LI> <a href=gf_unit.c><b>gf_unit.c</b></a>: Unit tester
<LI> <a href=gf_54.c><b>gf_54.c</b></a>: A simple example program that multiplies
5 and 4 in GF(2^4).
<LI> <a href=gf_w4.c><b>gf_w4.c</b></a>: Implementation of code for <i>w</i> = 4.
(For now, only SHIFT and LOG, plus EUCLID & MATRIX).
<LI> <a href=gf_w8.c><b>gf_w8.c</b></a>: Implementation of code for <i>w</i> = 8.
(For now, only SHIFT plus EUCLID & MATRIX).
<LI> <a href=gf_w16.c><b>gf_w16.c</b></a>: Implementation of code for <i>w</i> = 16.
(For now, only SHIFT plus EUCLID & MATRIX).
<LI> <a href=gf_w32.c><b>gf_w32.c</b></a>: Implementation of code for <i>w</i> = 32.
(For now, only SHIFT plus EUCLID & MATRIX).
<LI> <a href=gf_w64.c><b>gf_w64.c</b></a>: Implementation of code for <i>w</i> = 64.
(For now, only SHIFT and EUCLID.
<LI> I don't have gf_w128.c or gf_gen.c yet.
</UL>
<hr>
<h3>Prototypes and typedefs in gf.h</h3>
The main structure that users will see is in <b>gf.h</b>, and it is of type
<b>gf_t</b>:
<p><center><table border=3 cellpadding=3><td><pre>
typedef struct gf {
gf_func_a_b multiply;
gf_func_a_b divide;
gf_func_a inverse;
gf_region multiply_region;
void *scratch;
} gf_t;
</pre></td></table></center><p>
We can beef it up later with buf-buf or buf-acc. The problem is that the paper is
already bloated, so right now, I want to keep it lean.
<p>
The types of the procedures are big unions, so that they work with the following
types of arguments:
<p><center><table border=3 cellpadding=3><td><pre>
typedef uint8_t gf_val_4_t;
typedef uint8_t gf_val_8_t;
typedef uint16_t gf_val_16_t;
typedef uint32_t gf_val_32_t;
typedef uint64_t gf_val_64_t;
typedef uint64_t *gf_val_128_t;
typedef uint32_t gf_val_gen_t; /* The intent here is for general values <= 32 */
</pre></td></table></center><p>
To use one of these, you need to create one with <b>gf_init_easy()</b> or
<b>gf_init_hard()</b>. Let's concentrate on the former:
<p><center><table border=3 cellpadding=3><td><pre>
extern int gf_init_easy(gf_t *gf, int w, int mult_type);
</pre></td></table></center><p>
You pass it memory for a <b>gf_t</b>, a value of <b>w</b> and
a variable that says how to do multiplication. The valid values of <b>mult_type</b>
are enumerated in <b>gf.h</b>:
<p><center><table border=3 cellpadding=3><td><pre>
typedef enum {GF_MULT_DEFAULT,
GF_MULT_SHIFT,
GF_MULT_GROUP,
GF_MULT_BYTWO_p,
GF_MULT_BYTWO_b,
GF_MULT_TABLE,
GF_MULT_LOG_TABLE,
GF_MULT_SPLIT_TABLE,
GF_MULT_COMPOSITE } gf_mult_type_t;
</pre></td></table></center><p>
After creating the <b>gf_t</b>, you use its <b>multiply</b> method
to multiply, using the union's fields to work with the various types.
It looks easier than my explanation. For example, suppose you wanted to multiply 5 and 4 in <i>GF(2<sup>4</sup>)</i>.
You can do it as in
<b><a href=gf_54.c>gf_54.c</a></b>
<p><center><table border=3 cellpadding=3><td><pre>
#include "gf.h"
main()
{
gf_t gf;
gf_init_easy(&gf, 4, GF_MULT_DEFAULT);
printf("%d\n", gf.multiply.w4(&gf, 5, 4));
exit(0);
}
</pre></td></table></center><p>
If you wanted to multiply in <i>GF(2<sup>8</sup>)</i>, then you'd have to use 8 as a parameter
to <b>gf_init_easy</b>, and call the multiplier as <b>gf.mult.w8()</b>.
<p>
When you're done with your <b>gf_t</b>, you should call <b>gf_free()</b> on it so
that it can free memory that it has allocated. We'll talk more about memory later, but if you
create your <b>gf_t</b> with <b>gf_init_easy</b>, then it calls <b>malloc()</b>, and
if you care about freeing memory, you'll have to call <b>gf_free()</b>.
<p>
<hr>
<h3>Memory allocation</h3>
Each implementation of a multiplication technique keeps around its
own data. For example, <b>GF_MULT_TABLE</b> keeps around
multiplication and division tables, and <b>GF_MULT_LOG</b> maintains log and
antilog tables. This data is stored in the pointer <b>scratch</b>. My intent
is that the memory that is there is all that's required. In other
words, the <b>multiply()</b>, <b>divide()</b>, <b>inverse()</b> and
<b>multiply_region()</b> calls don't do any memory allocation.
Moreover, <b>gf_init_easy()</b> only allocates one chunk of memory --
the one in <b>scratch</b>.
<p>
If you don't want to have the initialization call allocate memory, you can use <b>gf_init_hard()</b>:
<p><center><table border=3 cellpadding=3><td><pre>
extern int gf_init_hard(gf_t *gf,
int w,
int mult_type,
int region_type,
int divide_type,
uint64_t prim_poly,
int arg1,
int arg2,
gf_t *base_gf,
void *scratch_memory);
</pre></td></table></center><p>
The first three parameters are the same as <b>gf_init_easy()</b>.
You can add additional arguments for performing <b>multiply_region</b>, and
for performing division in the <b>region_type</b> and <b>divide_type</b>
arguments. Their values are also defined in <b>gf.h</b>. You can
mix the <b>region_type</b> values (e.g. "DOUBLE" and "SSE"):
<p><center><table border=3 cellpadding=3><td><pre>
#define GF_REGION_DEFAULT (0x0)
#define GF_REGION_SINGLE_TABLE (0x1)
#define GF_REGION_DOUBLE_TABLE (0x2)
#define GF_REGION_QUAD_TABLE (0x4)
#define GF_REGION_LAZY (0x8)
#define GF_REGION_SSE (0x10)
#define GF_REGION_NOSSE (0x20)
#define GF_REGION_STDMAP (0x40)
#define GF_REGION_ALTMAP (0x80)
#define GF_REGION_CAUCHY (0x100)
typedef uint32_t gf_region_type_t;
typedef enum { GF_DIVIDE_DEFAULT,
GF_DIVIDE_MATRIX,
GF_DIVIDE_EUCLID } gf_division_type_t;
</pre></td></table></center><p>
You can change
the primitive polynomial with <b>prim_poly</b>, give additional arguments with
<b>arg1</b> and <b>arg2</b> and give a base Galois Field for composite fields.
Finally, you can pass it a pointer to memory in <b>scratch_memory</b>. That
way, you can avoid having <b>gf_init_hard()</b> call <b>malloc()</b>.
<p>
There is a procedure called <b>gf_scratch_size()</b> that lets you know the minimum
size for <b>scratch_memory</b>, depending on <i>w</i>, the multiplication type
and the arguments:
<p><center><table border=3 cellpadding=3><td><pre>
extern int gf_scratch_size(int w,
int mult_type,
int region_type,
int divide_type,
int arg1,
int arg2);
</pre></td></table></center><p>
You can specify default arguments in <b>gf_init_hard()</b>:
<UL>
<LI> <b>region_type</b> = <b>GF_REGION_DEFAULT</b>
<LI> <b>divide_type</b> = <b>GF_REGION_DEFAULT</b>
<LI> <b>prim_poly</b> = 0
<LI> <b>arg1</b> = 0
<LI> <b>arg2</b> = 0
<LI> <b>base_gf</b> = <b>NULL</b>
<LI> <b>scratch_memory</b> = <b>NULL</b>
</UL>
If any argument is equal to its default, then default actions are taken (e.g. a
standard primitive polynomial is used, or memory is allocated for <b>scratch_memory</b>).
In fact, <b>gf_init_easy()</b> simply calls <b>gf_init_hard()</b> with the default
parameters.
<p>
<b>gf_free()</b> frees memory that was allocated with <b>gf_init_easy()</b>
or <b>gf_init_hard()</b>. The <b>recursive</b> parameter is in case you
use composite fields, and want to recursively free the base fields.
If you pass <b>scratch_memory</b> to <b>gf_init_hard()</b>, then you typically
don't need to call <b>gf_free()</b>. It won't hurt to call it, though.
<hr>
<h3>gf_mult and gf_div</h3>
For the moment, I have few things completely implemented, but that's because I want
to be able to explain the structure, and how to specify methods. In particular, for
<i>w=4</i>, I have implemented <b>SHIFT</b> and <b>LOG</b>. For <i>w=8, 16, 32, 64</i>
I have implemented <b>SHIFT</b>. For all <i>w &le; 32</i>, I have implemented both
Euclid's algorithm for inversion, and the matrix method for inversion. For
<i>w=64</i>, it's just Euclid. You can
test these all with <b>gf_mult</b> and <b>gf_div</b>. Here are a few calls:
<pre>
UNIX> <font color=darkred><b>gf_mult 7 11 4</b></font> - Default
4
UNIX> <font color=darkred><b>gf_mult 7 11 4 SHIFT - -</b></font> - Use shift
4
UNIX> <font color=darkred><b>gf_mult 7 11 4 LOG - -</b></font> - Use logs
4
UNIX> <font color=darkred><b>gf_div 4 7 4</b></font> - Default
11
UNIX> <font color=darkred><b>gf_div 4 7 4 LOG - -</b></font> - Use logs
11
UNIX> <font color=darkred><b>gf_div 4 7 4 LOG - EUCLID</b></font> - Use Euclid instead of logs
11
UNIX> <font color=darkred><b>gf_div 4 7 4 LOG - MATRIX</b></font> - Use Matrix inversion instead of logs
11
UNIX> <font color=darkred><b>gf_div 4 7 4 SHIFT - -</b></font> - Default
11
UNIX> <font color=darkred><b>gf_div 4 7 4 SHIFT - EUCLID</b></font> - Use Euclid (which is the default)
11
UNIX> <font color=darkred><b>gf_div 4 7 4 SHIFT - MATRIX</b></font> - Use Matrix inversion instead of logs
11
UNIX> <font color=darkred><b>gf_mult 200 211 8</b></font> - The remainder are shift/Euclid
201
UNIX> <font color=darkred><b>gf_div 201 211 8</b></font>
200
UNIX> <font color=darkred><b>gf_mult 60000 65111 16</b></font>
63515
UNIX> <font color=darkred><b>gf_div 63515 65111 16</b></font>
60000
UNIX> <font color=darkred><b>gf_mult abcd0001 9afbf788 32h</b></font>
b0359681
UNIX> <font color=darkred><b>gf_div b0359681 9afbf788 32h</b></font>
abcd0001
UNIX> <font color=darkred><b>gf_mult abcd00018c8b8c8a 9afbf7887f6d8e5b 64h</b></font>
3a7def35185bd571
UNIX> <font color=darkred><b>gf_mult abcd00018c8b8c8a 9afbf7887f6d8e5b 64h</b></font>
3a7def35185bd571
UNIX> <font color=darkred><b>gf_div 3a7def35185bd571 9afbf7887f6d8e5b 64h</b></font>
abcd00018c8b8c8a
UNIX> <font color=darkred><b></b></font>
</pre>
You can see all the methods with <b>gf_methods</b>. We have a lot of implementing to do:
<pre>
UNIX> <font color=darkred><b>gf_methods</b></font>
To specify the methods, do one of the following:
- leave empty to use defaults
- use a single dash to use defaults
- specify MULTIPLY REGION DIVIDE
Legal values of MULTIPLY:
SHIFT: shift
GROUP g_mult g_reduce: the Group technique - see the paper
BYTWO_p: BYTWO doubling the product.
BYTWO_b: BYTWO doubling b (more efficient thatn BYTWO_p)
TABLE: Full multiplication table
LOG: Discrete logs
LOG_ZERO: Discrete logs with a large table for zeros
SPLIT g_a g_b: Split tables defined by g_a and g_b
COMPOSITE k l [METHOD]: Composite field, recursively specify the
method of the base field in GF(2^l)
Legal values of REGION: Specify multiples with commas e.g. 'DOUBLE,LAZY'
-: Use defaults
SINGLE/DOUBLE/QUAD: Expand tables
LAZY: Lazily create table (only applies to TABLE and SPLIT)
SSE/NOSSE: Use 128-bit SSE instructions if you can
CAUCHY/ALTMAP/STDMAP: Use different memory mappings
Legal values of DIVIDE:
-: Use defaults
MATRIX: Use matrix inversion
EUCLID: Use the extended Euclidian algorithm.
See the user's manual for more information.
There are many restrictions, so it is better to simply use defaults in most cases.
UNIX> <font color=darkred><b></b></font>
</pre>
<hr>
<h3>gf_unit and gf_time</h3>
<b><a href=gf_unit.c>gf_unit.c</a></b> is a unit tester, and
<b><a href=gf_time.c>gf_time.c</a></b> is a time tester.
They are called as follows:
<p><center><table border=3 cellpadding=3><td><pre>
UNIX> <font color=darkred><b>gf_unit w tests seed [METHOD] </b></font>
UNIX> <font color=darkred><b>gf_time w tests seed size(bytes) iterations [METHOD] </b></font>
</pre></td></table></center><p>
The <b>tests</b> parameter is one or more of the following characters:
<UL>
<LI> A: Do all tests
<LI> S: Test only single operations (multiplication/division)
<LI> R: Test only region operations
<LI> V: Verbose Output
</UL>
<b>seed</b> is a seed for <b>srand48()</b> -- using -1 defaults to the current time.
<p>
For example, testing the defaults with w=4:
<pre>
UNIX> <font color=darkred><b>gf_unit 4 AV 1 LOG - -</b></font>
Seed: 1
Testing single multiplications/divisions.
Testing Inversions.
Testing buffer-constant, src != dest, xor = 0
Testing buffer-constant, src != dest, xor = 1
Testing buffer-constant, src == dest, xor = 0
Testing buffer-constant, src == dest, xor = 1
UNIX> <font color=darkred><b>gf_unit 4 AV 1 SHIFT - -</b></font>
Seed: 1
Testing single multiplications/divisions.
Testing Inversions.
No multiply_region.
UNIX> <font color=darkred><b></b></font>
</pre>
There is no <b>multiply_region()</b> method defined for <b>SHIFT</b>.
Thus, the procedures are <b>NULL</b> and the unit tester ignores them.
<p>
At the moment, I only have the unit tester working for w=4.
<p>
<b>gf_time</b> takes the size of an array (in bytes) and a number of iterations, and
tests the speed of both single and region operations. The tests are:
<UL>
<LI> A: All
<LI> S: All Single Operations
<LI> R: All Region Operations
<LI> M: Single: Multiplications
<LI> D: Single: Divisions
<LI> I: Single: Inverses
<LI> B: Region: Multipy_Region
</UL>
Here are some examples with <b>SHIFT</b> and <b>LOG</b> on my mac.
<pre>
UNIX> <font color=darkred><b>gf_time 4 A 1 102400 1024 LOG - -</b></font>
Seed: 1
Multiply: 0.538126 s 185.830 Mega-ops/s
Divide: 0.520825 s 192.003 Mega-ops/s
Inverse: 0.631198 s 158.429 Mega-ops/s
Buffer-Const,s!=d,xor=0: 0.478395 s 209.032 MB/s
Buffer-Const,s!=d,xor=1: 0.524245 s 190.751 MB/s
Buffer-Const,s==d,xor=0: 0.471851 s 211.931 MB/s
Buffer-Const,s==d,xor=1: 0.528275 s 189.295 MB/s
UNIX> <font color=darkred><b>gf_time 4 A 1 102400 1024 LOG - EUCLID</b></font>
Seed: 1
Multiply: 0.555512 s 180.014 Mega-ops/s
Divide: 5.359434 s 18.659 Mega-ops/s
Inverse: 4.911719 s 20.359 Mega-ops/s
Buffer-Const,s!=d,xor=0: 0.496097 s 201.573 MB/s
Buffer-Const,s!=d,xor=1: 0.538536 s 185.689 MB/s
Buffer-Const,s==d,xor=0: 0.485564 s 205.946 MB/s
Buffer-Const,s==d,xor=1: 0.540227 s 185.107 MB/s
UNIX> <font color=darkred><b>gf_time 4 A 1 102400 1024 LOG - MATRIX</b></font>
Seed: 1
Multiply: 0.544005 s 183.822 Mega-ops/s
Divide: 7.602822 s 13.153 Mega-ops/s
Inverse: 7.000564 s 14.285 Mega-ops/s
Buffer-Const,s!=d,xor=0: 0.474868 s 210.585 MB/s
Buffer-Const,s!=d,xor=1: 0.527588 s 189.542 MB/s
Buffer-Const,s==d,xor=0: 0.473130 s 211.358 MB/s
Buffer-Const,s==d,xor=1: 0.529877 s 188.723 MB/s
UNIX> <font color=darkred><b>gf_time 4 A 1 102400 1024 SHIFT - -</b></font>
Seed: 1
Multiply: 2.708842 s 36.916 Mega-ops/s
Divide: 8.756882 s 11.420 Mega-ops/s
Inverse: 5.695511 s 17.558 Mega-ops/s
UNIX> <font color=darkred><b></b></font>
</pre>
At the moment, I only have the timer working for w=4.
<hr>
<h3>Walking you through <b>LOG</b></h3>
To see how <b>scratch</b> is used to store data, let's look at what happens when
you call <b>gf_init_easy(&gf, 4, GF_MULT_LOG);</b>
First, <b>gf_init_easy()</b> calls <b>gf_init_hard()</b> with default parameters.
This is in <b><a href=gf.c>gf.c</a></b>.
<p>
<b>gf_init_hard()</b>' first job is to set up the scratch.
The scratch's type is <b>gf_internal_t</b>, defined in
<b><a href=gf_int.h>gf_int.h</a></b>:
<p><center><table border=3 cellpadding=3><td><pre>
typedef struct {
int mult_type;
int region_type;
int divide_type;
int w;
uint64_t prim_poly;
int free_me;
int arg1;
int arg2;
gf_t *base_gf;
void *private;
} gf_internal_t;
</pre></td></table></center><p>
All the fields are straightfoward, with the exception of <b>private</b>. That is
a <b>(void *)</b> which points to the implementation's private data.
<p>
Here's the code for
<b>gf_init_hard()</b>:
<p><center><table border=3 cellpadding=3><td><pre>
int gf_init_hard(gf_t *gf, int w, int mult_type,
int region_type,
int divide_type,
uint64_t prim_poly,
int arg1, int arg2,
gf_t *base_gf,
void *scratch_memory)
{
int sz;
gf_internal_t *h;
if (scratch_memory == NULL) {
sz = gf_scratch_size(w, mult_type, region_type, divide_type, arg1, arg2);
if (sz &lt;= 0) return 0;
h = (gf_internal_t *) malloc(sz);
h-&gt;free_me = 1;
} else {
h = scratch_memory;
h-&gt;free_me = 0;
}
gf-&gt;scratch = (void *) h;
h-&gt;mult_type = mult_type;
h-&gt;region_type = region_type;
h-&gt;divide_type = divide_type;
h-&gt;w = w;
h-&gt;prim_poly = prim_poly;
h-&gt;arg1 = arg1;
h-&gt;arg2 = arg2;
h-&gt;base_gf = base_gf;
h-&gt;private = (void *) gf-&gt;scratch;
h-&gt;private += (sizeof(gf_internal_t));
switch(w) {
case 4: return gf_w4_init(gf);
case 8: return gf_w8_init(gf);
case 16: return gf_w16_init(gf);
case 32: return gf_w32_init(gf);
case 64: return gf_w64_init(gf);
case 128: return gf_dummy_init(gf);
default: return 0;
}
}
</pre></td></table></center><p>
The first thing it does is determine if it has to allocate space for <b>scratch</b>.
If it must, it uses <b>gf_scratch_size()</b> to figure out how big the space must be.
It then sets <b>gf->scratch</b> to this space, and sets all of the fields of the
scratch to the arguments in <b>gf_init_hard()</b>. The <b>private</b> pointer is
set to be the space just after the pointer <b>gf->private</b>. Again, it is up to
<b>gf_scratch_size()</b> to make sure there is enough space for the scratch, and
for all of the private data needed by the implementation.
<p>
Once the scratch is set up, <b>gf_init_hard()</b> calls <b>gf_w4_init()</b>. This is
in <b><a href=gf_w4.c>gf_w4.c</a></b>, and it is a
simple dispatcher to the various initialization routines, plus it
sets <b>EUCLID</b> and <b>MATRIX</b> if need be:
<p><center><table border=3 cellpadding=3><td><pre>
int gf_w4_init(gf_t *gf)
{
gf_internal_t *h;
h = (gf_internal_t *) gf-&gt;scratch;
if (h-&gt;prim_poly == 0) h-&gt;prim_poly = 0x13;
gf-&gt;multiply.w4 = NULL;
gf-&gt;divide.w4 = NULL;
gf-&gt;inverse.w4 = NULL;
gf-&gt;multiply_region.w4 = NULL;
switch(h-&gt;mult_type) {
case GF_MULT_SHIFT: if (gf_w4_shift_init(gf) == 0) return 0; break;
case GF_MULT_LOG_TABLE: if (gf_w4_log_init(gf) == 0) return 0; break;
case GF_MULT_DEFAULT: if (gf_w4_log_init(gf) == 0) return 0; break;
default: return 0;
}
if (h-&gt;divide_type == GF_DIVIDE_EUCLID) {
gf-&gt;divide.w4 = gf_w4_divide_from_inverse;
gf-&gt;inverse.w4 = gf_w4_euclid;
} else if (h-&gt;divide_type == GF_DIVIDE_MATRIX) {
gf-&gt;divide.w4 = gf_w4_divide_from_inverse;
gf-&gt;inverse.w4 = gf_w4_matrix;
}
if (gf-&gt;inverse.w4 != NULL && gf-&gt;divide.w4 == NULL) {
gf-&gt;divide.w4 = gf_w4_divide_from_inverse;
}
if (gf-&gt;inverse.w4 == NULL && gf-&gt;divide.w4 != NULL) {
gf-&gt;inverse.w4 = gf_w4_inverse_from_divide;
}
return 1;
}
</pre></td></table></center><p>
The code in <b>gf_w4_log_init()</b> sets up the log and antilog tables, and sets
the <b>multiply.w4</b>, <b>divide.w4</b> etc routines to be the ones for logs. The
tables are put into <b>gf->scratch->private</b>, which is typecast to a <b>struct
gf_logtable_data *</b>:
<p><center><table border=3 cellpadding=3><td><pre>
struct gf_logtable_data {
gf_val_4_t log_tbl[GF_FIELD_SIZE];
gf_val_4_t antilog_tbl[GF_FIELD_SIZE * 2];
gf_val_4_t *antilog_tbl_div;
};
.......
static
int gf_w4_log_init(gf_t *gf)
{
gf_internal_t *h;
struct gf_logtable_data *ltd;
int i, b;
h = (gf_internal_t *) gf-&gt;scratch;
ltd = h-&gt;private;
ltd-&gt;log_tbl[0] = 0;
ltd-&gt;antilog_tbl_div = ltd-&gt;antilog_tbl + (GF_FIELD_SIZE-1);
b = 1;
for (i = 0; i &lt; GF_FIELD_SIZE-1; i++) {
ltd-&gt;log_tbl[b] = (gf_val_8_t)i;
ltd-&gt;antilog_tbl[i] = (gf_val_8_t)b;
ltd-&gt;antilog_tbl[i+GF_FIELD_SIZE-1] = (gf_val_8_t)b;
b &lt;&lt;= 1;
if (b & GF_FIELD_SIZE) {
b = b ^ h-&gt;prim_poly;
}
}
gf-&gt;inverse.w4 = gf_w4_inverse_from_divide;
gf-&gt;divide.w4 = gf_w4_log_divide;
gf-&gt;multiply.w4 = gf_w4_log_multiply;
gf-&gt;multiply_region.w4 = gf_w4_log_multiply_region;
return 1;
}
</pre></td></table></center><p>
And of course the individual routines use <b>h->private</b> to access the tables:
<p><center><table border=3 cellpadding=3><td><pre>
static
inline
gf_val_8_t gf_w4_log_multiply (gf_t *gf, gf_val_8_t a, gf_val_8_t b)
{
struct gf_logtable_data *ltd;
ltd = (struct gf_logtable_data *) ((gf_internal_t *) (gf-&gt;scratch))-&gt;private;
return (a == 0 || b == 0) ? 0 : ltd-&gt;antilog_tbl[(unsigned)(ltd-&gt;log_tbl[a] + ltd-&gt;log_tbl[b])];
}
</pre></td></table></center><p>
Finally, it's important that the proper sizes are put into
<b>gf_w4_scratch_size()</b> for each implementation:
<p><center><table border=3 cellpadding=3><td><pre>
int gf_w4_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2)
{
int region_tbl_size;
switch(mult_type)
{
case GF_MULT_DEFAULT:
case GF_MULT_LOG_TABLE:
return sizeof(gf_internal_t) + sizeof(struct gf_logtable_data) + 64;
break;
case GF_MULT_SHIFT:
return sizeof(gf_internal_t);
break;
default:
return -1;
}
}
</pre></td></table></center><p>
I hope that's enough explanation for y'all to start implementing. Let me know if you have
problems -- thanks -- Jim
<hr>
The initial structure has been set for w=4, 8, 16, 32 and 64, with implementations of SHIFT and EUCLID, and for w <= 32, MATRIX. There are some weird caveats:
<UL>
<LI> For w=32 and w=64, the primitive polynomial does not have the leading one.
<LI> I'd like for naming to be:
<p>
<UL>
<b>gf_w</b><i>w</i><b>_</b><i>technique</i></i><b>_</b><i>funcationality</i><b>()</b>.
</UL>
<p>
For example, the log techniques for w=4 are:
<pre>
gf_w4_log_multiply()
gf_w4_log_divide()
gf_w4_log_multiply_region()
gf_w4_log_init()
</pre>
<p>
<LI> I'd also like a header block on implementations that says who wrote it.
</UL>
<hr>
<h3>Things we need to Implement: <i>w=4</i></h3>
<p><table border=3 cellpadding=2>
<tr> <td> SHIFT </td> <td> Done - Jim </td> </tr>
<tr> <td> BYTWO_p </td> <td>Done - Jim</td> </tr>
<tr> <td> BYTWO_b </td> <td>Done - Jim</td> </tr>
<tr> <td> BYTWO_p, SSE </td> <td>Done - Jim</td> </tr>
<tr> <td> BYTWO_b, SSE </td> <td>Done - Jim</td> </tr>
<tr> <td> Single TABLE </td> <td> Done - Jim </td> </tr>
<tr> <td> Double TABLE </td> <td> Done - Jim </td> </tr>
<tr> <td> Double TABLE, SSE </td> <td> Done - Jim </td> </tr>
<tr> <td> Quad TABLE </td> <td>Done - Jim</td> </tr>
<tr> <td> Lazy Quad TABLE </td> <td>Done - Jim</td> </tr>
<tr> <td> LOG </td> <td> Done - Jim </td> </tr>
</table><p>
<hr>
<h3>Things we need to Implement: <i>w=8</i></h3>
<p><table border=3 cellpadding=2>
<tr> <td> SHIFT </td> <td> Done - Jim </td> </tr>
<tr> <td> BYTWO_p </td> <td>Done - Jim </td> </tr>
<tr> <td> BYTWO_b </td> <td>Done - Jim </td> </tr>
<tr> <td> BYTWO_p, SSE </td> <td>Done - Jim </td> </tr>
<tr> <td> BYTWO_b, SSE </td> <td>Done - Jim </td> </tr>
<tr> <td> Single TABLE </td> <td> Done - Kevin </td> </tr>
<tr> <td> Double TABLE </td> <td> Done - Jim </td> </tr>
<tr> <td> Lazy Double TABLE </td> <td> Done - Jim </td> </tr>
<tr> <td> Split 2 1 (Half) SSE </td> <td>Done - Jim</td> </tr>
<tr> <td> Composite, k=2 </td> <td> Done - Kevin (alt mapping not passing unit test) </td> </tr>
<tr> <td> LOG </td> <td> Done - Kevin </td> </tr>
<tr> <td> LOG ZERO</td> <td> Done - Jim</td> </tr>
</table><p>
<hr>
<h3>Things we need to Implement: <i>w=16</i></h3>
<p><table border=3 cellpadding=2>
<tr> <td> SHIFT </td> <td> Done - Jim </td> </tr>
<tr> <td> BYTWO_p </td> <td>Done - Jim</td> </tr>
<tr> <td> BYTWO_b </td> <td>Done - Jim</td> </tr>
<tr> <td> BYTWO_p, SSE </td> <td>Done - Jim</td> </tr>
<tr> <td> BYTWO_b, SSE </td> <td>Done - Jim</td> </tr>
<tr> <td> Lazy TABLE </td> <td>Done - Jim</td> </tr>
<tr> <td> Split 4 16 No-SSE, lazy </td> <td>Done - Jim</td> </tr>
<tr> <td> Split 4 16 SSE, lazy </td> <td>Done - Jim</td> </tr>
<tr> <td> Split 4 16 SSE, lazy, alternate mapping </td> <td>Done - Jim</td> </tr>
<tr> <td> Split 8 16, lazy </td> <td>Done - Jim</td> </tr>
<tr> <td> Composite, k=2, stdmap recursive </td> <td> Done - Kevin</td> </tr>
<tr> <td> Composite, k=2, altmap recursive </td> <td> Done - Kevin</td> </tr>
<tr> <td> Composite, k=2, stdmap inline </td> <td> Done - Kevin</td> </tr>
<tr> <td> LOG </td> <td> Done - Kevin </td> </tr>
<tr> <td> LOG ZERO</td> <td> Done - Kevin </td> </tr>
<tr> <td> Group 4 4 </td> <td>Done - Jim: I don't see a reason to implement others, although 4-8 will be faster, and 8 8 will have faster region ops. They'll never beat SPLIT.</td> </tr>
</table><p>
<hr>
<h3>Things we need to Implement: <i>w=32</i></h3>
<p><table border=3 cellpadding=2>
<tr> <td> SHIFT </td> <td> Done - Jim </td> </tr>
<tr> <td> BYTWO_p </td> <td>Done - Jim</td> </tr>
<tr> <td> BYTWO_b </td> <td>Done - Jim</td> </tr>
<tr> <td> BYTWO_p, SSE </td> <td>Done - Jim</td> </tr>
<tr> <td> BYTWO_b, SSE </td> <td>Done - Jim</td> </tr>
<tr> <td> Split 2 32,lazy </td> <td>Done - Jim</td> </tr>
<tr> <td> Split 2 32, SSE, lazy </td> <td>Done - Jim</td> </tr>
<tr> <td> Split 4 32, lazy </td> <td>Done - Jim</td> </tr>
<tr> <td> Split 4 32, SSE,ALTMAP lazy </td> <td>Done - Jim</td> </tr>
<tr> <td> Split 4 32, SSE, lazy </td> <td>Done - Jim</td> </tr>
<tr> <td> Split 8 8 </td> <td>Done - Jim </td> </tr>
<tr> <td> Group, g_s == g_r </td> <td>Done - Jim</td></tr>
<tr> <td> Group, any g_s and g_r</td> <td>Done - Jim</td></tr>
<tr> <td> Composite, k=2, stdmap recursive </td> <td> Done - Kevin</td> </tr>
<tr> <td> Composite, k=2, altmap recursive </td> <td> Done - Kevin</td> </tr>
<tr> <td> Composite, k=2, stdmap inline </td> <td> Done - Kevin</td> </tr>
</table><p>
<hr>
<h3>Things we need to Implement: <i>w=64</i></h3>
<p><table border=3 cellpadding=2>
<tr> <td> SHIFT </td> <td> Done - Jim </td> </tr>
<tr> <td> BYTWO_p </td> <td> - </td> </tr>
<tr> <td> BYTWO_b </td> <td> - </td> </tr>
<tr> <td> BYTWO_p, SSE </td> <td> - </td> </tr>
<tr> <td> BYTWO_b, SSE </td> <td> - </td> </tr>
<tr> <td> Split 16 1 SSE, maybe lazy </td> <td> - </td> </tr>
<tr> <td> Split 8 1 lazy </td> <td> - </td> </tr>
<tr> <td> Split 8 8 </td> <td> - </td> </tr>
<tr> <td> Split 8 8 lazy </td> <td> - </td> </tr>
<tr> <td> Group </td> <td> - </td> </tr>
<tr> <td> Composite, k=2, alternate mapping </td> <td> - </td> </tr>
</table><p>
<hr>
<h3>Things we need to Implement: <i>w=128</i></h3>
<p><table border=3 cellpadding=2>
<tr> <td> SHIFT </td> <td> Done - Will </td> </tr>
<tr> <td> BYTWO_p </td> <td> - </td> </tr>
<tr> <td> BYTWO_b </td> <td> - </td> </tr>
<tr> <td> BYTWO_p, SSE </td> <td> - </td> </tr>
<tr> <td> BYTWO_b, SSE </td> <td> - </td> </tr>
<tr> <td> Split 32 1 SSE, maybe lazy </td> <td> - </td> </tr>
<tr> <td> Split 16 1 lazy </td> <td> - </td> </tr>
<tr> <td> Split 16 16 - Maybe that's insanity</td> <td> - </td> </tr>
<tr> <td> Split 16 16 lazy </td> <td> - </td> </tr>
<tr> <td> Group (SSE) </td> <td> - </td> </tr>
<tr> <td> Composite, k=?, alternate mapping </td> <td> - </td> </tr>
</table><p>
<hr>
<h3>Things we need to Implement: <i>w=general between 1 & 32</i></h3>
<p><table border=3 cellpadding=2>
<tr> <td> CAUCHY Region (SSE XOR)</td> <td> Done - Jim </td> </tr>
<tr> <td> SHIFT </td> <td> Done - Jim </td> </tr>
<tr> <td> TABLE </td> <td> Done - Jim </td> </tr>
<tr> <td> LOG </td> <td> Done - Jim </td> </tr>
<tr> <td> BYTWO_p </td> <td>Done - Jim</td> </tr>
<tr> <td> BYTWO_b </td> <td>Done - Jim</td> </tr>
<tr> <td> Group, g_s == g_r </td> <td>Done - Jim</td></tr>
<tr> <td> Group, any g_s and g_r</td> <td>Done - Jim</td></tr>
<tr> <td> Split - do we need it?</td> <td>Done - Jim</td></tr>
<tr> <td> Composite - do we need it?</td> <td> - </td></tr>
<tr> <td> Split - do we need it?</td> <td> - </td></tr>
<tr> <td> Logzero?</td> <td> - </td></tr>
</table><p>

10
flag_tester/README.txt Normal file
View File

@ -0,0 +1,10 @@
Run which_compile_flags.sh and it will print out the compile flags to use in
GNUmakefile. By default, this script uses "cc" as its compiler but you can
pass in the name of your compiler as an argument.
EXAMPLE: "./which_compile_flags.sh clang"
This script will run "clang" in the above example so be warned that if you type
something like "rm" for that argument, you get what you asked for. Also, make
sure that the compiler that you pass to which_compile_flags.sh is the same as
the compiler in GNUmakefile.

120
flag_tester/flag_test.c Normal file
View File

@ -0,0 +1,120 @@
/*
* flag_test.c - copied from whats_my_sse.c to output proper compile
* flags for the GNUmakefile
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "intel_cpu_capabilities.h"
void usage()
{
fprintf(stderr, "usage: flag_test <compiler name>\n");
exit(EXIT_FAILURE);
}
int main(int argc, char **argv)
{
//make sure to extend these buffers if more flags are added to this program
char cflags[1000], ldflags[1000], buf[1000];
FILE *file;
char sse_found = 0;
if(argc != 2)
usage();
sprintf(cflags, "CFLAGS = -O3");
sprintf(ldflags, "LDFLAGS = -O3");
if(cpu_has_feature(CPU_CAP_SSE42))
{
sprintf(buf, "%s sse_test.c -o sse4 -msse4 -DSSE4 2> /dev/null", argv[1]);
system(buf);
if(file = fopen("sse4", "r"))
{
fclose(file);
//run program and compare to the included output
system("./sse4 > temp.txt 2> /dev/null");
system("diff sse4_test.txt temp.txt > diff.txt 2> /dev/null");
file = fopen("diff.txt", "r");
if(fgetc(file) == EOF)
{
strcat(cflags, " -msse4 -DINTEL_SSE4");
strcat(ldflags, " -msse4");
sse_found = 1;
}
fclose(file);
}
}
if(cpu_has_feature(CPU_CAP_SSSE3) && !sse_found)
{
sprintf(buf, "%s sse_test.c -o ssse3 -mssse3 -DSSSE3 2> /dev/null", argv[1]);
system(buf);
if(file = fopen("ssse3", "r"))
{
fclose(file);
//run program and compare to the included output
system("./ssse3 > temp.txt 2> /dev/null");
system("diff ssse3_test.txt temp.txt > diff.txt 2> /dev/null");
file = fopen("diff.txt", "r");
if(fgetc(file) == EOF)
{
strcat(cflags, " -mssse3 -DINTEL_SSSE3");
strcat(ldflags, " -mssse3");
sse_found = 1;
}
fclose(file);
}
}
if(cpu_has_feature(CPU_CAP_SSE2) && !sse_found)
{
sprintf(buf, "%s sse_test.c -o sse2 -msse2 -DSSE2 2> /dev/null", argv[1]);
system(buf);
if(file = fopen("sse2", "r"))
{
fclose(file);
//run program and compare to the included output
system("./sse2 > temp.txt 2> /dev/null");
system("diff sse2_test.txt temp.txt > diff.txt 2> /dev/null");
file = fopen("diff.txt", "r");
if(fgetc(file) == EOF)
{
strcat(cflags, " -msse2 -DINTEL_SSE2");
strcat(ldflags, " -msse2");
sse_found = 1;
}
fclose(file);
}
}
if(cpu_has_feature(CPU_CAP_PCLMULQDQ) && sse_found)
{
sprintf(buf, "%s pclmul_test.c -o pclmul -maes -mpclmul 2> /dev/null"
, argv[1]);
system(buf);
if(file = fopen("pclmul", "r"))
{
fclose(file);
//run program and compare to the included output
system("./pclmul > temp.txt 2> /dev/null");
system("diff pclmul_test.txt temp.txt > diff.txt 2> /dev/null");
file = fopen("diff.txt", "r");
if(fgetc(file) == EOF)
{
strcat(cflags, " -maes -mpclmul -DINTEL_PCLMUL");
strcat(ldflags, " -maes -mpclmul");
}
fclose(file);
}
}
printf("%s\n%s\n", cflags, ldflags);
}

View File

@ -16,7 +16,7 @@
#define CPU_CPSSE 0x2000
#define CPU_CAP_SSE3 (CPU_CPSSE | 0)
#define CPU_CAP_PCLMULQDQ (CPU_CPSSE | 1)
#define CPU_CAP_SSSE3 (CPU_CPSSE | 10)
#define CPU_CAP_SSSE3 (CPU_CPSSE | 9)
#define CPU_CAP_SSE41 (CPU_CPSSE | 19)
#define CPU_CAP_SSE42 (CPU_CPSSE | 20)
#define CPU_CAP_AVX (CPU_CPSSE | 28)
@ -25,7 +25,6 @@
__asm__ __volatile__ ("cpuid":\
"=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) : "a" (func));
inline
int
cpu_has_feature (unsigned which)
{

40
flag_tester/pclmul_test.c Normal file
View File

@ -0,0 +1,40 @@
#include <wmmintrin.h>
#include <stdint.h>
#include <stdio.h>
#define MM_PRINT8(s, r) { uint8_t blah[16], ii; printf("%-20s", s); _mm_storeu_si128((__m128i *)blah, r); for (ii = 0; ii < 16; ii += 1) printf("%s%02x", (ii%4==0) ? " " : " ", blah[15-ii]); printf("\n"); }
int main()
{
uint64_t answer;
uint32_t pp;
__m128i a, b, c;
a = _mm_set1_epi8(0x0D);
b = _mm_set_epi32(0,0,0,0x0A);
pp = 0x13;
MM_PRINT8("a", a);
MM_PRINT8("b", b);
c = _mm_clmulepi64_si128(a, b, 0);
MM_PRINT8("a clm b", c);
a = _mm_set1_epi8(0xf0);
MM_PRINT8("a", a);
b = _mm_and_si128(a, c);
b = _mm_srli_epi64(b, 4);
MM_PRINT8("shifted", b);
a = _mm_set_epi32(0,0,0,pp);
MM_PRINT8("PP", a);
b = _mm_clmulepi64_si128(a, b, 0);
MM_PRINT8("PP clm over", b);
c = _mm_xor_si128(c,b);
MM_PRINT8("Answer", c);
//answer = _mm_extract_epi64(c, 0);
//printf("%llx\n", answer);
}

View File

@ -0,0 +1,8 @@
a 0d 0d 0d 0d 0d 0d 0d 0d 0d 0d 0d 0d 0d 0d 0d 0d
b 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0a
a clm b 00 00 00 00 00 00 00 00 72 72 72 72 72 72 72 72
a f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0
shifted 00 00 00 00 00 00 00 00 07 07 07 07 07 07 07 07
PP 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 13
PP clm over 00 00 00 00 00 00 00 00 79 79 79 79 79 79 79 79
Answer 00 00 00 00 00 00 00 00 0b 0b 0b 0b 0b 0b 0b 0b

30
flag_tester/sse2_test.txt Normal file
View File

@ -0,0 +1,30 @@
a 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
b 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
c 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02
d 12 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03
a sl16 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08 04 00
b sl32 40 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08 04
c sl64 44 40 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08
d sl128 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 00 00
a sr16 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
b sr32 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
c sr64 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02
d sr128 00 00 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03
d = a^b 1f 01 03 01 07 01 03 01 0f 01 03 01 07 01 03 01
d = a-b epi8 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
d = a-b epi16 fe ff fe ff fe ff fe ff fe ff fe ff fe ff fe ff
d = a-b epi32 fe fe fe ff fe fe fe ff fe fe fe ff fe fe fe ff
d = a-b epi64 fe fe fe fe fe fe fe ff fe fe fe fe fe fe fe ff
d set_epi8 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
d set_epi32 12 34 56 78 9a bc de f0 12 34 56 78 9a bc de f0
d set1_epi64 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0
d set1_epi32 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2
d set1_epi16 af f3 af f3 af f3 af f3 af f3 af f3 af f3 af f3
d set1_epi8 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5
d packus_epi16(d,d) 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
c unpackhi(a,d) 00 0f 00 0e 00 0d 00 0c 00 0b 00 0a 00 09 00 08
b unpacklo(c,a) 07 00 06 0b 05 00 04 0a 03 00 02 09 01 00 00 08
d and(d,b) 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
a stored to mem: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
d setzero 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
c 05 05 05 05 05 05 05 05 05 05 05 05 05 05 05 05

35
flag_tester/sse4_test.txt Normal file
View File

@ -0,0 +1,35 @@
a 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
b 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
c 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02
d 12 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03
a sl16 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08 04 00
b sl32 40 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08 04
c sl64 44 40 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08
d sl128 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 00 00
a sr16 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
b sr32 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
c sr64 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02
d sr128 00 00 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03
d = a^b 1f 01 03 01 07 01 03 01 0f 01 03 01 07 01 03 01
d = a-b epi8 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
d = a-b epi16 fe ff fe ff fe ff fe ff fe ff fe ff fe ff fe ff
d = a-b epi32 fe fe fe ff fe fe fe ff fe fe fe ff fe fe fe ff
d = a-b epi64 fe fe fe fe fe fe fe ff fe fe fe fe fe fe fe ff
d set_epi8 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
d set_epi32 12 34 56 78 9a bc de f0 12 34 56 78 9a bc de f0
d set1_epi64 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0
d set1_epi32 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2
d set1_epi16 af f3 af f3 af f3 af f3 af f3 af f3 af f3 af f3
d set1_epi8 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5
d packus_epi16(d,d) 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
c unpackhi(a,d) 00 0f 00 0e 00 0d 00 0c 00 0b 00 0a 00 09 00 08
b unpacklo(c,a) 07 00 06 0b 05 00 04 0a 03 00 02 09 01 00 00 08
d and(d,b) 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
a stored to mem: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
d setzero 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
d insert32 @ 2 00 00 00 00 ab cd 12 34 00 00 00 00 00 00 00 00
extract_epi32 @ 2: abcd1234
d insert64 @ 0 00 00 00 00 ab cd 12 34 fe dc ba 12 91 82 73 64
extract_epi64 @ 0: fedcba1291827364
c 05 05 05 05 05 05 05 05 05 05 05 05 05 05 05 05
a shuffle(b, c) 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02

142
flag_tester/sse_test.c Normal file
View File

@ -0,0 +1,142 @@
#ifdef SSE4
#define SSSE3
#include <nmmintrin.h>
#endif
#ifdef SSSE3
#define SSE2
#include <tmmintrin.h>
#endif
#ifdef SSE2
#include <emmintrin.h>
#endif
#include <stdio.h>
#include <stdint.h>
#include <inttypes.h>
#define MM_PRINT8(s, r) { uint8_t blah[16], ii; printf("%-20s", s); _mm_storeu_si128((__m128i *)blah, r); for (ii = 0; ii < 16; ii += 1) printf("%s%02x", (ii%4==0) ? " " : " ", blah[15-ii]); printf("\n"); }
int main()
{
uint32_t u32;
uint64_t u64;
uint8_t *ui8 = malloc(20), i;
__m128i a, b, c, d;
for(i=0; i < 20; i++)
ui8[i] = i;
a = _mm_load_si128( (__m128i *) ui8 );
b = _mm_loadu_si128( (__m128i *) (ui8+1));
c = _mm_loadu_si128( (__m128i *) (ui8+2));
d = _mm_loadu_si128( (__m128i *) (ui8+3));
MM_PRINT8("a", a);
MM_PRINT8("b", b);
MM_PRINT8("c", c);
MM_PRINT8("d", d);
a = _mm_slli_epi16(a, 2);
b = _mm_slli_epi32(b, 2);
c = _mm_slli_epi64(c, 2);
d = _mm_slli_si128(d, 2);
MM_PRINT8("a sl16", a);
MM_PRINT8("b sl32", b);
MM_PRINT8("c sl64", c);
MM_PRINT8("d sl128", d);
a = _mm_srli_epi16(a, 2);
b = _mm_srli_epi32(b, 2);
c = _mm_srli_epi64(c, 2);
d = _mm_srli_si128(d, 2);
MM_PRINT8("a sr16", a);
MM_PRINT8("b sr32", b);
MM_PRINT8("c sr64", c);
MM_PRINT8("d sr128", d);
d = _mm_xor_si128(a, b);
MM_PRINT8("d = a^b", d);
d = _mm_sub_epi8(a, b);
MM_PRINT8("d = a-b epi8", d);
d = _mm_sub_epi16(a, b);
MM_PRINT8("d = a-b epi16", d);
d = _mm_sub_epi32(a, b);
MM_PRINT8("d = a-b epi32", d);
d = _mm_sub_epi64(a, b);
MM_PRINT8("d = a-b epi64", d);
d = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
MM_PRINT8("d set_epi8", d);
d = _mm_set_epi32(0x12345678, 0x9abcdef0, 0x12345678, 0x9abcdef0);
MM_PRINT8("d set_epi32", d);
d = _mm_set1_epi64x(0xF0F0F0F0F0F0F0F0ULL);
MM_PRINT8("d set1_epi64", d);
d = _mm_set1_epi32(0xe2e2e2e2);
MM_PRINT8("d set1_epi32", d);
d = _mm_set1_epi16(0xaff3);
MM_PRINT8("d set1_epi16", d);
d = _mm_set1_epi8(0xc5);
MM_PRINT8("d set1_epi8", d);
d = _mm_packus_epi16(d, d);
MM_PRINT8("d packus_epi16(d,d)", d);
c = _mm_unpackhi_epi8(a, d);
MM_PRINT8("c unpackhi(a,d)", c);
b = _mm_unpacklo_epi8(c, a);
MM_PRINT8("b unpacklo(c,a)", b);
d = _mm_and_si128(d, b);
MM_PRINT8("d and(d,b)", d);
_mm_store_si128( (__m128i *) ui8, a);
printf("a stored to mem: ");
for(i=0; i < 16; i++)
printf("%u ", ui8[i]);
printf("\n");
d = _mm_setzero_si128();
MM_PRINT8("d setzero", d);
u32 = 0xABCD1234;
u64 = 0xFEDCBA1291827364ULL;
#ifdef SSE4
d = _mm_insert_epi32(d, u32, 2);
MM_PRINT8("d insert32 @ 2", d);
u32 = 0;
u32 = _mm_extract_epi32(d, 2);
printf("extract_epi32 @ 2: %x\n", u32);
d = _mm_insert_epi64(d, u64, 0);
MM_PRINT8("d insert64 @ 0", d);
u64 = 0;
u64 = _mm_extract_epi64(d, 0);
printf("extract_epi64 @ 0: %" PRIx64 "\n", u64);
#endif
c = _mm_set1_epi8(5);
MM_PRINT8("c", c);
#ifdef SSSE3
a = _mm_shuffle_epi8(b, c);
MM_PRINT8("a shuffle(b, c)", a);
#endif
}

View File

@ -0,0 +1,31 @@
a 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
b 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
c 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02
d 12 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03
a sl16 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08 04 00
b sl32 40 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08 04
c sl64 44 40 3c 38 34 30 2c 28 24 20 1c 18 14 10 0c 08
d sl128 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 00 00
a sr16 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
b sr32 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
c sr64 11 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02
d sr128 00 00 10 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03
d = a^b 1f 01 03 01 07 01 03 01 0f 01 03 01 07 01 03 01
d = a-b epi8 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
d = a-b epi16 fe ff fe ff fe ff fe ff fe ff fe ff fe ff fe ff
d = a-b epi32 fe fe fe ff fe fe fe ff fe fe fe ff fe fe fe ff
d = a-b epi64 fe fe fe fe fe fe fe ff fe fe fe fe fe fe fe ff
d set_epi8 0f 0e 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
d set_epi32 12 34 56 78 9a bc de f0 12 34 56 78 9a bc de f0
d set1_epi64 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0 f0
d set1_epi32 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2 e2
d set1_epi16 af f3 af f3 af f3 af f3 af f3 af f3 af f3 af f3
d set1_epi8 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5 c5
d packus_epi16(d,d) 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
c unpackhi(a,d) 00 0f 00 0e 00 0d 00 0c 00 0b 00 0a 00 09 00 08
b unpacklo(c,a) 07 00 06 0b 05 00 04 0a 03 00 02 09 01 00 00 08
d and(d,b) 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
a stored to mem: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
d setzero 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
c 05 05 05 05 05 05 05 05 05 05 05 05 05 05 05 05
a shuffle(b, c) 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02 02

View File

@ -0,0 +1,19 @@
if [ -n "$1" ]; then
CC=$1
else
CC=cc
fi
$CC flag_test.c -o flag_test 2> /dev/null
if [ -e "flag_test" ]; then
OUTPUT=`./flag_test $CC 2> /dev/null`
if [ -n "$OUTPUT" ]; then
echo "$OUTPUT"
else
printf "CFLAGS = -O3\nLDFLAGS = -O3\n"
fi
else
printf "$CC failed to compile flag_test.c\n"
fi
rm sse4 sse2 ssse3 pclmul diff.txt flag_test temp.txt 2> /dev/null

660
gf.c
View File

@ -8,6 +8,405 @@
#include <stdio.h>
#include <stdlib.h>
int _gf_errno = GF_E_DEFAULT;
void gf_error()
{
char *s;
switch(_gf_errno) {
case GF_E_DEFAULT: s = "No Error."; break;
case GF_E_TWOMULT: s = "Cannot specify two -m's."; break;
case GF_E_TWO_DIV: s = "Cannot specify two -d's."; break;
case GF_E_POLYSPC: s = "-p needs to be followed by a number in hex (0x optional)."; break;
case GF_E_GROUPAR: s = "Ran out of arguments in -m GROUP."; break;
case GF_E_GROUPNU: s = "In -m GROUP g_s g_r -- g_s and g_r need to be numbers."; break;
case GF_E_SPLITAR: s = "Ran out of arguments in -m SPLIT."; break;
case GF_E_SPLITNU: s = "In -m SPLIT w_a w_b -- w_a and w_b need to be numbers."; break;
case GF_E_FEWARGS: s = "Not enough arguments (Perhaps end with '-'?)"; break;
case GF_E_CFM___W: s = "-m CARRY_FREE, w must be 4, 8, 16, 32, 64 or 128."; break;
case GF_E_COMPXPP: s = "-m COMPOSITE, No poly specified, and we don't have a default for the given sub-field."; break;
case GF_E_BASE__W: s = "-m COMPOSITE and the base field is not for w/2."; break;
case GF_E_CFM4POL: s = "-m CARRY_FREE, w=4. (Prim-poly & 0xc) must equal 0."; break;
case GF_E_CFM8POL: s = "-m CARRY_FREE, w=8. (Prim-poly & 0x80) must equal 0."; break;
case GF_E_CF16POL: s = "-m CARRY_FREE, w=16. (Prim-poly & 0xe000) must equal 0."; break;
case GF_E_CF32POL: s = "-m CARRY_FREE, w=32. (Prim-poly & 0xfe000000) must equal 0."; break;
case GF_E_CF64POL: s = "-m CARRY_FREE, w=64. (Prim-poly & 0xfffe000000000000ULL) must equal 0."; break;
case GF_E_MDEFDIV: s = "If multiplication method == default, can't change division."; break;
case GF_E_MDEFREG: s = "If multiplication method == default, can't change region."; break;
case GF_E_MDEFARG: s = "If multiplication method == default, can't use arg1/arg2."; break;
case GF_E_DIVCOMP: s = "Cannot change the division technique with -m COMPOSITE."; break;
case GF_E_DOUQUAD: s = "Cannot specify -r DOUBLE and -r QUAD."; break;
case GF_E_SSE__NO: s = "Cannot specify -r SSE and -r NOSSE."; break;
case GF_E_CAUCHYB: s = "Cannot specify -r CAUCHY and any other -r."; break;
case GF_E_CAUCOMP: s = "Cannot specify -m COMPOSITE and -r CAUCHY."; break;
case GF_E_CAUGT32: s = "Cannot specify -r CAUCHY with w > 32."; break;
case GF_E_ARG1SET: s = "Only use arg1 with SPLIT, GROUP or COMPOSITE."; break;
case GF_E_ARG2SET: s = "Only use arg2 with SPLIT or GROUP."; break;
case GF_E_MATRIXW: s = "Cannot specify -d MATRIX with w > 32."; break;
case GF_E_BAD___W: s = "W must be 1-32, 64 or 128."; break;
case GF_E_DOUBLET: s = "Can only specify -r DOUBLE with -m TABLE."; break;
case GF_E_DOUBLEW: s = "Can only specify -r DOUBLE w = 4 or w = 8."; break;
case GF_E_DOUBLEJ: s = "Cannot specify -r DOUBLE with -r ALTMAP|SSE|NOSSE."; break;
case GF_E_DOUBLEL: s = "Can only specify -r DOUBLE -r LAZY with w = 8"; break;
case GF_E_QUAD__T: s = "Can only specify -r QUAD with -m TABLE."; break;
case GF_E_QUAD__W: s = "Can only specify -r QUAD w = 4."; break;
case GF_E_QUAD__J: s = "Cannot specify -r QUAD with -r ALTMAP|SSE|NOSSE."; break;
case GF_E_BADPOLY: s = "Bad primitive polynomial (high bits set)."; break;
case GF_E_COMP_PP: s = "Bad primitive polynomial -- bigger than sub-field."; break;
case GF_E_LAZY__X: s = "If -r LAZY, then -r must be DOUBLE or QUAD."; break;
case GF_E_ALTSHIF: s = "Cannot specify -m SHIFT and -r ALTMAP."; break;
case GF_E_SSESHIF: s = "Cannot specify -m SHIFT and -r SSE|NOSSE."; break;
case GF_E_ALT_CFM: s = "Cannot specify -m CARRY_FREE and -r ALTMAP."; break;
case GF_E_SSE_CFM: s = "Cannot specify -m CARRY_FREE and -r SSE|NOSSE."; break;
case GF_E_PCLMULX: s = "Specified -m CARRY_FREE, but PCLMUL is not supported."; break;
case GF_E_ALT_BY2: s = "Cannot specify -m BYTWO_x and -r ALTMAP."; break;
case GF_E_BY2_SSE: s = "Specified -m BYTWO_x -r SSE, but SSE2 is not supported."; break;
case GF_E_LOGBADW: s = "With Log Tables, w must be <= 27."; break;
case GF_E_LOG___J: s = "Cannot use Log tables with -r ALTMAP|SSE|NOSSE."; break;
case GF_E_LOGPOLY: s = "Cannot use Log tables because the polynomial is not primitive."; break;
case GF_E_ZERBADW: s = "With -m LOG_ZERO, w must be 8 or 16."; break;
case GF_E_ZEXBADW: s = "With -m LOG_ZERO_EXT, w must be 8."; break;
case GF_E_GR_ARGX: s = "With -m GROUP, arg1 and arg2 must be >= 0."; break;
case GF_E_GR_W_48: s = "With -m GROUP, w cannot be 4 or 8."; break;
case GF_E_GR_W_16: s = "With -m GROUP, w == 16, arg1 and arg2 must be 4."; break;
case GF_E_GR_128A: s = "With -m GROUP, w == 128, arg1 must be 4, and arg2 in { 4,8,16 }."; break;
case GF_E_GR_SSE4: s = "With -m GROUP, w == 128, you need SSE4."; break;
case GF_E_GR_A_27: s = "With -m GROUP, arg1 and arg2 must be <= 27."; break;
case GF_E_GR_AR_W: s = "With -m GROUP, arg1 and arg2 must be <= w."; break;
case GF_E_GR____J: s = "Cannot use GROUP with -r ALTMAP|SSE|NOSSE."; break;
case GF_E_TABLE_W: s = "With -m TABLE, w must be < 15, or == 16."; break;
case GF_E_TAB_SSE: s = "With -m TABLE, SSE|NOSSE only applies to w=4."; break;
case GF_E_TABSSE3: s = "With -m TABLE, -r SSE, you need SSSE3 supported."; break;
case GF_E_TAB_ALT: s = "With -m TABLE, you cannot use ALTMAP."; break;
case GF_E_SP128AR: s = "With -m SPLIT, w=128, bad arg1/arg2."; break;
case GF_E_SP128AL: s = "With -m SPLIT, w=128, -r SSE requires -r ALTMAP."; break;
case GF_E_SP128AS: s = "With -m SPLIT, w=128, ALTMAP needs SSSE3 supported."; break;
case GF_E_SP128_A: s = "With -m SPLIT, w=128, -r SSE|NOSSE only with arg1/arg2 = 4/128."; break;
case GF_E_SP128_S: s = "With -m SPLIT, w=128, -r ALTMAP only with arg1/arg2 = 4/128."; break;
case GF_E_SPLIT_W: s = "With -m SPLIT, w must be in {8, 16, 32, 64, 128}."; break;
case GF_E_SP_16AR: s = "With -m SPLIT, w=16, Bad arg1/arg2."; break;
case GF_E_SP_16_A: s = "With -m SPLIT, w=16, -r ALTMAP only with arg1/arg2 = 4/16."; break;
case GF_E_SP_16_S: s = "With -m SPLIT, w=16, -r SSE|NOSSE only with arg1/arg2 = 4/16."; break;
case GF_E_SP_32AR: s = "With -m SPLIT, w=32, Bad arg1/arg2."; break;
case GF_E_SP_32AS: s = "With -m SPLIT, w=32, -r ALTMAP needs SSSE3 supported."; break;
case GF_E_SP_32_A: s = "With -m SPLIT, w=32, -r ALTMAP only with arg1/arg2 = 4/32."; break;
case GF_E_SP_32_S: s = "With -m SPLIT, w=32, -r SSE|NOSSE only with arg1/arg2 = 4/32."; break;
case GF_E_SP_64AR: s = "With -m SPLIT, w=64, Bad arg1/arg2."; break;
case GF_E_SP_64AS: s = "With -m SPLIT, w=64, -r ALTMAP needs SSSE3 supported."; break;
case GF_E_SP_64_A: s = "With -m SPLIT, w=64, -r ALTMAP only with arg1/arg2 = 4/64."; break;
case GF_E_SP_64_S: s = "With -m SPLIT, w=64, -r SSE|NOSSE only with arg1/arg2 = 4/64."; break;
case GF_E_SP_8_AR: s = "With -m SPLIT, w=8, Bad arg1/arg2."; break;
case GF_E_SP_8__A: s = "With -m SPLIT, w=8, Can't have -r ALTMAP."; break;
case GF_E_SP_SSE3: s = "With -m SPLIT, Need SSSE3 support for SSE."; break;
case GF_E_COMP_A2: s = "With -m COMPOSITE, arg1 must equal 2."; break;
case GF_E_COMP_SS: s = "With -m COMPOSITE, -r SSE and -r NOSSE do not apply."; break;
case GF_E_COMP__W: s = "With -m COMPOSITE, w must be 8, 16, 32, 64 or 128."; break;
case GF_E_UNKFLAG: s = "Unknown method flag - should be -m, -d, -r or -p."; break;
case GF_E_UNKNOWN: s = "Unknown multiplication type."; break;
case GF_E_UNK_REG: s = "Unknown region type."; break;
case GF_E_UNK_DIV: s = "Unknown division type."; break;
default: s = "Undefined error.";
}
fprintf(stderr, "%s\n", s);
}
uint64_t gf_composite_get_default_poly(gf_t *base)
{
gf_internal_t *h;
int rv;
h = (gf_internal_t *) base->scratch;
if (h->w == 4) {
if (h->mult_type == GF_MULT_COMPOSITE) return 0;
if (h->prim_poly == 0x13) return 2;
return 0;
}
if (h->w == 8) {
if (h->mult_type == GF_MULT_COMPOSITE) return 0;
if (h->prim_poly == 0x11d) return 3;
return 0;
}
if (h->w == 16) {
if (h->mult_type == GF_MULT_COMPOSITE) {
rv = gf_composite_get_default_poly(h->base_gf);
if (rv != h->prim_poly) return 0;
if (rv == 3) return 0x105;
return 0;
} else {
if (h->prim_poly == 0x1100b) return 2;
if (h->prim_poly == 0x1002d) return 7;
return 0;
}
}
if (h->w == 32) {
if (h->mult_type == GF_MULT_COMPOSITE) {
rv = gf_composite_get_default_poly(h->base_gf);
if (rv != h->prim_poly) return 0;
if (rv == 2) return 0x10005;
if (rv == 7) return 0x10008;
if (rv == 0x105) return 0x10002;
return 0;
} else {
if (h->prim_poly == 0x400007) return 2;
if (h->prim_poly == 0xc5) return 3;
return 0;
}
}
if (h->w == 64) {
if (h->mult_type == GF_MULT_COMPOSITE) {
rv = gf_composite_get_default_poly(h->base_gf);
if (rv != h->prim_poly) return 0;
if (rv == 3) return 0x100000009ULL;
if (rv == 2) return 0x100000004ULL;
if (rv == 0x10005) return 0x100000003ULL;
if (rv == 0x10002) return 0x100000005ULL;
if (rv == 0x10008) return 0x100000006ULL; /* JSP: (0x0x100000003 works too,
but I want to differentiate cases). */
return 0;
} else {
if (h->prim_poly == 0x1bULL) return 2;
return 0;
}
}
return 0;
}
int gf_error_check(int w, int mult_type, int region_type, int divide_type,
int arg1, int arg2, uint64_t poly, gf_t *base)
{
int sse4 = 0;
int sse3 = 0;
int sse2 = 0;
int pclmul = 0;
int rdouble, rquad, rlazy, rsse, rnosse, raltmap, rcauchy, tmp;
uint64_t pp;
gf_internal_t *sub, *subsub, *subsubsub;
rdouble = (region_type & GF_REGION_DOUBLE_TABLE);
rquad = (region_type & GF_REGION_QUAD_TABLE);
rlazy = (region_type & GF_REGION_LAZY);
rsse = (region_type & GF_REGION_SSE);
rnosse = (region_type & GF_REGION_NOSSE);
raltmap = (region_type & GF_REGION_ALTMAP);
rcauchy = (region_type & GF_REGION_CAUCHY);
if (divide_type != GF_DIVIDE_DEFAULT &&
divide_type != GF_DIVIDE_MATRIX &&
divide_type != GF_DIVIDE_EUCLID) {
_gf_errno = GF_E_UNK_DIV;
return 0;
}
tmp = ( GF_REGION_DOUBLE_TABLE | GF_REGION_QUAD_TABLE | GF_REGION_LAZY |
GF_REGION_SSE | GF_REGION_NOSSE | GF_REGION_ALTMAP | GF_REGION_CAUCHY );
if (region_type & (~tmp)) { _gf_errno = GF_E_UNK_REG; return 0; }
#ifdef INTEL_SSE2
sse2 = 1;
#endif
#ifdef INTEL_SSSE3
sse3 = 1;
#endif
#ifdef INTEL_SSE4
sse4 = 1;
#endif
#ifdef INTEL_PCLMUL
pclmul = 1;
#endif
if (w < 1 || (w > 32 && w != 64 && w != 128)) { _gf_errno = GF_E_BAD___W; return 0; }
if (mult_type != GF_MULT_COMPOSITE && w < 64) {
if ((poly >> (w+1)) != 0) { _gf_errno = GF_E_BADPOLY; return 0; }
}
if (mult_type == GF_MULT_DEFAULT) {
if (divide_type != GF_DIVIDE_DEFAULT) { _gf_errno = GF_E_MDEFDIV; return 0; }
if (region_type != GF_REGION_DEFAULT) { _gf_errno = GF_E_MDEFREG; return 0; }
if (arg1 != 0 || arg2 != 0) { _gf_errno = GF_E_MDEFARG; return 0; }
return 1;
}
if (rsse && rnosse) { _gf_errno = GF_E_SSE__NO; return 0; }
if (rcauchy && w > 32) { _gf_errno = GF_E_CAUGT32; return 0; }
if (rcauchy && region_type != GF_REGION_CAUCHY) { _gf_errno = GF_E_CAUCHYB; return 0; }
if (rcauchy && mult_type == GF_MULT_COMPOSITE) { _gf_errno = GF_E_CAUCOMP; return 0; }
if (arg1 != 0 && mult_type != GF_MULT_COMPOSITE &&
mult_type != GF_MULT_SPLIT_TABLE && mult_type != GF_MULT_GROUP) {
_gf_errno = GF_E_ARG1SET;
return 0;
}
if (arg2 != 0 && mult_type != GF_MULT_SPLIT_TABLE && mult_type != GF_MULT_GROUP) {
_gf_errno = GF_E_ARG2SET;
return 0;
}
if (divide_type == GF_DIVIDE_MATRIX && w > 32) { _gf_errno = GF_E_MATRIXW; return 0; }
if (rdouble) {
if (rquad) { _gf_errno = GF_E_DOUQUAD; return 0; }
if (mult_type != GF_MULT_TABLE) { _gf_errno = GF_E_DOUBLET; return 0; }
if (w != 4 && w != 8) { _gf_errno = GF_E_DOUBLEW; return 0; }
if (rsse || rnosse || raltmap) { _gf_errno = GF_E_DOUBLEJ; return 0; }
if (rlazy && w == 4) { _gf_errno = GF_E_DOUBLEL; return 0; }
return 1;
}
if (rquad) {
if (mult_type != GF_MULT_TABLE) { _gf_errno = GF_E_QUAD__T; return 0; }
if (w != 4) { _gf_errno = GF_E_QUAD__W; return 0; }
if (rsse || rnosse || raltmap) { _gf_errno = GF_E_QUAD__J; return 0; }
return 1;
}
if (rlazy) { _gf_errno = GF_E_LAZY__X; return 0; }
if (mult_type == GF_MULT_SHIFT) {
if (raltmap) { _gf_errno = GF_E_ALTSHIF; return 0; }
if (rsse || rnosse) { _gf_errno = GF_E_SSESHIF; return 0; }
return 1;
}
if (mult_type == GF_MULT_CARRY_FREE) {
if (w != 4 && w != 8 && w != 16 &&
w != 32 && w != 64 && w != 128) { _gf_errno = GF_E_CFM___W; return 0; }
if (w == 4 && (poly & 0xc)) { _gf_errno = GF_E_CFM4POL; return 0; }
if (w == 8 && (poly & 0x80)) { _gf_errno = GF_E_CFM8POL; return 0; }
if (w == 16 && (poly & 0xe000)) { _gf_errno = GF_E_CF16POL; return 0; }
if (w == 32 && (poly & 0xfe000000)) { _gf_errno = GF_E_CF32POL; return 0; }
if (w == 64 && (poly & 0xfffe000000000000ULL)) { _gf_errno = GF_E_CF64POL; return 0; }
if (raltmap) { _gf_errno = GF_E_ALT_CFM; return 0; }
if (rsse || rnosse) { _gf_errno = GF_E_SSE_CFM; return 0; }
if (!pclmul) { _gf_errno = GF_E_PCLMULX; return 0; }
return 1;
}
if (mult_type == GF_MULT_BYTWO_p || mult_type == GF_MULT_BYTWO_b) {
if (raltmap) { _gf_errno = GF_E_ALT_BY2; return 0; }
if (rsse && !sse2) { _gf_errno = GF_E_BY2_SSE; return 0; }
return 1;
}
if (mult_type == GF_MULT_LOG_TABLE || mult_type == GF_MULT_LOG_ZERO
|| mult_type == GF_MULT_LOG_ZERO_EXT ) {
if (w > 27) { _gf_errno = GF_E_LOGBADW; return 0; }
if (raltmap || rsse || rnosse) { _gf_errno = GF_E_LOG___J; return 0; }
if (mult_type == GF_MULT_LOG_TABLE) return 1;
if (w != 8 && w != 16) { _gf_errno = GF_E_ZERBADW; return 0; }
if (mult_type == GF_MULT_LOG_ZERO) return 1;
if (w != 8) { _gf_errno = GF_E_ZEXBADW; return 0; }
return 1;
}
if (mult_type == GF_MULT_GROUP) {
if (arg1 <= 0 || arg2 <= 0) { _gf_errno = GF_E_GR_ARGX; return 0; }
if (w == 4 || w == 8) { _gf_errno = GF_E_GR_W_48; return 0; }
if (w == 16 && (arg1 != 4 || arg2 != 4)) { _gf_errno = GF_E_GR_W_16; return 0; }
if (w == 128 && (arg1 != 4 ||
(arg2 != 4 && arg2 != 8 && arg2 != 16))) { _gf_errno = GF_E_GR_128A; return 0; }
if (w == 128 && !sse4) { _gf_errno = GF_E_GR_SSE4; return 0; }
if (arg1 > 27 || arg2 > 27) { _gf_errno = GF_E_GR_A_27; return 0; }
if (arg1 > w || arg2 > w) { _gf_errno = GF_E_GR_AR_W; return 0; }
if (raltmap || rsse || rnosse) { _gf_errno = GF_E_GR____J; return 0; }
return 1;
}
if (mult_type == GF_MULT_TABLE) {
if (w != 16 && w >= 15) { _gf_errno = GF_E_TABLE_W; return 0; }
if (w != 4 && (rsse || rnosse)) { _gf_errno = GF_E_TAB_SSE; return 0; }
if (rsse && !sse3) { _gf_errno = GF_E_TABSSE3; return 0; }
if (raltmap) { _gf_errno = GF_E_TAB_ALT; return 0; }
return 1;
}
if (mult_type == GF_MULT_SPLIT_TABLE) {
if (arg1 > arg2) {
tmp = arg1;
arg1 = arg2;
arg2 = tmp;
}
if (w == 8) {
if (arg1 != 4 || arg2 != 8) { _gf_errno = GF_E_SP_8_AR; return 0; }
if (rsse && !sse3) { _gf_errno = GF_E_SP_SSE3; return 0; }
if (raltmap) { _gf_errno = GF_E_SP_8__A; return 0; }
} else if (w == 16) {
if (arg1 == 4 && arg2 == 16) {
if (rsse && !sse3) { _gf_errno = GF_E_SP_SSE3; return 0; }
} else if (arg1 == 8 && (arg2 == 16 || arg2 == 8)) {
if (rsse || rnosse) { _gf_errno = GF_E_SP_16_S; return 0; }
if (raltmap) { _gf_errno = GF_E_SP_16_A; return 0; }
} else { _gf_errno = GF_E_SP_16AR; return 0; }
} else if (w == 32) {
if ((arg1 == 8 && arg2 == 8) ||
(arg1 == 8 && arg2 == 32) ||
(arg1 == 16 && arg2 == 32)) {
if (rsse || rnosse) { _gf_errno = GF_E_SP_32_S; return 0; }
if (raltmap) { _gf_errno = GF_E_SP_32_A; return 0; }
} else if ((arg1 == 4 && arg2 == 32) ||
(arg1 == 4 && arg2 == 32)) {
if (rsse && !sse3) { _gf_errno = GF_E_SP_SSE3; return 0; }
if (raltmap && arg1 != 4) { _gf_errno = GF_E_SP_32_A; return 0; }
if (raltmap && !sse3) { _gf_errno = GF_E_SP_32AS; return 0; }
if (raltmap && rnosse) { _gf_errno = GF_E_SP_32AS; return 0; }
} else { _gf_errno = GF_E_SP_32AR; return 0; }
} else if (w == 64) {
if ((arg1 == 8 && arg2 == 8) ||
(arg1 == 8 && arg2 == 64) ||
(arg1 == 16 && arg2 == 64)) {
if (rsse || rnosse) { _gf_errno = GF_E_SP_64_S; return 0; }
if (raltmap) { _gf_errno = GF_E_SP_64_A; return 0; }
} else if (arg1 == 4 && arg2 == 64) {
if (rsse && !sse3) { _gf_errno = GF_E_SP_SSE3; return 0; }
if (raltmap && !sse3) { _gf_errno = GF_E_SP_64AS; return 0; }
if (raltmap && rnosse) { _gf_errno = GF_E_SP_64AS; return 0; }
} else { _gf_errno = GF_E_SP_64AR; return 0; }
} else if (w == 128) {
if (arg1 == 8 && arg2 == 128) {
if (rsse || rnosse) { _gf_errno = GF_E_SP128_S; return 0; }
if (raltmap) { _gf_errno = GF_E_SP128_A; return 0; }
} else if (arg1 == 4 && arg2 == 128) {
if (rsse && !sse3) { _gf_errno = GF_E_SP_SSE3; return 0; }
if (raltmap && !sse3) { _gf_errno = GF_E_SP128AS; return 0; }
if (raltmap && rnosse) { _gf_errno = GF_E_SP128AS; return 0; }
if (!raltmap && rsse) { _gf_errno = GF_E_SP128AL; return 0; }
} else { _gf_errno = GF_E_SP128AR; return 0; }
} else { _gf_errno = GF_E_SPLIT_W; return 0; }
return 1;
}
if (mult_type == GF_MULT_COMPOSITE) {
if (w != 8 && w != 16 && w != 32
&& w != 64 && w != 128) { _gf_errno = GF_E_COMP__W; return 0; }
if ((poly >> (w/2)) != 0) { _gf_errno = GF_E_COMP_PP; return 0; }
if (divide_type != GF_DIVIDE_DEFAULT) { _gf_errno = GF_E_DIVCOMP; return 0; }
if (arg1 != 2) { _gf_errno = GF_E_COMP_A2; return 0; }
if (rsse || rnosse) { _gf_errno = GF_E_COMP_SS; return 0; }
if (base != NULL) {
sub = (gf_internal_t *) base->scratch;
if (sub->w != w/2) { _gf_errno = GF_E_BASE__W; return 0; }
if (poly == 0) {
if (gf_composite_get_default_poly(base) == 0) { _gf_errno = GF_E_COMPXPP; return 0; }
}
}
return 1;
}
_gf_errno = GF_E_UNKNOWN;
return 0;
}
int gf_scratch_size(int w,
int mult_type,
int region_type,
@ -15,6 +414,8 @@ int gf_scratch_size(int w,
int arg1,
int arg2)
{
if (gf_error_check(w, mult_type, region_type, divide_type, arg1, arg2, 0, NULL) == 0) return 0;
switch(w) {
case 4: return gf_w4_scratch_size(mult_type, region_type, divide_type, arg1, arg2);
case 8: return gf_w8_scratch_size(mult_type, region_type, divide_type, arg1, arg2);
@ -26,16 +427,31 @@ int gf_scratch_size(int w,
}
}
int gf_dummy_init(gf_t *gf)
extern int gf_size(gf_t *gf)
{
return 0;
gf_internal_t *h;
int s;
s = sizeof(gf_t);
h = (gf_internal_t *) gf->scratch;
s += gf_scratch_size(h->w, h->mult_type, h->region_type, h->divide_type, h->arg1, h->arg2);
if (h->mult_type == GF_MULT_COMPOSITE) s += gf_size(h->base_gf);
return s;
}
int gf_init_easy(gf_t *gf, int w)
{
return gf_init_hard(gf, w, GF_MULT_DEFAULT, GF_REGION_DEFAULT, GF_DIVIDE_DEFAULT, 0, 0, 0, NULL, NULL);
return gf_init_hard(gf, w, GF_MULT_DEFAULT, GF_REGION_DEFAULT, GF_DIVIDE_DEFAULT,
0, 0, 0, NULL, NULL);
}
/* Allen: What's going on here is this function is putting info into the
scratch mem of gf, and then calling the relevant REAL init
func for the word size. Probably done this way to consolidate
those aspects of initialization that don't rely on word size,
and then take care of word-size-specific stuff. */
int gf_init_hard(gf_t *gf, int w, int mult_type,
int region_type,
int divide_type,
@ -46,11 +462,14 @@ int gf_init_hard(gf_t *gf, int w, int mult_type,
{
int sz;
gf_internal_t *h;
if (gf_error_check(w, mult_type, region_type, divide_type,
arg1, arg2, prim_poly, base_gf) == 0) return 0;
sz = gf_scratch_size(w, mult_type, region_type, divide_type, arg1, arg2);
if (sz <= 0) return 0;
if (sz <= 0) return 0; /* This shouldn't happen, as all errors should get caught
in gf_error_check() */
if (scratch_memory == NULL) {
h = (gf_internal_t *) malloc(sz);
h->free_me = 1;
@ -71,8 +490,6 @@ int gf_init_hard(gf_t *gf, int w, int mult_type,
h->private += (sizeof(gf_internal_t));
gf->extract_word.w32 = NULL;
//printf("Created w=%d, with mult_type=%d and region_type=%d\n", w, mult_type, region_type);
switch(w) {
case 4: return gf_w4_init(gf);
case 8: return gf_w8_init(gf);
@ -94,6 +511,7 @@ int gf_free(gf_t *gf, int recursive)
free(h->base_gf);
}
if (h->free_me) free(h);
return 0; /* Making compiler happy */
}
void gf_alignment_error(char *s, int a)
@ -105,9 +523,9 @@ void gf_alignment_error(char *s, int a)
}
static
void gf_invert_binary_matrix(int *mat, int *inv, int rows) {
void gf_invert_binary_matrix(uint32_t *mat, uint32_t *inv, int rows) {
int cols, i, j, k;
int tmp;
uint32_t tmp;
cols = rows;
@ -172,34 +590,6 @@ uint32_t gf_bitmatrix_inverse(uint32_t y, int w, uint32_t pp)
return inv[0];
}
/*
void gf_two_byte_region_table_multiply(gf_region_data *rd, uint16_t *base)
{
uint64_t p, ta, shift, tb;
uint64_t *s64, *d64
s64 = rd->s_start;
d64 = rd->d_start;
while (s64 < (uint64_t *) rd->s_top) {
p = (rd->xor) ? *d64 : 0;
ta = *s64;
shift = 0;
while (ta != 0) {
tb = base[ta&0xffff];
p ^= (tb << shift);
ta >>= 16;
shift += 16;
}
*d64 = p;
d64++;
s64++;
}
}
*/
void gf_two_byte_region_table_multiply(gf_region_data *rd, uint16_t *base)
{
uint64_t a, prod;
@ -226,8 +616,8 @@ void gf_two_byte_region_table_multiply(gf_region_data *rd, uint16_t *base)
prod ^= base[a >> 48];
prod ^= *d64;
*d64 = prod;
*s64++;
*d64++;
s64++;
d64++;
}
} else {
while (d64 != top) {
@ -243,8 +633,8 @@ void gf_two_byte_region_table_multiply(gf_region_data *rd, uint16_t *base)
prod <<= 16;
prod ^= base[a >> 48];
*d64 = prod;
*s64++;
*d64++;
s64++;
d64++;
}
}
}
@ -307,9 +697,71 @@ static void gf_slow_multiply_region(gf_region_data *rd, void *src, void *dest, v
}
}
/* If align>16, you align to 16 bytes, but make sure that within the aligned region bytes is a multiple of align. However, you make sure that the region itself is a multiple of align.
/* JSP - The purpose of this procedure is to error check alignment,
and to set up the region operation so that it can best leverage
large words.
If align = -1, then this is cauchy. You need to make sure that bytes is a multiple of w. */
It stores its information in rd.
Assuming you're not doing Cauchy coding, (see below for that),
then w will be 4, 8, 16, 32 or 64. It can't be 128 (probably
should change that).
src and dest must then be aligned on ceil(w/8)-byte boundaries.
Moreover, bytes must be a multiple of ceil(w/8). If the variable
align is equal to ceil(w/8), then we will set s_start = src,
d_start = dest, s_top to (src+bytes) and d_top to (dest+bytes).
And we return -- the implementation will go ahead and do the
multiplication on individual words (e.g. using discrete logs).
If align is greater than ceil(w/8), then the implementation needs
to work on groups of "align" bytes. For example, suppose you are
implementing BYTWO, without SSE. Then you will be doing the region
multiplication in units of 8 bytes, so align = 8. Or, suppose you
are doing a Quad table in GF(2^4). You will be doing the region
multiplication in units of 2 bytes, so align = 2. Or, suppose you
are doing split multiplication with SSE operations in GF(2^8).
Then align = 16. Worse yet, suppose you are doing split
multiplication with SSE operations in GF(2^16), with or without
ALTMAP. Then, you will be doing the multiplication on 256 bits at
a time. So align = 32.
When align does not equal ceil(w/8), we split the region
multiplication into three parts. We are going to make s_start be
the first address greater than or equal to src that is a multiple
of align. s_top is going to be the largest address >= src+bytes
such that (s_top - s_start) is a multiple of align. We do the
same with d_start and d_top. When we say that "src and dest must
be aligned with respect to each other, we mean that s_start-src
must equal d_start-dest.
Now, the region multiplication is done in three parts -- the part
between src and s_start must be done using single words.
Similarly, the part between s_top and src+bytes must also be done
using single words. The part between s_start and s_top will be
done in chunks of "align" bytes.
One final thing -- if align > 16, then s_start and d_start will be
aligned on a 16 byte boundary. Perhaps we should have two
variables: align and chunksize. Then we'd have s_start & d_start
aligned to "align", and have s_top-s_start be a multiple of
chunksize. That may be less confusing, but it would be a big
change.
Finally, if align = -1, then we are doing Cauchy multiplication,
using only XOR's. In this case, we're not going to care about
alignment because we are just doing XOR's. Instead, the only
thing we care about is that bytes must be a multiple of w.
This is not to say that alignment doesn't matter in performance
with XOR's. See that discussion in gf_multby_one().
After you call gf_set_region_data(), the procedure
gf_do_initial_region_alignment() calls gf->multiply.w32() on
everything between src and s_start. The procedure
gf_do_final_region_alignment() calls gf->multiply.w32() on
everything between s_top and src+bytes.
*/
void gf_set_region_data(gf_region_data *rd,
gf_t *gf,
@ -326,7 +778,7 @@ void gf_set_region_data(gf_region_data *rd,
uint32_t a;
unsigned long uls, uld;
if (gf == NULL) {
if (gf == NULL) { /* JSP - Can be NULL if you're just doing XOR's */
wb = 1;
} else {
h = gf->scratch;
@ -347,7 +799,7 @@ void gf_set_region_data(gf_region_data *rd,
a = (align <= 16) ? align : 16;
if (align == -1) { /* This is cauchy. Error check bytes, then set up the pointers
if (align == -1) { /* JSP: This is cauchy. Error check bytes, then set up the pointers
so that there are no alignment regions. */
if (bytes % h->w != 0) {
fprintf(stderr, "Error in region multiply operation.\n");
@ -386,14 +838,14 @@ void gf_set_region_data(gf_region_data *rd,
}
uls %= a;
if (uls != 0) uls = (align-uls);
if (uls != 0) uls = (a-uls);
rd->s_start = rd->src + uls;
rd->d_start = rd->dest + uls;
bytes -= uls;
bytes -= (bytes % align);
rd->s_top = rd->s_start + bytes;
rd->d_top = rd->d_start + bytes;
}
void gf_do_initial_region_alignment(gf_region_data *rd)
@ -413,25 +865,76 @@ void gf_multby_zero(void *dest, int bytes, int xor)
return;
}
/* JSP - gf_multby_one tries to do this in the most efficient way
possible. If xor = 0, then simply call memcpy() since that
should be optimized by the system. Otherwise, try to do the xor
in the following order:
If src and dest are aligned with respect to each other on 16-byte
boundaries and you have SSE instructions, then use aligned SSE
instructions.
If they aren't but you still have SSE instructions, use unaligned
SSE instructions.
If there are no SSE instructions, but they are aligned with
respect to each other on 8-byte boundaries, then do them with
uint64_t's.
Otherwise, call gf_unaligned_xor(), which does the following:
align a destination pointer along an 8-byte boundary, and then
memcpy 32 bytes at a time from the src pointer to an array of
doubles. I'm not sure if that's the best -- probably needs
testing, but this seems like it could be a black hole.
*/
static void gf_unaligned_xor(void *src, void *dest, int bytes);
void gf_multby_one(void *src, void *dest, int bytes, int xor)
{
#ifdef INTEL_SSE4
#ifdef INTEL_SSE2
__m128i ms, md;
#endif
unsigned long uls, uld;
uint8_t *s8, *d8, *dtop8;
uint64_t *s64, *d64, *dtop64;
int abytes;
gf_region_data rd;
if (!xor) {
memcpy(dest, src, bytes);
return;
}
uls = (unsigned long) src;
uld = (unsigned long) dest;
#ifdef INTEL_SSE4
#ifdef INTEL_SSE2
s8 = (uint8_t *) src;
d8 = (uint8_t *) dest;
abytes = bytes & 0xfffffff0;
if (uls % 16 == uld % 16) {
gf_set_region_data(&rd, NULL, src, dest, bytes, 1, xor, 16);
while (s8 != rd.s_start) {
*d8 ^= *s8;
d8++;
s8++;
}
while (s8 < (uint8_t *) rd.s_top) {
ms = _mm_load_si128 ((__m128i *)(s8));
md = _mm_load_si128 ((__m128i *)(d8));
md = _mm_xor_si128(md, ms);
_mm_store_si128((__m128i *)(d8), md);
s8 += 16;
d8 += 16;
}
while (s8 != (uint8_t *) src + bytes) {
*d8 ^= *s8;
d8++;
s8++;
}
return;
}
abytes = (bytes & 0xfffffff0);
while (d8 < (uint8_t *) dest + abytes) {
ms = _mm_loadu_si128 ((__m128i *)(s8));
@ -449,8 +952,11 @@ void gf_multby_one(void *src, void *dest, int bytes, int xor)
return;
#endif
/* If you don't have SSE, you'd better be aligned..... */
if (uls % 8 != uld % 8) {
gf_unaligned_xor(src, dest, bytes);
return;
}
gf_set_region_data(&rd, NULL, src, dest, bytes, 1, xor, 8);
s8 = (uint8_t *) src;
d8 = (uint8_t *) dest;
@ -480,3 +986,47 @@ void gf_multby_one(void *src, void *dest, int bytes, int xor)
}
return;
}
#define UNALIGNED_BUFSIZE (8)
static void gf_unaligned_xor(void *src, void *dest, int bytes)
{
uint64_t scopy[UNALIGNED_BUFSIZE], *d64;
int i;
gf_region_data rd;
uint8_t *s8, *d8;
/* JSP - call gf_set_region_data(), but use dest in both places. This is
because I only want to set up dest. If I used src, gf_set_region_data()
would fail because src and dest are not aligned to each other wrt
8-byte pointers. I know this will actually align d_start to 16 bytes.
If I change gf_set_region_data() to split alignment & chunksize, then
I could do this correctly. */
gf_set_region_data(&rd, NULL, dest, dest, bytes, 1, 1, 8*UNALIGNED_BUFSIZE);
s8 = (uint8_t *) src;
d8 = (uint8_t *) dest;
while (d8 < (uint8_t *) rd.d_start) {
*d8 ^= *s8;
d8++;
s8++;
}
d64 = (uint64_t *) d8;
while (d64 < (uint64_t *) rd.d_top) {
memcpy(scopy, s8, 8*UNALIGNED_BUFSIZE);
s8 += 8*UNALIGNED_BUFSIZE;
for (i = 0; i < UNALIGNED_BUFSIZE; i++) {
*d64 ^= scopy[i];
d64++;
}
}
d8 = (uint8_t *) d64;
while (d8 < (uint8_t *) (dest+bytes)) {
*d8 ^= *s8;
d8++;
s8++;
}
}

29
gf_54.c
View File

@ -1,29 +0,0 @@
/*
* Multiplies four and five in GF(2^4).
*/
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include "gf_complete.h"
main()
{
gf_t gf;
void *scratch;
int size;
size = gf_scratch_size(16, GF_MULT_SPLIT_TABLE,
GF_REGION_SSE | GF_REGION_ALTMAP,
GF_DIVIDE_DEFAULT,
16, 4);
if (size == -1) exit(1); /* It failed. That shouldn't happen*/
scratch = (void *) malloc(size);
if (scratch == NULL) { perror("malloc"); exit(1); }
if (!gf_init_hard(&gf, 16, GF_MULT_SPLIT_TABLE,
GF_REGION_SSE | GF_REGION_ALTMAP,
GF_DIVIDE_DEFAULT,
0, 16, 4, NULL, scratch)) exit(1);
printf("Yo\n");
}

View File

@ -16,7 +16,7 @@ void usage(char *s)
fprintf(stderr, " If w has an h on the end, treat a, b and the sum as hexadecimal (no 0x required)\n");
fprintf(stderr, "\n");
fprintf(stderr, " legal w are: 1-32, 64 and 128\n");
fprintf(stderr, " 128 is hex only (i.e. '128' will be an error - do '128h')\n");
fprintf(stderr, " 128 is hex only (i.e. '128' will be an error - do '128h')\n");
if (s != NULL) fprintf(stderr, "%s", s);
exit(1);

View File

@ -4,22 +4,30 @@
#pragma once
#include <stdint.h>
#ifdef INTEL_SSE4
#include <nmmintrin.h>
#include <emmintrin.h>
#include <smmintrin.h>
#ifdef INTEL_SSE4
#define INTEL_SSSE3
#include <nmmintrin.h>
#endif
#ifdef INTEL_PCLMUL
#include <wmmintrin.h>
#ifdef INTEL_SSSE3
#define INTEL_SSE2
#include <tmmintrin.h>
#endif
/* This does either memcpy or xor, depending on "xor" */
#ifdef INTEL_SSE2
#include <emmintrin.h>
#endif
extern void gf_multby_one(void *src, void *dest, int bytes, int xor);
#ifdef INTEL_PCLMUL
#include <wmmintrin.h>
#ifdef INTEL_SSE4
#define INTEL_SSE4_PCLMUL
#endif
#ifdef INTEL_SSSE3
#define INTEL_SSSE3_PCLMUL
#endif
#endif
#define GF_W128_IS_ZERO(val) (val[0] == 0 && val[1] == 0)
#define GF_W128_EQUAL(val1, val2) ((val1[0] == val2[0]) && (val1[1] == val2[1]))
/* These are the different ways to perform multiplication.
Not all are implemented for all values of w.
@ -27,30 +35,30 @@ extern void gf_multby_one(void *src, void *dest, int bytes, int xor);
typedef enum {GF_MULT_DEFAULT,
GF_MULT_SHIFT,
GF_MULT_CARRY_FREE,
GF_MULT_GROUP,
GF_MULT_BYTWO_p,
GF_MULT_BYTWO_b,
GF_MULT_TABLE,
GF_MULT_LOG_TABLE,
GF_MULT_LOG_ZERO,
GF_MULT_LOG_ZERO_EXT,
GF_MULT_SPLIT_TABLE,
GF_MULT_COMPOSITE } gf_mult_type_t;
/* These are the different ways to optimize region
operations. They are bits because you can compose them:
You can mix SINGLE/DOUBLE/QUAD, LAZY, SSE/NOSSE, STDMAP/ALTMAP/CAUCHY.
operations. They are bits because you can compose them.
Certain optimizations only apply to certain gf_mult_type_t's.
Again, please see documentation for how to use these */
#define GF_REGION_DEFAULT (0x0)
#define GF_REGION_SINGLE_TABLE (0x1)
#define GF_REGION_DOUBLE_TABLE (0x2)
#define GF_REGION_QUAD_TABLE (0x4)
#define GF_REGION_LAZY (0x8)
#define GF_REGION_SSE (0x10)
#define GF_REGION_NOSSE (0x20)
#define GF_REGION_STDMAP (0x40)
#define GF_REGION_ALTMAP (0x80)
#define GF_REGION_CAUCHY (0x100)
#define GF_REGION_DOUBLE_TABLE (0x1)
#define GF_REGION_QUAD_TABLE (0x2)
#define GF_REGION_LAZY (0x4)
#define GF_REGION_SSE (0x8)
#define GF_REGION_NOSSE (0x10)
#define GF_REGION_ALTMAP (0x20)
#define GF_REGION_CAUCHY (0x40)
typedef uint32_t gf_region_type_t;
@ -74,6 +82,9 @@ typedef uint32_t gf_val_32_t;
typedef uint64_t gf_val_64_t;
typedef uint64_t *gf_val_128_t;
extern int _gf_errno;
extern void gf_error();
typedef struct gf *GFP;
typedef union gf_func_a_b {
@ -109,8 +120,21 @@ typedef struct gf {
void *scratch;
} gf_t;
/* Initializes the GF to defaults. Pass it a pointer to a gf_t.
Returns 0 on failure, 1 on success. */
extern int gf_init_easy(GFP gf, int w);
/* Initializes the GF changing the defaults.
Returns 0 on failure, 1 on success.
Pass it a pointer to a gf_t.
For mult_type and divide_type, use one of gf_mult_type_t gf_divide_type_t .
For region_type, OR together the GF_REGION_xxx's defined above.
Use 0 as prim_poly for defaults. Otherwise, the leading 1 is optional.
Use NULL for scratch_memory to have init_hard allocate memory. Otherwise,
use gf_scratch_size() to determine how big scratch_memory has to be.
*/
extern int gf_init_hard(GFP gf,
int w,
int mult_type,
@ -122,6 +146,9 @@ extern int gf_init_hard(GFP gf,
GFP base_gf,
void *scratch_memory);
/* Determines the size for scratch_memory.
Returns 0 on failure and non-zero on success. */
extern int gf_scratch_size(int w,
int mult_type,
int region_type,
@ -129,25 +156,32 @@ extern int gf_scratch_size(int w,
int arg1,
int arg2);
/* This reports the gf_scratch_size of a gf_t that has already been created */
extern int gf_size(GFP gf);
/* Frees scratch memory if gf_init_easy/gf_init_hard called malloc.
If recursive = 1, then it calls itself recursively on base_gf. */
extern int gf_free(GFP gf, int recursive);
/* This is support for inline single multiplications and divisions.
I know it's yucky, but if you've got to be fast, you've got to be fast.
We'll support inlines for w=4, w=8 and w=16.
We support inlining for w=4, w=8 and w=16.
To use inline multiplication and division with w=4 or 8, you should use the
default gf_t, or one with a single table. Otherwise, gf_w4/8_get_mult_table()
will return NULL. */
will return NULL. Similarly, with w=16, the gf_t must be LOG */
uint8_t *gf_w4_get_mult_table(GFP gf);
uint8_t *gf_w4_get_div_table(GFP gf);
#define GF_W4_INLINE_MULTDIV(table, a, b) (table[((a)<<4)|b])
#define GF_W4_INLINE_MULTDIV(table, a, b) (table[((a)<<4)|(b)])
uint8_t *gf_w8_get_mult_table(GFP gf);
uint8_t *gf_w8_get_div_table(GFP gf);
#define GF_W8_INLINE_MULTDIV(table, a, b) (table[(((uint32_t) a)<<8)|b])
#define GF_W8_INLINE_MULTDIV(table, a, b) (table[(((uint32_t) (a))<<8)|(b)])
uint16_t *gf_w16_get_log_table(GFP gf);
uint16_t *gf_w16_get_mult_alog_table(GFP gf);

73
gf_example_5.c Normal file
View File

@ -0,0 +1,73 @@
/*
* gf_example_5.c
*
* Demonstrating altmap and extract_word
*/
#include <stdio.h>
#include <getopt.h>
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#include "gf_complete.h"
#include "gf_rand.h"
void usage(char *s)
{
fprintf(stderr, "usage: gf_example_5\n");
exit(1);
}
int main(int argc, char **argv)
{
uint16_t *a, *b;
int i, j;
gf_t gf;
if (gf_init_hard(&gf, 16, GF_MULT_SPLIT_TABLE, GF_REGION_ALTMAP, GF_DIVIDE_DEFAULT,
0, 16, 4, NULL, NULL) == 0) {
fprintf(stderr, "gf_init_hard failed\n");
exit(1);
}
a = (uint16_t *) malloc(200);
b = (uint16_t *) malloc(200);
a += 6;
b += 6;
MOA_Seed(0);
for (i = 0; i < 30; i++) a[i] = MOA_Random_W(16, 1);
gf.multiply_region.w32(&gf, a, b, 0x1234, 30*2, 0);
printf("a: 0x%lx b: 0x%lx\n", (unsigned long) a, (unsigned long) b);
for (i = 0; i < 30; i += 10) {
printf("\n");
printf(" ");
for (j = 0; j < 10; j++) printf(" %4d", i+j);
printf("\n");
printf("a:");
for (j = 0; j < 10; j++) printf(" %04x", a[i+j]);
printf("\n");
printf("b:");
for (j = 0; j < 10; j++) printf(" %04x", b[i+j]);
printf("\n");
printf("\n");
}
for (i = 0; i < 15; i ++) {
printf("Word %2d: 0x%04x * 0x1234 = 0x%04x ", i,
gf.extract_word.w32(&gf, a, 30*2, i),
gf.extract_word.w32(&gf, b, 30*2, i));
printf("Word %2d: 0x%04x * 0x1234 = 0x%04x\n", i+15,
gf.extract_word.w32(&gf, a, 30*2, i+15),
gf.extract_word.w32(&gf, b, 30*2, i+15));
}
}

79
gf_example_6.c Normal file
View File

@ -0,0 +1,79 @@
/*
* gf_example_6.c
*
* Demonstrating altmap and extract_word
*/
#include <stdio.h>
#include <getopt.h>
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#include "gf_complete.h"
#include "gf_rand.h"
void usage(char *s)
{
fprintf(stderr, "usage: gf_example_6\n");
exit(1);
}
int main(int argc, char **argv)
{
uint32_t *a, *b;
int i, j;
gf_t gf, gf_16;
if (gf_init_hard(&gf_16, 16, GF_MULT_LOG_TABLE, GF_REGION_DEFAULT, GF_DIVIDE_DEFAULT,
0, 0, 0, NULL, NULL) == 0) {
fprintf(stderr, "gf_init_hard (6) failed\n");
exit(1);
}
if (gf_init_hard(&gf, 32, GF_MULT_COMPOSITE, GF_REGION_ALTMAP, GF_DIVIDE_DEFAULT,
0, 2, 0, &gf_16, NULL) == 0) {
fprintf(stderr, "gf_init_hard (32) failed\n");
exit(1);
}
a = (uint32_t *) malloc(200);
b = (uint32_t *) malloc(200);
a += 3;
b += 3;
MOA_Seed(0);
for (i = 0; i < 30; i++) a[i] = MOA_Random_W(32, 1);
gf.multiply_region.w32(&gf, a, b, 0x12345678, 30*4, 0);
printf("a: 0x%lx b: 0x%lx\n", (unsigned long) a, (unsigned long) b);
for (i = 0; i < 30; i += 10) {
printf("\n");
printf(" ");
for (j = 0; j < 10; j++) printf(" %8d", i+j);
printf("\n");
printf("a:");
for (j = 0; j < 10; j++) printf(" %08x", a[i+j]);
printf("\n");
printf("b:");
for (j = 0; j < 10; j++) printf(" %08x", b[i+j]);
printf("\n");
printf("\n");
}
for (i = 0; i < 15; i ++) {
printf("Word %2d: 0x%08x * 0x12345678 = 0x%08x ", i,
gf.extract_word.w32(&gf, a, 30*4, i),
gf.extract_word.w32(&gf, b, 30*4, i));
printf("Word %2d: 0x%08x * 0x12345678 = 0x%08x\n", i+15,
gf.extract_word.w32(&gf, a, 30*4, i+15),
gf.extract_word.w32(&gf, b, 30*4, i+15));
}
}

70
gf_example_7.c Normal file
View File

@ -0,0 +1,70 @@
/*
* gf_example_7.c
*
* Demonstrating extract_word and Cauchy
*/
#include <stdio.h>
#include <getopt.h>
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#include "gf_complete.h"
#include "gf_rand.h"
void usage(char *s)
{
fprintf(stderr, "usage: gf_example_7\n");
exit(1);
}
int main(int argc, char **argv)
{
uint8_t *a, *b;
int i, j;
gf_t gf;
if (gf_init_hard(&gf, 3, GF_MULT_TABLE, GF_REGION_CAUCHY, GF_DIVIDE_DEFAULT, 0, 0, 0, NULL, NULL) == 0) {
fprintf(stderr, "gf_init_hard failed\n");
exit(1);
}
a = (uint8_t *) malloc(3);
b = (uint8_t *) malloc(3);
MOA_Seed(0);
for (i = 0; i < 3; i++) a[i] = MOA_Random_W(8, 1);
gf.multiply_region.w32(&gf, a, b, 5, 3, 0);
printf("a: 0x%lx b: 0x%lx\n", (unsigned long) a, (unsigned long) b);
printf("\n");
printf("a: 0x%02x 0x%02x 0x%02x\n", a[0], a[1], a[2]);
printf("b: 0x%02x 0x%02x 0x%02x\n", b[0], b[1], b[2]);
printf("\n");
printf("a bits:");
for (i = 0; i < 3; i++) {
printf(" ");
for (j = 7; j >= 0; j--) printf("%c", (a[i] & (1 << j)) ? '1' : '0');
}
printf("\n");
printf("b bits:");
for (i = 0; i < 3; i++) {
printf(" ");
for (j = 7; j >= 0; j--) printf("%c", (b[i] & (1 << j)) ? '1' : '0');
}
printf("\n");
printf("\n");
for (i = 0; i < 8; i++) {
printf("Word %2d: %d * 5 = %d\n", i,
gf.extract_word.w32(&gf, a, 3, i),
gf.extract_word.w32(&gf, b, 3, i));
}
}

View File

@ -95,12 +95,20 @@ void gf_general_set_random(gf_general_t *v, int w, int zero_ok)
}
}
void gf_general_val_to_s(gf_general_t *v, int w, char *s)
void gf_general_val_to_s(gf_general_t *v, int w, char *s, int hex)
{
if (w <= 32) {
sprintf(s, "%x", v->w32);
if (hex) {
sprintf(s, "%x", v->w32);
} else {
sprintf(s, "%d", v->w32);
}
} else if (w <= 64) {
sprintf(s, "%llx", (long long unsigned int) v->w64);
if (hex) {
sprintf(s, "%llx", (long long unsigned int) v->w64);
} else {
sprintf(s, "%lld", (long long unsigned int) v->w64);
}
} else {
if (v->w128[0] == 0) {
sprintf(s, "%llx", (long long unsigned int) v->w128[1]);
@ -111,6 +119,64 @@ void gf_general_val_to_s(gf_general_t *v, int w, char *s)
}
}
int gf_general_s_to_val(gf_general_t *v, int w, char *s, int hex)
{
int l;
int save;
if (w <= 32) {
if (hex) {
if (sscanf(s, "%x", &(v->w32)) == 0) return 0;
} else {
if (sscanf(s, "%d", &(v->w32)) == 0) return 0;
}
if (w == 32) return 1;
if (w == 31) {
if (v->w32 & (1 << 31)) return 0;
return 1;
}
if (v->w32 & ~((1 << w)-1)) return 0;
return 1;
} else if (w <= 64) {
if (hex) return (sscanf(s, "%llx", &(v->w64)) == 1);
return (sscanf(s, "%lld", &(v->w64)) == 1);
} else {
if (!hex) return 0;
l = strlen(s);
if (l <= 16) {
v->w128[0] = 0;
return (sscanf(s, "%llx", &(v->w128[1])) == 1);
} else {
if (l > 32) return 0;
save = s[l-16];
s[l-16] = '\0';
if (sscanf(s, "%llx", &(v->w128[0])) == 0) {
s[l-16] = save;
return 0;
}
return (sscanf(s+(l-16), "%llx", &(v->w128[1])) == 1);
}
}
}
void gf_general_add(gf_t *gf, gf_general_t *a, gf_general_t *b, gf_general_t *c)
{
gf_internal_t *h;
int w;
h = (gf_internal_t *) gf->scratch;
w = h->w;
if (w <= 32) {
c->w32 = a->w32 ^ b->w32;
} else if (w <= 64) {
c->w64 = a->w64 ^ b->w64;
} else {
c->w128[0] = a->w128[0] ^ b->w128[0];
c->w128[1] = a->w128[1] ^ b->w128[1];
}
}
void gf_general_multiply(gf_t *gf, gf_general_t *a, gf_general_t *b, gf_general_t *c)
{
gf_internal_t *h;
@ -229,19 +295,19 @@ void gf_general_do_region_check(gf_t *gf, gf_general_t *a, void *orig_a, void *o
if (!gf_general_are_equal(&ft, &sb, w)) {
printf("Problem with region multiply (all values in hex):\n");
printf(" Target address base: 0x%lx. Word 0x%x of 0x%x. Xor: %d\n",
fprintf(stderr,"Problem with region multiply (all values in hex):\n");
fprintf(stderr," Target address base: 0x%lx. Word 0x%x of 0x%x. Xor: %d\n",
(unsigned long) final_target, i, words, xor);
gf_general_val_to_s(a, w, sa);
gf_general_val_to_s(&oa, w, soa);
gf_general_val_to_s(&ot, w, sot);
gf_general_val_to_s(&ft, w, sft);
gf_general_val_to_s(&sb, w, ssb);
printf(" Value: %s\n", sa);
printf(" Original source word: %s\n", soa);
if (xor) printf(" XOR with target word: %s\n", sot);
printf(" Product word: %s\n", sft);
printf(" It should be: %s\n", ssb);
gf_general_val_to_s(a, w, sa, 1);
gf_general_val_to_s(&oa, w, soa, 1);
gf_general_val_to_s(&ot, w, sot, 1);
gf_general_val_to_s(&ft, w, sft, 1);
gf_general_val_to_s(&sb, w, ssb, 1);
fprintf(stderr," Value: %s\n", sa);
fprintf(stderr," Original source word: %s\n", soa);
if (xor) fprintf(stderr," XOR with target word: %s\n", sot);
fprintf(stderr," Product word: %s\n", sft);
fprintf(stderr," It should be: %s\n", ssb);
exit(0);
}
}
@ -251,7 +317,7 @@ void gf_general_set_up_single_timing_test(int w, void *ra, void *rb, int size)
{
void *top;
gf_general_t g;
uint8_t *r8;
uint8_t *r8, *r8a;
uint16_t *r16;
uint32_t *r32;
uint64_t *r64;
@ -263,6 +329,8 @@ void gf_general_set_up_single_timing_test(int w, void *ra, void *rb, int size)
However, don't allow for zeros in rb, because that will screw up
division.
When w is 4, you fill the regions with random 4-bit words in each byte.
Otherwise, treat every four bytes as an uint32_t
and fill it with a random value mod (1 << w).
*/
@ -296,6 +364,17 @@ void gf_general_set_up_single_timing_test(int w, void *ra, void *rb, int size)
}
rb += (w/8);
}
} else if (w == 4) {
r8a = (uint8_t *) ra;
r8 = (uint8_t *) rb;
while (r8 < (uint8_t *) top) {
gf_general_set_random(&g, w, 1);
*r8a = g.w32;
gf_general_set_random(&g, w, 0);
*r8 = g.w32;
r8a++;
r8++;
}
} else {
r32 = (uint32_t *) ra;
for (i = 0; i < size/4; i++) r32[i] = MOA_Random_W(w, 1);
@ -306,7 +385,7 @@ void gf_general_set_up_single_timing_test(int w, void *ra, void *rb, int size)
/* This sucks, but in order to time, you really need to avoid putting ifs in
the inner loops. So, I'm doing a separate timing test for each w:
8, 16, 32, 64, 128 and everything else. Fortunately, the "everything else"
(4 & 8), 16, 32, 64, 128 and everything else. Fortunately, the "everything else"
tests can be equivalent to w=32.
I'm also putting the results back into ra, because otherwise, the optimizer might
@ -327,7 +406,7 @@ int gf_general_do_single_timing_test(gf_t *gf, void *ra, void *rb, int size, cha
w = h->w;
top = ra + size;
if (w == 8) {
if (w == 8 || w == 4) {
r8a = (uint8_t *) ra;
r8b = (uint8_t *) rb;
top8 = (uint8_t *) top;

View File

@ -32,10 +32,12 @@ int gf_general_is_zero(gf_general_t *v, int w);
int gf_general_is_one(gf_general_t *v, int w);
int gf_general_are_equal(gf_general_t *v1, gf_general_t *v2, int w);
void gf_general_val_to_s(gf_general_t *v, int w, char *s);
void gf_general_val_to_s(gf_general_t *v, int w, char *s, int hex);
int gf_general_s_to_val(gf_general_t *v, int w, char *s, int hex);
void gf_general_set_random(gf_general_t *v, int w, int zero_ok);
void gf_general_add(gf_t *gf, gf_general_t *a, gf_general_t *b, gf_general_t *c);
void gf_general_multiply(gf_t *gf, gf_general_t *a, gf_general_t *b, gf_general_t *c);
void gf_general_divide(gf_t *gf, gf_general_t *a, gf_general_t *b, gf_general_t *c);
void gf_general_inverse(gf_t *gf, gf_general_t *a, gf_general_t *b);

View File

@ -9,6 +9,7 @@
#include <string.h>
#include <stdlib.h>
#include <time.h>
#include <sys/time.h>
#include "gf_complete.h"
#include "gf_rand.h"

100
gf_int.h
View File

@ -51,11 +51,15 @@ extern int gf_wgen_scratch_size(int w, int mult_type, int region_type, int divid
void gf_wgen_cauchy_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor);
gf_val_32_t gf_wgen_extract_word(gf_t *gf, void *start, int bytes, int index);
extern void gf_alignment_error(char *s, int a);
extern uint32_t gf_bitmatrix_inverse(uint32_t y, int w, uint32_t pp);
/* This returns the correct default for prim_poly when base is used as the base
field for COMPOSITE. It returns 0 if we don't have a default prim_poly. */
extern uint64_t gf_composite_get_default_poly(gf_t *base);
/* This structure lets you define a region multiply. It helps because you can handle
unaligned portions of the data with the procedures below, which really cleans
up the code. */
@ -96,3 +100,97 @@ extern void gf_do_final_region_alignment(gf_region_data *rd);
extern void gf_two_byte_region_table_multiply(gf_region_data *rd, uint16_t *base);
extern void gf_multby_zero(void *dest, int bytes, int xor);
extern void gf_multby_one(void *src, void *dest, int bytes, int xor);
typedef enum {GF_E_MDEFDIV, /* Dev != Default && Mult == Default */
GF_E_MDEFREG, /* Reg != Default && Mult == Default */
GF_E_MDEFARG, /* Args != Default && Mult == Default */
GF_E_DIVCOMP, /* Mult == Composite && Div != Default */
GF_E_CAUCOMP, /* Mult == Composite && Reg == CAUCHY */
GF_E_DOUQUAD, /* Reg == DOUBLE && Reg == QUAD */
GF_E_SSE__NO, /* Reg == SSE && Reg == NOSSE */
GF_E_CAUCHYB, /* Reg == CAUCHY && Other Reg */
GF_E_CAUGT32, /* Reg == CAUCHY && w > 32*/
GF_E_ARG1SET, /* Arg1 != 0 && Mult \notin COMPOSITE/SPLIT/GROUP */
GF_E_ARG2SET, /* Arg2 != 0 && Mult \notin SPLIT/GROUP */
GF_E_MATRIXW, /* Div == MATRIX && w > 32 */
GF_E_BAD___W, /* Illegal w */
GF_E_DOUBLET, /* Reg == DOUBLE && Mult != TABLE */
GF_E_DOUBLEW, /* Reg == DOUBLE && w \notin {4,8} */
GF_E_DOUBLEJ, /* Reg == DOUBLE && other Reg */
GF_E_DOUBLEL, /* Reg == DOUBLE & LAZY but w = 4 */
GF_E_QUAD__T, /* Reg == QUAD && Mult != TABLE */
GF_E_QUAD__W, /* Reg == QUAD && w != 4 */
GF_E_QUAD__J, /* Reg == QUAD && other Reg */
GF_E_LAZY__X, /* Reg == LAZY && not DOUBLE or QUAD*/
GF_E_ALTSHIF, /* Mult == Shift && Reg == ALTMAP */
GF_E_SSESHIF, /* Mult == Shift && Reg == SSE|NOSSE */
GF_E_ALT_CFM, /* Mult == CARRY_FREE && Reg == ALTMAP */
GF_E_SSE_CFM, /* Mult == CARRY_FREE && Reg == SSE|NOSSE */
GF_E_PCLMULX, /* Mult == Carry_Free && No PCLMUL */
GF_E_ALT_BY2, /* Mult == Bytwo_x && Reg == ALTMAP */
GF_E_BY2_SSE, /* Mult == Bytwo_x && Reg == SSE && No SSE2 */
GF_E_LOGBADW, /* Mult == LOGx, w too big*/
GF_E_LOG___J, /* Mult == LOGx, && Reg == SSE|ALTMAP|NOSSE */
GF_E_ZERBADW, /* Mult == LOG_ZERO, w \notin {8,16} */
GF_E_ZEXBADW, /* Mult == LOG_ZERO_EXT, w != 8 */
GF_E_LOGPOLY, /* Mult == LOG & poly not primitive */
GF_E_GR_ARGX, /* Mult == GROUP, Bad arg1/2 */
GF_E_GR_W_48, /* Mult == GROUP, w \in { 4, 8 } */
GF_E_GR_W_16, /* Mult == GROUP, w == 16, arg1 != 4 || arg2 != 4 */
GF_E_GR_128A, /* Mult == GROUP, w == 128, bad args */
GF_E_GR_SSE4, /* Mult == GROUP, w == 128, No SSE4 */
GF_E_GR_A_27, /* Mult == GROUP, either arg > 27 */
GF_E_GR_AR_W, /* Mult == GROUP, either arg > w */
GF_E_GR____J, /* Mult == GROUP, Reg == SSE|ALTMAP|NOSSE */
GF_E_TABLE_W, /* Mult == TABLE, w too big */
GF_E_TAB_SSE, /* Mult == TABLE, SSE|NOSSE only apply to w == 4 */
GF_E_TABSSE3, /* Mult == TABLE, Need SSSE3 for SSE */
GF_E_TAB_ALT, /* Mult == TABLE, Reg == ALTMAP */
GF_E_SP128AR, /* Mult == SPLIT, w=128, Bad arg1/arg2 */
GF_E_SP128AL, /* Mult == SPLIT, w=128, SSE requires ALTMAP */
GF_E_SP128AS, /* Mult == SPLIT, w=128, ALTMAP requires SSE */
GF_E_SP128_A, /* Mult == SPLIT, w=128, SSE only with 4/128 */
GF_E_SP128_S, /* Mult == SPLIT, w=128, ALTMAP only with 4/128 */
GF_E_SPLIT_W, /* Mult == SPLIT, Bad w (8, 16, 32, 64, 128) */
GF_E_SP_16AR, /* Mult == SPLIT, w=16, Bad arg1/arg2 */
GF_E_SP_16_A, /* Mult == SPLIT, w=16, ALTMAP only with 4/16 */
GF_E_SP_16_S, /* Mult == SPLIT, w=16, SSE only with 4/16 */
GF_E_SP_32AR, /* Mult == SPLIT, w=32, Bad arg1/arg2 */
GF_E_SP_32AS, /* Mult == SPLIT, w=32, ALTMAP requires SSE */
GF_E_SP_32_A, /* Mult == SPLIT, w=32, ALTMAP only with 4/32 */
GF_E_SP_32_S, /* Mult == SPLIT, w=32, SSE only with 4/32 */
GF_E_SP_64AR, /* Mult == SPLIT, w=64, Bad arg1/arg2 */
GF_E_SP_64AS, /* Mult == SPLIT, w=64, ALTMAP requires SSE */
GF_E_SP_64_A, /* Mult == SPLIT, w=64, ALTMAP only with 4/64 */
GF_E_SP_64_S, /* Mult == SPLIT, w=64, SSE only with 4/64 */
GF_E_SP_8_AR, /* Mult == SPLIT, w=8, Bad arg1/arg2 */
GF_E_SP_8__A, /* Mult == SPLIT, w=8, no ALTMAP */
GF_E_SP_SSE3, /* Mult == SPLIT, Need SSSE3 for SSE */
GF_E_COMP_A2, /* Mult == COMP, arg1 must be = 2 */
GF_E_COMP_SS, /* Mult == COMP, SSE|NOSSE */
GF_E_COMP__W, /* Mult == COMP, Bad w. */
GF_E_UNKFLAG, /* Unknown flag in create_from.... */
GF_E_UNKNOWN, /* Unknown mult_type. */
GF_E_UNK_REG, /* Unknown region_type. */
GF_E_UNK_DIV, /* Unknown divide_type. */
GF_E_CFM___W, /* Mult == CFM, Bad w. */
GF_E_CFM4POL, /* Mult == CFM & Prim Poly has high bits set. */
GF_E_CFM8POL, /* Mult == CFM & Prim Poly has high bits set. */
GF_E_CF16POL, /* Mult == CFM & Prim Poly has high bits set. */
GF_E_CF32POL, /* Mult == CFM & Prim Poly has high bits set. */
GF_E_CF64POL, /* Mult == CFM & Prim Poly has high bits set. */
GF_E_FEWARGS, /* Too few args in argc/argv. */
GF_E_BADPOLY, /* Bad primitive polynomial -- too many bits set. */
GF_E_COMP_PP, /* Bad primitive polynomial -- bigger than sub-field. */
GF_E_COMPXPP, /* Can't derive a default pp for composite field. */
GF_E_BASE__W, /* Composite -- Base field is the wrong size. */
GF_E_TWOMULT, /* In create_from... two -m's. */
GF_E_TWO_DIV, /* In create_from... two -d's. */
GF_E_POLYSPC, /* Bad numbera after -p. */
GF_E_SPLITAR, /* Ran out of arguments in SPLIT */
GF_E_SPLITNU, /* Arguments not integers in SPLIT. */
GF_E_GROUPAR, /* Ran out of arguments in GROUP */
GF_E_GROUPNU, /* Arguments not integers in GROUP. */
GF_E_DEFAULT } gf_error_type_t;

View File

@ -11,179 +11,172 @@
#include <time.h>
#include "gf_complete.h"
#include "gf_int.h"
#include "gf_method.h"
void methods_to_stderr()
{
fprintf(stderr, "To specify the methods, do one of the following: \n");
fprintf(stderr, " - leave empty to use defaults\n");
fprintf(stderr, " - use a single dash to use defaults\n");
fprintf(stderr, " - specify MULTIPLY REGION DIVIDE\n");
fprintf(stderr, "\n");
fprintf(stderr, "Legal values of MULTIPLY:\n");
fprintf(stderr, " SHIFT: shift\n");
fprintf(stderr, " GROUP g_mult g_reduce: the Group technique - see the paper\n");
fprintf(stderr, " BYTWO_p: BYTWO doubling the product.\n");
fprintf(stderr, " BYTWO_b: BYTWO doubling b (more efficient thatn BYTWO_p)\n");
fprintf(stderr, " TABLE: Full multiplication table\n");
fprintf(stderr, " LOG: Discrete logs\n");
fprintf(stderr, " LOG_ZERO: Discrete logs with a large table for zeros\n");
fprintf(stderr, " LOG_ZERO_EXT: Discrete logs with an extra large table for zeros\n");
fprintf(stderr, " SPLIT g_a g_b: Split tables defined by g_a and g_b\n");
fprintf(stderr, " COMPOSITE k rec METHOD: Composite field. GF((2^l)^k), l=w/k.\n");
fprintf(stderr, " rec = 0 means inline single multiplication\n");
fprintf(stderr, " rec = 1 means recursive single multiplication\n");
fprintf(stderr, " METHOD is the method of the base field in GF(2^l)\n");
fprintf(stderr, "\n");
fprintf(stderr, "Legal values of REGION: Specify multiples with commas e.g. 'DOUBLE,LAZY'\n");
fprintf(stderr, " -: Use defaults\n");
fprintf(stderr, " SINGLE/DOUBLE/QUAD: Expand tables\n");
fprintf(stderr, " LAZY: Lazily create table (only applies to TABLE and SPLIT)\n");
fprintf(stderr, " SSE/NOSSE: Use 128-bit SSE instructions if you can\n");
fprintf(stderr, " CAUCHY/ALTMAP/STDMAP: Use different memory mappings\n");
fprintf(stderr, "\n");
fprintf(stderr, "Legal values of DIVIDE:\n");
fprintf(stderr, " -: Use defaults\n");
fprintf(stderr, " MATRIX: Use matrix inversion\n");
fprintf(stderr, " EUCLID: Use the extended Euclidian algorithm.\n");
fprintf(stderr, "\n");
fprintf(stderr, "See the user's manual for more information.\n");
fprintf(stderr, "There are many restrictions, so it is better to simply use defaults in most cases.\n");
}
int create_gf_from_argv(gf_t *gf, int w, int argc, char **argv, int starting)
{
int mult_type, divide_type, region_type;
uint32_t prim_poly = 0;
int arg1, arg2, subrg_size;
uint64_t prim_poly;
gf_t *base;
char *crt, *x, *y;
if (argc <= starting || strcmp(argv[starting], "-") == 0) {
if (!gf_init_easy(gf, w)) return 0;
return (argc <= starting) ? starting : starting+1;
}
mult_type = GF_MULT_DEFAULT;
region_type = GF_REGION_DEFAULT;
divide_type = GF_DIVIDE_DEFAULT;
arg1 = 0;
arg2 = 0;
prim_poly = 0;
base = NULL;
subrg_size = 0;
if (argc < starting+3) return 0;
if (strcmp(argv[starting], "SHIFT") == 0) {
mult_type = GF_MULT_SHIFT;
starting++;
} else if (strcmp(argv[starting], "GROUP") == 0) {
mult_type = GF_MULT_GROUP;
if (argc < starting+5) return 0;
if (sscanf(argv[starting+1], "%d", &arg1) == 0 ||
sscanf(argv[starting+2], "%d", &arg2) == 0 ||
arg1 <= 0 || arg2 <= 0 || arg1 >= w || arg2 >= w) return 0;
starting += 3;
} else if (strcmp(argv[starting], "BYTWO_p") == 0) {
mult_type = GF_MULT_BYTWO_p;
starting++;
} else if (strcmp(argv[starting], "BYTWO_b") == 0) {
mult_type = GF_MULT_BYTWO_b;
starting++;
} else if (strcmp(argv[starting], "TABLE") == 0) {
mult_type = GF_MULT_TABLE;
starting++;
} else if (strcmp(argv[starting], "LOG") == 0) {
mult_type = GF_MULT_LOG_TABLE;
starting++;
} else if (strcmp(argv[starting], "LOG_ZERO") == 0) {
mult_type = GF_MULT_LOG_TABLE;
arg1 = 1;
starting++;
} else if (strcmp(argv[starting], "LOG_ZERO_EXT") == 0) {
mult_type = GF_MULT_LOG_TABLE;
arg1 = 2;
starting++;
} else if (strcmp(argv[starting], "SPLIT") == 0) {
mult_type = GF_MULT_SPLIT_TABLE;
if (argc < starting+5) return 0;
if (sscanf(argv[starting+1], "%d", &arg1) == 0 ||
sscanf(argv[starting+2], "%d", &arg2) == 0 ||
arg1 <= 0 || arg2 <= 0 || w % arg1 != 0 || w % arg2 != 0) return 0;
starting += 3;
} else if (strcmp(argv[starting], "COMPOSITE") == 0) {
mult_type = GF_MULT_COMPOSITE;
if (argc < starting+6) return 0;
if (sscanf(argv[starting+1], "%d", &arg1) == 0 ||
sscanf(argv[starting+2], "%d", &arg2) == 0 ||
arg1 <= 1 || w %arg1 != 0 || ((arg2 | 1) != 1)) return 0;
base = (gf_t *) malloc(sizeof(gf_t));
starting = create_gf_from_argv(base, w/arg1, argc, argv, starting+3);
if (starting == 0) { free(base); return 0; }
} else {
return 0;
}
if (argc < starting+2) {
if (base != NULL) gf_free(base, 1);
return 0;
}
if (strcmp(argv[starting], "-") == 0) {
region_type = GF_REGION_DEFAULT;
} else {
crt = strdup(argv[starting]);
region_type = 0;
x = crt;
do {
y = strchr(x, ',');
if (y != NULL) *y = '\0';
if (strcmp(x, "DOUBLE") == 0) {
region_type |= GF_REGION_DOUBLE_TABLE;
} else if (strcmp(x, "QUAD") == 0) {
region_type |= GF_REGION_QUAD_TABLE;
} else if (strcmp(x, "SINGLE") == 0) {
region_type |= GF_REGION_SINGLE_TABLE;
} else if (strcmp(x, "LAZY") == 0) {
region_type |= GF_REGION_LAZY;
} else if (strcmp(x, "SSE") == 0) {
region_type |= GF_REGION_SSE;
} else if (strcmp(x, "NOSSE") == 0) {
region_type |= GF_REGION_NOSSE;
} else if (strcmp(x, "CAUCHY") == 0) {
region_type |= GF_REGION_CAUCHY;
} else if (strcmp(x, "ALTMAP") == 0) {
region_type |= GF_REGION_ALTMAP;
} else if (strcmp(x, "STDMAP") == 0) {
region_type |= GF_REGION_STDMAP;
arg1 = 0;
arg2 = 0;
while (1) {
if (argc > starting) {
if (strcmp(argv[starting], "-m") == 0) {
starting++;
if (mult_type != GF_MULT_DEFAULT) {
if (base != NULL) gf_free(base, 1);
_gf_errno = GF_E_TWOMULT;
return 0;
}
if (strcmp(argv[starting], "SHIFT") == 0) {
mult_type = GF_MULT_SHIFT;
starting++;
} else if (strcmp(argv[starting], "CARRY_FREE") == 0) {
mult_type = GF_MULT_CARRY_FREE;
starting++;
} else if (strcmp(argv[starting], "GROUP") == 0) {
mult_type = GF_MULT_GROUP;
if (argc < starting + 3) {
_gf_errno = GF_E_GROUPAR;
return 0;
}
if (sscanf(argv[starting+1], "%d", &arg1) == 0 ||
sscanf(argv[starting+2], "%d", &arg2) == 0) {
_gf_errno = GF_E_GROUPNU;
return 0;
}
starting += 3;
} else if (strcmp(argv[starting], "BYTWO_p") == 0) {
mult_type = GF_MULT_BYTWO_p;
starting++;
} else if (strcmp(argv[starting], "BYTWO_b") == 0) {
mult_type = GF_MULT_BYTWO_b;
starting++;
} else if (strcmp(argv[starting], "TABLE") == 0) {
mult_type = GF_MULT_TABLE;
starting++;
} else if (strcmp(argv[starting], "LOG") == 0) {
mult_type = GF_MULT_LOG_TABLE;
starting++;
} else if (strcmp(argv[starting], "LOG_ZERO") == 0) {
mult_type = GF_MULT_LOG_ZERO;
starting++;
} else if (strcmp(argv[starting], "LOG_ZERO_EXT") == 0) {
mult_type = GF_MULT_LOG_ZERO_EXT;
starting++;
} else if (strcmp(argv[starting], "SPLIT") == 0) {
mult_type = GF_MULT_SPLIT_TABLE;
if (argc < starting + 3) {
_gf_errno = GF_E_SPLITAR;
return 0;
}
if (sscanf(argv[starting+1], "%d", &arg1) == 0 ||
sscanf(argv[starting+2], "%d", &arg2) == 0) {
_gf_errno = GF_E_SPLITNU;
return 0;
}
starting += 3;
} else if (strcmp(argv[starting], "COMPOSITE") == 0) {
mult_type = GF_MULT_COMPOSITE;
if (argc < starting + 2) { _gf_errno = GF_E_FEWARGS; return 0; }
if (sscanf(argv[starting+1], "%d", &arg1) == 0) {
_gf_errno = GF_E_COMP_A2;
return 0;
}
starting += 2;
base = (gf_t *) malloc(sizeof(gf_t));
starting = create_gf_from_argv(base, w/arg1, argc, argv, starting);
if (starting == 0) {
free(base);
return 0;
}
} else {
if (base != NULL) gf_free(base, 1);
_gf_errno = GF_E_UNKNOWN;
return 0;
}
} else if (strcmp(argv[starting], "-r") == 0) {
starting++;
if (strcmp(argv[starting], "DOUBLE") == 0) {
region_type |= GF_REGION_DOUBLE_TABLE;
starting++;
} else if (strcmp(argv[starting], "QUAD") == 0) {
region_type |= GF_REGION_QUAD_TABLE;
starting++;
} else if (strcmp(argv[starting], "LAZY") == 0) {
region_type |= GF_REGION_LAZY;
starting++;
} else if (strcmp(argv[starting], "SSE") == 0) {
region_type |= GF_REGION_SSE;
starting++;
} else if (strcmp(argv[starting], "NOSSE") == 0) {
region_type |= GF_REGION_NOSSE;
starting++;
} else if (strcmp(argv[starting], "CAUCHY") == 0) {
region_type |= GF_REGION_CAUCHY;
starting++;
} else if (strcmp(argv[starting], "ALTMAP") == 0) {
region_type |= GF_REGION_ALTMAP;
starting++;
} else {
if (base != NULL) gf_free(base, 1);
_gf_errno = GF_E_UNK_REG;
return 0;
}
} else if (strcmp(argv[starting], "-p") == 0) {
starting++;
if (sscanf(argv[starting], "%llx", (long long unsigned int *)(&prim_poly)) == 0) {
if (base != NULL) gf_free(base, 1);
_gf_errno = GF_E_POLYSPC;
return 0;
}
starting++;
} else if (strcmp(argv[starting], "-d") == 0) {
starting++;
if (divide_type != GF_DIVIDE_DEFAULT) {
if (base != NULL) gf_free(base, 1);
_gf_errno = GF_E_TWO_DIV;
return 0;
} else if (strcmp(argv[starting], "EUCLID") == 0) {
divide_type = GF_DIVIDE_EUCLID;
starting++;
} else if (strcmp(argv[starting], "MATRIX") == 0) {
divide_type = GF_DIVIDE_MATRIX;
starting++;
} else {
_gf_errno = GF_E_UNK_DIV;
return 0;
}
} else if (strcmp(argv[starting], "-") == 0) {
/*
printf("Scratch size: %d\n", gf_scratch_size(w,
mult_type, region_type, divide_type, arg1, arg2));
*/
if (gf_init_hard(gf, w, mult_type, region_type, divide_type,
prim_poly, arg1, arg2, base, NULL) == 0) {
if (base != NULL) gf_free(base, 1);
return 0;
} else
return starting + 1;
} else {
if (base != NULL) gf_free(base, 1);
free(crt);
_gf_errno = GF_E_UNKFLAG;
return 0;
}
if (y != NULL) x = y+1;
} while (y != NULL);
free(crt);
} else {
if (base != NULL) gf_free(base, 1);
_gf_errno = GF_E_FEWARGS;
return 0;
}
}
starting++;
if (strcmp(argv[starting], "-") == 0) {
divide_type = GF_DIVIDE_DEFAULT;
} else if (strcmp(argv[starting], "MATRIX") == 0) {
divide_type = GF_DIVIDE_MATRIX;
} else if (strcmp(argv[starting], "EUCLID") == 0) {
divide_type = GF_DIVIDE_EUCLID;
} else {
if (base != NULL) gf_free(base, 1);
return 0;
}
starting++;
if (!gf_init_hard(gf, w, mult_type, region_type, divide_type, prim_poly, arg1, arg2, base, NULL)) {
if (base != NULL) gf_free(base, 1);
return 0;
}
return starting;
}

View File

@ -8,8 +8,9 @@
#include "gf_complete.h"
/* This prints out the error string defining the methods that you can put on argv*/
extern void methods_to_stderr();
/* Parses argv starting at "starting".
Returns 0 on failure.
On success, it returns one past the last argument it read in argv. */
/* Parses argv starting at "starting" */
extern int create_gf_from_argv(gf_t *gf, int w, int argc, char **argv, int starting);

View File

@ -11,58 +11,26 @@
#include "gf_complete.h"
#include "gf_method.h"
#include "gf_int.h"
#define NMULTS (15)
static char *mults[NMULTS] = { "SHIFT", "GROUP44", "GROUP48", "BYTWO_p", "BYTWO_b",
"TABLE", "LOG", "LOG_ZERO", "LOG_ZERO_EXT", "SPLIT2", "SPLIT4", "SPLIT8", "SPLIT88", "COMPOSITE-0", "COMPOSITE-1" };
#define NMULTS (16)
static char *mults[NMULTS] = { "SHIFT", "CARRY_FREE", "GROUP44", "GROUP48", "BYTWO_p", "BYTWO_b",
"TABLE", "LOG", "LOG_ZERO", "LOG_ZERO_EXT", "SPLIT2",
"SPLIT4", "SPLIT8", "SPLIT16", "SPLIT88", "COMPOSITE" };
#define NREGIONS (96)
static char *regions[NREGIONS] = { "-", "SINGLE", "DOUBLE", "QUAD",
"LAZY", "SINGLE,LAZY", "DOUBLE,LAZY", "QUAD,LAZY", "SSE",
"SINGLE,SSE", "DOUBLE,SSE", "QUAD,SSE", "LAZY,SSE",
"SINGLE,LAZY,SSE", "DOUBLE,LAZY,SSE", "QUAD,LAZY,SSE", "NOSSE",
"SINGLE,NOSSE", "DOUBLE,NOSSE", "QUAD,NOSSE", "LAZY,NOSSE",
"SINGLE,LAZY,NOSSE", "DOUBLE,LAZY,NOSSE", "QUAD,LAZY,NOSSE",
"STDMAP", "SINGLE,STDMAP", "DOUBLE,STDMAP", "QUAD,STDMAP",
"LAZY,STDMAP", "SINGLE,LAZY,STDMAP", "DOUBLE,LAZY,STDMAP",
"QUAD,LAZY,STDMAP", "SSE,STDMAP", "SINGLE,SSE,STDMAP",
"DOUBLE,SSE,STDMAP", "QUAD,SSE,STDMAP", "LAZY,SSE,STDMAP",
"SINGLE,LAZY,SSE,STDMAP", "DOUBLE,LAZY,SSE,STDMAP",
"QUAD,LAZY,SSE,STDMAP", "NOSSE,STDMAP", "SINGLE,NOSSE,STDMAP",
"DOUBLE,NOSSE,STDMAP", "QUAD,NOSSE,STDMAP", "LAZY,NOSSE,STDMAP",
"SINGLE,LAZY,NOSSE,STDMAP", "DOUBLE,LAZY,NOSSE,STDMAP",
"QUAD,LAZY,NOSSE,STDMAP", "ALTMAP", "SINGLE,ALTMAP", "DOUBLE,ALTMAP",
"QUAD,ALTMAP", "LAZY,ALTMAP", "SINGLE,LAZY,ALTMAP",
"DOUBLE,LAZY,ALTMAP", "QUAD,LAZY,ALTMAP", "SSE,ALTMAP",
"SINGLE,SSE,ALTMAP", "DOUBLE,SSE,ALTMAP", "QUAD,SSE,ALTMAP",
"LAZY,SSE,ALTMAP", "SINGLE,LAZY,SSE,ALTMAP",
"DOUBLE,LAZY,SSE,ALTMAP", "QUAD,LAZY,SSE,ALTMAP", "NOSSE,ALTMAP",
"SINGLE,NOSSE,ALTMAP", "DOUBLE,NOSSE,ALTMAP", "QUAD,NOSSE,ALTMAP",
"LAZY,NOSSE,ALTMAP", "SINGLE,LAZY,NOSSE,ALTMAP",
"DOUBLE,LAZY,NOSSE,ALTMAP", "QUAD,LAZY,NOSSE,ALTMAP", "CAUCHY",
"SINGLE,CAUCHY", "DOUBLE,CAUCHY", "QUAD,CAUCHY", "LAZY,CAUCHY",
"SINGLE,LAZY,CAUCHY", "DOUBLE,LAZY,CAUCHY", "QUAD,LAZY,CAUCHY",
"SSE,CAUCHY", "SINGLE,SSE,CAUCHY", "DOUBLE,SSE,CAUCHY",
"QUAD,SSE,CAUCHY", "LAZY,SSE,CAUCHY", "SINGLE,LAZY,SSE,CAUCHY",
"DOUBLE,LAZY,SSE,CAUCHY", "QUAD,LAZY,SSE,CAUCHY", "NOSSE,CAUCHY",
"SINGLE,NOSSE,CAUCHY", "DOUBLE,NOSSE,CAUCHY", "QUAD,NOSSE,CAUCHY",
"LAZY,NOSSE,CAUCHY", "SINGLE,LAZY,NOSSE,CAUCHY",
"DOUBLE,LAZY,NOSSE,CAUCHY", "QUAD,LAZY,NOSSE,CAUCHY" };
#define NREGIONS (7)
static char *regions[NREGIONS] = { "DOUBLE", "QUAD", "LAZY", "SSE", "NOSSE",
"ALTMAP", "CAUCHY" };
#define NDIVS (3)
static char *divides[NDIVS] = { "-", "MATRIX", "EUCLID" };
#define NDIVS (2)
static char *divides[NDIVS] = { "MATRIX", "EUCLID" };
int main()
int main()
{
int m, r, d, w, i, sa, j;
char *argv[20];
int m, r, d, w, i, sa, j, k, reset;
char *argv[50];
gf_t gf;
char divs[200], ks[10], ls[10];
methods_to_stderr();
printf("\n");
printf("Implemented Methods: \n\n");
for (i = 2; i < 8; i++) {
w = (1 << i);
@ -70,9 +38,14 @@ int main()
if (create_gf_from_argv(&gf, w, 1, argv, 0) > 0) {
printf("w=%d: -\n", w);
gf_free(&gf, 1);
} else if (_gf_errno == GF_E_DEFAULT) {
fprintf(stderr, "Unlabeled failed method: w=%d: -\n", 2);
exit(1);
}
for (m = 0; m < NMULTS; m++) {
sa = 0;
argv[sa++] = "-m";
if (strcmp(mults[m], "GROUP44") == 0) {
argv[sa++] = "GROUP";
argv[sa++] = "4";
@ -96,46 +69,66 @@ int main()
sprintf(ls, "%d", w);
argv[sa++] = ls;
argv[sa++] = "8";
} else if (strcmp(mults[m], "SPLIT16") == 0) {
argv[sa++] = "SPLIT";
sprintf(ls, "%d", w);
argv[sa++] = ls;
argv[sa++] = "16";
} else if (strcmp(mults[m], "SPLIT88") == 0) {
argv[sa++] = "SPLIT";
argv[sa++] = "8";
argv[sa++] = "8";
} else if (strcmp(mults[m], "COMPOSITE-0") == 0) {
} else if (strcmp(mults[m], "COMPOSITE") == 0) {
argv[sa++] = "COMPOSITE";
argv[sa++] = "2";
argv[sa++] = "0";
argv[sa++] = "-";
} else if (strcmp(mults[m], "COMPOSITE-1") == 0) {
argv[sa++] = "COMPOSITE";
argv[sa++] = "2";
argv[sa++] = "1";
argv[sa++] = "-";
} else {
argv[sa++] = mults[m];
}
for (r = 0; r < NREGIONS; r++) {
argv[sa++] = regions[r];
strcpy(divs, "");
for (d = 0; d < NDIVS; d++) {
argv[sa++] = divides[d];
/* printf("w=%d:", w);
for (j = 0; j < sa; j++) printf(" %s", argv[j]);
printf("\n"); */
if (create_gf_from_argv(&gf, w, sa, argv, 0) > 0) {
strcat(divs, "|");
strcat(divs, divides[d]);
gf_free(&gf, 1);
}
sa--;
reset = sa;
for (r = 0; r < (1 << NREGIONS); r++) {
sa = reset;
for (k = 0; k < NREGIONS; k++) {
if (r & 1 << k) {
argv[sa++] = "-r";
argv[sa++] = regions[k];
}
}
if (strlen(divs) > 0) {
argv[sa++] = "-";
if (create_gf_from_argv(&gf, w, sa, argv, 0) > 0) {
printf("w=%d:", w);
for (j = 0; j < sa; j++) printf(" %s", argv[j]);
printf(" %s\n", divs+1);
printf("\n");
gf_free(&gf, 1);
} else if (_gf_errno == GF_E_DEFAULT) {
fprintf(stderr, "Unlabeled failed method: w=%d:", w);
for (j = 0; j < sa; j++) fprintf(stderr, " %s", argv[j]);
fprintf(stderr, "\n");
exit(1);
}
sa--;
for (d = 0; d < NDIVS; d++) {
argv[sa++] = "-d";
argv[sa++] = divides[d];
/* printf("w=%d:", w);
for (j = 0; j < sa; j++) printf(" %s", argv[j]);
printf("\n"); */
argv[sa++] = "-";
if (create_gf_from_argv(&gf, w, sa, argv, 0) > 0) {
printf("w=%d:", w);
for (j = 0; j < sa; j++) printf(" %s", argv[j]);
printf("\n");
gf_free(&gf, 1);
} else if (_gf_errno == GF_E_DEFAULT) {
fprintf(stderr, "Unlabeled failed method: w=%d:", w);
for (j = 0; j < sa; j++) fprintf(stderr, " %s", argv[j]);
fprintf(stderr, "\n");
exit(1);
}
sa-=3;
}
}
sa--;
}
}
return 0;
}

118
gf_mult.c
View File

@ -12,105 +12,53 @@
#include "gf_complete.h"
#include "gf_method.h"
#include "gf_general.h"
void usage(char *s)
void usage(int why)
{
fprintf(stderr, "usage: gf_mult a b w [method] - does multiplication of a and b in GF(2^w)\n");
fprintf(stderr, " If w has an h on the end, treat a, b and the product as hexadecimal (no 0x required)\n");
fprintf(stderr, "\n");
fprintf(stderr, " legal w are: 1-32, 64 and 128\n");
fprintf(stderr, " 128 is hex only (i.e. '128' will be an error - do '128h')\n");
fprintf(stderr, "\n");
fprintf(stderr, " For method specification, type gf_methods\n");
if (s != NULL) fprintf(stderr, "%s", s);
if (why == 'W') {
fprintf(stderr, "Bad w.\n");
fprintf(stderr, "Legal w are: 1 - 32, 64 and 128.\n");
fprintf(stderr, "Append 'h' to w to treat a, b and the product as hexadecimal.\n");
fprintf(stderr, "w=128 is hex only (i.e. '128' will be an error - do '128h')\n");
}
if (why == 'A') fprintf(stderr, "Bad a\n");
if (why == 'B') fprintf(stderr, "Bad b\n");
if (why == 'M') {
fprintf(stderr, "Bad Method Specification: ");
gf_error();
}
exit(1);
}
int read_128(char *s, uint64_t *v)
{
int l, t;
char save;
l = strlen(s);
if (l > 32) return 0;
if (l > 16) {
if (sscanf(s + (l-16), "%llx", (long long unsigned int *) &(v[1])) == 0) return 0;
save = s[l-16];
s[l-16] = '\0';
t = sscanf(s, "%llx", (long long unsigned int *) &(v[0]));
s[l-16] = save;
return t;
} else {
v[0] = 0;
return sscanf(s, "%llx", (long long unsigned int *)&(v[1]));
}
return 1;
}
void print_128(uint64_t *v)
{
if (v[0] > 0) {
printf("%llx", (long long unsigned int) v[0]);
printf("%016llx", (long long unsigned int) v[1]);
} else {
printf("%llx", (long long unsigned int) v[1]);
}
printf("\n");
}
int main(int argc, char **argv)
{
int hex, al, bl, w;
uint32_t a, b, c, top;
uint64_t a64, b64, c64;
uint64_t a128[2], b128[2], c128[2];
char *format;
int hex, w;
gf_t gf;
gf_general_t a, b, c;
char output[50];
if (argc < 4) usage(NULL);
if (sscanf(argv[3], "%d", &w) == 0) usage("Bad w\n");
if (argc < 4) usage(' ');
if (w <= 0 || (w > 32 && w != 64 && w != 128)) usage("Bad w");
if (sscanf(argv[3], "%d", &w) == 0) usage('W');
if (w <= 0 || (w > 32 && w != 64 && w != 128)) usage('W');
hex = (strchr(argv[3], 'h') != NULL);
if (create_gf_from_argv(&gf, w, argc, argv, 4) == 0) usage("\nBad Method\n");
if (!hex && w == 128) usage('W');
if (!hex && w == 128) usage(NULL);
if (w <= 32) {
format = (hex) ? "%x" : "%u";
if (sscanf(argv[1], format, &a) == 0) usage("Bad a\n");
if (sscanf(argv[2], format, &b) == 0) usage("Bad b\n");
if (w < 32) {
top = (w == 31) ? 0x80000000 : (1 << w);
if (w != 32 && a >= top) usage("a is too large\n");
if (w != 32 && b >= top) usage("b is too large\n");
}
c = gf.multiply.w32(&gf, a, b);
printf(format, c);
printf("\n");
} else if (w == 64) {
format = (hex) ? "%llx" : "%llu";
if (sscanf(argv[1], format, &a64) == 0) usage("Bad a\n");
if (sscanf(argv[2], format, &b64) == 0) usage("Bad b\n");
c64 = gf.multiply.w64(&gf, a64, b64);
printf(format, c64);
printf("\n");
} else if (w == 128) {
if (read_128(argv[1], a128) == 0) usage("Bad a\n");
if (read_128(argv[2], b128) == 0) usage("Bad b\n");
gf.multiply.w128(&gf, a128, b128, c128);
print_128(c128);
if (argc == 4) {
if (gf_init_easy(&gf, w) == 0) usage('M');
} else {
if (create_gf_from_argv(&gf, w, argc, argv, 4) == 0) usage('M');
}
if (!gf_general_s_to_val(&a, w, argv[1], hex)) usage('A');
if (!gf_general_s_to_val(&b, w, argv[2], hex)) usage('B');
gf_general_multiply(&gf, &a, &b, &c);
gf_general_val_to_s(&c, w, output, hex);
printf("%s\n", output);
exit(0);
}

744
gf_poly.c
View File

@ -1,560 +1,268 @@
/*
* gf_poly.c - program to help find primitive polynomials in composite fields
gf_poly.c - program to help find irreducible polynomials in composite fields,
using the Ben-Or algorithm.
James S. Plank
Please see the following paper for a
description of the Ben-Or algorithm:
author S. Gao and D. Panario
title Tests and Constructions of Irreducible Polynomials over Finite Fields
booktitle Foundations of Computational Mathematics
year 1997
publisher Springer Verlag
pages 346-361
The basic technique is this. You have a polynomial f(x) whose coefficients are
in a base field GF(2^w). The polynomial is of degree n. You need to do the
following for all i from 1 to n/2:
Construct x^(2^w)^i modulo f. That will be a polynomial of maximum degree n-1
with coefficients in GF(2^w). You construct that polynomial by starting with x
and doubling it w times, each time taking the result modulo f. Then you
multiply that by itself i times, again each time taking the result modulo f.
When you're done, you need to "subtract" x -- since addition = subtraction =
XOR, that means XOR x.
Now, find the GCD of that last polynomial and f, using Euclid's algorithm. If
the GCD is not one, then f is reducible. If it is not reducible for each of
those i, then it is irreducible.
In this code, I am using a gf_general_t to represent elements of GF(2^w). This
is so that I can use base fields that are GF(2^64) or GF(2^128).
I have two main procedures. The first is x_to_q_to_i_minus_x, which calculates
x^(2^w)^i - x, putting the result into a gf_general_t * called retval.
The second is gcd_one, which takes a polynomial of degree n and a second one
of degree n-1, and uses Euclid's algorithm to decide if their GCD == 1.
These can be made faster (e.g. calculate x^(2^w) once and store it).
*/
#include "gf_complete.h"
#include "gf_method.h"
#include "gf_general.h"
#include "gf_int.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define GF_POLY_COEF_MASK8 0xff
#define GF_POLY_COEF_MASK16 0xffff
#define GF_POLY_COEF_MASK32 0xffffffff
#define GF_POLY_COEF_MASK64 0xffffffffffffffff
char *BM = "Bad Method: ";
#define LLUI (long long unsigned int)
struct gf_poly_coef_s;
typedef struct gf_poly_coef_s {
uint64_t coef;
uint64_t power;
struct gf_poly_coef_s *next;
} gf_poly_coef_t;
typedef struct gf_poly_s {
gf_poly_coef_t *leading_coef;
uint64_t num_coefs;
gf_t *coef_gf;
int w;
} gf_poly_t;
static uint64_t gf_add(int w, uint64_t a, uint64_t b)
void usage(char *s)
{
if (w == 8) {
return (a & GF_POLY_COEF_MASK8) ^ (b & GF_POLY_COEF_MASK8);
} else if (w == 16) {
return (a & GF_POLY_COEF_MASK16) ^ (b & GF_POLY_COEF_MASK16);
} else if (w == 32) {
return (a & GF_POLY_COEF_MASK32) ^ (b & GF_POLY_COEF_MASK32);
} else if (w == 64) {
return (a & GF_POLY_COEF_MASK64) ^ (b & GF_POLY_COEF_MASK64);
}
}
static uint64_t gf_mult(int w, gf_t* gf, uint64_t a, uint64_t b)
{
if (w <= 32) {
return gf->multiply.w32(gf, a, b);
} else if (w == 64) {
return gf->multiply.w64(gf, a, b);
}
}
static uint64_t gf_divide(int w, gf_t* gf, uint64_t a, uint64_t b)
{
if (w <= 32) {
return gf->divide.w32(gf, a, b);
} else if (w == 64) {
return gf->divide.w64(gf, a, b);
}
}
static uint64_t gf_inverse(int w, gf_t* gf, uint64_t a)
{
if (w <= 32) {
return gf->inverse.w32(gf, a);
} else if (w == 64) {
return gf->inverse.w64(gf, a);
}
}
gf_poly_t* gf_poly_init(int w, gf_t *gf)
{
gf_poly_t *gf_poly = (gf_poly_t*)malloc(sizeof(gf_poly_t));
if (gf_poly == NULL || gf == NULL) {
return NULL;
}
gf_poly->leading_coef = NULL;
gf_poly->num_coefs = 0;
gf_poly->coef_gf = gf;
gf_poly->w = w;
return gf_poly;
}
void gf_poly_print(gf_poly_t *gf_poly, char *message)
{
gf_poly_coef_t *tmp;
if (gf_poly == NULL) {
fprintf(stderr, "0 * x^0\n");
return;
}
tmp = gf_poly->leading_coef;
while (tmp != NULL) {
printf("%llu * x^%llu", LLUI tmp->coef, LLUI tmp->power);
tmp = tmp->next;
if (tmp) {
printf(" + ");
}
}
if (message != NULL) {
printf(": %s\n", message);
}
}
gf_poly_t* gf_poly_copy(gf_poly_t *poly)
{
gf_poly_t *new_poly = (gf_poly_t*)malloc(sizeof(gf_poly_t));
gf_poly_coef_t *tmp = poly->leading_coef;
if (new_poly == NULL) {
return NULL;
}
new_poly->leading_coef = NULL;
new_poly->num_coefs = 0;
new_poly->coef_gf = poly->coef_gf;
new_poly->w = poly->w;
while (tmp != NULL) {
gf_poly_add_coef(new_poly, tmp->coef, tmp->power);
tmp = tmp->next;
}
return new_poly;
}
void gf_poly_clear(gf_poly_t* a)
{
while (a->leading_coef != NULL) {
gf_poly_coef_t *tmp = a->leading_coef;
a->leading_coef = tmp->next;
free(tmp);
}
}
void gf_poly_free(gf_poly_t **a)
{
gf_poly_clear(*a);
free(*a);
*a = NULL;
}
gf_poly_coef_t* gf_poly_create_node(uint64_t coef, uint64_t power)
{
gf_poly_coef_t* node = (gf_poly_coef_t*)malloc(sizeof(gf_poly_coef_t));
if (node == NULL) {
return NULL;
}
node->coef = coef;
node->power = power;
node->next = NULL;
return node;
}
int gf_poly_remove_node(gf_poly_t *gf_poly, uint64_t power)
{
gf_poly_coef_t* iter = gf_poly->leading_coef;
if (iter->power == power) {
gf_poly->leading_coef = iter->next;
free(iter);
return 0;
}
while (iter->next != NULL) {
if (iter->next->power == power) {
gf_poly_coef_t* tmp = iter->next;
iter->next = iter->next->next;
free(tmp);
return 0;
}
iter = iter->next;
}
return -1;
}
int gf_poly_add_coef(gf_poly_t *gf_poly, uint64_t coef_val, uint64_t power)
{
gf_poly_coef_t* node;
gf_poly_coef_t* iter = gf_poly->leading_coef;
/*
* The new node has the highest power, or there are no terms
*/
if (gf_poly->leading_coef == NULL || gf_poly->leading_coef->power < power) {
node = gf_poly_create_node(coef_val, power);
node->next = gf_poly->leading_coef;
gf_poly->leading_coef = node;
return 0;
}
/*
* The new node is of the same power, add the coefs
*/
if (gf_poly->leading_coef->power == power) {
gf_poly->leading_coef->coef = gf_add(gf_poly->w, gf_poly->leading_coef->coef, coef_val);
if (gf_poly->leading_coef->coef == 0) {
gf_poly_remove_node(gf_poly, power);
}
return 0;
}
while (iter->next != NULL) {
if (iter->next->power == power) {
iter->next->coef = gf_add(gf_poly->w, iter->next->coef, coef_val);
if (iter->next->coef == 0) {
gf_poly_remove_node(gf_poly, power);
}
return 0;
}
if (iter->next->power < power) {
node = gf_poly_create_node(coef_val, power);
node->next = iter->next;
iter->next = node;
return 0;
}
iter = iter->next;
}
/*
* The power passed in is lower than any in the existing poly
*/
node = gf_poly_create_node(coef_val, power);
iter->next = node;
return 0;
}
/*
* Compute a+b and store in a
*/
int gf_poly_add(gf_poly_t* a, gf_poly_t* b)
{
gf_poly_coef_t* iter = b->leading_coef;
while (iter != NULL) {
gf_poly_add_coef(a, iter->coef, iter->power);
iter = iter->next;
}
return 0;
}
/*
* Compute a*b and store in a
*/
int gf_poly_mult(gf_poly_t* a, gf_poly_t* b)
{
gf_poly_coef_t* a_iter = a->leading_coef;
/*
* Remove one node at a time from 'a', starting with
* highest power. Multiply the removed (coef,power)
* by every entry of 'b,' adding each product into 'a.'
*/
while (a_iter != NULL) {
gf_poly_coef_t* tmp = a_iter;
gf_poly_coef_t* b_iter = b->leading_coef;
uint64_t a_power = a_iter->power;
uint64_t a_coef = a_iter->coef;
a_iter = a_iter->next;
gf_poly_remove_node(a, tmp->power);
while (b_iter != NULL) {
uint64_t new_power = b_iter->power + a_power;
uint64_t new_coef = gf_mult(a->w, a->coef_gf, b_iter->coef, a_coef);
gf_poly_add_coef(a, new_coef, new_power);
b_iter = b_iter->next;
}
}
return 0;
}
/*
* Compute a % b and store in a
*/
int gf_poly_reduce(gf_poly_t* a, gf_poly_t* b)
{
gf_poly_t* c = gf_poly_init(a->w, a->coef_gf);
gf_poly_coef_t* a_iter = a->leading_coef;
gf_poly_coef_t* b_iter = b->leading_coef;
/*
* Reduce until the degree of 'a' is less than
* the degree of 'b.' At that point 'a' will
* contain the remainder of a / b.
*/
while (a_iter && (a_iter->power >= b_iter->power)) {
/*
* Get the degree and leading coef of the current
* 'b'.
*/
uint64_t reduce_power = a_iter->power - b_iter->power;
uint64_t reduce_coef = gf_divide(a->w, a->coef_gf, a_iter->coef, b_iter->coef);
/*
* Create a poly that will get rid of leading power
* of 'b' when added: c*x^(n-m)*b(x), where c
* is the leading coef of 'a', n is the deg of 'a'
* and m is the degree of 'b'.
*/
gf_poly_add_coef(c, reduce_coef, reduce_power);
gf_poly_mult(c, b);
/*
* Add the newly created poly, which will reduce
* a(x) by at least one term (leading term).
*/
gf_poly_add(a, c);
gf_poly_clear(c);
/*
* Grab the new leading term of 'a'
*/
a_iter = a->leading_coef;
}
}
/*
* Get the GCD of a and b, return the result
*/
gf_poly_t* gf_poly_gcd(gf_poly_t* a, gf_poly_t* b)
{
gf_poly_t *r1, *r2;
gf_poly_t* tmp_swp;
if (a->leading_coef == NULL || b->leading_coef == NULL) {
return NULL;
}
if (a->leading_coef->power > b->leading_coef->power) {
r1 = a;
r2 = b;
} else {
r1 = b;
r2 = a;
}
while ( 1 ) {
if (r2->leading_coef == NULL) {
break;
}
if (r2->leading_coef->power == 0 && r2->leading_coef->coef <= 1) {
break;
}
gf_poly_reduce(r1, r2);
tmp_swp = r1;
r1 = r2;
r2 = tmp_swp;
}
return r1;
}
/*
* The Ben-Or algorithm for determining irreducibility
*/
int gf_poly_is_irred(gf_poly_t* poly)
{
gf_poly_t *gcd;
gf_poly_t *prod_of_irred;
uint64_t prod_of_irred_power = ((unsigned long long) 1) << poly->w;
int n = poly->leading_coef->power / 2;
int i;
int ret = 0;
gf_poly_t *a = gf_poly_copy(poly);
prod_of_irred = gf_poly_init(a->w, a->coef_gf);
for (i = 1; i <= n; i++) {
gf_poly_add_coef(prod_of_irred, 1, prod_of_irred_power);
gf_poly_add_coef(prod_of_irred, 1, 1);
gf_poly_reduce(prod_of_irred, a);
gcd = gf_poly_gcd(a, prod_of_irred);
/*
* It is irreducible if it is not the product of
* non-trivial factors (non-constant). Therefore,
* the GCD of the poly and prod_of_irred should be
* a constant (0 or 0-degree polynomial).
*/
if (gcd == NULL) {
ret = -1;
break;
} else if (gcd->leading_coef->power != 0) {
ret = -1;
break;
} else if (gcd->leading_coef->power == 0) {
ret = 0;
break;
fprintf(stderr, "usage: gf_poly w(base-field) method power:coef [ power:coef .. ]\n");
fprintf(stderr, "\n");
fprintf(stderr, " use - for the default method.\n");
fprintf(stderr, " use 0x in front of the coefficient if it's in hex\n");
fprintf(stderr, " \n");
fprintf(stderr, " For example, to test whether x^2 + 2x + 1 is irreducible\n");
fprintf(stderr, " in GF(2^16), the call is:\n");
fprintf(stderr, " \n");
fprintf(stderr, " gf_poly 16 - 2:1 1:2 0:1\n");
fprintf(stderr, " \n");
fprintf(stderr, " See the user's manual for more information.\n");
if (s != NULL) {
fprintf(stderr, "\n");
if (s == BM) {
fprintf(stderr, "%s", s);
gf_error();
} else {
ret = -1;
break;
fprintf(stderr, "%s\n", s);
}
// Need if to avoid a overflow error
if ((i + 1) <= n) {
prod_of_irred_power *= prod_of_irred_power;
}
exit(1);
}
int gcd_one(gf_t *gf, int w, int n, gf_general_t *poly, gf_general_t *prod)
{
gf_general_t *a, *b, zero, factor, p;
int i, j, da, db;
char buf[30];
gf_general_set_zero(&zero, w);
a = (gf_general_t *) malloc(sizeof(gf_general_t) * n+1);
b = (gf_general_t *) malloc(sizeof(gf_general_t) * n);
for (i = 0; i <= n; i++) gf_general_add(gf, &zero, poly+i, a+i);
for (i = 0; i < n; i++) gf_general_add(gf, &zero, prod+i, b+i);
da = n;
while (1) {
for (db = n-1; db >= 0 && gf_general_is_zero(b+db, w); db--) ;
if (db < 0) return 0;
if (db == 0) return 1;
for (j = da; j >= db; j--) {
if (!gf_general_is_zero(a+j, w)) {
gf_general_divide(gf, a+j, b+db, &factor);
for (i = 0; i <= db; i++) {
gf_general_multiply(gf, b+i, &factor, &p);
gf_general_add(gf, &p, a+(i+j-db), a+(i+j-db));
}
}
}
for (i = 0; i < n; i++) {
gf_general_add(gf, a+i, &zero, &p);
gf_general_add(gf, b+i, &zero, a+i);
gf_general_add(gf, &p, &zero, b+i);
}
gf_poly_clear(prod_of_irred);
}
gf_poly_free(&a);
return ret;
}
int is_suitible_s(int w, gf_t *gf, uint64_t s)
void x_to_q_to_i_minus_x(gf_t *gf, int w, int n, gf_general_t *poly, int logq, int i, gf_general_t *retval)
{
uint64_t num_elems = ((unsigned long long) 1) << w;
uint64_t i = 2;
uint64_t i_inv;
gf_general_t x;
gf_general_t *x_to_q;
gf_general_t *product;
gf_general_t p, zero, factor;
int j, k, lq;
char buf[20];
for (; i < num_elems; i++) {
i_inv = gf_inverse(w, gf, i);
if ((i ^ i_inv) == s) {
fprintf(stderr, "Bailed on %llu ^ %llu = %llu\n", LLUI i, LLUI i_inv, LLUI s);
return -1;
gf_general_set_zero(&zero, w);
product = (gf_general_t *) malloc(sizeof(gf_general_t) * n*2);
x_to_q = (gf_general_t *) malloc(sizeof(gf_general_t) * n);
for (j = 0; j < n; j++) gf_general_set_zero(x_to_q+j, w);
gf_general_set_one(x_to_q+1, w);
for (lq = 0; lq < logq; lq++) {
for (j = 0; j < n*2; j++) gf_general_set_zero(product+j, w);
for (j = 0; j < n; j++) {
for (k = 0; k < n; k++) {
gf_general_multiply(gf, x_to_q+j, x_to_q+k, &p);
gf_general_add(gf, product+(j+k), &p, product+(j+k));
}
}
if (i % 1000000000 == 0) fprintf(stderr, "Processed %llu\n", LLUI i);
for (j = n*2-1; j >= n; j--) {
if (!gf_general_is_zero(product+j, w)) {
gf_general_add(gf, product+j, &zero, &factor);
for (k = 0; k <= n; k++) {
gf_general_multiply(gf, poly+k, &factor, &p);
gf_general_add(gf, product+(j-n+k), &p, product+(j-n+k));
}
}
}
for (j = 0; j < n; j++) gf_general_add(gf, product+j, &zero, x_to_q+j);
}
for (j = 0; j < n; j++) gf_general_set_zero(retval+j, w);
gf_general_set_one(retval, w);
while (i > 0) {
for (j = 0; j < n*2; j++) gf_general_set_zero(product+j, w);
for (j = 0; j < n; j++) {
for (k = 0; k < n; k++) {
gf_general_multiply(gf, x_to_q+j, retval+k, &p);
gf_general_add(gf, product+(j+k), &p, product+(j+k));
}
}
for (j = n*2-1; j >= n; j--) {
if (!gf_general_is_zero(product+j, w)) {
gf_general_add(gf, product+j, &zero, &factor);
for (k = 0; k <= n; k++) {
gf_general_multiply(gf, poly+k, &factor, &p);
gf_general_add(gf, product+(j-n+k), &p, product+(j-n+k));
}
}
}
for (j = 0; j < n; j++) gf_general_add(gf, product+j, &zero, retval+j);
i--;
}
return 0;
gf_general_set_one(&x, w);
gf_general_add(gf, &x, retval+1, retval+1);
free(product);
free(x_to_q);
}
static void
usage(char *cmd)
{
fprintf(stderr, "%s w <GF args> S <s value>\n", cmd);
fprintf(stderr, "\t will build a trinomial x^2+S*x+1\n");
fprintf(stderr, "OR\n");
fprintf(stderr, "%s w <GF args> G coef1,power1 <coef2,power2> ... <coefn,powern>\n", cmd);
fprintf(stderr, "\t will build a polynomial coef1^(power1) + ... + coefn^(powern)\n");
fprintf(stderr, "Example: ./gf_poly 8 - - - G 1,2 2,1 1,0\n");
fprintf(stderr, "\t will build a polynomial x^2+2*x+1 with coefs from GF(2^8)\n");
}
/*
* Find irred poly of form x^2+sx+1
* a_n*x^n + a_(n-1)*x^(n-1) + ...
*
* Terms are specified as: a_i,i a_j,j, ... where
* i is the degree of the term and a_i is the coef
*
*/
int main(int argc, char **argv)
main(int argc, char **argv)
{
int w, i, power, n, ap, success, j;
gf_t gf;
int ret;
int w;
int i;
uint64_t irred_coef_s;
gf_poly_t *irred_poly;
char *term;
gf_general_t *poly, *prod;
char *string, *ptr;
char buf[100];
bzero(&gf, sizeof(gf_t));
if (argc < 4) usage(NULL);
if (argc < 4) {
usage(argv[0]);
return -1;
}
w = atoi(argv[1]);
ret = create_gf_from_argv(&gf, w, argc, argv, 3);
if (sscanf(argv[1], "%d", &w) != 1 || w <= 0) usage("Bad w.");
ap = create_gf_from_argv(&gf, w, argc, argv, 2);
if (ret <= 0) {
fprintf(stderr, "Could not create a GF\n");
return -1;
}
irred_poly = gf_poly_init(w, &gf);
if (ap == 0) usage(BM);
i = ret + 1;
if (ap == argc) usage("No powers/coefficients given.");
if (strlen(argv[i]) > 1) {
usage(argv[0]);
exit(1);
}
if (argv[i][0] == 'S') {
i++;
irred_coef_s = (uint64_t)strtoull(argv[i], NULL, 10);
/*
* If this is a trinomial of the form x^2+s*x+1, then
* we can do a quick pre-check to see if this may be
* an irreducible polynomial.
*/
if (is_suitible_s(w, &gf, irred_coef_s) < 0) {
fprintf(stderr, "%llu is not a suitable coeffient!\n", LLUI irred_coef_s);
return -1;
} else {
fprintf(stderr, "%llu IS A suitable coeffient!\n", LLUI irred_coef_s);
n = -1;
for (i = ap; i < argc; i++) {
if (strchr(argv[i], ':') == NULL || sscanf(argv[i], "%d:", &power) != 1) {
string = (char *) malloc(sizeof(char)*(strlen(argv[i]+100)));
sprintf(string, "Argument '%s' not in proper format of power:coefficient\n", argv[i]);
usage(string);
}
if (power < 0) usage("Can't have negative powers\n");
if (power > n) n = power;
}
poly = (gf_general_t *) malloc(sizeof(gf_general_t)*(n+1));
for (i = 0; i <= n; i++) gf_general_set_zero(poly+i, w);
prod = (gf_general_t *) malloc(sizeof(gf_general_t)*n);
gf_poly_add_coef(irred_poly, 1, 2);
gf_poly_add_coef(irred_poly, irred_coef_s, 1);
gf_poly_add_coef(irred_poly, 1, 0);
for (i = ap; i < argc; i++) {
sscanf(argv[i], "%d:", &power);
ptr = strchr(argv[i], ':');
ptr++;
if (strncmp(ptr, "0x", 2) == 0) {
success = gf_general_s_to_val(poly+power, w, ptr+2, 1);
} else {
success = gf_general_s_to_val(poly+power, w, ptr, 0);
}
if (success == 0) {
string = (char *) malloc(sizeof(char)*(strlen(argv[i]+100)));
sprintf(string, "Argument '%s' not in proper format of power:coefficient\n", argv[i]);
usage(string);
}
}
} else if (argv[i][0] == 'G') {
term = argv[++i];
while (term != NULL) {
uint64_t coef = strtoull(strtok(term, ","), NULL, 10);
uint64_t power = strtoull(strtok(NULL, ","), NULL, 10);
gf_poly_add_coef(irred_poly, coef, power);
if (i < argc) {
term = argv[++i];
printf("Poly:");
for (power = n; power >= 0; power--) {
if (!gf_general_is_zero(poly+power, w)) {
printf("%s", (power == n) ? " " : " + ");
if (!gf_general_is_one(poly+power, w)) {
gf_general_val_to_s(poly+power, w, buf, 1);
if (n > 0) {
printf("(0x%s)", buf);
} else {
printf("0x%s", buf);
}
}
if (power == 0) {
if (gf_general_is_one(poly+power, w)) printf("1");
} else if (power == 1) {
printf("x");
} else {
break;
printf("x^%d", power);
}
}
} else {
usage(argv[0]);
exit(1);
}
printf("\n");
if (!gf_general_is_one(poly+n, w)) {
printf("\n");
printf("Can't do Ben-Or, because the polynomial is not monic.\n");
exit(0);
}
for (i = 1; i <= n/2; i++) {
x_to_q_to_i_minus_x(&gf, w, n, poly, w, i, prod);
if (!gcd_one(&gf, w, n, poly, prod)) {
printf("Reducible.\n");
exit(0);
}
}
gf_poly_print(irred_poly, " specified via the command line\n");
ret = gf_poly_is_irred(irred_poly);
if (ret < 0) {
gf_poly_print(irred_poly, " IS NOT irreducible\n");
} else {
gf_poly_print(irred_poly, " IS irreducible\n");
}
return 0;
printf("Irreducible.\n");
exit(0);
}

View File

@ -9,7 +9,7 @@
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#include <sys/time.h>
#include "gf_complete.h"
#include "gf_method.h"
@ -43,10 +43,14 @@ void problem(char *s)
exit(1);
}
char *BM = "Bad Method: ";
void usage(char *s)
{
fprintf(stderr, "usage: gf_time w tests seed size(bytes) iterations [method [params]] - does timing\n");
fprintf(stderr, "\n");
fprintf(stderr, "does unit testing in GF(2^w)\n");
fprintf(stderr, "\n");
fprintf(stderr, "Legal w are: 1 - 32, 64 and 128\n");
fprintf(stderr, "\n");
fprintf(stderr, "Tests may be any combination of:\n");
@ -63,9 +67,12 @@ void usage(char *s)
fprintf(stderr, "\n");
fprintf(stderr, "Use -1 for time(0) as a seed.\n");
fprintf(stderr, "\n");
fprintf(stderr, "For method specification, type gf_methods\n");
fprintf(stderr, "\n");
if (s != NULL) fprintf(stderr, "%s\n", s);
if (s == BM) {
fprintf(stderr, "%s", BM);
gf_error();
} else if (s != NULL) {
fprintf(stderr, "%s\n", s);
}
exit(1);
}
@ -84,9 +91,15 @@ int main(int argc, char **argv)
time_t t0;
uint8_t *ra, *rb;
gf_general_t a;
if (argc < 6) usage(NULL);
if (sscanf(argv[1], "%d", &w) == 0) usage("Bad w\n");
if (sscanf(argv[1], "%d", &w) == 0){
usage("Bad w[-pp]\n");
}
if (sscanf(argv[3], "%ld", &t0) == 0) usage("Bad seed\n");
if (sscanf(argv[4], "%d", &size) == 0) usage("Bad size\n");
if (sscanf(argv[5], "%d", &iterations) == 0) usage("Bad iterations\n");
@ -99,7 +112,7 @@ int main(int argc, char **argv)
if ((w > 32 && w != 64 && w != 128) || w < 0) usage("Bad w");
if ((size * 8) % w != 0) usage ("Bad size -- must be a multiple of w*8\n");
if (!create_gf_from_argv(&gf, w, argc, argv, 6)) usage("Bad Method");
if (!create_gf_from_argv(&gf, w, argc, argv, 6)) usage(BM);
strcpy(tests, "");
for (i = 0; i < argv[2][i] != '\0'; i++) {

243
gf_unit.c
View File

@ -10,6 +10,7 @@
#include <string.h>
#include <stdlib.h>
#include <time.h>
#include <signal.h>
#include "gf_complete.h"
#include "gf_int.h"
@ -18,6 +19,8 @@
#include "gf_general.h"
#define REGION_SIZE (16384)
#define RMASK (0x00000000ffffffffLL)
#define LMASK (0xffffffff00000000LL)
void problem(char *s)
{
@ -26,11 +29,14 @@ void problem(char *s)
exit(1);
}
char *BM = "Bad Method: ";
void usage(char *s)
{
fprintf(stderr, "usage: gf_unit w tests seed [method] - does unit testing in GF(2^w)\n");
fprintf(stderr, "\n");
fprintf(stderr, "\n");
fprintf(stderr, "Legal w are: 1 - 32, 64 and 128\n");
fprintf(stderr, " 128 is hex only (i.e. '128' will be an error - do '128h')\n");
fprintf(stderr, "\n");
fprintf(stderr, "Tests may be any combination of:\n");
fprintf(stderr, " A: All\n");
@ -40,16 +46,28 @@ void usage(char *s)
fprintf(stderr, "\n");
fprintf(stderr, "Use -1 for time(0) as a seed.\n");
fprintf(stderr, "\n");
fprintf(stderr, "For method specification, type gf_methods\n");
fprintf(stderr, "\n");
if (s != NULL) fprintf(stderr, "%s\n", s);
if (s == BM) {
fprintf(stderr, "%s", BM);
gf_error();
} else if (s != NULL) {
fprintf(stderr, "%s\n", s);
}
exit(1);
}
void SigHandler(int v)
{
fprintf(stderr, "Problem: SegFault!\n");
fflush(stdout);
exit(2);
}
int main(int argc, char **argv)
{
signal(SIGSEGV, SigHandler);
int w, i, verbose, single, region, tested, top;
int start, end, xor;
int s_start, d_start, bytes, xor, alignment_test;
gf_t gf, gf_def;
time_t t0;
gf_internal_t *h;
@ -61,15 +79,21 @@ int main(int argc, char **argv)
char *ra, *rb, *rc, *rd, *target;
int align;
if (argc < 4) usage(NULL);
if (sscanf(argv[1], "%d", &w) == 0) usage("Bad w\n");
if (sscanf(argv[1], "%d", &w) == 0){
usage("Bad w\n");
}
if (sscanf(argv[3], "%ld", &t0) == 0) usage("Bad seed\n");
if (t0 == -1) t0 = time(0);
MOA_Seed(t0);
if (w > 32 && w != 64 && w != 128) usage("Bad w");
if (create_gf_from_argv(&gf, w, argc, argv, 4) == 0) usage("Bad Method");
if (create_gf_from_argv(&gf, w, argc, argv, 4) == 0) usage(BM);
printf("Size (bytes): %d\n", gf_size(&gf));
for (i = 0; i < strlen(argv[2]); i++) {
if (strchr("ASRV", argv[2][i]) == NULL) usage("Bad test\n");
@ -83,10 +107,18 @@ int main(int argc, char **argv)
ai = (gf_general_t *) malloc(sizeof(gf_general_t));
bi = (gf_general_t *) malloc(sizeof(gf_general_t));
ra = (char *) malloc(sizeof(char)*REGION_SIZE);
rb = (char *) malloc(sizeof(char)*REGION_SIZE);
rc = (char *) malloc(sizeof(char)*REGION_SIZE);
rd = (char *) malloc(sizeof(char)*REGION_SIZE);
//15 bytes extra to make sure it's 16byte aligned
ra = (char *) malloc(sizeof(char)*REGION_SIZE+15);
rb = (char *) malloc(sizeof(char)*REGION_SIZE+15);
rc = (char *) malloc(sizeof(char)*REGION_SIZE+15);
rd = (char *) malloc(sizeof(char)*REGION_SIZE+15);
//this still assumes 8 byte aligned pointer from malloc
//(which is usual on 32-bit machines)
ra += (uint64_t)ra & 0xf;
rb += (uint64_t)rb & 0xf;
rc += (uint64_t)rc & 0xf;
rd += (uint64_t)rd & 0xf;
if (w <= 32) {
mask = 0;
@ -97,8 +129,9 @@ int main(int argc, char **argv)
single = (strchr(argv[2], 'S') != NULL || strchr(argv[2], 'A') != NULL);
region = (strchr(argv[2], 'R') != NULL || strchr(argv[2], 'A') != NULL);
if (!gf_init_easy(&gf_def, w)) problem("No default for this value of w");
if (!gf_init_hard(&gf_def, w, GF_MULT_DEFAULT, GF_REGION_DEFAULT, GF_DIVIDE_DEFAULT,
(h->mult_type != GF_MULT_COMPOSITE) ? h->prim_poly : 0, 0, 0, NULL, NULL))
problem("No default for this value of w");
if (w == 4) {
mult4 = gf_w4_get_mult_table(&gf);
div4 = gf_w4_get_div_table(&gf);
@ -129,21 +162,71 @@ int main(int argc, char **argv)
if (w <= 10) {
a->w32 = i % (1 << w);
b->w32 = (i >> w);
} else if (i < 10) {
gf_general_set_zero(a, w);
gf_general_set_random(b, w, 1);
} else if (i < 20) {
gf_general_set_random(a, w, 1);
gf_general_set_zero(b, w);
} else if (i < 30) {
gf_general_set_one(a, w);
gf_general_set_random(b, w, 1);
} else if (i < 40) {
gf_general_set_random(a, w, 1);
gf_general_set_one(b, w);
//Allen: the following conditions were being run 10 times each. That didn't seem like nearly enough to
//me for these special cases, so I converted to doing this mod stuff to easily make the number of times
//run both larger and proportional to the total size of the run.
} else {
gf_general_set_random(a, w, 1);
gf_general_set_random(b, w, 1);
switch (i % 32)
{
case 0:
gf_general_set_zero(a, w);
gf_general_set_random(b, w, 1);
break;
case 1:
gf_general_set_random(a, w, 1);
gf_general_set_zero(b, w);
break;
case 2:
gf_general_set_one(a, w);
gf_general_set_random(b, w, 1);
break;
case 3:
gf_general_set_random(a, w, 1);
gf_general_set_one(b, w);
break;
default:
gf_general_set_random(a, w, 1);
gf_general_set_random(b, w, 1);
}
}
//Allen: the following special cases for w=64 are based on the code below for w=128.
//These w=64 cases are based on Dr. Plank's suggestion because some of the methods for w=64
//involve splitting it in two. I think they're less likely to give errors than the 128-bit case
//though, because the 128 bit case is always split in two.
//As with w=128, I'm arbitrarily deciding to do this sort of thing with a quarter of the cases
if (w == 64) {
switch (i % 32)
{
case 0: if (!gf_general_is_one(a, w)) a->w64 &= RMASK; break;
case 1: if (!gf_general_is_one(a, w)) a->w64 &= LMASK; break;
case 2: if (!gf_general_is_one(a, w)) a->w64 &= RMASK; if (!gf_general_is_one(b, w)) b->w64 &= RMASK; break;
case 3: if (!gf_general_is_one(a, w)) a->w64 &= RMASK; if (!gf_general_is_one(b, w)) b->w64 &= LMASK; break;
case 4: if (!gf_general_is_one(a, w)) a->w64 &= LMASK; if (!gf_general_is_one(b, w)) b->w64 &= RMASK; break;
case 5: if (!gf_general_is_one(a, w)) a->w64 &= LMASK; if (!gf_general_is_one(b, w)) b->w64 &= LMASK; break;
case 6: if (!gf_general_is_one(b, w)) b->w64 &= RMASK; break;
case 7: if (!gf_general_is_one(b, w)) b->w64 &= LMASK; break;
}
}
//Allen: for w=128, we have important special cases where one half or the other of the number is all
//zeros. The probability of hitting such a number randomly is 1^-64, so if we don't force these cases
//we'll probably never hit them. This could be implemented more efficiently by changing the set-random
//function for w=128, but I think this is easier to follow.
//I'm arbitrarily deciding to do this sort of thing with a quarter of the cases
if (w == 128) {
switch (i % 32)
{
case 0: if (!gf_general_is_one(a, w)) a->w128[0] = 0; break;
case 1: if (!gf_general_is_one(a, w)) a->w128[1] = 0; break;
case 2: if (!gf_general_is_one(a, w)) a->w128[0] = 0; if (!gf_general_is_one(b, w)) b->w128[0] = 0; break;
case 3: if (!gf_general_is_one(a, w)) a->w128[0] = 0; if (!gf_general_is_one(b, w)) b->w128[1] = 0; break;
case 4: if (!gf_general_is_one(a, w)) a->w128[1] = 0; if (!gf_general_is_one(b, w)) b->w128[0] = 0; break;
case 5: if (!gf_general_is_one(a, w)) a->w128[1] = 0; if (!gf_general_is_one(b, w)) b->w128[1] = 0; break;
case 6: if (!gf_general_is_one(b, w)) b->w128[0] = 0; break;
case 7: if (!gf_general_is_one(b, w)) b->w128[1] = 0; break;
}
}
tested = 0;
@ -195,10 +278,10 @@ int main(int argc, char **argv)
gf_general_multiply(&gf_def, a, b, d);
if (!gf_general_are_equal(c, d, w)) {
gf_general_val_to_s(a, w, as);
gf_general_val_to_s(b, w, bs);
gf_general_val_to_s(c, w, cs);
gf_general_val_to_s(d, w, ds);
gf_general_val_to_s(a, w, as, 1);
gf_general_val_to_s(b, w, bs, 1);
gf_general_val_to_s(c, w, cs, 1);
gf_general_val_to_s(d, w, ds, 1);
printf("Error in single multiplication (all numbers in hex):\n\n");
printf(" gf.multiply(gf, %s, %s) = %s\n", as, bs, cs);
printf(" The default gf multiplier returned %s\n", ds);
@ -216,9 +299,9 @@ int main(int argc, char **argv)
if (((gf_general_is_zero(a, w) || gf_general_is_zero(b, w)) && !gf_general_is_zero(c, w)) ||
(gf_general_is_one(a, w) && !gf_general_are_equal(b, c, w)) ||
(gf_general_is_one(b, w) && !gf_general_are_equal(a, c, w))) {
gf_general_val_to_s(a, w, as);
gf_general_val_to_s(b, w, bs);
gf_general_val_to_s(c, w, cs);
gf_general_val_to_s(a, w, as, 1);
gf_general_val_to_s(b, w, bs, 1);
gf_general_val_to_s(c, w, cs, 1);
printf("Error in single multiplication (all numbers in hex):\n\n");
printf(" gf.multiply(gf, %s, %s) = %s, which is clearly wrong.\n", as, bs, cs);
;
@ -229,9 +312,9 @@ int main(int argc, char **argv)
/* Dumb check to make sure that it's not returning numbers that are too big: */
if (w < 32 && (c->w32 & mask) != c->w32) {
gf_general_val_to_s(a, w, as);
gf_general_val_to_s(b, w, bs);
gf_general_val_to_s(c, w, cs);
gf_general_val_to_s(a, w, as, 1);
gf_general_val_to_s(b, w, bs, 1);
gf_general_val_to_s(c, w, cs, 1);
printf("Error in single multiplication (all numbers in hex):\n\n");
printf(" gf.multiply.w32(gf, %s, %s) = %s, which is too big.\n", as, bs, cs);
exit(1);
@ -242,10 +325,10 @@ int main(int argc, char **argv)
if (!gf_general_is_zero(a, w)) {
gf_general_divide(&gf, c, a, d);
if (!gf_general_are_equal(b, d, w)) {
gf_general_val_to_s(a, w, as);
gf_general_val_to_s(b, w, bs);
gf_general_val_to_s(c, w, cs);
gf_general_val_to_s(d, w, ds);
gf_general_val_to_s(a, w, as, 1);
gf_general_val_to_s(b, w, bs, 1);
gf_general_val_to_s(c, w, cs, 1);
gf_general_val_to_s(d, w, ds, 1);
printf("Error in single multiplication/division (all numbers in hex):\n\n");
printf(" gf.multiply(gf, %s, %s) = %s, but gf.divide(gf, %s, %s) = %s\n", as, bs, cs, cs, as, ds);
exit(1);
@ -257,40 +340,82 @@ int main(int argc, char **argv)
if (region) {
if (verbose) { printf("Testing region multiplications\n"); fflush(stdout); }
for (i = 0; i < 1000; i++) {
if (i < 20) {
gf_general_set_zero(a, w);
} else if (i < 40) {
gf_general_set_one(a, w);
} else if (i < 60) {
gf_general_set_two(a, w);
} else {
gf_general_set_random(a, w, 1);
for (i = 0; i < 1024; i++) {
//Allen: changing to a switch thing as with the single ops to make things proportional
switch (i % 32)
{
case 0:
gf_general_set_zero(a, w);
break;
case 1:
gf_general_set_one(a, w);
break;
case 2:
gf_general_set_two(a, w);
break;
default:
gf_general_set_random(a, w, 1);
}
MOA_Fill_Random_Region(ra, REGION_SIZE);
MOA_Fill_Random_Region(rb, REGION_SIZE);
xor = i%2;
xor = (i/32)%2;
align = w/8;
if (align == 0) align = 1;
if (align > 16) align = 16;
/* JSP - Cauchy test. When w < 32 & it doesn't equal 4, 8 or 16, the default is
equal to GF_REGION_CAUCHY, even if GF_REGION_CAUCHY is not set. We are testing
three alignments here:
1. Anything goes -- no alignment guaranteed.
2. Perfect alignment. Here src and dest must be aligned wrt each other,
and bytes must be a multiple of 16*w.
3. Imperfect alignment. Here we'll have src and dest be aligned wrt each
other, but bytes is simply a multiple of w. That means some XOR's will
be aligned, and some won't.
*/
if ((h->region_type & GF_REGION_CAUCHY) || (w < 32 && w != 4 && w != 8 && w != 16)) {
start = MOA_Random_W(5, 1);
end = REGION_SIZE - MOA_Random_W(5, 1);
alignment_test = (i%3);
s_start = MOA_Random_W(5, 1);
if (alignment_test == 0) {
d_start = MOA_Random_W(5, 1);
} else {
d_start = s_start;
}
bytes = (d_start > s_start) ? REGION_SIZE - d_start : REGION_SIZE - s_start;
bytes -= MOA_Random_W(5, 1);
if (alignment_test == 1) {
bytes -= (bytes % (w*16));
} else {
bytes -= (bytes % w);
}
target = rb;
while ((end-start)%w != 0) end--;
/* JSP - Otherwise, we're testing a non-cauchy test, and alignment
must be more strict. We have to make sure that the regions are
aligned wrt each other on 16-byte pointers. */
} else {
start = MOA_Random_W(5, 1) * align;
end = REGION_SIZE - (MOA_Random_W(5, 1) * align);
s_start = MOA_Random_W(5, 1) * align;
d_start = s_start;
bytes = REGION_SIZE - s_start - MOA_Random_W(5, 1);
bytes -= (bytes % align);
if (h->mult_type == GF_MULT_COMPOSITE && (h->region_type & GF_REGION_ALTMAP)) {
target = rb ;
} else {
target = ((i%4)/2) ? rb : ra;
target = (i/64)%2 ? rb : ra;
}
}
memcpy(rc, ra, REGION_SIZE);
memcpy(rd, target, REGION_SIZE);
gf_general_do_region_multiply(&gf, a, ra+start, target+start, end-start, xor);
gf_general_do_region_check(&gf, a, rc+start, rd+start, target+start, end-start, xor);
gf_general_do_region_multiply(&gf, a, ra+s_start, target+d_start, bytes, xor);
gf_general_do_region_check(&gf, a, rc+s_start, rd+d_start, target+d_start, bytes, xor);
}
}
}

1322
gf_w128.c

File diff suppressed because it is too large Load Diff

1070
gf_w16.c

File diff suppressed because it is too large Load Diff

993
gf_w32.c

File diff suppressed because it is too large Load Diff

376
gf_w4.c
View File

@ -100,7 +100,6 @@ gf_val_32_t gf_w4_euclid (gf_t *gf, gf_val_32_t b)
y_im1 = 0;
while (e_i != 1) {
e_ip1 = e_im1;
d_ip1 = d_im1;
c_i = 0;
@ -108,6 +107,7 @@ gf_val_32_t gf_w4_euclid (gf_t *gf, gf_val_32_t b)
while (d_ip1 >= d_i) {
c_i ^= (1 << (d_ip1 - d_i));
e_ip1 ^= (e_i << (d_ip1 - d_i));
if (e_ip1 == 0) return 0;
while ((e_ip1 & (1 << d_ip1)) == 0) d_ip1--;
}
@ -146,6 +146,110 @@ gf_val_32_t gf_w4_matrix (gf_t *gf, gf_val_32_t b)
return gf_bitmatrix_inverse(b, 4, ((gf_internal_t *) (gf->scratch))->prim_poly);
}
static
inline
gf_val_32_t
gf_w4_shift_multiply (gf_t *gf, gf_val_32_t a, gf_val_32_t b)
{
uint8_t product, i, pp;
gf_internal_t *h;
h = (gf_internal_t *) gf->scratch;
pp = h->prim_poly;
product = 0;
for (i = 0; i < GF_FIELD_WIDTH; i++) {
if (a & (1 << i)) product ^= (b << i);
}
for (i = (GF_FIELD_WIDTH*2-2); i >= GF_FIELD_WIDTH; i--) {
if (product & (1 << i)) product ^= (pp << (i-GF_FIELD_WIDTH));
}
return product;
}
/* Ben: This function works, but it is 33% slower than the normal shift mult */
static
inline
gf_val_32_t
gf_w4_clm_multiply (gf_t *gf, gf_val_32_t a4, gf_val_32_t b4)
{
gf_val_32_t rv = 0;
#ifdef INTEL_SSE4_PCLMUL
__m128i a, b;
__m128i result;
__m128i prim_poly;
__m128i w;
gf_internal_t * h = gf->scratch;
a = _mm_insert_epi32 (_mm_setzero_si128(), a4, 0);
b = _mm_insert_epi32 (a, b4, 0);
prim_poly = _mm_set_epi32(0, 0, 0, (uint32_t)(h->prim_poly & 0x1fULL));
/* Do the initial multiply */
result = _mm_clmulepi64_si128 (a, b, 0);
/* Ben/JSP: Do prim_poly reduction once. We are guaranteed that we will only
have to do the reduction only once, because (w-2)/z == 1. Where
z is equal to the number of zeros after the leading 1.
_mm_clmulepi64_si128 is the carryless multiply operation. Here
_mm_srli_epi64 shifts the result to the right by 4 bits. This allows
us to multiply the prim_poly by the leading bits of the result. We
then xor the result of that operation back with the result. */
w = _mm_clmulepi64_si128 (prim_poly, _mm_srli_epi64 (result, 4), 0);
result = _mm_xor_si128 (result, w);
/* Extracts 32 bit value from result. */
rv = ((gf_val_32_t)_mm_extract_epi32(result, 0));
#endif
return rv;
}
static
void
gf_w4_multiply_region_from_single(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int
xor)
{
gf_region_data rd;
uint8_t *s8;
uint8_t *d8;
if (val == 0) { gf_multby_zero(dest, bytes, xor); return; }
if (val == 1) { gf_multby_one(src, dest, bytes, xor); return; }
gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 1);
gf_do_initial_region_alignment(&rd);
s8 = (uint8_t *) rd.s_start;
d8 = (uint8_t *) rd.d_start;
if (xor) {
while (d8 < ((uint8_t *) rd.d_top)) {
*d8 ^= (gf->multiply.w32(gf, val, (*s8 & 0xf)) |
((gf->multiply.w32(gf, val, (*s8 >> 4))) << 4));
d8++;
s8++;
}
} else {
while (d8 < ((uint8_t *) rd.d_top)) {
*d8 = (gf->multiply.w32(gf, val, (*s8 & 0xf)) |
((gf->multiply.w32(gf, val, (*s8 >> 4))) << 4));
d8++;
s8++;
}
}
gf_do_final_region_alignment(&rd);
}
/* ------------------------------------------------------------
IMPLEMENTATION: LOG_TABLE:
@ -220,18 +324,28 @@ int gf_w4_log_init(gf_t *gf)
h = (gf_internal_t *) gf->scratch;
ltd = h->private;
ltd->log_tbl[0] = 0;
for (i = 0; i < GF_FIELD_SIZE; i++)
ltd->log_tbl[i]=0;
ltd->antilog_tbl_div = ltd->antilog_tbl + (GF_FIELD_SIZE-1);
b = 1;
for (i = 0; i < GF_FIELD_SIZE-1; i++) {
ltd->log_tbl[b] = i;
ltd->antilog_tbl[i] = b;
ltd->antilog_tbl[i+GF_FIELD_SIZE-1] = b;
b <<= 1;
if (b & GF_FIELD_SIZE) {
b = b ^ h->prim_poly;
}
i = 0;
do {
if (ltd->log_tbl[b] != 0 && i != 0) {
fprintf(stderr, "Cannot construct log table: Polynomial is not primitive.\n\n");
return 0;
}
ltd->log_tbl[b] = i;
ltd->antilog_tbl[i] = b;
ltd->antilog_tbl[i+GF_FIELD_SIZE-1] = b;
b <<= 1;
i++;
if (b & GF_FIELD_SIZE) b = b ^ h->prim_poly;
} while (b != 1);
if (i != GF_FIELD_SIZE - 1) {
_gf_errno = GF_E_LOGPOLY;
return 0;
}
gf->inverse.w32 = gf_w4_inverse_from_divide;
@ -300,7 +414,7 @@ static
void
gf_w4_single_table_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
#ifdef INTEL_SSE4
#ifdef INTEL_SSSE3
gf_region_data rd;
uint8_t *base, *sptr, *dptr, *top;
__m128i tl, loset, h4, r, va, th;
@ -351,37 +465,17 @@ int gf_w4_single_table_init(gf_t *gf)
gf_internal_t *h;
struct gf_single_table_data *std;
int a, b, prod, loga, logb;
uint8_t log_tbl[GF_FIELD_SIZE];
uint8_t antilog_tbl[GF_FIELD_SIZE*2];
int sse;
sse = 0;
#ifdef INTEL_SSE4
sse = 1;
#endif
h = (gf_internal_t *) gf->scratch;
std = (struct gf_single_table_data *)h->private;
b = 1;
for (a = 0; a < GF_MULT_GROUP_SIZE; a++) {
log_tbl[b] = a;
antilog_tbl[a] = b;
antilog_tbl[a+GF_MULT_GROUP_SIZE] = b;
b <<= 1;
if (b & GF_FIELD_SIZE) {
b = b ^ h->prim_poly;
}
}
bzero(std->mult, sizeof(uint8_t) * GF_FIELD_SIZE * GF_FIELD_SIZE);
bzero(std->div, sizeof(uint8_t) * GF_FIELD_SIZE * GF_FIELD_SIZE);
for (a = 1; a < GF_FIELD_SIZE; a++) {
loga = log_tbl[a];
for (b = 1; b < GF_FIELD_SIZE; b++) {
logb = log_tbl[b];
prod = antilog_tbl[loga+logb];
prod = gf_w4_shift_multiply(gf, a, b);
std->mult[a][b] = prod;
std->div[prod][b] = a;
}
@ -390,11 +484,16 @@ int gf_w4_single_table_init(gf_t *gf)
gf->inverse.w32 = NULL;
gf->divide.w32 = gf_w4_single_table_divide;
gf->multiply.w32 = gf_w4_single_table_multiply;
if ((h->region_type & GF_REGION_SSE) || (h->mult_type == GF_MULT_DEFAULT && sse)) {
gf->multiply_region.w32 = gf_w4_single_table_sse_multiply_region;
} else {
#ifdef INTEL_SSSE3
if(h->region_type & (GF_REGION_NOSSE | GF_REGION_CAUCHY))
gf->multiply_region.w32 = gf_w4_single_table_multiply_region;
else
gf->multiply_region.w32 = gf_w4_single_table_sse_multiply_region;
#else
gf->multiply_region.w32 = gf_w4_single_table_multiply_region;
}
if (h->region_type & GF_REGION_SSE) return 0;
#endif
return 1;
}
@ -458,32 +557,17 @@ int gf_w4_double_table_init(gf_t *gf)
gf_internal_t *h;
struct gf_double_table_data *std;
int a, b, c, prod, loga, logb, ab;
uint8_t log_tbl[GF_FIELD_SIZE];
uint8_t antilog_tbl[GF_FIELD_SIZE*2];
uint8_t mult[GF_FIELD_SIZE][GF_FIELD_SIZE];
h = (gf_internal_t *) gf->scratch;
std = (struct gf_double_table_data *)h->private;
b = 1;
for (a = 0; a < GF_MULT_GROUP_SIZE; a++) {
log_tbl[b] = a;
antilog_tbl[a] = b;
antilog_tbl[a+GF_MULT_GROUP_SIZE] = b;
b <<= 1;
if (b & GF_FIELD_SIZE) {
b = b ^ h->prim_poly;
}
}
bzero(mult, sizeof(uint8_t) * GF_FIELD_SIZE * GF_FIELD_SIZE);
bzero(std->div, sizeof(uint8_t) * GF_FIELD_SIZE * GF_FIELD_SIZE);
for (a = 1; a < GF_FIELD_SIZE; a++) {
loga = log_tbl[a];
for (b = 1; b < GF_FIELD_SIZE; b++) {
logb = log_tbl[b];
prod = antilog_tbl[loga+logb];
prod = gf_w4_shift_multiply(gf, a, b);
mult[a][b] = prod;
std->div[prod][b] = a;
}
@ -600,32 +684,17 @@ int gf_w4_quad_table_init(gf_t *gf)
gf_internal_t *h;
struct gf_quad_table_data *std;
int prod, loga, logb, ab, val, a, b, c, d, va, vb, vc, vd;
uint8_t log_tbl[GF_FIELD_SIZE];
uint8_t antilog_tbl[GF_FIELD_SIZE*2];
uint8_t mult[GF_FIELD_SIZE][GF_FIELD_SIZE];
h = (gf_internal_t *) gf->scratch;
std = (struct gf_quad_table_data *)h->private;
b = 1;
for (a = 0; a < GF_MULT_GROUP_SIZE; a++) {
log_tbl[b] = a;
antilog_tbl[a] = b;
antilog_tbl[a+GF_MULT_GROUP_SIZE] = b;
b <<= 1;
if (b & GF_FIELD_SIZE) {
b = b ^ h->prim_poly;
}
}
bzero(mult, sizeof(uint8_t) * GF_FIELD_SIZE * GF_FIELD_SIZE);
bzero(std->div, sizeof(uint8_t) * GF_FIELD_SIZE * GF_FIELD_SIZE);
for (a = 1; a < GF_FIELD_SIZE; a++) {
loga = log_tbl[a];
for (b = 1; b < GF_FIELD_SIZE; b++) {
logb = log_tbl[b];
prod = antilog_tbl[loga+logb];
prod = gf_w4_shift_multiply(gf, a, b);
mult[a][b] = prod;
std->div[prod][b] = a;
}
@ -702,13 +771,18 @@ int gf_w4_table_init(gf_t *gf)
{
int rt;
gf_internal_t *h;
int issse3 = 0;
#ifdef INTEL_SSSE3
issse3 = 1;
#endif
h = (gf_internal_t *) gf->scratch;
rt = (h->region_type);
if (rt == 0 || rt == GF_REGION_CAUCHY) rt |= GF_REGION_SINGLE_TABLE;
if (rt & GF_REGION_SINGLE_TABLE) {
return gf_w4_single_table_init(gf);
} else if (rt & GF_REGION_DOUBLE_TABLE) {
if (h->mult_type == GF_MULT_DEFAULT && !issse3) rt |= GF_REGION_DOUBLE_TABLE;
if (rt & GF_REGION_DOUBLE_TABLE) {
return gf_w4_double_table_init(gf);
} else if (rt & GF_REGION_QUAD_TABLE) {
if (rt & GF_REGION_LAZY) {
@ -717,7 +791,9 @@ int gf_w4_table_init(gf_t *gf)
return gf_w4_quad_table_init(gf);
}
return gf_w4_double_table_init(gf);
}
} else {
return gf_w4_single_table_init(gf);
}
return 0;
}
@ -842,7 +918,7 @@ static
void
gf_w4_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
#ifdef INTEL_SSE4
#ifdef INTEL_SSE2
int i;
uint8_t *s8, *d8;
uint8_t vrev;
@ -895,7 +971,7 @@ static
void
gf_w4_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
#ifdef INTEL_SSE4
#ifdef INTEL_SSE2
uint8_t *d8, *s8, tb;
__m128i pp, m1, m2, t1, t2, va, vb;
struct gf_bytwo_data *btd;
@ -960,7 +1036,7 @@ static
void
gf_w4_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
#ifdef INTEL_SSE4
#ifdef INTEL_SSE2
int i;
uint8_t *d8, *s8, tb;
__m128i pp, m1, m2, t1, t2, va, vb;
@ -986,7 +1062,7 @@ static
void
gf_w4_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
#ifdef INTEL_SSE4
#ifdef INTEL_SSE2
int i;
uint8_t *d8, *s8, tb;
__m128i pp, m1, m2, t1, t2, va, vb;
@ -1014,7 +1090,7 @@ static
void
gf_w4_bytwo_b_sse_region_4_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
#ifdef INTEL_SSE4
#ifdef INTEL_SSE2
int i;
uint8_t *d8, *s8, tb;
__m128i pp, m1, m2, t1, t2, va, vb;
@ -1041,7 +1117,7 @@ static
void
gf_w4_bytwo_b_sse_region_4_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
#ifdef INTEL_SSE4
#ifdef INTEL_SSE2
int i;
uint8_t *d8, *s8, tb;
__m128i pp, m1, m2, t1, t2, va, vb;
@ -1071,7 +1147,7 @@ static
void
gf_w4_bytwo_b_sse_region_3_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
#ifdef INTEL_SSE4
#ifdef INTEL_SSE2
int i;
uint8_t *d8, *s8, tb;
__m128i pp, m1, m2, t1, t2, va, vb;
@ -1099,7 +1175,7 @@ static
void
gf_w4_bytwo_b_sse_region_3_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
#ifdef INTEL_SSE4
#ifdef INTEL_SSE2
int i;
uint8_t *d8, *s8, tb;
__m128i pp, m1, m2, t1, t2, va, vb;
@ -1127,7 +1203,7 @@ static
void
gf_w4_bytwo_b_sse_region_5_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
#ifdef INTEL_SSE4
#ifdef INTEL_SSE2
int i;
uint8_t *d8, *s8, tb;
__m128i pp, m1, m2, t1, t2, va, vb;
@ -1156,7 +1232,7 @@ static
void
gf_w4_bytwo_b_sse_region_5_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
#ifdef INTEL_SSE4
#ifdef INTEL_SSE2
int i;
uint8_t *d8, *s8, tb;
__m128i pp, m1, m2, t1, t2, va, vb;
@ -1185,7 +1261,7 @@ static
void
gf_w4_bytwo_b_sse_region_7_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
#ifdef INTEL_SSE4
#ifdef INTEL_SSE2
int i;
uint8_t *d8, *s8, tb;
__m128i pp, m1, m2, t1, t2, va, vb;
@ -1215,7 +1291,7 @@ static
void
gf_w4_bytwo_b_sse_region_7_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
#ifdef INTEL_SSE4
#ifdef INTEL_SSE2
int i;
uint8_t *d8, *s8, tb;
__m128i pp, m1, m2, t1, t2, va, vb;
@ -1245,7 +1321,7 @@ static
void
gf_w4_bytwo_b_sse_region_6_noxor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
#ifdef INTEL_SSE4
#ifdef INTEL_SSE2
int i;
uint8_t *d8, *s8, tb;
__m128i pp, m1, m2, t1, t2, va, vb;
@ -1274,7 +1350,7 @@ static
void
gf_w4_bytwo_b_sse_region_6_xor(gf_region_data *rd, struct gf_bytwo_data *btd)
{
#ifdef INTEL_SSE4
#ifdef INTEL_SSE2
int i;
uint8_t *d8, *s8, tb;
__m128i pp, m1, m2, t1, t2, va, vb;
@ -1303,7 +1379,7 @@ static
void
gf_w4_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
#ifdef INTEL_SSE4
#ifdef INTEL_SSE2
uint8_t *d8, *s8, tb;
__m128i pp, m1, m2, t1, t2, va, vb;
struct gf_bytwo_data *btd;
@ -1853,114 +1929,107 @@ int gf_w4_bytwo_init(gf_t *gf)
if (h->mult_type == GF_MULT_BYTWO_p) {
gf->multiply.w32 = gf_w4_bytwo_p_multiply;
if (h->region_type == GF_REGION_SSE) {
gf->multiply_region.w32 = gf_w4_bytwo_p_sse_multiply_region;
} else {
#ifdef INTEL_SSE2
if (h->region_type & GF_REGION_NOSSE)
gf->multiply_region.w32 = gf_w4_bytwo_p_nosse_multiply_region;
else
gf->multiply_region.w32 = gf_w4_bytwo_p_sse_multiply_region;
#else
gf->multiply_region.w32 = gf_w4_bytwo_p_nosse_multiply_region;
}
if (h->region_type & GF_REGION_SSE)
return 0;
#endif
} else {
gf->multiply.w32 = gf_w4_bytwo_b_multiply;
if (h->region_type == GF_REGION_SSE) {
gf->multiply_region.w32 = gf_w4_bytwo_b_sse_multiply_region;
} else {
#ifdef INTEL_SSE2
if (h->region_type & GF_REGION_NOSSE)
gf->multiply_region.w32 = gf_w4_bytwo_b_nosse_multiply_region;
else
gf->multiply_region.w32 = gf_w4_bytwo_b_sse_multiply_region;
#else
gf->multiply_region.w32 = gf_w4_bytwo_b_nosse_multiply_region;
}
if (h->region_type & GF_REGION_SSE)
return 0;
#endif
}
gf->inverse.w32 = gf_w4_euclid;
return 1;
}
/* ------------------------------------------------------------
JSP: GF_MULT_SHIFT: The world's dumbest multiplication algorithm. I only
include it for completeness. It does have the feature that it requires no
extra memory.
*/
static
inline
gf_val_32_t
gf_w4_shift_multiply (gf_t *gf, gf_val_32_t a, gf_val_32_t b)
static
int gf_w4_cfm_init(gf_t *gf)
{
uint8_t product, i, pp;
gf_internal_t *h;
h = (gf_internal_t *) gf->scratch;
pp = h->prim_poly;
product = 0;
for (i = 0; i < GF_FIELD_WIDTH; i++) {
if (a & (1 << i)) product ^= (b << i);
}
for (i = (GF_FIELD_WIDTH*2-1); i >= GF_FIELD_WIDTH; i--) {
if (product & (1 << i)) product ^= (pp << (i-GF_FIELD_WIDTH));
}
return product;
#ifdef INTEL_SSE4_PCLMUL
gf->multiply.w32 = gf_w4_clm_multiply;
return 1;
#endif
return 0;
}
static
int gf_w4_shift_init(gf_t *gf)
{
gf->multiply.w32 = gf_w4_shift_multiply;
gf->inverse.w32 = gf_w4_euclid;
return 1;
}
/* JSP: I'm putting all error-checking into gf_error_check(), so you don't
have to do error checking in scratch_size or in init */
int gf_w4_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2)
{
int region_tbl_size;
int sss;
int ss;
int issse3 = 0;
sss = (GF_REGION_SINGLE_TABLE | GF_REGION_SSE | GF_REGION_NOSSE);
ss = (GF_REGION_SSE | GF_REGION_NOSSE);
#ifdef INTEL_SSSE3
issse3 = 1;
#endif
switch(mult_type)
{
case GF_MULT_BYTWO_p:
case GF_MULT_BYTWO_b:
if (arg1 != 0 || arg2 != 0) return -1;
if (region_type != GF_REGION_CAUCHY) {
if ((region_type | ss) != ss || (region_type & ss) == ss) return -1;
}
return sizeof(gf_internal_t) + sizeof(struct gf_bytwo_data);
break;
case GF_MULT_DEFAULT:
case GF_MULT_TABLE:
if (arg1 != 0 || arg2 != 0) return -1;
if (region_type == GF_REGION_CAUCHY || region_type == (GF_REGION_CAUCHY | GF_REGION_SINGLE_TABLE)) {
if (region_type == GF_REGION_CAUCHY) {
return sizeof(gf_internal_t) + sizeof(struct gf_single_table_data) + 64;
}
if (mult_type == GF_MULT_DEFAULT || region_type == 0) region_type = GF_REGION_SINGLE_TABLE;
if (region_type & GF_REGION_SINGLE_TABLE) {
if ((region_type | sss) != sss) return -1;
if ((region_type & sss) == sss) return -1;
return sizeof(gf_internal_t) + sizeof(struct gf_single_table_data) + 64;
} else if (region_type & GF_REGION_DOUBLE_TABLE) {
if (region_type != GF_REGION_DOUBLE_TABLE) return -1;
if (mult_type == GF_MULT_DEFAULT && !issse3) region_type = GF_REGION_DOUBLE_TABLE;
if (region_type & GF_REGION_DOUBLE_TABLE) {
return sizeof(gf_internal_t) + sizeof(struct gf_double_table_data) + 64;
} else if (region_type & GF_REGION_QUAD_TABLE) {
if ((region_type | GF_REGION_LAZY) != (GF_REGION_QUAD_TABLE | GF_REGION_LAZY)) return -1;
if ((region_type & GF_REGION_LAZY) == 0) {
return sizeof(gf_internal_t) + sizeof(struct gf_quad_table_data) + 64;
} else {
return sizeof(gf_internal_t) + sizeof(struct gf_quad_table_lazy_data) + 64;
}
} else {
return sizeof(gf_internal_t) + sizeof(struct gf_single_table_data) + 64;
}
return -1;
break;
case GF_MULT_LOG_TABLE:
if (arg1 != 0 || arg2 != 0 || (region_type != 0 && region_type != GF_REGION_CAUCHY)) return -1;
return sizeof(gf_internal_t) + sizeof(struct gf_logtable_data) + 64;
break;
case GF_MULT_CARRY_FREE:
return sizeof(gf_internal_t);
break;
case GF_MULT_SHIFT:
if (arg1 != 0 || arg2 != 0 || (region_type != 0 && region_type != GF_REGION_CAUCHY)) return -1;
return sizeof(gf_internal_t);
break;
default:
return -1;
return 0;
}
return 0;
}
int
@ -1970,7 +2039,7 @@ gf_w4_init (gf_t *gf)
h = (gf_internal_t *) gf->scratch;
if (h->prim_poly == 0) h->prim_poly = 0x13;
h->prim_poly |= 0x10;
gf->multiply.w32 = NULL;
gf->divide.w32 = NULL;
gf->inverse.w32 = NULL;
@ -1978,13 +2047,13 @@ gf_w4_init (gf_t *gf)
gf->extract_word.w32 = gf_w4_extract_word;
switch(h->mult_type) {
case GF_MULT_SHIFT: if (gf_w4_shift_init(gf) == 0) return 0; break;
case GF_MULT_CARRY_FREE: if (gf_w4_cfm_init(gf) == 0) return 0; break;
case GF_MULT_SHIFT: if (gf_w4_shift_init(gf) == 0) return 0; break;
case GF_MULT_BYTWO_p:
case GF_MULT_BYTWO_b:
if (gf_w4_bytwo_init(gf) == 0) return 0; break;
case GF_MULT_LOG_TABLE: if (gf_w4_log_init(gf) == 0) return 0; break;
case GF_MULT_BYTWO_b: if (gf_w4_bytwo_init(gf) == 0) return 0; break;
case GF_MULT_LOG_TABLE: if (gf_w4_log_init(gf) == 0) return 0; break;
case GF_MULT_DEFAULT:
case GF_MULT_TABLE: if (gf_w4_table_init(gf) == 0) return 0; break;
case GF_MULT_TABLE: if (gf_w4_table_init(gf) == 0) return 0; break;
default: return 0;
}
@ -1996,17 +2065,22 @@ gf_w4_init (gf_t *gf)
gf->inverse.w32 = gf_w4_matrix;
}
if (gf->inverse.w32 != NULL && gf->divide.w32 == NULL) {
if (gf->divide.w32 == NULL) {
gf->divide.w32 = gf_w4_divide_from_inverse;
if (gf->inverse.w32 == NULL) gf->inverse.w32 = gf_w4_euclid;
}
if (gf->inverse.w32 == NULL && gf->divide.w32 != NULL) {
gf->inverse.w32 = gf_w4_inverse_from_divide;
}
if (gf->inverse.w32 == NULL) gf->inverse.w32 = gf_w4_inverse_from_divide;
if (h->region_type == GF_REGION_CAUCHY) {
gf->multiply_region.w32 = gf_wgen_cauchy_region;
gf->extract_word.w32 = gf_wgen_extract_word;
}
if (gf->multiply_region.w32 == NULL) {
gf->multiply_region.w32 = gf_w4_multiply_region_from_single;
}
return 1;
}

922
gf_w64.c

File diff suppressed because it is too large Load Diff

1830
gf_w8.c

File diff suppressed because it is too large Load Diff

147
gf_wgen.c
View File

@ -93,6 +93,7 @@ gf_val_32_t gf_wgen_euclid (gf_t *gf, gf_val_32_t b)
while (d_ip1 >= d_i) {
c_i ^= (1 << (d_ip1 - d_i));
e_ip1 ^= (e_i << (d_ip1 - d_i));
if (e_ip1 == 0) return 0;
while ((e_ip1 & (1 << d_ip1)) == 0) d_ip1--;
}
@ -223,7 +224,7 @@ gf_wgen_bytwo_p_multiply (gf_t *gf, gf_val_32_t a, gf_val_32_t b)
pp = h->prim_poly;
prod = 0;
pmask = (1 << (h->w)-1);
pmask = (1 << ((h->w)-1)); /*Ben: Had an operator precedence warning here*/
amask = pmask;
while (amask != 0) {
@ -508,16 +509,11 @@ int gf_wgen_table_8_init(gf_t *gf)
}
for (a = 1; a < (1 << w); a++) {
b = 1;
p = a;
do {
for (b = 1; b < (1 << w); b++) {
p = gf_wgen_shift_multiply(gf, a, b);
std->mult[(a<<w)|b] = p;
std->div[(p<<w)|b] = a;
b = (b & (1 << (w-1))) ? (b << 1) ^ h->prim_poly : (b << 1);
b &= ((1 << w)-1);
p = (p & (1 << (w-1))) ? (p << 1) ^ h->prim_poly : (p << 1);
p &= ((1 << w)-1);
} while (b != 1);
std->div[(p<<w)|a] = b;
}
}
gf->multiply.w32 = gf_wgen_table_8_multiply;
@ -572,18 +568,13 @@ int gf_wgen_table_16_init(gf_t *gf)
std->div[a] = 0;
std->div[a<<w] = 0;
}
for (a = 1; a < (1 << w); a++) {
b = 1;
p = a;
do {
for (b = 1; b < (1 << w); b++) {
p = gf_wgen_shift_multiply(gf, a, b);
std->mult[(a<<w)|b] = p;
std->div[(p<<w)|b] = a;
b = (b & (1 << (w-1))) ? (b << 1) ^ h->prim_poly : (b << 1);
b &= ((1 << w)-1);
p = (p & (1 << (w-1))) ? (p << 1) ^ h->prim_poly : (p << 1);
p &= ((1 << w)-1);
} while (b != 1);
std->div[(p<<w)|a] = b;
}
}
gf->multiply.w32 = gf_wgen_table_16_multiply;
@ -599,6 +590,11 @@ int gf_wgen_table_init(gf_t *gf)
h = (gf_internal_t *) gf->scratch;
if (h->w <= 8) return gf_wgen_table_8_init(gf);
if (h->w <= 14) return gf_wgen_table_16_init(gf);
/* Returning zero to make the compiler happy, but this won't get
executed, because it is tested in _scratch_space. */
return 0;
}
static
@ -640,6 +636,7 @@ int gf_wgen_log_8_init(gf_t *gf)
struct gf_wgen_log_w8_data *std;
int w;
uint32_t a, i;
int check = 0;
h = (gf_internal_t *) gf->scratch;
w = h->w;
@ -649,17 +646,27 @@ int gf_wgen_log_8_init(gf_t *gf)
std->anti = std->log + (1<<h->w);
std->danti = std->anti + (1<<h->w)-1;
i = 0;
for (i = 0; i < (1 << w); i++)
std->log[i] = 0;
a = 1;
do {
for(i=0; i < (1<<w)-1; i++)
{
if (std->log[a] != 0) check = 1;
std->log[a] = i;
std->anti[i] = a;
std->danti[i] = a;
i++;
a = (a & (1 << (w-1))) ? (a << 1) ^ h->prim_poly : (a << 1);
a &= ((1 << w)-1);
} while (a != 1);
a <<= 1;
if(a & (1<<w))
a ^= h->prim_poly;
//a &= ((1 << w)-1);
}
if (check != 0) {
_gf_errno = GF_E_LOGPOLY;
return 0;
}
gf->multiply.w32 = gf_wgen_log_8_multiply;
gf->divide.w32 = gf_wgen_log_8_divide;
return 1;
@ -704,6 +711,7 @@ int gf_wgen_log_16_init(gf_t *gf)
struct gf_wgen_log_w16_data *std;
int w;
uint32_t a, i;
int check = 0;
h = (gf_internal_t *) gf->scratch;
w = h->w;
@ -712,17 +720,28 @@ int gf_wgen_log_16_init(gf_t *gf)
std->log = &(std->base);
std->anti = std->log + (1<<h->w);
std->danti = std->anti + (1<<h->w)-1;
i = 0;
for (i = 0; i < (1 << w); i++)
std->log[i] = 0;
a = 1;
do {
for(i=0; i < (1<<w)-1; i++)
{
if (std->log[a] != 0) check = 1;
std->log[a] = i;
std->anti[i] = a;
std->danti[i] = a;
i++;
a = (a & (1 << (w-1))) ? (a << 1) ^ h->prim_poly : (a << 1);
a &= ((1 << w)-1);
} while (a != 1);
a <<= 1;
if(a & (1<<w))
a ^= h->prim_poly;
//a &= ((1 << w)-1);
}
if (check) {
if (h->mult_type != GF_MULT_LOG_TABLE) return gf_wgen_shift_init(gf);
_gf_errno = GF_E_LOGPOLY;
return 0;
}
gf->multiply.w32 = gf_wgen_log_16_multiply;
gf->divide.w32 = gf_wgen_log_16_divide;
@ -768,7 +787,8 @@ int gf_wgen_log_32_init(gf_t *gf)
struct gf_wgen_log_w32_data *std;
int w;
uint32_t a, i;
int check = 0;
h = (gf_internal_t *) gf->scratch;
w = h->w;
std = (struct gf_wgen_log_w32_data *) h->private;
@ -777,17 +797,27 @@ int gf_wgen_log_32_init(gf_t *gf)
std->anti = std->log + (1<<h->w);
std->danti = std->anti + (1<<h->w)-1;
i = 0;
for (i = 0; i < (1 << w); i++)
std->log[i] = 0;
a = 1;
do {
for(i=0; i < (1<<w)-1; i++)
{
if (std->log[a] != 0) check = 1;
std->log[a] = i;
std->anti[i] = a;
std->danti[i] = a;
i++;
a = (a & (1 << (w-1))) ? (a << 1) ^ h->prim_poly : (a << 1);
a &= ((1 << w)-1);
} while (a != 1);
a <<= 1;
if(a & (1<<w))
a ^= h->prim_poly;
//a &= ((1 << w)-1);
}
if (check != 0) {
_gf_errno = GF_E_LOGPOLY;
return 0;
}
gf->multiply.w32 = gf_wgen_log_32_multiply;
gf->divide.w32 = gf_wgen_log_32_divide;
return 1;
@ -802,15 +832,16 @@ int gf_wgen_log_init(gf_t *gf)
if (h->w <= 8) return gf_wgen_log_8_init(gf);
if (h->w <= 16) return gf_wgen_log_16_init(gf);
if (h->w <= 32) return gf_wgen_log_32_init(gf);
/* Returning zero to make the compiler happy, but this won't get
executed, because it is tested in _scratch_space. */
return 0;
}
int gf_wgen_scratch_size(int w, int mult_type, int region_type, int divide_type, int arg1, int arg2)
{
if (w > 32 || w < 0) return -1;
if ((region_type | GF_REGION_CAUCHY) != GF_REGION_CAUCHY) return -1;
switch(mult_type)
{
case GF_MULT_DEFAULT:
@ -828,40 +859,37 @@ int gf_wgen_scratch_size(int w, int mult_type, int region_type, int divide_type,
case GF_MULT_SHIFT:
case GF_MULT_BYTWO_b:
case GF_MULT_BYTWO_p:
if (arg1 != 0 || arg2 != 0) return -1;
return sizeof(gf_internal_t);
break;
case GF_MULT_GROUP:
if (arg1 <= 0 || arg2 <= 0) return -1;
return sizeof(gf_internal_t) + sizeof(struct gf_wgen_group_data) +
sizeof(uint32_t) * (1 << arg1) +
sizeof(uint32_t) * (1 << arg2) + 64;
break;
case GF_MULT_TABLE:
if (arg1 != 0 || arg2 != 0) return -1;
if (w <= 8) {
return sizeof(gf_internal_t) + sizeof(struct gf_wgen_table_w8_data) +
sizeof(uint8_t)*(1 << w)*(1<<w)*2 + 64;
} else if (w < 15) {
return sizeof(gf_internal_t) + sizeof(struct gf_wgen_table_w16_data) +
sizeof(uint16_t)*(1 << w)*(1<<w)*2 + 64;
} else return -1;
}
return 0;
case GF_MULT_LOG_TABLE:
if (arg1 != 0 || arg2 != 0) return -1;
if (w <= 8) {
return sizeof(gf_internal_t) + sizeof(struct gf_wgen_log_w8_data) +
sizeof(uint8_t)*(1 << w)*3;
} else if (w <= 16) {
return sizeof(gf_internal_t) + sizeof(struct gf_wgen_log_w16_data) +
sizeof(uint16_t)*(1 << w)*3;
} else if (w <= 29) {
} else if (w <= 27) {
return sizeof(gf_internal_t) + sizeof(struct gf_wgen_log_w32_data) +
sizeof(uint32_t)*(1 << w)*3;
} else return -1;
} else
return 0;
default:
return -1;
return 0;
}
}
@ -935,6 +963,13 @@ int gf_wgen_init(gf_t *gf)
case 32: h->prim_poly = 00020000007; break;
default: fprintf(stderr, "gf_wgen_init: w not defined yet\n"); exit(1);
}
} else {
if (h->w == 32) {
h->prim_poly &= 0xffffffff;
} else {
h->prim_poly |= (1 << h->w);
if (h->prim_poly & ~((1ULL<<(h->w+1))-1)) return 0;
}
}
gf->multiply.w32 = NULL;
@ -950,7 +985,7 @@ int gf_wgen_init(gf_t *gf)
} else if (h->w <= 16) {
if (gf_wgen_log_init(gf) == 0) return 0;
} else {
if (gf_wgen_group_init(gf) == 0) return 0;
if (gf_wgen_bytwo_p_init(gf) == 0) return 0;
}
break;
case GF_MULT_SHIFT: if (gf_wgen_shift_init(gf) == 0) return 0; break;

View File

@ -1,31 +0,0 @@
License.txt
README.txt
GNUmakefile
gf.c
gf_add.c
gf_complete.h
gf_div.c
gf_example_1.c
gf_example_2.c
gf_example_3.c
gf_example_4.c
gf_general.c
gf_general.h
gf_int.h
gf_method.c
gf_method.h
gf_methods.c
gf_mult.c
gf_poly.c
gf_rand.c
gf_rand.h
gf_time.c
gf_unit.c
gf_w128.c
gf_w16.c
gf_w32.c
gf_w4.c
gf_w64.c
gf_w8.c
gf_wgen.c
whats_my_sse.c

View File

View File

View File

@ -1,14 +0,0 @@
if [ $# -lt 4 ]; then
echo 'usage: sh tmp-test.sh w gf_specs (e.g. LOG - -)' >&2
exit 1
fi
w=$1
shift
i=1024
while [ $i -le 134217728 ]; do
iter=`echo $i | awk '{ print (134217728/$1)*1 }'`
echo $i $iter $w $* `./gf_time $w G -1 $i $iter $* | head -n 3 | tail -n 2`
i=`echo $i | awk '{ print $1*2 }'`
done

1583
tmp.c

File diff suppressed because it is too large Load Diff

15
tmp.sh
View File

@ -1,15 +0,0 @@
for i in 5 10 ; do
sed 's/1 }/'$i' }/' tmp-time-test.sh > tmp2.sh
sh tmp2.sh 4 LOG - - >> tmp-$i-out.txt
sh tmp2.sh 4 TABLE - - >> tmp-$i-out.txt
sh tmp2.sh 4 TABLE SINGLE,SSE - >> tmp-$i-out.txt
sh tmp2.sh 8 LOG - - >> tmp-$i-out.txt
sh tmp2.sh 8 TABLE - - >> tmp-$i-out.txt
sh tmp2.sh 8 SPLIT 8 4 SSE - >> tmp-$i-out.txt
sh tmp2.sh 16 LOG - - >> tmp-$i-out.txt
sh tmp2.sh 16 SPLIT 16 4 SSE,STDMAP - >> tmp-$i-out.txt
sh tmp2.sh 16 SPLIT 16 4 SSE,ALTMAP - >> tmp-$i-out.txt
sh tmp2.sh 32 SPLIT 8 8 - - >> tmp-$i-out.txt
sh tmp2.sh 32 SPLIT 32 4 SSE,STDMAP - >> tmp-$i-out.txt
sh tmp2.sh 32 SPLIT 32 4 SSE,ALTMAP - >> tmp-$i-out.txt
done

162
tmp.txt
View File

@ -1,162 +0,0 @@
Tables[0] = 0000000000000000 3b60e7ccf8f4454e 76c1cf99f1e88a9c 4da12855091ccfd2 ed839f33e3d11538 d6e378ff1b255076 9b4250aa12399fa4 a022b766eacddaea db073e67c7a22a6b e067d9ab3f566f25 adc6f1fe364aa0f7 96a61632cebee5b9 3684a15424733f53 0de44698dc877a1d 40456ecdd59bb5cf 7b2589012d6ff081
Tij 81 cf 1d 53 b9 f7 25 6b ea a4 76 38 d2 9c 4e 00
Tij f0 b5 7a 3f e5 a0 6f 2a da 9f 50 15 cf 8a 45 00
Tij 6f 9b 87 73 be 4a 56 a2 cd 39 25 d1 1c e8 f4 00
Tij 2d d5 dc 24 ce 36 3f c7 ea 12 1b e3 09 f1 f8 00
Tij 01 cd 98 54 32 fe ab 67 66 aa ff 33 55 99 cc 00
Tij 89 6e 46 a1 16 f1 d9 3e b7 50 78 9f 28 cf e7 00
Tij 25 45 e4 84 a6 c6 67 07 22 42 e3 83 a1 c1 60 00
Tij 7b 40 0d 36 96 ad e0 db a0 9b d6 ed 4d 76 3b 00
Tables[1] = 0000000000000000 b60e7ccf8f4454cd 6c1cf99f1e88a981 da12855091ccfd4c d839f33e3d115302 6e378ff1b25507cf b4250aa12399fa83 022b766eacddae4e b073e67c7a22a61f 067d9ab3f566f2d2 dc6f1fe364aa0f9e 6a61632cebee5b53 684a15424733f51d de44698dc877a1d0 0456ecdd59bb5c9c b2589012d6ff0851
Tij 51 9c d0 1d 53 9e d2 1f 4e 83 cf 02 4c 81 cd 00
Tij 08 5c a1 f5 5b 0f f2 a6 ae fa 07 53 fd a9 54 00
Tij ff bb 77 33 ee aa 66 22 dd 99 55 11 cc 88 44 00
Tij d6 59 c8 47 eb 64 f5 7a ac 23 b2 3d 91 1e 8f 00
Tij 12 dd 8d 42 2c e3 b3 7c 6e a1 f1 3e 50 9f cf 00
Tij 90 ec 69 15 63 1f 9a e6 76 0a 8f f3 85 f9 7c 00
Tij 58 56 44 4a 61 6f 7d 73 2b 25 37 39 12 1c 0e 00
Tij b2 04 de 68 6a dc 06 b0 02 b4 6e d8 da 6c b6 00
Tables[2] = 0000000000000000 60e7ccf8f4454c25 c1cf99f1e88a984a a12855091ccfd46f 839f33e3d115308f e378ff1b25507caa 4250aa12399fa8c5 22b766eacddae4e0 073e67c7a22a6105 67d9ab3f566f2d20 c6f1fe364aa0f94f a61632cebee5b56a 84a15424733f518a e44698dc877a1daf 456ecdd59bb5c9c0 2589012d6ff085e5
Tij e5 c0 af 8a 6a 4f 20 05 e0 c5 aa 8f 6f 4a 25 00
Tij 85 c9 1d 51 b5 f9 2d 61 e4 a8 7c 30 d4 98 4c 00
Tij f0 b5 7a 3f e5 a0 6f 2a da 9f 50 15 cf 8a 45 00
Tij 6f 9b 87 73 be 4a 56 a2 cd 39 25 d1 1c e8 f4 00
Tij 2d d5 dc 24 ce 36 3f c7 ea 12 1b e3 09 f1 f8 00
Tij 01 cd 98 54 32 fe ab 67 66 aa ff 33 55 99 cc 00
Tij 89 6e 46 a1 16 f1 d9 3e b7 50 78 9f 28 cf e7 00
Tij 25 45 e4 84 a6 c6 67 07 22 42 e3 83 a1 c1 60 00
Tables[3] = 0000000000000000 0e7ccf8f4454c20a 1cf99f1e88a98414 12855091ccfd461e 39f33e3d11530828 378ff1b25507ca22 250aa12399fa8c3c 2b766eacddae4e36 73e67c7a22a61050 7d9ab3f566f2d25a 6f1fe364aa0f9444 61632cebee5b564e 4a15424733f51878 44698dc877a1da72 56ecdd59bb5c9c6c 589012d6ff085e66
Tij 66 6c 72 78 4e 44 5a 50 36 3c 22 28 1e 14 0a 00
Tij 5e 9c da 18 56 94 d2 10 4e 8c ca 08 46 84 c2 00
Tij 08 5c a1 f5 5b 0f f2 a6 ae fa 07 53 fd a9 54 00
Tij ff bb 77 33 ee aa 66 22 dd 99 55 11 cc 88 44 00
Tij d6 59 c8 47 eb 64 f5 7a ac 23 b2 3d 91 1e 8f 00
Tij 12 dd 8d 42 2c e3 b3 7c 6e a1 f1 3e 50 9f cf 00
Tij 90 ec 69 15 63 1f 9a e6 76 0a 8f f3 85 f9 7c 00
Tij 58 56 44 4a 61 6f 7d 73 2b 25 37 39 12 1c 0e 00
Tables[4] = 0000000000000000 e7ccf8f4454c20a0 cf99f1e88a98415b 2855091ccfd461fb 9f33e3d1153082ad 78ff1b25507ca20d 50aa12399fa8c3f6 b766eacddae4e356 3e67c7a22a610541 d9ab3f566f2d25e1 f1fe364aa0f9441a 1632cebee5b564ba a15424733f5187ec 4698dc877a1da74c 6ecdd59bb5c9c6b7 89012d6ff085e617
Tij 17 b7 4c ec ba 1a e1 41 56 f6 0d ad fb 5b a0 00
Tij e6 c6 a7 87 64 44 25 05 e3 c3 a2 82 61 41 20 00
Tij 85 c9 1d 51 b5 f9 2d 61 e4 a8 7c 30 d4 98 4c 00
Tij f0 b5 7a 3f e5 a0 6f 2a da 9f 50 15 cf 8a 45 00
Tij 6f 9b 87 73 be 4a 56 a2 cd 39 25 d1 1c e8 f4 00
Tij 2d d5 dc 24 ce 36 3f c7 ea 12 1b e3 09 f1 f8 00
Tij 01 cd 98 54 32 fe ab 67 66 aa ff 33 55 99 cc 00
Tij 89 6e 46 a1 16 f1 d9 3e b7 50 78 9f 28 cf e7 00
Tables[5] = 0000000000000000 7ccf8f4454c20a82 f99f1e88a9841504 855091ccfd461f86 f33e3d1153082a13 8ff1b25507ca2091 0aa12399fa8c3f17 766eacddae4e3595 e67c7a22a610543d 9ab3f566f2d25ebf 1fe364aa0f944139 632cebee5b564bbb 15424733f5187e2e 698dc877a1da74ac ecdd59bb5c9c6b2a 9012d6ff085e61a8
Tij a8 2a ac 2e bb 39 bf 3d 95 17 91 13 86 04 82 00
Tij 61 6b 74 7e 4b 41 5e 54 35 3f 20 2a 1f 15 0a 00
Tij 5e 9c da 18 56 94 d2 10 4e 8c ca 08 46 84 c2 00
Tij 08 5c a1 f5 5b 0f f2 a6 ae fa 07 53 fd a9 54 00
Tij ff bb 77 33 ee aa 66 22 dd 99 55 11 cc 88 44 00
Tij d6 59 c8 47 eb 64 f5 7a ac 23 b2 3d 91 1e 8f 00
Tij 12 dd 8d 42 2c e3 b3 7c 6e a1 f1 3e 50 9f cf 00
Tij 90 ec 69 15 63 1f 9a e6 76 0a 8f f3 85 f9 7c 00
Tables[6] = 0000000000000000 ccf8f4454c20a861 99f1e88a984150d9 55091ccfd461f8b8 33e3d1153082a1a9 ff1b25507ca209c8 aa12399fa8c3f170 66eacddae4e35911 67c7a22a61054352 ab3f566f2d25eb33 fe364aa0f944138b 32cebee5b564bbea 5424733f5187e2fb 98dc877a1da74a9a cdd59bb5c9c6b222 012d6ff085e61a43
Tij 43 22 9a fb ea 8b 33 52 11 70 c8 a9 b8 d9 61 00
Tij 1a b2 4a e2 bb 13 eb 43 59 f1 09 a1 f8 50 a8 00
Tij e6 c6 a7 87 64 44 25 05 e3 c3 a2 82 61 41 20 00
Tij 85 c9 1d 51 b5 f9 2d 61 e4 a8 7c 30 d4 98 4c 00
Tij f0 b5 7a 3f e5 a0 6f 2a da 9f 50 15 cf 8a 45 00
Tij 6f 9b 87 73 be 4a 56 a2 cd 39 25 d1 1c e8 f4 00
Tij 2d d5 dc 24 ce 36 3f c7 ea 12 1b e3 09 f1 f8 00
Tij 01 cd 98 54 32 fe ab 67 66 aa ff 33 55 99 cc 00
Tables[7] = 0000000000000000 cf8f4454c20a86a4 9f1e88a984150d53 5091ccfd461f8bf7 3e3d1153082a1abd f1b25507ca209c19 a12399fa8c3f17ee 6eacddae4e35914a 7c7a22a61054357a b3f566f2d25eb3de e364aa0f94413829 2cebee5b564bbe8d 424733f5187e2fc7 8dc877a1da74a963 dd59bb5c9c6b2294 12d6ff085e61a430
Tij 30 94 63 c7 8d 29 de 7a 4a ee 19 bd f7 53 a4 00
Tij a4 22 a9 2f be 38 b3 35 91 17 9c 1a 8b 0d 86 00
Tij 61 6b 74 7e 4b 41 5e 54 35 3f 20 2a 1f 15 0a 00
Tij 5e 9c da 18 56 94 d2 10 4e 8c ca 08 46 84 c2 00
Tij 08 5c a1 f5 5b 0f f2 a6 ae fa 07 53 fd a9 54 00
Tij ff bb 77 33 ee aa 66 22 dd 99 55 11 cc 88 44 00
Tij d6 59 c8 47 eb 64 f5 7a ac 23 b2 3d 91 1e 8f 00
Tij 12 dd 8d 42 2c e3 b3 7c 6e a1 f1 3e 50 9f cf 00
Tables[8] = 0000000000000000 f8f4454c20a86af4 f1e88a984150d5f3 091ccfd461f8bf07 e3d1153082a1abfd 1b25507ca209c109 12399fa8c3f17e0e eacddae4e35914fa c7a22a61054357e1 3f566f2d25eb3d15 364aa0f944138212 cebee5b564bbe8e6 24733f5187e2fc1c dc877a1da74a96e8 d59bb5c9c6b229ef 2d6ff085e61a431b
Tij 1b ef e8 1c e6 12 15 e1 fa 0e 09 fd 07 f3 f4 00
Tij 43 29 96 fc e8 82 3d 57 14 7e c1 ab bf d5 6a 00
Tij 1a b2 4a e2 bb 13 eb 43 59 f1 09 a1 f8 50 a8 00
Tij e6 c6 a7 87 64 44 25 05 e3 c3 a2 82 61 41 20 00
Tij 85 c9 1d 51 b5 f9 2d 61 e4 a8 7c 30 d4 98 4c 00
Tij f0 b5 7a 3f e5 a0 6f 2a da 9f 50 15 cf 8a 45 00
Tij 6f 9b 87 73 be 4a 56 a2 cd 39 25 d1 1c e8 f4 00
Tij 2d d5 dc 24 ce 36 3f c7 ea 12 1b e3 09 f1 f8 00
Tables[9] = 0000000000000000 8f4454c20a86afd9 1e88a984150d5fa9 91ccfd461f8bf070 3d1153082a1abf52 b25507ca209c108b 2399fa8c3f17e0fb acddae4e35914f22 7a22a61054357ea4 f566f2d25eb3d17d 64aa0f944138210d ebee5b564bbe8ed4 4733f5187e2fc1f6 c877a1da74a96e2f 59bb5c9c6b229e5f d6ff085e61a43186
Tij 86 5f 2f f6 d4 0d 7d a4 22 fb 8b 52 70 a9 d9 00
Tij 31 9e 6e c1 8e 21 d1 7e 4f e0 10 bf f0 5f af 00
Tij a4 22 a9 2f be 38 b3 35 91 17 9c 1a 8b 0d 86 00
Tij 61 6b 74 7e 4b 41 5e 54 35 3f 20 2a 1f 15 0a 00
Tij 5e 9c da 18 56 94 d2 10 4e 8c ca 08 46 84 c2 00
Tij 08 5c a1 f5 5b 0f f2 a6 ae fa 07 53 fd a9 54 00
Tij ff bb 77 33 ee aa 66 22 dd 99 55 11 cc 88 44 00
Tij d6 59 c8 47 eb 64 f5 7a ac 23 b2 3d 91 1e 8f 00
Tables[10] = 0000000000000000 f4454c20a86afd48 e88a984150d5fa8b 1ccfd461f8bf07c3 d1153082a1abf50d 25507ca209c10845 399fa8c3f17e0f86 cddae4e35914f2ce a22a61054357ea01 566f2d25eb3d1749 4aa0f9441382108a bee5b564bbe8edc2 733f5187e2fc1f0c 877a1da74a96e244 9bb5c9c6b229e587 6ff085e61a4318cf
Tij cf 87 44 0c c2 8a 49 01 ce 86 45 0d c3 8b 48 00
Tij 18 e5 e2 1f ed 10 17 ea f2 0f 08 f5 07 fa fd 00
Tij 43 29 96 fc e8 82 3d 57 14 7e c1 ab bf d5 6a 00
Tij 1a b2 4a e2 bb 13 eb 43 59 f1 09 a1 f8 50 a8 00
Tij e6 c6 a7 87 64 44 25 05 e3 c3 a2 82 61 41 20 00
Tij 85 c9 1d 51 b5 f9 2d 61 e4 a8 7c 30 d4 98 4c 00
Tij f0 b5 7a 3f e5 a0 6f 2a da 9f 50 15 cf 8a 45 00
Tij 6f 9b 87 73 be 4a 56 a2 cd 39 25 d1 1c e8 f4 00
Tables[11] = 0000000000000000 4454c20a86afd419 88a984150d5fa832 ccfd461f8bf07c2b 1153082a1abf507f 5507ca209c108466 99fa8c3f17e0f84d ddae4e35914f2c54 22a61054357ea0fe 66f2d25eb3d174e7 aa0f9441382108cc ee5b564bbe8edcd5 33f5187e2fc1f081 77a1da74a96e2498 bb5c9c6b229e58b3 ff085e61a4318caa
Tij aa b3 98 81 d5 cc e7 fe 54 4d 66 7f 2b 32 19 00
Tij 8c 58 24 f0 dc 08 74 a0 2c f8 84 50 7c a8 d4 00
Tij 31 9e 6e c1 8e 21 d1 7e 4f e0 10 bf f0 5f af 00
Tij a4 22 a9 2f be 38 b3 35 91 17 9c 1a 8b 0d 86 00
Tij 61 6b 74 7e 4b 41 5e 54 35 3f 20 2a 1f 15 0a 00
Tij 5e 9c da 18 56 94 d2 10 4e 8c ca 08 46 84 c2 00
Tij 08 5c a1 f5 5b 0f f2 a6 ae fa 07 53 fd a9 54 00
Tij ff bb 77 33 ee aa 66 22 dd 99 55 11 cc 88 44 00
Tables[12] = 0000000000000000 454c20a86afd41fc 8a984150d5fa83f8 cfd461f8bf07c204 153082a1abf507eb 507ca209c1084617 9fa8c3f17e0f8413 dae4e35914f2c5ef 2a61054357ea0fd6 6f2d25eb3d174e2a a0f9441382108c2e e5b564bbe8edcdd2 3f5187e2fc1f083d 7a1da74a96e249c1 b5c9c6b229e58bc5 f085e61a4318ca39
Tij 39 c5 c1 3d d2 2e 2a d6 ef 13 17 eb 04 f8 fc 00
Tij ca 8b 49 08 cd 8c 4e 0f c5 84 46 07 c2 83 41 00
Tij 18 e5 e2 1f ed 10 17 ea f2 0f 08 f5 07 fa fd 00
Tij 43 29 96 fc e8 82 3d 57 14 7e c1 ab bf d5 6a 00
Tij 1a b2 4a e2 bb 13 eb 43 59 f1 09 a1 f8 50 a8 00
Tij e6 c6 a7 87 64 44 25 05 e3 c3 a2 82 61 41 20 00
Tij 85 c9 1d 51 b5 f9 2d 61 e4 a8 7c 30 d4 98 4c 00
Tij f0 b5 7a 3f e5 a0 6f 2a da 9f 50 15 cf 8a 45 00
Tables[13] = 0000000000000000 54c20a86afd41fac a984150d5fa83f58 fd461f8bf07c20f4 53082a1abf507eab 07ca209c10846107 fa8c3f17e0f841f3 ae4e35914f2c5e5f a61054357ea0fd56 f2d25eb3d174e2fa 0f9441382108c20e 5b564bbe8edcdda2 f5187e2fc1f083fd a1da74a96e249c51 5c9c6b229e58bca5 085e61a4318ca309
Tij 09 a5 51 fd a2 0e fa 56 5f f3 07 ab f4 58 ac 00
Tij a3 bc 9c 83 dd c2 e2 fd 5e 41 61 7e 20 3f 1f 00
Tij 8c 58 24 f0 dc 08 74 a0 2c f8 84 50 7c a8 d4 00
Tij 31 9e 6e c1 8e 21 d1 7e 4f e0 10 bf f0 5f af 00
Tij a4 22 a9 2f be 38 b3 35 91 17 9c 1a 8b 0d 86 00
Tij 61 6b 74 7e 4b 41 5e 54 35 3f 20 2a 1f 15 0a 00
Tij 5e 9c da 18 56 94 d2 10 4e 8c ca 08 46 84 c2 00
Tij 08 5c a1 f5 5b 0f f2 a6 ae fa 07 53 fd a9 54 00
Tables[14] = 0000000000000000 4c20a86afd41fab7 984150d5fa83f56e d461f8bf07c20fd9 3082a1abf507eac7 7ca209c108461070 a8c3f17e0f841fa9 e4e35914f2c5e51e 61054357ea0fd58e 2d25eb3d174e2f39 f9441382108c20e0 b564bbe8edcdda57 5187e2fc1f083f49 1da74a96e249c5fe c9c6b229e58bca27 85e61a4318ca3090
Tij 90 27 fe 49 57 e0 39 8e 1e a9 70 c7 d9 6e b7 00
Tij 30 ca c5 3f da 20 2f d5 e5 1f 10 ea 0f f5 fa 00
Tij ca 8b 49 08 cd 8c 4e 0f c5 84 46 07 c2 83 41 00
Tij 18 e5 e2 1f ed 10 17 ea f2 0f 08 f5 07 fa fd 00
Tij 43 29 96 fc e8 82 3d 57 14 7e c1 ab bf d5 6a 00
Tij 1a b2 4a e2 bb 13 eb 43 59 f1 09 a1 f8 50 a8 00
Tij e6 c6 a7 87 64 44 25 05 e3 c3 a2 82 61 41 20 00
Tij 85 c9 1d 51 b5 f9 2d 61 e4 a8 7c 30 d4 98 4c 00
Tables[15] = 0000000000000000 c20a86afd41fab1c 84150d5fa83f5623 461f8bf07c20fd3f 082a1abf507eac5d ca209c1084610741 8c3f17e0f841fa7e 4e35914f2c5e5162 1054357ea0fd58ba d25eb3d174e2f3a6 9441382108c20e99 564bbe8edcdda585 187e2fc1f083f4e7 da74a96e249c5ffb 9c6b229e58bca2c4 5e61a4318ca309d8
Tij d8 c4 fb e7 85 99 a6 ba 62 7e 41 5d 3f 23 1c 00
Tij 09 a2 5f f4 a5 0e f3 58 51 fa 07 ac fd 56 ab 00
Tij a3 bc 9c 83 dd c2 e2 fd 5e 41 61 7e 20 3f 1f 00
Tij 8c 58 24 f0 dc 08 74 a0 2c f8 84 50 7c a8 d4 00
Tij 31 9e 6e c1 8e 21 d1 7e 4f e0 10 bf f0 5f af 00
Tij a4 22 a9 2f be 38 b3 35 91 17 9c 1a 8b 0d 86 00
Tij 61 6b 74 7e 4b 41 5e 54 35 3f 20 2a 1f 15 0a 00
Tij 5e 9c da 18 56 94 d2 10 4e 8c ca 08 46 84 c2 00
Val= 3b60e7ccf8f4454e
v0 28 4f 14 e3 1b f7 ee 76 b9 31 47 0a ba 8b 70 fc
v0 12 56 28 59 66 cd d2 d2 1c 91 30 26 a8 95 0a a9
v0 ee 5d 14 e3 fb c8 45 23 a9 fd 8c f1 ff c9 2c 93
v0 65 ce 82 f2 dc ec 6b e2 53 a3 9c fb 07 70 e7 ad
v0 1b 87 3d 7b 4d 15 1d c2 d2 45 f3 03 4b e4 f4 9b
v0 3b 01 2b c5 c5 d2 9d a9 68 7c a2 61 c9 5b 49 90
v0 5d 13 7d ef eb f1 52 da a0 29 89 ef 08 f2 51 3b
v0 17 05 b3 80 77 3a f2 5e 82 7a c9 39 84 df 8e bf
p0 11 fc 47 f4 6c 01 44 ba ba 62 e7 3f ba fb ba 85
p0 a6 fc 67 16 5f c3 95 fc 58 51 f4 fd 58 5f 58 a5
p0 12 fc 1f b3 50 1e 3f 9a fd 5e 83 20 fd 9c fd dd
p0 d9 fc 1e ee 22 42 10 7f a0 2c f0 7c a0 24 a0 dc
p0 a2 fc 4c 30 41 ce ad eb 7e 4f c1 f0 7e 6e 7e 8e
p0 8b fc 7c 7b 9f b5 38 67 35 91 2f 8b 35 a9 35 be
p0 07 fc 89 1a 3b 21 fd db 54 35 7e 1f 54 74 54 4b
p0 cf fc 94 5e 40 78 c2 31 10 4e 18 46 10 da 10 56

13
tmp2.sh
View File

@ -1,13 +0,0 @@
if [ $# -lt 4 ]; then
echo 'usage: sh tmp-test.sh w gf_specs (e.g. LOG - -)' >&2
exit 1
fi
w=$1
shift
i=1024
while [ $i -le 1073741824 ]; do
iter=`echo $i | awk '{ print (1073741824/$1)*10 }'`
echo $i $iter $w $* `gf_time $w R -1 $i $iter $*`
i=`echo $i | awk '{ print $1*2 }'`
done