From 70b6d55aee0256e8d656733004a142d798de0ef4 Mon Sep 17 00:00:00 2001 From: plank Date: Sat, 8 Dec 2012 15:28:43 +0000 Subject: [PATCH] Big checkin after I've lost the others. Ha ha. git-svn-id: svn://mamba.eecs.utk.edu/home/plank/svn/Galois-Library@78 36f187d4-5712-4624-889c-152d48957efa --- GNUmakefile | 51 + Log-Zero-for-w=8.odg | Bin 0 -> 9769 bytes README | 1 + explanation.html | 777 ++++++++++++ gf.c | 478 ++++++++ gf.h | 123 ++ gf_54 | Bin 0 -> 113032 bytes gf_54.c | 18 + gf_div | Bin 0 -> 121856 bytes gf_div.c | 116 ++ gf_general.c | 421 +++++++ gf_general.h | 55 + gf_int.h | 101 ++ gf_method.c | 185 +++ gf_method.h | 15 + gf_methods | Bin 0 -> 121848 bytes gf_methods.c | 141 +++ gf_mult | Bin 0 -> 121856 bytes gf_mult.c | 116 ++ gf_rand.c | 72 ++ gf_rand.h | 18 + gf_time | Bin 0 -> 131480 bytes gf_time.c | 195 +++ gf_unit | Bin 0 -> 131360 bytes gf_unit.c | 222 ++++ gf_w128.c | 496 ++++++++ gf_w16.c | 1941 ++++++++++++++++++++++++++++++ gf_w32.c | 2350 +++++++++++++++++++++++++++++++++++++ gf_w4.c | 2006 +++++++++++++++++++++++++++++++ gf_w64.c | 206 ++++ gf_w8.c | 1837 +++++++++++++++++++++++++++++ gf_wgen.c | 945 +++++++++++++++ junk | Bin 0 -> 8712 bytes junk-pick-best-output | Bin 0 -> 28584 bytes junk-pick-best-output.cpp | 78 ++ junk-proc.awk | 11 + junk-save.c | 658 +++++++++++ junk-w16-backup.c | 1585 +++++++++++++++++++++++++ junk-w16-timing-tests.sh | 12 + junk-w2.eps | 203 ++++ junk-w32-backup.c | 1337 +++++++++++++++++++++ junk-w32-single-time.c | 16 + junk-w4-out.txt | 60 + junk-w4-timing-out.txt | 792 +++++++++++++ junk-w4-timing-tests.sh | 11 + junk-w4-timing.jgr | 11 + junk-w4.jgr | 6 + junk-w8-timing-out.txt | 936 +++++++++++++++ junk-w8-timing-tests.sh | 13 + junk-w8-timing.jgr | 11 + junk.c | 17 + junk.ps | 199 ++++ junk.sh | 14 + junk.txt | 110 ++ junk_gf_unit.c | 957 +++++++++++++++ tests.txt | 0 tmp-10-out.txt | 0 tmp-time-test.sh | 14 + tmp.c | 1583 +++++++++++++++++++++++++ tmp.sh | 15 + tmp.txt | 294 +++++ tmp2.sh | 13 + 62 files changed, 21842 insertions(+) create mode 100644 GNUmakefile create mode 100644 Log-Zero-for-w=8.odg create mode 100644 README create mode 100644 explanation.html create mode 100644 gf.c create mode 100644 gf.h create mode 100755 gf_54 create mode 100644 gf_54.c create mode 100755 gf_div create mode 100644 gf_div.c create mode 100644 gf_general.c create mode 100644 gf_general.h create mode 100644 gf_int.h create mode 100644 gf_method.c create mode 100644 gf_method.h create mode 100755 gf_methods create mode 100644 gf_methods.c create mode 100755 gf_mult create mode 100644 gf_mult.c create mode 100644 gf_rand.c create mode 100644 gf_rand.h create mode 100755 gf_time create mode 100644 gf_time.c create mode 100755 gf_unit create mode 100644 gf_unit.c create mode 100644 gf_w128.c create mode 100644 gf_w16.c create mode 100644 gf_w32.c create mode 100644 gf_w4.c create mode 100644 gf_w64.c create mode 100644 gf_w8.c create mode 100644 gf_wgen.c create mode 100755 junk create mode 100755 junk-pick-best-output create mode 100644 junk-pick-best-output.cpp create mode 100644 junk-proc.awk create mode 100644 junk-save.c create mode 100644 junk-w16-backup.c create mode 100644 junk-w16-timing-tests.sh create mode 100644 junk-w2.eps create mode 100644 junk-w32-backup.c create mode 100644 junk-w32-single-time.c create mode 100644 junk-w4-out.txt create mode 100644 junk-w4-timing-out.txt create mode 100644 junk-w4-timing-tests.sh create mode 100644 junk-w4-timing.jgr create mode 100644 junk-w4.jgr create mode 100644 junk-w8-timing-out.txt create mode 100644 junk-w8-timing-tests.sh create mode 100644 junk-w8-timing.jgr create mode 100644 junk.c create mode 100644 junk.ps create mode 100644 junk.sh create mode 100644 junk.txt create mode 100644 junk_gf_unit.c create mode 100644 tests.txt create mode 100644 tmp-10-out.txt create mode 100644 tmp-time-test.sh create mode 100644 tmp.c create mode 100644 tmp.sh create mode 100644 tmp.txt create mode 100644 tmp2.sh diff --git a/GNUmakefile b/GNUmakefile new file mode 100644 index 0000000..389ec20 --- /dev/null +++ b/GNUmakefile @@ -0,0 +1,51 @@ +# +# GNUmakefile for Galois field library +# +# + +SRCS = gf_w4.c gf_w8.c gf_w16.c gf_w32.c gf_w64.c gf_w128.c gf_wgen.c gf.c gf_unit.c gf_time.c gf_mult.c gf_method.c gf_54.c gf_methods.c gf_div.c gf_rand.c gf_general.c +HDRS = gf.h gf_int.h +EXECUTABLES = gf_mult gf_div gf_unit gf_time gf_54 gf_methods +CFLAGS = -O3 -msse4 -DINTEL_SSE4 +# CFLAGS = -g +LDFLAGS = -O3 -msse4 +RM = /bin/rm -f + +OBJS = $(addsuffix .o, $(basename $(SRCS))) + +DEFAULT = $(EXECUTABLES) + +default: $(DEFAULT) + +all: $(OBJS) + +gf_methods: gf_methods.o gf.o gf_method.o gf_wgen.o gf_w4.o gf_w8.o gf_w16.o gf_w32.o gf_w64.o gf_w128.o +gf_time: gf_time.o gf.o gf_method.o gf_wgen.o gf_w4.o gf_w8.o gf_w16.o gf_w32.o gf_w64.o gf_w128.o gf_rand.o gf_general.o +gf_unit: gf_unit.o gf.o gf_method.o gf_wgen.o gf_w4.o gf_w8.o gf_w16.o gf_w32.o gf_w64.o gf_w128.o gf_rand.o gf_general.o +gf_mult: gf_mult.o gf.o gf_wgen.o gf_w4.o gf_method.o gf_w8.o gf_w16.o gf_w32.o gf_w64.o gf_w128.o +gf_div: gf_div.o gf.o gf_wgen.o gf_w4.o gf_method.o gf_w8.o gf_w16.o gf_w32.o gf_w64.o gf_w128.o +gf_54: gf_54.o gf.o gf_wgen.o gf_w4.o gf_w8.o gf_w16.o gf_w32.o gf_w64.o gf_w128.o + +clean: + $(RM) $(OBJS) gf_div.c + +spotless: clean + $(RM) *~ $(EXECUTABLES) + +gf_div.o: gf.h gf_method.h +gf_methods.o: gf.h gf_method.h +gf_time.o: gf.h gf_method.h gf_rand.h gf_general.h +gf_wgen.o: gf_int.h gf.h +gf_w4.o: gf_int.h gf.h +gf_w8.o: gf_int.h gf.h +gf_w16.o: gf_int.h gf.h +gf_w32.o: gf_int.h gf.h +gf_w64.o: gf_int.h gf.h +gf_54.o: gf.h +gf_unit.o: gf.h gf_method.h gf_rand.h gf_general.h +gf_general.o: gf.h gf_int.h gf_general.h gf_rand.h +gf_mult.o: gf.h gf_method.h +gf_method.o: gf.h + +gf_div.c: gf_mult.c + sed 's/multiply/divide/g' gf_mult.c > gf_div.c diff --git a/Log-Zero-for-w=8.odg b/Log-Zero-for-w=8.odg new file mode 100644 index 0000000000000000000000000000000000000000..138a673fc1e2ee1a74272fc00161c42d9bf9c54d GIT binary patch literal 9769 zcma)C2Ut@}(*{JEbfqXDRRIw~Rl3qaklqmxLI|NGF$uj&ktS6s0)l{a2u+%F6%dfB zbm>h%q!+1wV!3kt^*-O`NuE9D%)B!@yV!as+DLxh8~8Er3IN{DqF2M_y$DSvl|9`p2fcdYGU7AVk9XMRTI8_-~ljff+qF^w4xCIns{~s3r)IO|}zk>ZIYk$(h zTKjJ;=0CI!;$Q&=Apx=wl!FBv`5*mz2BQpwK~X>YaM{=dHgfHp*LWeNZoYE=0(2<$ zLKZzgDSbD2)3D|1F`M9$VC}SXk6`bUd{#Q7p@y?@&Fs$%-B?%@(p#gYf|Eo-0g$+^z&kBN$i`!;dBa7 z=}IPkk7y0}MEkOZ@9{U;3QW3`&kGMSs(u)d4p>gmy}HglSDPb4y{u3+V~B(QQ7?E1 zmWJ-s_u>WDmfDF)+(Zg0)i@_ZR-JP3A;-GJy=Z9o%)w2MmQEh}56p#T(Djgc&jIa0 z-dt++wlS#wC$uy$Uqp{vw+Y>`W$HYk+r_lG$mP)#{HErVM)<3<@{eNg#f~dpn2;o- zq&3TZ3=37JVkPI=RTuf_tKbG-95=7^c=YaG__e!;>w|0ok=cX%>V(*7+gWSECUooLjTS%=}}rA z+@Xt^!dlml=AF^0jl}0K zq+F7lY^7gcZZr}UmOL-lXtXXQ&U6DjoYle!w1H=Oii@+#%@1Ws?O2@)FgHm5LORd_ znV&Fu>~^=UH*q`F%M{_6QRuRmq2f4{7e2l?B$Sx9FejZiUTp&j$Nz}l9Eo-Ntdx{` zTJJg;*@S4+{@G6zfaNCC68hlf`P5H}wabBk^2k0=CHX1Fw-2I1qN7E2)W^xCc=IzI z%(QcOg5{|h=S z>#gw9U+aoA1CHo5OBInQNdFVUbdW)c;y#GK9sG72jT)1q6Y++!sc zmZc%3sndM6zJ!{mWQZl}Da1xK70V?ynH$K8?gl_n3=-oEjB)f=1Zx2r$3}I5snDs|AKb|vM_b$@kdzUTeBmkS=UqH`fv&#eo`sL8VjJmBwOq$Emz27D z{3v7Kh#$Jtk<^L&fUI|`9{a*SVrh{95a8gv;rRK&|I<2x1oNIjqFn7kNbFK!)YvY3 zFJHG$m-GGCjTODK4AnZaN+x7AQtG~ff*0Ic-6wMI_>S96K54(#S!sh4 z+`?%(c*#avkxhFzMUbP++H`|!(dx8gCf{=kBD49}i4LA>J+cQ(7IpJPSIWjX)8gNK zl;E0hN6!=&FIwuKo3miV2Oro(pO>;C>u9f|DW9}*rRi|(Jn>ob02MTXMk zHjH97DpT=)Vyz&o5ouaH3!)9t#+|nJsd;fGdTt#@m}!y3NuQBDTPk3JOvDNqXH$PM&~HLB!{bB!4w2f=)z*K>_%(Is2ido6YVhva4ab4liN{=dG@cZI ztp53j4W48s1_L{`#-Vg0vJ;z{lL>V?BNLGc1Ej{sZB$pz+71sq&(tmd;6E7nwZ37HyazNC{W4o~gYVc~RD* zO*M(UjrJ7&icjTpC*l_W-ezCxj2@IU#cNT0naFhIYb@mW^&cB4I%h5tSTy-ZZq(kIGF2HqR0AFE|TsBQv@mcYwo7#XP_?KbD^57A5ONZVn&f`~;xpjWgX-{i* z?!2~2hfAd<3>n2dwzPnx-3h#4y`*#-G2@7Y9Pg~$2xC4E7^;dMiMz71F(D$_uyxH; zz?S)uq9Zi%X|4X6fYm#yOoDN%+*mD|P9qDhHN%S^7+f_mFO_$xHm3!{m{q4Jg1a}Z zh#HsY6xf6%SY_t{)TyP9SkI|hV=C7<euordf;stTLI{aiKP+yI|bFRg$*bPQ2UA+BU?>vQI#w>hgl)6RGCM z@<0pf7$t)bJALlXdLl#d&~J=9!ViTaQ}?UGz5SHxB6a7L*^kk_Tyb8JDqYtN$%F5NH(u;U zNjd9G@*Cec&mS@IQCzWJwVfukTq9>;^>oga5EzT@wyIOhYu1bHJsTqHK_iy?B?jiHO0Q)6wIC@u>|NGMVkN?@-Xw``-d3xqWa*O2 z_;MnXJ}!o5o0X!_CBO)F(IZjcK(TQ2QgFrolMF|KdyEe@zHW&73CN(GhhQX}UK%EY zl;WK`m7PVNpWdg3sQ7Ae0BSW-jK18jlSR^QHlE*|77*v-w|$t*p(n+pysTgQrNH2! zVvF3)O~~w!?DeI11SPJ_MK3@ax1vHTGrL&_o2&gJ`p?&cyYZdc$`W2b)1x%zH0Fpb zYkwG&`N72p&9I)IHc27JCpeb#*yrnmR5$nD**;tUXf&Z z8u3hDeF^B0YYp%9xoV7%`o0vOri}$IwyrzFTIRPe8Yw7iu|76|I-7sIuZ2)i4|VRE zBXk+ItundpnYBWHar8CaVLt0FHyGuUPU7@Lh9<5W7D$jGsBigEJY#&TMXCCE(MHcx z!U383_H&EyQKq-kC?_Z#^E zLRnx}3X(T4KP*ll1QJ7kNU;g<@w2gjpg@=v1PYd7)6-St6=l02O(F@iwuS&f;y)P? zUJP|&!GckRBE`Qyl45g2K*hgtC*n{G2M|&m1r$F?qKKcA7XN1G`yCg12-HrB%?5>n zivs}8&dz+!LVPd;7$6`fCIQ&o2!4e*M#HqyxnMf3=8} z){_zzoTG=i6FoQX^#THtgHnQR@Qu&3pVMWV;nmeq$R%%2?!K|f>>aJi~wQT zIO$v1fz6_6ZvnNF1pHCryWTf>%pZyu6DE+fAisbhuYdqAznHEdzqqiFxG?_@?SIQj zS^>rXr63@vD}yB8oUjQ3D5uhkDqQd(0`URU6nvXIzqVPTyg zF8n{Cf9nEWftWpow9s$TKW=}Q1%sg9*-B{*OrSNs%@p6lEyO3pp$CPyu>AYJIffgw z1p!e2At8PTL4FQdM~J-@ub9ZS>p!gg0{m{^pIHaXs6vn^2$1Dl@L`r%7|dV55sJdh zT>@+XX~`cWO#FK<{=E7J;oI%M)3I2FP}j!M!4hf#u}1<>KdXFjD0qo<6*)f}J!N_K z?!?w6oEWdSg`S@NylA{nK5E}}ij>q|HkmJ+l?FLB*3pspl(mO8TX(U_)pcg}`jhcY ze+S#1?uwkRTHEl2jT>fSVnQS?10vbS2fI6k1+S})`sT>mIYT(qwbzMl*FFobp3{9V z`Fu(oNZewTcG`KQ;7+~Q;m`+@v*vFK3rh~ymU;}zI+V;aLYC_pHp~Ph_U!Z;%Yg$i zUoRakmztbEUfDS^k3IHEDO4G+tM)S37gX$dFA1KWd+vE$mhI$d{eE_~r5tQD&(xor ziy)#gINn>DDGxP>J@gW~US_wP?;LSfCkwVh8&j7wZ#1$xw`Z^uQ;)AU@D^aI|5S^H z&rurMi$44zT?Gh@j6q9A9(nc_cIw+S`+Z73egO3}1JFBlGfmL`_0a7icN-a<74zPjY-{^RELk>sbQ zO{HW@`9S84;qloJz5TtyV1cx^AAZv3iA@%-mbWz_+V*4 z_0)=Y9D)+_V`!mZoBFE!I$IdJ>;Th%@@2MhGlqivE2_NEb4T_`hemcz^Ra!dBkd%P z#}773@jwC9Sacgz=8ZEd4GtH();ZByq5w&?A0DikVigQU&Z;Y)2f zoxR(od+%=9o_T%?Z05S%oR|4!DAd(;ju=wvC0V_-uWj3{?|y2(>sZ}6>x)~QQ55yL z>9AzqJAS>?b5dW+-mFlpybGk587;|b?@I8P-PwzHx&N82KH0tuZr*kH^3!her%@f9 z_9>>#9rI&Nlxgjz)~Z{zm`Pnl`)5J5e6g>e&|19((ZkPzw>RFYEh-?r4@PS)JB{~~ zOt(jDYCQ0l#=lU{Hg2U*r`No9V`=BwfL%vz(ghUwvT<%5!REd%g#`J`$F@?ggt)IY z>n1d+vr2ITXUZhNg=0G&bJn%3#`XmZYa>r1YPSxo%U4F)0>`UI`Y*SdN7U7JZS17x zdYZ5MK<3-(x3--eFRdeIDp$@Et_+v=m2r=3qi0JN+#VSeS6m!-*_lmoju4ZajGg4#EvFQ3gX8Y_0$4>C&#^h6f3(AB zIXspS=CPI3YASJxUVTnG3jv%DUF890-uHfFJxWt9@YQU+&daN}D;rkcXfJs)VYN1U_dWmJON|o?i}NL&+^NLh4v6IIRR+^Bt2|W< zXY}`pjugumfl%1@!w<&D1UZE{m{=)%>$2A^O(_@Maarvyp|VZ$i93dOLi zt{h_i-u2M}M*V|tqO`K!OU_uTjyreu^i)OVX#iNo+z9yi(|NP*-Ej{0aeLPyY$Ntw zrQT`M!}b`_G>7~w^B!!*h!-P*tDSpkO4%VRt;%Z@VX2--N_;ipTmVZ#uU!ezCug{&B4I_L6{j zi;U6Zt;OZThs6e|TMx=CgC{t*_MTK9xtALI$3LF!wX?f;hW*mU!viZG`Fn=6iiM*+ zZoGY7Lja+YeMa7$FE$@cYn#PxQ8>3}s3&bVdtZaTue%jib)@y0T6S4SX#rTE)2`*; z0VZuXfFakn4{PSn&<@{aBDZ4JNs{dD_5>8)+9otZpA>eq+~@SAhd zJWBB4VO~<+HNuvoo6oo3HgwUm`((dQW@mdw;*}zlP`1X}O=zj6C2Wz<#CAY*8XoB4 zlfz{hC5cu}tEUhMkPSJ-wa&j!C^B+76ly>fNAEi@D`tAG!N6FLNn0#1$mS5pX$F%Q;J4EG?}R+Xp8RUOUaP+-fdf3Ht(KJ zWRfCa^3 z<}i6uk2B=0?G+z$_F!^z15~kYi24hF8gt<3M=|E!p)ol1S@>vWo!#QQLPGj_&G6^9 zRK|d*7vC%vPsnRse37eW>kRCW6za9(plwd7W959OrS?3HsomFhiV2eC@?Ze$@xYV$ z)6$w=$|o{CDTevYvpEGm3CW)MU(aY}0bj4<-+Sn!=)!FqY*uy#H`DDU=pkJ0uy#MM zxvJvPx%FZ^A*Z#5ck30(ZgV@f774sJPGvErc6yxC(P%DDK;Ktx7BX#4CK`M~2|J65 z+ObNP8=V`JSVF6%AyS_TfPKeWdW+t1slXHEwK^<7(aCU|O?Q*CU;Hh6i3iKx&rAC} zf6`6*D5U?Z2QPvhXUvH#axN`sk8Z!74^@T#aQ z0vs^gDQgh&+lhTyj12TO3#G!2SU}TQW=C$xl)P<5GK*9LegqQ-Y0&w!neC>i=t>7& zk(ig%*)=_D{4Mcp*su3GmkbZ!aHoj#!p)5h< zy87(g1vMSI=OL^Qt2It{eraR-^zc-(8hpiLb73jy$!g;kF88=fDYDs5;Su2t{mXXS z_He125gg_s6cgNe(=yvT>IyyKH^4NG8JWKIAQ#O}7b~%H-|E$w`QxpS@v1kRe!>z? zHy;e`4LdOhK0WQAybdyS9qA%VsVI7zgr2g47_7J4t~tWT_D9am4|2D0F=rtdjzd!w z7w;U--_tuVopPf5O!oL)>x2jnHW}ep(ZW=JPoX%O(ed5-$+MWBX&Km@j$b7hQ~e$1 zFBu;Hs=tH&%J2AhV18-g{MJNTOelXxIhpVAcN}cS$FHLPC!BxffBYTk z-$4)h1JcQSkiX;nJ3bA6!1Code structure as of 7/20/2012 + +written by Jim. +

+Ok -- once again, I have messed with the structure. My goal is flexible and efficient. +It's similar to the stuff before, but better because it makes things like Euclid's +method much cleaner. +

+I think we're ready to hack. +

+

+


+

Files

+
    +
  • GNUmakefile: Makefile +
  • README: Empty readme +
  • explanation.html: This file. +
  • gf.c: Main gf routines +
  • gf.h: Main gf prototypes and typedefs +
  • gf_int.h: Prototypes and typedefs for common routines for the + internal gf implementations. +
  • gf_method.c: Code to help parse argc/argv to define the method. + This way, various programs can be consistent with how they handle the command line. +
  • gf_method.h: Prototypes for ibid. +
  • gf_methods.c: This program prints out how to define + the various methods on the command line. My idea is to beef this up so that you can + give it a method spec on the command line, and it will tell you whether it's valid, or + why it's invalid. I haven't written that part yet. +
  • gf_mult.c: Program to do single multiplication. +
  • gf_mult.c: Program to do single divisions -- it's created + in the makefile with a sed script on gf_mult.c. +
  • gf_time.c: Time tester +
  • gf_unit.c: Unit tester +
  • gf_54.c: A simple example program that multiplies + 5 and 4 in GF(2^4). +
  • gf_w4.c: Implementation of code for w = 4. +(For now, only SHIFT and LOG, plus EUCLID & MATRIX). +
  • gf_w8.c: Implementation of code for w = 8. +(For now, only SHIFT plus EUCLID & MATRIX). +
  • gf_w16.c: Implementation of code for w = 16. +(For now, only SHIFT plus EUCLID & MATRIX). +
  • gf_w32.c: Implementation of code for w = 32. +(For now, only SHIFT plus EUCLID & MATRIX). +
  • gf_w64.c: Implementation of code for w = 64. +(For now, only SHIFT and EUCLID. +
  • I don't have gf_w128.c or gf_gen.c yet. +
+ +
+

Prototypes and typedefs in gf.h

+ +The main structure that users will see is in gf.h, and it is of type +gf_t: + +

+typedef struct gf {
+  gf_func_a_b    multiply;
+  gf_func_a_b    divide;
+  gf_func_a      inverse;
+  gf_region      multiply_region;
+  void           *scratch;
+} gf_t;
+

+ +We can beef it up later with buf-buf or buf-acc. The problem is that the paper is +already bloated, so right now, I want to keep it lean. +

+The types of the procedures are big unions, so that they work with the following +types of arguments: + +

+typedef uint8_t     gf_val_4_t;
+typedef uint8_t     gf_val_8_t;
+typedef uint16_t    gf_val_16_t;
+typedef uint32_t    gf_val_32_t;
+typedef uint64_t    gf_val_64_t;
+typedef uint64_t    *gf_val_128_t;
+typedef uint32_t    gf_val_gen_t;   /* The intent here is for general values <= 32 */
+

+ +To use one of these, you need to create one with gf_init_easy() or +gf_init_hard(). Let's concentrate on the former: + +

+extern int gf_init_easy(gf_t *gf, int w, int mult_type);
+

+ +You pass it memory for a gf_t, a value of w and +a variable that says how to do multiplication. The valid values of mult_type +are enumerated in gf.h: + +

+typedef enum {GF_MULT_DEFAULT,
+              GF_MULT_SHIFT,
+              GF_MULT_GROUP,
+              GF_MULT_BYTWO_p,
+              GF_MULT_BYTWO_b,
+              GF_MULT_TABLE,
+              GF_MULT_LOG_TABLE,
+              GF_MULT_SPLIT_TABLE,
+              GF_MULT_COMPOSITE } gf_mult_type_t;
+

+ +After creating the gf_t, you use its multiply method +to multiply, using the union's fields to work with the various types. +It looks easier than my explanation. For example, suppose you wanted to multiply 5 and 4 in GF(24). +You can do it as in +gf_54.c + +

+#include "gf.h"
+
+main()
+{
+  gf_t gf;
+
+  gf_init_easy(&gf, 4, GF_MULT_DEFAULT);
+  printf("%d\n", gf.multiply.w4(&gf, 5, 4));
+  exit(0);
+}
+

+ + +If you wanted to multiply in GF(28), then you'd have to use 8 as a parameter +to gf_init_easy, and call the multiplier as gf.mult.w8(). +

+When you're done with your gf_t, you should call gf_free() on it so +that it can free memory that it has allocated. We'll talk more about memory later, but if you +create your gf_t with gf_init_easy, then it calls malloc(), and +if you care about freeing memory, you'll have to call gf_free(). +

+ +


+

Memory allocation

+ +Each implementation of a multiplication technique keeps around its +own data. For example, GF_MULT_TABLE keeps around +multiplication and division tables, and GF_MULT_LOG maintains log and +antilog tables. This data is stored in the pointer scratch. My intent +is that the memory that is there is all that's required. In other +words, the multiply(), divide(), inverse() and +multiply_region() calls don't do any memory allocation. +Moreover, gf_init_easy() only allocates one chunk of memory -- +the one in scratch. +

+If you don't want to have the initialization call allocate memory, you can use gf_init_hard(): + +

+extern int gf_init_hard(gf_t *gf,
+                        int w,
+                        int mult_type,
+                        int region_type,
+                        int divide_type,
+                        uint64_t prim_poly,
+                        int arg1,
+                        int arg2,
+                        gf_t *base_gf,
+                        void *scratch_memory);
+

+ +The first three parameters are the same as gf_init_easy(). +You can add additional arguments for performing multiply_region, and +for performing division in the region_type and divide_type +arguments. Their values are also defined in gf.h. You can +mix the region_type values (e.g. "DOUBLE" and "SSE"): + +

+#define GF_REGION_DEFAULT      (0x0)
+#define GF_REGION_SINGLE_TABLE (0x1)
+#define GF_REGION_DOUBLE_TABLE (0x2)
+#define GF_REGION_QUAD_TABLE   (0x4)
+#define GF_REGION_LAZY         (0x8)
+#define GF_REGION_SSE          (0x10)
+#define GF_REGION_NOSSE        (0x20)
+#define GF_REGION_STDMAP       (0x40)
+#define GF_REGION_ALTMAP       (0x80)
+#define GF_REGION_CAUCHY       (0x100)
+
+typedef uint32_t gf_region_type_t;
+
+typedef enum { GF_DIVIDE_DEFAULT,
+               GF_DIVIDE_MATRIX,
+               GF_DIVIDE_EUCLID } gf_division_type_t;
+

+You can change +the primitive polynomial with prim_poly, give additional arguments with +arg1 and arg2 and give a base Galois Field for composite fields. +Finally, you can pass it a pointer to memory in scratch_memory. That +way, you can avoid having gf_init_hard() call malloc(). +

+There is a procedure called gf_scratch_size() that lets you know the minimum +size for scratch_memory, depending on w, the multiplication type +and the arguments: + +

+extern int gf_scratch_size(int w,
+                           int mult_type,
+                           int region_type,
+                           int divide_type,
+                           int arg1,
+                           int arg2);
+

+ +You can specify default arguments in gf_init_hard(): +

    +
  • region_type = GF_REGION_DEFAULT +
  • divide_type = GF_REGION_DEFAULT +
  • prim_poly = 0 +
  • arg1 = 0 +
  • arg2 = 0 +
  • base_gf = NULL +
  • scratch_memory = NULL +
+If any argument is equal to its default, then default actions are taken (e.g. a +standard primitive polynomial is used, or memory is allocated for scratch_memory). +In fact, gf_init_easy() simply calls gf_init_hard() with the default +parameters. +

+gf_free() frees memory that was allocated with gf_init_easy() +or gf_init_hard(). The recursive parameter is in case you +use composite fields, and want to recursively free the base fields. +If you pass scratch_memory to gf_init_hard(), then you typically +don't need to call gf_free(). It won't hurt to call it, though. + +


+

gf_mult and gf_div

+ +For the moment, I have few things completely implemented, but that's because I want +to be able to explain the structure, and how to specify methods. In particular, for +w=4, I have implemented SHIFT and LOG. For w=8, 16, 32, 64 +I have implemented SHIFT. For all w ≤ 32, I have implemented both +Euclid's algorithm for inversion, and the matrix method for inversion. For +w=64, it's just Euclid. You can +test these all with gf_mult and gf_div. Here are a few calls: + +
+UNIX> gf_mult 7 11 4                - Default
+4
+UNIX> gf_mult 7 11 4 SHIFT - -      - Use shift
+4
+UNIX> gf_mult 7 11 4 LOG - -        - Use logs
+4
+UNIX> gf_div 4 7 4                  - Default
+11
+UNIX> gf_div 4 7 4 LOG - -          - Use logs
+11
+UNIX> gf_div 4 7 4 LOG - EUCLID     - Use Euclid instead of logs
+11
+UNIX> gf_div 4 7 4 LOG - MATRIX     - Use Matrix inversion instead of logs
+11
+UNIX> gf_div 4 7 4 SHIFT - -        - Default
+11
+UNIX> gf_div 4 7 4 SHIFT - EUCLID   - Use Euclid (which is the default)
+11
+UNIX> gf_div 4 7 4 SHIFT - MATRIX   - Use Matrix inversion instead of logs
+11
+UNIX> gf_mult 200 211 8        - The remainder are shift/Euclid
+201
+UNIX> gf_div 201 211 8
+200
+UNIX> gf_mult 60000 65111 16
+63515
+UNIX> gf_div 63515 65111 16
+60000
+UNIX> gf_mult abcd0001 9afbf788 32h
+b0359681
+UNIX> gf_div b0359681 9afbf788 32h
+abcd0001
+UNIX> gf_mult abcd00018c8b8c8a 9afbf7887f6d8e5b 64h
+3a7def35185bd571
+UNIX> gf_mult abcd00018c8b8c8a 9afbf7887f6d8e5b 64h
+3a7def35185bd571
+UNIX> gf_div 3a7def35185bd571 9afbf7887f6d8e5b 64h
+abcd00018c8b8c8a
+UNIX> 
+
+ +You can see all the methods with gf_methods. We have a lot of implementing to do: + +
+UNIX> gf_methods
+To specify the methods, do one of the following: 
+       - leave empty to use defaults
+       - use a single dash to use defaults
+       - specify MULTIPLY REGION DIVIDE
+
+Legal values of MULTIPLY:
+       SHIFT: shift
+       GROUP g_mult g_reduce: the Group technique - see the paper
+       BYTWO_p: BYTWO doubling the product.
+       BYTWO_b: BYTWO doubling b (more efficient thatn BYTWO_p)
+       TABLE: Full multiplication table
+       LOG:   Discrete logs
+       LOG_ZERO: Discrete logs with a large table for zeros
+       SPLIT g_a g_b: Split tables defined by g_a and g_b
+       COMPOSITE k l [METHOD]: Composite field, recursively specify the
+                               method of the base field in GF(2^l)
+
+Legal values of REGION: Specify multiples with commas e.g. 'DOUBLE,LAZY'
+       -: Use defaults
+       SINGLE/DOUBLE/QUAD: Expand tables
+       LAZY: Lazily create table (only applies to TABLE and SPLIT)
+       SSE/NOSSE: Use 128-bit SSE instructions if you can
+       CAUCHY/ALTMAP/STDMAP: Use different memory mappings
+
+Legal values of DIVIDE:
+       -: Use defaults
+       MATRIX: Use matrix inversion
+       EUCLID: Use the extended Euclidian algorithm.
+
+See the user's manual for more information.
+There are many restrictions, so it is better to simply use defaults in most cases.
+UNIX> 
+
+ +
+

gf_unit and gf_time

+ +gf_unit.c is a unit tester, and +gf_time.c is a time tester. + +They are called as follows: + +

+UNIX> gf_unit w tests seed [METHOD] 
+UNIX> gf_time w tests seed size(bytes) iterations [METHOD] 
+

+ +The tests parameter is one or more of the following characters: + +

    +
  • A: Do all tests +
  • S: Test only single operations (multiplication/division) +
  • R: Test only region operations +
  • V: Verbose Output +
+ +seed is a seed for srand48() -- using -1 defaults to the current time. +

+For example, testing the defaults with w=4: + +

+UNIX> gf_unit 4 AV 1 LOG - -
+Seed: 1
+Testing single multiplications/divisions.
+Testing Inversions.
+Testing buffer-constant, src != dest, xor = 0
+Testing buffer-constant, src != dest, xor = 1
+Testing buffer-constant, src == dest, xor = 0
+Testing buffer-constant, src == dest, xor = 1
+UNIX> gf_unit 4 AV 1 SHIFT - -
+Seed: 1
+Testing single multiplications/divisions.
+Testing Inversions.
+No multiply_region.
+UNIX> 
+
+ +There is no multiply_region() method defined for SHIFT. +Thus, the procedures are NULL and the unit tester ignores them. +

+At the moment, I only have the unit tester working for w=4. +

+gf_time takes the size of an array (in bytes) and a number of iterations, and +tests the speed of both single and region operations. The tests are: + +

    +
  • A: All +
  • S: All Single Operations +
  • R: All Region Operations +
  • M: Single: Multiplications +
  • D: Single: Divisions +
  • I: Single: Inverses +
  • B: Region: Multipy_Region +
+ +Here are some examples with SHIFT and LOG on my mac. + +
+UNIX> gf_time 4 A 1 102400 1024 LOG - -
+Seed: 1
+Multiply:   0.538126 s      185.830 Mega-ops/s
+Divide:     0.520825 s      192.003 Mega-ops/s
+Inverse:    0.631198 s      158.429 Mega-ops/s
+Buffer-Const,s!=d,xor=0:    0.478395 s      209.032 MB/s
+Buffer-Const,s!=d,xor=1:    0.524245 s      190.751 MB/s
+Buffer-Const,s==d,xor=0:    0.471851 s      211.931 MB/s
+Buffer-Const,s==d,xor=1:    0.528275 s      189.295 MB/s
+UNIX> gf_time 4 A 1 102400 1024 LOG - EUCLID
+Seed: 1
+Multiply:   0.555512 s      180.014 Mega-ops/s
+Divide:     5.359434 s       18.659 Mega-ops/s
+Inverse:    4.911719 s       20.359 Mega-ops/s
+Buffer-Const,s!=d,xor=0:    0.496097 s      201.573 MB/s
+Buffer-Const,s!=d,xor=1:    0.538536 s      185.689 MB/s
+Buffer-Const,s==d,xor=0:    0.485564 s      205.946 MB/s
+Buffer-Const,s==d,xor=1:    0.540227 s      185.107 MB/s
+UNIX> gf_time 4 A 1 102400 1024 LOG - MATRIX
+Seed: 1
+Multiply:   0.544005 s      183.822 Mega-ops/s
+Divide:     7.602822 s       13.153 Mega-ops/s
+Inverse:    7.000564 s       14.285 Mega-ops/s
+Buffer-Const,s!=d,xor=0:    0.474868 s      210.585 MB/s
+Buffer-Const,s!=d,xor=1:    0.527588 s      189.542 MB/s
+Buffer-Const,s==d,xor=0:    0.473130 s      211.358 MB/s
+Buffer-Const,s==d,xor=1:    0.529877 s      188.723 MB/s
+UNIX> gf_time 4 A 1 102400 1024 SHIFT - -
+Seed: 1
+Multiply:   2.708842 s       36.916 Mega-ops/s
+Divide:     8.756882 s       11.420 Mega-ops/s
+Inverse:    5.695511 s       17.558 Mega-ops/s
+UNIX> 
+
+ +At the moment, I only have the timer working for w=4. + +
+

Walking you through LOG

+ +To see how scratch is used to store data, let's look at what happens when +you call gf_init_easy(&gf, 4, GF_MULT_LOG); +First, gf_init_easy() calls gf_init_hard() with default parameters. +This is in gf.c. +

+gf_init_hard()' first job is to set up the scratch. +The scratch's type is gf_internal_t, defined in +gf_int.h: + +

+typedef struct {
+  int mult_type;
+  int region_type;
+  int divide_type;
+  int w;
+  uint64_t prim_poly;
+  int free_me;
+  int arg1;
+  int arg2;
+  gf_t *base_gf;
+  void *private;
+} gf_internal_t;
+

+ +All the fields are straightfoward, with the exception of private. That is +a (void *) which points to the implementation's private data. +

+Here's the code for +gf_init_hard(): + +

+int gf_init_hard(gf_t *gf, int w, int mult_type, 
+                        int region_type,
+                        int divide_type,
+                        uint64_t prim_poly,
+                        int arg1, int arg2,
+                        gf_t *base_gf,
+                        void *scratch_memory) 
+{
+  int sz;
+  gf_internal_t *h;
+
+
+  if (scratch_memory == NULL) {
+    sz = gf_scratch_size(w, mult_type, region_type, divide_type, arg1, arg2);
+    if (sz <= 0) return 0;
+    h = (gf_internal_t *) malloc(sz);
+    h->free_me = 1;
+  } else {
+    h = scratch_memory;
+    h->free_me = 0;
+  }
+  gf->scratch = (void *) h;
+  h->mult_type = mult_type;
+  h->region_type = region_type;
+  h->divide_type = divide_type;
+  h->w = w;
+  h->prim_poly = prim_poly;
+  h->arg1 = arg1;
+  h->arg2 = arg2;
+  h->base_gf = base_gf;
+  h->private = (void *) gf->scratch;
+  h->private += (sizeof(gf_internal_t));
+
+  switch(w) {
+    case 4: return gf_w4_init(gf);
+    case 8: return gf_w8_init(gf);
+    case 16: return gf_w16_init(gf);
+    case 32: return gf_w32_init(gf);
+    case 64: return gf_w64_init(gf);
+    case 128: return gf_dummy_init(gf);
+    default: return 0;
+  }
+}
+

+ +The first thing it does is determine if it has to allocate space for scratch. +If it must, it uses gf_scratch_size() to figure out how big the space must be. +It then sets gf->scratch to this space, and sets all of the fields of the +scratch to the arguments in gf_init_hard(). The private pointer is +set to be the space just after the pointer gf->private. Again, it is up to +gf_scratch_size() to make sure there is enough space for the scratch, and +for all of the private data needed by the implementation. +

+Once the scratch is set up, gf_init_hard() calls gf_w4_init(). This is +in gf_w4.c, and it is a +simple dispatcher to the various initialization routines, plus it +sets EUCLID and MATRIX if need be: + +

+int gf_w4_init(gf_t *gf)
+{
+  gf_internal_t *h;
+
+  h = (gf_internal_t *) gf->scratch;
+  if (h->prim_poly == 0) h->prim_poly = 0x13;
+
+  gf->multiply.w4 = NULL;
+  gf->divide.w4 = NULL;
+  gf->inverse.w4 = NULL;
+  gf->multiply_region.w4 = NULL;
+
+  switch(h->mult_type) {
+    case GF_MULT_SHIFT:     if (gf_w4_shift_init(gf) == 0) return 0; break;
+    case GF_MULT_LOG_TABLE: if (gf_w4_log_init(gf) == 0) return 0; break;
+    case GF_MULT_DEFAULT:   if (gf_w4_log_init(gf) == 0) return 0; break;
+    default: return 0;
+  }
+  if (h->divide_type == GF_DIVIDE_EUCLID) {
+    gf->divide.w4 = gf_w4_divide_from_inverse;
+    gf->inverse.w4 = gf_w4_euclid;
+  } else if (h->divide_type == GF_DIVIDE_MATRIX) {
+    gf->divide.w4 = gf_w4_divide_from_inverse;
+    gf->inverse.w4 = gf_w4_matrix;
+  }
+
+  if (gf->inverse.w4 != NULL && gf->divide.w4 == NULL) {
+    gf->divide.w4 = gf_w4_divide_from_inverse;
+  }
+  if (gf->inverse.w4 == NULL && gf->divide.w4 != NULL) {
+    gf->inverse.w4 = gf_w4_inverse_from_divide;
+  }
+  return 1;
+}
+

+ +The code in gf_w4_log_init() sets up the log and antilog tables, and sets +the multiply.w4, divide.w4 etc routines to be the ones for logs. The +tables are put into gf->scratch->private, which is typecast to a struct +gf_logtable_data *: + +

+struct gf_logtable_data {
+    gf_val_4_t      log_tbl[GF_FIELD_SIZE];
+    gf_val_4_t      antilog_tbl[GF_FIELD_SIZE * 2];
+    gf_val_4_t      *antilog_tbl_div;
+};
+.......
+
+static 
+int gf_w4_log_init(gf_t *gf)
+{
+  gf_internal_t *h;
+  struct gf_logtable_data *ltd;
+  int i, b;
+
+  h = (gf_internal_t *) gf->scratch;
+  ltd = h->private;
+
+  ltd->log_tbl[0] = 0;
+
+  ltd->antilog_tbl_div = ltd->antilog_tbl + (GF_FIELD_SIZE-1);
+  b = 1;
+  for (i = 0; i < GF_FIELD_SIZE-1; i++) {
+      ltd->log_tbl[b] = (gf_val_8_t)i;
+      ltd->antilog_tbl[i] = (gf_val_8_t)b;
+      ltd->antilog_tbl[i+GF_FIELD_SIZE-1] = (gf_val_8_t)b;
+      b <<= 1;
+      if (b & GF_FIELD_SIZE) {
+          b = b ^ h->prim_poly;
+      }
+  }
+    
+  gf->inverse.w4 = gf_w4_inverse_from_divide;
+  gf->divide.w4 = gf_w4_log_divide;
+  gf->multiply.w4 = gf_w4_log_multiply;
+  gf->multiply_region.w4 = gf_w4_log_multiply_region;
+  return 1;
+}
+

+ +And of course the individual routines use h->private to access the tables: + +

+static
+inline
+gf_val_8_t gf_w4_log_multiply (gf_t *gf, gf_val_8_t a, gf_val_8_t b)
+{
+  struct gf_logtable_data *ltd;
+    
+  ltd = (struct gf_logtable_data *) ((gf_internal_t *) (gf->scratch))->private;
+  return (a == 0 || b == 0) ? 0 : ltd->antilog_tbl[(unsigned)(ltd->log_tbl[a] + ltd->log_tbl[b])];
+}
+

+ +Finally, it's important that the proper sizes are put into +gf_w4_scratch_size() for each implementation: + +

+int gf_w4_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2)
+{
+  int region_tbl_size;
+  switch(mult_type)
+  {
+    case GF_MULT_DEFAULT:
+    case GF_MULT_LOG_TABLE:
+      return sizeof(gf_internal_t) + sizeof(struct gf_logtable_data) + 64;
+      break;
+    case GF_MULT_SHIFT:
+      return sizeof(gf_internal_t);
+      break;
+    default:
+      return -1;
+   }
+}
+

+I hope that's enough explanation for y'all to start implementing. Let me know if you have +problems -- thanks -- Jim + +


+The initial structure has been set for w=4, 8, 16, 32 and 64, with implementations of SHIFT and EUCLID, and for w <= 32, MATRIX. There are some weird caveats: + +
    +
  • For w=32 and w=64, the primitive polynomial does not have the leading one. +
  • I'd like for naming to be: +

    +

      + gf_ww_technique_funcationality(). +
    +

    +For example, the log techniques for w=4 are: +

    +gf_w4_log_multiply()
    +gf_w4_log_divide()
    +gf_w4_log_multiply_region()
    +gf_w4_log_init()
    +
    +

    +

  • I'd also like a header block on implementations that says who wrote it. +
+ +
+

Things we need to Implement: w=4

+ +

+ + + + + + + + + + + +
SHIFT Done - Jim
BYTWO_p Done - Jim
BYTWO_b Done - Jim
BYTWO_p, SSE Done - Jim
BYTWO_b, SSE Done - Jim
Single TABLE Done - Jim
Double TABLE Done - Jim
Double TABLE, SSE Done - Jim
Quad TABLE Done - Jim
Lazy Quad TABLE Done - Jim
LOG Done - Jim

+ +


+

Things we need to Implement: w=8

+ +

+ + + + + + + + + + + + +
SHIFT Done - Jim
BYTWO_p Done - Jim
BYTWO_b Done - Jim
BYTWO_p, SSE Done - Jim
BYTWO_b, SSE Done - Jim
Single TABLE Done - Kevin
Double TABLE Done - Jim
Lazy Double TABLE Done - Jim
Split 2 1 (Half) SSE Done - Jim
Composite, k=2 Done - Kevin (alt mapping not passing unit test)
LOG Done - Kevin
LOG ZERO Done - Jim

+ +


+

Things we need to Implement: w=16

+ +

+ + + + + + + + + + + + + + + + +
SHIFT Done - Jim
BYTWO_p Done - Jim
BYTWO_b Done - Jim
BYTWO_p, SSE Done - Jim
BYTWO_b, SSE Done - Jim
Lazy TABLE Done - Jim
Split 4 16 No-SSE, lazy Done - Jim
Split 4 16 SSE, lazy Done - Jim
Split 4 16 SSE, lazy, alternate mapping Done - Jim
Split 8 16, lazy Done - Jim
Composite, k=2, stdmap recursive Done - Kevin
Composite, k=2, altmap recursive Done - Kevin
Composite, k=2, stdmap inline Done - Kevin
LOG Done - Kevin
LOG ZERO Done - Kevin
Group 4 4 Done - Jim: I don't see a reason to implement others, although 4-8 will be faster, and 8 8 will have faster region ops. They'll never beat SPLIT.

+ +


+

Things we need to Implement: w=32

+ +

+ + + + + + + + + + + + + + + + +
SHIFT Done - Jim
BYTWO_p Done - Jim
BYTWO_b Done - Jim
BYTWO_p, SSE Done - Jim
BYTWO_b, SSE Done - Jim
Split 2 32,lazy Done - Jim
Split 2 32, SSE, lazy Done - Jim
Split 4 32, lazy Done - Jim
Split 4 32, SSE,ALTMAP lazy Done - Jim
Split 4 32, SSE, lazy Done - Jim
Split 8 8 Done - Jim
Group, g_s == g_r Done - Jim
Group, any g_s and g_r Done - Jim
Composite, k=2, stdmap recursive Done - Kevin
Composite, k=2, altmap recursive Done - Kevin
Composite, k=2, stdmap inline Done - Kevin

+


+

Things we need to Implement: w=64

+ +

+ + + + + + + + + + + +
SHIFT Done - Jim
BYTWO_p -
BYTWO_b -
BYTWO_p, SSE -
BYTWO_b, SSE -
Split 16 1 SSE, maybe lazy -
Split 8 1 lazy -
Split 8 8 -
Split 8 8 lazy -
Group -
Composite, k=2, alternate mapping -

+


+

Things we need to Implement: w=128

+ +

+ + + + + + + + + + + +
SHIFT Done - Will
BYTWO_p -
BYTWO_b -
BYTWO_p, SSE -
BYTWO_b, SSE -
Split 32 1 SSE, maybe lazy -
Split 16 1 lazy -
Split 16 16 - Maybe that's insanity -
Split 16 16 lazy -
Group (SSE) -
Composite, k=?, alternate mapping -

+


+

Things we need to Implement: w=general between 1 & 32

+ +

+ + + + + + + + + + + + +
CAUCHY Region (SSE XOR) Done - Jim
SHIFT Done - Jim
TABLE Done - Jim
LOG Done - Jim
BYTWO_p Done - Jim
BYTWO_b Done - Jim
Group, g_s == g_r Done - Jim
Group, any g_s and g_r Done - Jim
Split - do we need it? Done - Jim
Composite - do we need it? -
Split - do we need it? -
Logzero? -

diff --git a/gf.c b/gf.c new file mode 100644 index 0000000..6487f74 --- /dev/null +++ b/gf.c @@ -0,0 +1,478 @@ +/* + * gf.c + * + * Generic routines for Galois fields + */ + +#include "gf_int.h" +#include +#include + +int gf_scratch_size(int w, + int mult_type, + int region_type, + int divide_type, + int arg1, + int arg2) +{ + switch(w) { + case 4: return gf_w4_scratch_size(mult_type, region_type, divide_type, arg1, arg2); + case 8: return gf_w8_scratch_size(mult_type, region_type, divide_type, arg1, arg2); + case 16: return gf_w16_scratch_size(mult_type, region_type, divide_type, arg1, arg2); + case 32: return gf_w32_scratch_size(mult_type, region_type, divide_type, arg1, arg2); + case 64: return gf_w64_scratch_size(mult_type, region_type, divide_type, arg1, arg2); + case 128: return gf_w128_scratch_size(mult_type, region_type, divide_type, arg1, arg2); + default: return gf_wgen_scratch_size(w, mult_type, region_type, divide_type, arg1, arg2); + } +} + +int gf_dummy_init(gf_t *gf) +{ + return 0; +} + +int gf_init_easy(gf_t *gf, int w, int mult_type) +{ + return gf_init_hard(gf, w, mult_type, GF_REGION_DEFAULT, GF_DIVIDE_DEFAULT, 0, 0, 0, NULL, NULL); +} + +int gf_init_hard(gf_t *gf, int w, int mult_type, + int region_type, + int divide_type, + uint64_t prim_poly, + int arg1, int arg2, + gf_t *base_gf, + void *scratch_memory) +{ + int sz; + gf_internal_t *h; + + sz = gf_scratch_size(w, mult_type, region_type, divide_type, arg1, arg2); + + if (sz <= 0) return 0; + + if (scratch_memory == NULL) { + h = (gf_internal_t *) malloc(sz); + h->free_me = 1; + } else { + h = scratch_memory; + h->free_me = 0; + } + gf->scratch = (void *) h; + h->mult_type = mult_type; + h->region_type = region_type; + h->divide_type = divide_type; + h->w = w; + h->prim_poly = prim_poly; + h->arg1 = arg1; + h->arg2 = arg2; + h->base_gf = base_gf; + h->private = (void *) gf->scratch; + h->private += (sizeof(gf_internal_t)); + gf->extract_word.w32 = NULL; + + //printf("Created w=%d, with mult_type=%d and region_type=%d\n", w, mult_type, region_type); + + switch(w) { + case 4: return gf_w4_init(gf); + case 8: return gf_w8_init(gf); + case 16: return gf_w16_init(gf); + case 32: return gf_w32_init(gf); + case 64: return gf_w64_init(gf); + case 128: return gf_w128_init(gf); + default: return gf_wgen_init(gf); + } +} + +int gf_free(gf_t *gf, int recursive) +{ + gf_internal_t *h; + + h = (gf_internal_t *) gf->scratch; + if (recursive && h->base_gf != NULL) { + gf_free(h->base_gf, 1); + free(h->base_gf); + } + if (h->free_me) free(h); +} + +void gf_alignment_error(char *s, int a) +{ + fprintf(stderr, "Alignment error in %s:\n", s); + fprintf(stderr, " The source and destination buffers must be aligned to each other,\n"); + fprintf(stderr, " and they must be aligned to a %d-byte address.\n", a); + exit(1); +} + +/* Lifted this code from Jens Gregor -- thanks, Jens */ + +int gf_is_sse2() +{ + unsigned int cpeinfo; + unsigned int cpsse; + asm ( "mov $0x1, %%eax\n\t" + "cpuid\n\t" + "mov %%edx, %0\n\t" + "mov %%ecx, %1\n" : "=m" (cpeinfo), "=m" (cpsse)); + if ((cpeinfo >> 26) & 0x1 ) return 1; + return 0; +} + +static +void gf_invert_binary_matrix(int *mat, int *inv, int rows) { + int cols, i, j, k; + int tmp; + + cols = rows; + + for (i = 0; i < rows; i++) inv[i] = (1 << i); + + /* First -- convert into upper triangular */ + + for (i = 0; i < cols; i++) { + + /* Swap rows if we ave a zero i,i element. If we can't swap, then the + matrix was not invertible */ + + if ((mat[i] & (1 << i)) == 0) { + for (j = i+1; j < rows && (mat[j] & (1 << i)) == 0; j++) ; + if (j == rows) { + fprintf(stderr, "galois_invert_matrix: Matrix not invertible!!\n"); + exit(1); + } + tmp = mat[i]; mat[i] = mat[j]; mat[j] = tmp; + tmp = inv[i]; inv[i] = inv[j]; inv[j] = tmp; + } + + /* Now for each j>i, add A_ji*Ai to Aj */ + for (j = i+1; j != rows; j++) { + if ((mat[j] & (1 << i)) != 0) { + mat[j] ^= mat[i]; + inv[j] ^= inv[i]; + } + } + } + + /* Now the matrix is upper triangular. Start at the top and multiply down */ + + for (i = rows-1; i >= 0; i--) { + for (j = 0; j < i; j++) { + if (mat[j] & (1 << i)) { + /* mat[j] ^= mat[i]; */ + inv[j] ^= inv[i]; + } + } + } +} + +uint32_t gf_bitmatrix_inverse(uint32_t y, int w, uint32_t pp) +{ + uint32_t mat[32], inv[32], mask; + int i; + + mask = (w == 32) ? 0xffffffff : (1 << w) - 1; + for (i = 0; i < w; i++) { + mat[i] = y; + + if (y & (1 << (w-1))) { + y = y << 1; + y = ((y ^ pp) & mask); + } else { + y = y << 1; + } + } + + gf_invert_binary_matrix(mat, inv, w); + return inv[0]; +} + +/* +void gf_two_byte_region_table_multiply(gf_region_data *rd, uint16_t *base) +{ + uint64_t p, ta, shift, tb; + uint64_t *s64, *d64 + + s64 = rd->s_start; + d64 = rd->d_start; + + while (s64 < (uint64_t *) rd->s_top) { + p = (rd->xor) ? *d64 : 0; + ta = *s64; + + shift = 0; + while (ta != 0) { + tb = base[ta&0xffff]; + p ^= (tb << shift); + ta >>= 16; + shift += 16; + } + + *d64 = p; + d64++; + s64++; + } +} +*/ + +void gf_two_byte_region_table_multiply(gf_region_data *rd, uint16_t *base) +{ + uint64_t a, prod; + int j, xor; + uint64_t *s64, *d64, *top; + + s64 = rd->s_start; + d64 = rd->d_start; + top = rd->d_top; + xor = rd->xor; + + if (xor) { + while (d64 != top) { + a = *s64; + prod = base[a >> 48]; + a <<= 16; + prod <<= 16; + prod ^= base[a >> 48]; + a <<= 16; + prod <<= 16; + prod ^= base[a >> 48]; + a <<= 16; + prod <<= 16; + prod ^= base[a >> 48]; + prod ^= *d64; + *d64 = prod; + *s64++; + *d64++; + } + } else { + while (d64 != top) { + a = *s64; + prod = base[a >> 48]; + a <<= 16; + prod <<= 16; + prod ^= base[a >> 48]; + a <<= 16; + prod <<= 16; + prod ^= base[a >> 48]; + a <<= 16; + prod <<= 16; + prod ^= base[a >> 48]; + *d64 = prod; + *s64++; + *d64++; + } + } +} + +static void gf_slow_multiply_region(gf_region_data *rd, void *src, void *dest, void *s_top) +{ + uint8_t *s8, *d8; + uint16_t *s16, *d16; + uint32_t *s32, *d32; + gf_internal_t *h; + int wb; + uint32_t p, a; + + h = rd->gf->scratch; + wb = (h->w)/8; + if (wb == 0) wb = 1; + + while (src < s_top) { + switch (h->w) { + case 8: + s8 = (uint8_t *) src; + d8 = (uint8_t *) dest; + *d8 = (rd->xor) ? (*d8 ^ rd->gf->multiply.w32(rd->gf, rd->val, *s8)) : + rd->gf->multiply.w32(rd->gf, rd->val, *s8); + break; + case 4: + s8 = (uint8_t *) src; + d8 = (uint8_t *) dest; + a = *s8; + p = rd->gf->multiply.w32(rd->gf, rd->val, a&0xf); + p |= (rd->gf->multiply.w32(rd->gf, rd->val, a >> 4) << 4); + if (rd->xor) p ^= *d8; + *d8 = p; + break; + case 16: + s16 = (uint16_t *) src; + d16 = (uint16_t *) dest; + *d16 = (rd->xor) ? (*d16 ^ rd->gf->multiply.w32(rd->gf, rd->val, *s16)) : + rd->gf->multiply.w32(rd->gf, rd->val, *s16); + break; + case 32: + s32 = (uint32_t *) src; + d32 = (uint32_t *) dest; + *d32 = (rd->xor) ? (*d32 ^ rd->gf->multiply.w32(rd->gf, rd->val, *s32)) : + rd->gf->multiply.w32(rd->gf, rd->val, *s32); + break; + default: + fprintf(stderr, "Error: gf_slow_multiply_region: w=%d not implemented.\n", h->w); + exit(1); + } + src += wb; + dest += wb; + } +} + +/* If align>16, you align to 16 bytes, but make sure that within the aligned region bytes is a multiple of align. However, you make sure that the region itself is a multiple of align. + + If align = -1, then this is cauchy. You need to make sure that bytes is a multiple of w. */ + +void gf_set_region_data(gf_region_data *rd, + gf_t *gf, + void *src, + void *dest, + int bytes, + uint32_t val, + int xor, + int align) +{ + uint8_t *s8, *d8; + gf_internal_t *h; + int wb; + uint32_t a; + unsigned long uls, uld; + + h = gf->scratch; + wb = (h->w)/8; + if (wb == 0) wb = 1; + + rd->gf = gf; + rd->src = src; + rd->dest = dest; + rd->bytes = bytes; + rd->val = val; + rd->xor = xor; + rd->align = align; + + uls = (unsigned long) src; + uld = (unsigned long) dest; + + a = (align <= 16) ? align : 16; + + if (align == -1) { /* This is cauchy. Error check bytes, then set up the pointers + so that there is no alignment regions. */ + if (bytes % h->w != 0) { + fprintf(stderr, "Error in region multiply operation.\n"); + fprintf(stderr, "The size must be a multiple of %d bytes.\n", h->w); + exit(1); + } + + rd->s_start = src; + rd->d_start = dest; + rd->s_top = src + bytes; + rd->d_top = src + bytes; + return; + } + + if (uls % a != uld % a) { + fprintf(stderr, "Error in region multiply operation.\n"); + fprintf(stderr, "The source & destination pointers must be aligned with respect\n"); + fprintf(stderr, "to each other along a %d byte boundary.\n", a); + fprintf(stderr, "Src = 0x%lx. Dest = 0x%lx\n", (unsigned long) src, + (unsigned long) dest); + exit(1); + } + + if (uls % wb != 0) { + fprintf(stderr, "Error in region multiply operation.\n"); + fprintf(stderr, "The pointers must be aligned along a %d byte boundary.\n", wb); + fprintf(stderr, "Src = 0x%lx. Dest = 0x%lx\n", (unsigned long) src, + (unsigned long) dest); + exit(1); + } + + if (bytes % wb != 0) { + fprintf(stderr, "Error in region multiply operation.\n"); + fprintf(stderr, "The size must be a multiple of %d bytes.\n", wb); + exit(1); + } + + uls %= a; + if (uls != 0) uls = (align-uls); + rd->s_start = rd->src + uls; + rd->d_start = rd->dest + uls; + bytes -= uls; + + bytes -= (bytes % align); + rd->s_top = rd->s_start + bytes; + rd->d_top = rd->d_start + bytes; +} + +void gf_do_initial_region_alignment(gf_region_data *rd) +{ + gf_slow_multiply_region(rd, rd->src, rd->dest, rd->s_start); +} + +void gf_do_final_region_alignment(gf_region_data *rd) +{ + gf_slow_multiply_region(rd, rd->s_top, rd->d_top, rd->src+rd->bytes); +} + +void gf_multby_zero(void *dest, int bytes, int xor) +{ + if (xor) return; + bzero(dest, bytes); + return; +} + +void gf_multby_one(gf_t *gf, void *src, void *dest, int bytes, int xor) +{ +#ifdef INTEL_SSE4 + __m128i ms, md; +#endif + uint8_t *s8, *d8, *dtop8; + uint64_t *s64, *d64, *dtop64; + int abytes; + + gf_region_data rd; + if (!xor) { + memcpy(dest, src, bytes); + return; + } + +#ifdef INTEL_SSE4 + s8 = (uint8_t *) src; + d8 = (uint8_t *) dest; + abytes = bytes & 0xfffffff0; + + while (d8 < (uint8_t *) dest + abytes) { + ms = _mm_loadu_si128 ((__m128i *)(s8)); + md = _mm_loadu_si128 ((__m128i *)(d8)); + md = _mm_xor_si128(md, ms); + _mm_storeu_si128((__m128i *)(d8), md); + s8 += 16; + d8 += 16; + } + while (d8 != (uint8_t *) dest+bytes) { + *d8 ^= *s8; + d8++; + s8++; + } + return; +#endif + + /* If you don't have SSE, you'd better be aligned..... */ + + gf_set_region_data(&rd, gf, src, dest, bytes, 1, xor, 8); + s8 = (uint8_t *) src; + d8 = (uint8_t *) dest; + while (d8 != rd.d_start) { + *d8 ^= *s8; + d8++; + s8++; + } + dtop64 = (uint64_t *) rd.d_top; + + while (d64 < dtop64) { + *d64 ^= *s64; + d64++; + s64++; + } + while (d8 != (uint8_t *) dest+bytes) { + *d8 ^= *s8; + d8++; + s8++; + } + return; +} diff --git a/gf.h b/gf.h new file mode 100644 index 0000000..4822b1e --- /dev/null +++ b/gf.h @@ -0,0 +1,123 @@ +/* gf.h + * External include file for Galois field arithmetic. */ + +#pragma once +#include + +#ifdef INTEL_SSE4 +#include +#include +#endif + +#define GF_W128_IS_ZERO(val) (val[0] == 0 && val[1] == 0) +#define GF_W128_EQUAL(val1, val2) ((val1[0] == val2[0]) && (val1[1] == val2[1])) + +/* These are the different ways to perform multiplication. + Not all are implemented for all values of w. + See the paper for an explanation of how they work. */ + +typedef enum {GF_MULT_DEFAULT, + GF_MULT_SHIFT, + GF_MULT_GROUP, + GF_MULT_BYTWO_p, + GF_MULT_BYTWO_b, + GF_MULT_TABLE, + GF_MULT_LOG_TABLE, + GF_MULT_SPLIT_TABLE, + GF_MULT_COMPOSITE } gf_mult_type_t; + +/* These are the different ways to optimize region + operations. They are bits because you can compose them: + You can mix SINGLE/DOUBLE/QUAD, LAZY, SSE/NOSSE, STDMAP/ALTMAP/CAUCHY. + Certain optimizations only apply to certain gf_mult_type_t's. + Again, please see documentation for how to use these */ + +#define GF_REGION_DEFAULT (0x0) +#define GF_REGION_SINGLE_TABLE (0x1) +#define GF_REGION_DOUBLE_TABLE (0x2) +#define GF_REGION_QUAD_TABLE (0x4) +#define GF_REGION_LAZY (0x8) +#define GF_REGION_SSE (0x10) +#define GF_REGION_NOSSE (0x20) +#define GF_REGION_STDMAP (0x40) +#define GF_REGION_ALTMAP (0x80) +#define GF_REGION_CAUCHY (0x100) + +typedef uint32_t gf_region_type_t; + +/* These are different ways to implement division. + Once again, it's best to use "DEFAULT". However, + there are times when you may want to experiment + with the others. */ + +typedef enum { GF_DIVIDE_DEFAULT, + GF_DIVIDE_MATRIX, + GF_DIVIDE_EUCLID } gf_division_type_t; + +/* We support w=4,8,16,32,64 and 128 with their own data types and + operations for multiplication, division, etc. We also support + a "gen" type so that you can do general gf arithmetic for any + value of w from 1 to 32. You can perform a "region" operation + on these if you use "CAUCHY" as the mapping. + */ + +typedef uint32_t gf_val_32_t; +typedef uint64_t gf_val_64_t; +typedef uint64_t *gf_val_128_t; + +typedef struct gf *GFP; + +typedef union gf_func_a_b { + gf_val_32_t (*w32) (GFP gf, gf_val_32_t a, gf_val_32_t b); + gf_val_64_t (*w64) (GFP gf, gf_val_64_t a, gf_val_64_t b); + void (*w128)(GFP gf, gf_val_128_t a, gf_val_128_t b, gf_val_128_t c); +} gf_func_a_b; + +typedef union { + gf_val_32_t (*w32) (GFP gf, gf_val_32_t a); + gf_val_64_t (*w64) (GFP gf, gf_val_64_t a); + void (*w128)(GFP gf, gf_val_128_t a, gf_val_128_t b); +} gf_func_a; + +typedef union { + void (*w32) (GFP gf, void *src, void *dest, gf_val_32_t val, int bytes, int add); + void (*w64) (GFP gf, void *src, void *dest, gf_val_64_t val, int bytes, int add); + void (*w128)(GFP gf, void *src, void *dest, gf_val_128_t val, int bytes, int add); +} gf_region; + +typedef union { + gf_val_32_t (*w32) (GFP gf, void *start, int bytes, int index); + gf_val_64_t (*w64) (GFP gf, void *start, int bytes, int index); + void (*w128)(GFP gf, void *start, int bytes, int index, gf_val_128_t rv); +} gf_extract; + +typedef struct gf { + gf_func_a_b multiply; + gf_func_a_b divide; + gf_func_a inverse; + gf_region multiply_region; + gf_extract extract_word; + void *scratch; +} gf_t; + +extern int gf_init_easy(gf_t *gf, int w, int mult_type); + +extern int gf_init_hard(gf_t *gf, + int w, + int mult_type, + int region_type, + int divide_type, + uint64_t prim_poly, + int arg1, + int arg2, + gf_t *base_gf, + void *scratch_memory); + +extern int gf_scratch_size(int w, + int mult_type, + int region_type, + int divide_type, + int arg1, + int arg2); + +extern int gf_free(gf_t *gf, int recursive); diff --git a/gf_54 b/gf_54 new file mode 100755 index 0000000000000000000000000000000000000000..d338c84e7cfd64a893bf26c6d0d57c3e3b1278ef GIT binary patch literal 113032 zcmeFa3w%`7wLd_wJJzv1Zpvf zGlSVZj;8&C+TQloYioa;P5-l|2eXE#oJFW$)~xC7Tc)RH;osM0l%j>vHcK8J`lY{FvwU+G z`NX>jXOxEyP)Sd_wDJ(PrLt3?)*PHQt9tRhcg(wIR<+N6hqHhCp1#q_%ezwhNDu2} z*tPNiGX6Jf)|_hJf*;*?rx8wX-!`qhM+w;YqcC7^{Atlf80F2ouR7J?O!&?Ju4v!W z+7}c~>#tFgctL;B(OGxRy=VU11&GQl&oNDucenN(h134}!Owr+!XMrD!&yJNuWDYF z@@lp6dbDCFeERmyy=zw0g4^$%D{9Sb-~39^zWT3J+r-cdMs9e3Rvu4d-Fz-&ZYt{p|FQ`6sc_pG0iv{Ho-k$1j)~x%0 za{~3|`xf+TUy0VfBCP-ln{k8iE#Fl5tXcQme!B7$tvs5`^haTHdDP$Yc#8NpYt|2M z_uZa~$b?@*M*Api9*5MI;w?o-yq+7TxLqEd1~h2nP^x`_F#Xk!7GY|aHZnZo%Ydw& zl}8|dIe!uV$`P)|Yb$=&ph48fLOiw0td_m_?R&;*X}D0Ma|qMlpS0)e_}zl10MDuZ zp2qk1yx-sXh$+HD-+s$qz2IB-{OFEv{c!O;KSY@RF2zIb_{C2@oP6>7Wi!ToHTt*C z#VDkGPotk_Y610g93JWu!FX`a68yKO`i}ljTU_m%d+#?Vd=qf^ z(H%(s-@pHb!2d$veGT_gG8p$6zl%Zopq9KA)xp4vtg8Gb7HRGu@^)+AjP#CH}8h1bvmC_Td}KV=nts{7Mu?2a22ga#1Ma`(Z`dHs8z!k2*$SJxVlIDod*iWd7dmP@;SA z@VH^J;@l{riKt!|^B2SE=@uP^af6^D1I z(`RpM64femwm5s+;>ihCv;fT@c*!&5o8?M*I*m(-hkfQ@y&kqzVHGPCb)ce_mnh7= zMx9y^o-i=fDR~;BeYr0ah(i&{-KvcDuapA6L|B>AhHqM_JJue25+Qg8j@zZ+lgM22 z?Z9!H6#NTbmGHNhqkcur+qHmx#$kM?BI0!_G**tcs&DG!8946njSd_y@^6&Y$|6)B z8irgR)oqbg{|H&VP+=`ehs*}Xe}Ru>wW280CWYR{JI^DnzL4_qOjXF0n5V&AFJD%d z6oryfXsPkVs?-;9{lLVvvb5?;R2JJ2*ee-@$!tAEMiVxQVmo*ZtTDRR=E~oTsG?A> z{~L1sMc!P0tD*u-R(Axy5#?)mFLSSa;BIg35=tS!#_y2+V13E0@(<*h@@?`g9T7A*(D*BJZY)%CZ?r4w zjB-U?IIbdZo~`ap@SPAUjb9T6^lfVUnU>@l;R}BIops2<94f}PPQvnVDjV=z{P>~ zl)BiK(OcMI*26zJF?tIo60(&jR!q*x;}k1s0znUuPyq=QkWc{$bxEHNZ~;%xoHvxw zAF+*ze-mL-J02>ah8#hg0fZ8&)H5gIwW94PpG_`L{D-2Be10Cyn89mDA=^5dFCdY% z*I6yGOYx5qt}`XPP6;{q=VFI3>3VpU@tAFUueG9*V_x&q#N%z4-cTMxkNj>m}lAOO~(SMpA5^@%6L#I(`?h7nOC~*S~AeQucEE zlhvCG5)Fc$rRY`NK|7i&v!1}oA}M$k-b208V`cc|IK@8d>Q1)GVI{eadeybx+2#%Z zc`B+)oNrA!w-&c3$*5e``p^Y3`)Sw`I_ZyFlOn3-18?}*c1#{G+v{xOrFC_)oMco9 zG}`b<&A|%xp(p%Gi8VXa`_R7t zdBu7wtF|Yo2dHp>I#71nmdVM8l5CXAdi?JtHz|Qbw!q1xzgZ0tFqOLXG+-uT1VF^k zM@3i+I_{zIx3B%C34>qz4vqgcI$Dmxn?b;9k&mxm3Y!URFR$a0d0pE85$s*rvGKz{s-{NMz5!O*r)7> z%(lpEvx~Xk^sEL;9YnDiIH2}wClBa*z{2)E;h!+eqvMIZ?Xw?_Wc6BCb9_*l*J~1+OJ^~ ziV(L_VG~L`s%un<5{Q8MO(^xWxJDIGF(CU=@EyE7G&XRe+_#|aL@ffoiogkwfQRtv zWe2j-;dncE%)kkoZ*ky+-M{^m+1TvUl<_Wz%qhQ|_l?se9>yr0^2>jWr{Qs(CP&wL z$^lk5Yu}obIeoD&f$%A;v8B8afVadS#dvW35A&i(0r>bR&7c3^!^=cg66)eR1L29UkGM|nyK+j2atT`6mSps3-9H1GT)vNX?9B^`?5rN*KJ znjK(y9p19Y!ad5Is1hDF&{Nj#yO8%!S~Wl}wRng|G$Yo3s0~f;Xp7&)FGN46kuEMHbOFWK9FUSh`@YB#3Dpsb*3By4ULNhLkn(}B0oTvb=KC1 zT$hRjn6;d0DiZt}kM!NNP}Is+bplTl?Ihk2X-<(AajAC~x-r8-jl69% zHIaIONX5Lo1F8G`i}*aElw()&GFQ6P?;~ZWf4WDlDT%#@ShqR@F`fPe9`&J;SO<@} z0Wod%(`thg6`k=5ASR&Y#h5fED%-f$qzY#cCsIVmw zzFHC{Zs70-o~ctE>Y{=*7)T2{*3~Vu&q6)^A#(C99_3>3uI^B??+_0KPFkfPVU^6@ zrq=Yze5Gk|6T510_PWXv)#oK-vEImKe_wbd+wK}2U&{+`@9K6oyI2qEaIRzK>m**+r78uiHQIMsG&a%|wprys_dt1c#F+%XJT#rPbJqBncg{A? zoKA=XyrXx7<$>6aDXr0CxttP=a5#G_*bdLp9f^F=Kd-gRnv_=MVlnONgDhdA6)ckb zc(U15w#mPj_1&to&!+tfRvWE7Qw!u*2rMqv;puwMJNOny#hi`6XWtz}%Yb_r4GtNl zt6RYU*x8Wlp=NtYcvb@(Vb2P$r=K`K-e=y2H!1=^t%G zz6W+<=g`)K8kCksAZaqOF&$n4R^<+WE7``!w(EeiP4t(wI@`;8o6*PCjt_l>p{>4= z)=rnRF)iL7W35Dju^w1E6K{$y#ecCM<6|MpmwAh2paq5@;`=rLnHZ63k2UFXZpNOd zJ+w)3_jSa5!{ksOvAL!=|3PN#Qqu69aFcaFMuMH&t=Mc=7gVS}F0D}AcDaRc6?ARd z2n}MuJdUBs)`wo)ox z3~d07UB}^mmY|*S85sX;;{%FZit+E-53!g!P+Dbk zq~uS0-)K&sY}YdgewJ1)bNWCCl%}5kdo4rS_t?rhNH%6}A4cV%qDrhCHCowWY1ObH z*ug%9?LD<2hbpP1k=6N09p9)urz@|Am)A+01=>gD zby9go`%YCJ>(ECCJwBg9(2#ckejh5>Wwo&$)%I|%?Pmlzyy;LrYk09v)mHa4+s`@d z9lQjcKL;ja_wc|#zRa{W)o!xZ9j8M*|A&E-0~W?1o(vDiP67fq+ryAQB)dVk9!8j+ z8QL$s(|OYfJg)qx{{`$ZYVHy92@90+RM=I1OIuR|r9DY$cN=Nae?`JUO9MUxkpt86 zm=8D(5-&aWW4wrLjaYajpd^Wr*8k$WZ%_*IhP?g`W3wuqVOqa{Nxk z7{7?>WOZ@@K0ytsA)Ww$EJ7nfcpP{};`u6`tMIt++=S;gJU_-$jzi!IJm1H2CmtW3 zIy{fz`8PZ(@Vtbl5zl5k@8XH$>BWiz|OE7VFX1V1SNx=4=? zL!&+Ff>I?s2|UpNS-rkMa420}?6I};8;x)7lm#4^4JLu7A-!89YWw3_@wJm4TRt709U;YLcvBwon)^sfGBd3qrMWk=h5`iY|F7g(BVloy2iU({ci=9v|6R5 zbbub7k`z3N-1VLde7v08s4daFeiu86G7n%~YH`0x9Zy`9hR@gsTO;yV?9p7s?bv#| zLrp%<#IVi6;{r=wwfbB#R=MJKwer>c<7k@zOa!HN4|c!mQE#%B*I$w23e9}g|2JNm zQILN%6+~tDUzNdOjrU{0wNbZOPkasr8OVIiQI5llWv8 z+U*G}aag3_hvL7IfASa?g^YV`=WI<3f~-U@`7HcDaB?AmZZe9Olp3|hu= zi6J1eR}(mPnJ=$ve|S=oO?1c{Qeg7ru6=UYH7bemV-pK0Gck7jby|VOfSwF>%93jf zn^ttRQ_eM_kIT}3HL{6CikjO3u!GSCP21=D5%#^#PGVLnoV{WXoHibUfT4+@7T?7l zbsB`h#6il=DAhjtBcw;Q6;QH|^JDZtnQ2k|=x{7-^t{I(Z_ zv}_CJ_9vKXisS|kx(({0!#VU6_BR*``3N5hoU)C4fWOYfzIq8j9a_#a>_AM~BWTM*@E*EsdnM@QKWWrTU+$5(H{{R|uI>D}KpncCI zJ1b})DuExO5^oTdcni?ji7^0H6EuoAuDk8=OVCOrm8X&_`74r=pQ1}AuwM1zr2^Y{ zkd*l#DW#mG7<)1eg(rDfInk88tny6=^MhBQRnA@~KwznAv0;G(3(F}A1*Z}LEw_sV z?G16sW+l*)Gv5EEzb&xjO;D9X047ltUYU%ASFLzcPPXy+Bb$ken;p@KN~zAjndwO@ z=!qOU?3*06U6~>$6IipWc#}p=ELH4^sbHmbVlvfU-!J;^(W7L*a`3Wuvf8briyW|Vg*xa;OUn}zLiFP zqo-xEBM)m>yo6S31g2pnk6)?O3Zi#Qv0{?2U*yC+l0Ihb!U)2J6(DjP{m+?() zdHfj!FylTHbd4tCSWcxdk^(Fc;wtsIjsV=7vhzbk<00Xa42n@{#1Qazy*+@z$ zt2Y*~9c&w9Cr6LTteGS;7k3`*WXl)P*I>H0qeU&DpCW`8FofW%!gMs_cJ6aKKS*3+ z5d2z7my_$=)=%7JcNRQ!gC-i;)Ae5HH&nZCuV>CXN-pn?OAS#4a`84Sz-YEOgX8^< z+22nl^MAvWjnZoVwiftdQD35h6qRR8Uc^EaO!C$d*E*iRJv3TTms1lJ>vAd=dx*zx zz;EJJNT1j)d`$q2-M$++>eds)&6{f0&9>uv%97Ep`Eiu-R(gMF{77#B!pRe_UxT7z zgz|h zCqrMwj^e+B?_)0)8^vF$m)F%@ye&&%?=?>vVJeKvbJFmQPFt|oTnNe9Z6jRjeMK(y zp#mI~L&O5sxrN$mQE$#7VVR~&SVo>&zJd?c8#1IT$59}p7Ot^3^pmdqnV;kIiQ{d~ zLUp;>r}F5xsVxjxvPVcquBmlle7X)m+VUP9INs(c-r+8LZ^^ga?6`aM4mac`N4L92 zzsuUeuW166TE*Fdue$aY@4%OC>zm+1N62MOKiz>Ax<+q@Af{bbuRykwZuPbzh^QC2 zD%6_`E7bE{?9kEOZnn)mIyNypY`m9>MfDgz$a|Eo7{oPoT8y(-TIsT5`sMp|_TNE%EOT8b$OauGINroLD$X3I3y!&(3wv8;pS%JZ zZRsaFK2kj-;ZDi_9{O7K+^S*1hrO9DQ`*_Ua^R@%4Ox^HIzc* z38{@;=!BJ9z}VwJnB-|nDeEVsx~WtMsZ#Lb_`vg*m$FXQt3>zP6zdjb8$j(eoJXcHfoZf}zf1n$X$?VRmoS?!VI7rp4rl7~eSXUd}#eV_-i7-i>WdtUX*{N&m{X z(b{H|qc z9I)_C)`xB2AY?YAcnv>Cs16UQ`roQOB)_A0Ilt9EHoVjV@`aV=0e`61cRz1!@L{Av z0jAX%=yUoVf&I?FNvFS3=l|E>0|?{j^N;IJ(h&QGQtr|~U#Wj@V1H@gWT|h*8tglO zzLaYdW={#1<%#e*l7X&S$2Y zMdPB~wuxhG4k!jvRby;dPlS{zRrniiO*DVFZlwo`DGU+<(J3j|fX?!rD`A%2Io66& z2T~XBC4p<`aiqDOJ0X*jR<&~aNX_^%ZwC4vjQB1B6{T;`Afm3Wfey5Z1Vqj@){aj5 zhMF_Dq*dO54!5(_U2nUDRDwuR=ZODLe38USJliea;v3%4z@e}mUbbJ0J+zx(g66cL zIc{my*Dt2#6rWXdI-NX=^z7Oiz%(#aV1#+? zPwfIo~o}tt-iz)Df*bw=A8{`xWbM?X2sq>vDms0 zY+^cWUdM|!>~EmG47S5*usQB!AJSp{rI;A>djuFk8V@*Pht~AHYrJY@Byd8KmafHz z0ErZ=26)+a@A%4gDX<)%t59=1svkqNIZ{B%O^T8T`W*~&Q(S?dM>{}vG#f{ zueK{rsBcCTXHR)=i`6SZ>7nD0e^Uj^AuVce8OmJ>1ucjkH`o);Xk>%WS%sY>h)w-F zMlrB{w#M@D^&0SmYHsY4&VET^{Ah58(?;4L!&MDqfD#`e>LOIO7tg&mi)9lipC9|! zI}rXDzmp-^*@5RO#0Owpwi|g0u{SQ)q45VmL%@Qz+ysaUb)wy)J_re7AGp^FbuiFM z147hW+yq?PHIVL!DqvkYAWcY~Z<8 zV#%UzRj;C43X6Im7=KZ(-xO7U@e0T7kwhp4|t zz|zA8nc=YTxAEf;V)FVg!?24HIzVzw-W^~giFl(_tmqVt2RH$!-J{TU&*iy@$Skxe z{;Xc;N5BD5=r4Go{|AQsY{z_IUHp8#Sxu0~h-RJO5?Q|nWQ-jl8?doIkgwE%+(G!J zL6|U28{t+CVKb)DXnM=q0p#(R797Y;#x!`CSG+`<2HNQMWt#@q>C<4NQVhLi1Nem5 z);2GMzD~5c*)#TPj1kYgP?m%^02^@Ftmuhk(GFWP(Ek6jLca2~w#58rBF#Dt+>C zs?cxp{UQDtB=o$-w+$e?kEnAbPiPW!YfbGjk4p=uPR;tzz=r4aq-o*9@jLZa9Rb=H zB;h8+!XD?`V8I$aV}}Ze5P`@*R-CsC)zLyn*5AY*)T^IvOx|~npDSkc!&Qt?n^JsC z;Hr6yGy-)v`xvqK_fR(f(m{NeaF(FC34_%d$}=QqGKCB7o~p)bg9yw!(362I}R#&4V}KGGsiE7Mt_;xl|H z{2uBw_@(sv&P=KWQ^2b{Eve?i`b_)-$os`;zDJUO^_<9aKVq21rEF20Kz>@9Iija8 z%^=bJR?)inETc7*yft7$GT4Vid^}QU>c+Wo24^-)c;<5<<5?yvF)lPv?O0iK`q2tf zfBM;4AOBCBWC#fU2j2eq5Ik$}1>;3VQ!YIBrt|}enWi^mMb!!V*{ros;9OY%v)6qqnTd- z`_YeunF1{f&O3pV&kELzWx)$gQZu4$%)&R<$0wk2V{P1|E!u)rCN7RD>dz)nu^Luu zd;;+dDJH$&8lCA7ICiHqi1)PVKVj^2QzsqY8)T|$C^<4ZJlr3s`iq~>3^r#;tZPb|u3@g#`p&XwG z-e;mmm!k3vdAeCYZjlhKY2tvlNE~n{wzOPS0>fJ_iGn>tsSBL&{V=WLsYwa9@3b`PC$?zef^ z208GL9ECLn-m@kK>+5Tv346b*NqGyuzEO?})i%o6RuM`_Q6I8Ie}vYU>%caJ3%-`bC8H$XeaWtc$~S! zHe%6SBR%pxY*qC>zqTIlf%S_tkpr36YEr~qvFMxq(jbk1v@v!k)W$QuA~;n)cX}8N zggngi-xEG813vMc*m>&b?~1S=E|t~|sFLpMu99Z&6VLp87HQoiJ0fF<_&_}KKd>m{ zXOzRH97uy5SCusH+r2nKM{2Kl=J(QJk&xdYwGhu6RZ{gw)+%Y|TL?hmd1o_z+Y#BB zi+?>xInjvN^{`SxfrqvuHL4}-+<=5li0II>Q$Z&-A{LtWhmh?M3arV2jiv*}Xbz=p zM)|_}bbViLAN>XTcj4*!htAHs1g+SHUo_t6=jr=+F8xEvHJ6-i|B!k1PVvm&sgGa! zpo;P1f^9K=SLU5&{7TR-44dc|Kt7KKB>VVbAZYwjeZzpBV(fqseC$wI#@IcIu`~3! z#rf9f7{BTT8g72y*1vBqL+=2MrSThz)&L#*j~}2}-DTh?`e)!M(6MG{+Mv7ueeKWR z-utxn{weni?JYz*&v8B>d(EJeCZh>Fl zxZc`-toqG|vmdWRgZkmtE1dWW`v1A{tGY^S@2_yo19FyzTVJ5P8S~(EeWB3!8)|Qg zAFDgK2s{!U(x`bGVR@ja;N&Y?;f8{iOcuTitEduK3`+*P(jq%Z$L~}&35YL5GO5zl z?*f<0V63e!rvd_34_H{LLNuRw&XGXfui_IrV}@Zpf6#La#gm=!c~|i!RkkW)he)Ac zA{Ode)L;|0BlPZFqikeUdk@5S-=R}8DR?_xVaXS?;@1Pc{L*IkUoFk<25YnXw^jvb zPR;JW;r-`1&F-gjxZ@vg1^}zzWHfM0l7fGOfKr*$SnthSS%0&&{$_`@v$xUV+a-^k zKFX2U3f*J5o4Sb&T{!QFcX^Lt7QlO~P7n&~-++Xl6xPY%hftCZmZMlrg_)RAu4SXB z9ME-(ZM^NF9Afaod3(0OBasxp#YSgw?h+4FV48tr zVfiwJ$)(VivNX#MYqNZr6?}EGoP*aPIn8oW4%bBD_ApS3gMxJulX~cQ)KPM8Nb7~Qyx3tk$4A9+Ps4|iFC$aJP&Xto$<$kfh%AT`bs$5Kr@AfXX9rrjiR#`=Z>*LD7+cL-LZTGz2TWFE7&&PI~-w8waV<6 zGKaiu*nkK0y<&|zdvVU{`jXW}wr6!-Nr(SGmGCRnfVCE2fKs=gG&W)l$562vqSA4G?wQj>rFfy_hzO6h5eMqL*zh%l zu>FEM1AG9DA;g%|!FuFDmipk;R3z;0TXD+I4$Fh4TI-jM)8lfqxExblt`?VTinD2P zHd9=l7MEv=8=%DvFvShj;s%=H>{^`N6qm2X<(uLLX>o&0af7wE!KSzZEv~>6cb*n^ zo+<8pE$)0RE&vnbIc@Tw`BHiPi)92Au7c4h2aY+U(4PQf?)@aXbAy5Wyy6n zM_Mv;7=E3=Urdqnf_w^A6vm%}7-R(;S0x~VUtyPmV{z#FIS88X($nCvfifI%V|N@) z!`f*Go(^lnkLFUJzW~nu*#wRUx6>Tv;Ar5lko$zsn~NiEOy|P*uK)&(en#*Lpsn3P zAAk<6MA$Uhe%(tHNt+9zV~nEME+?GK%v^~XHvKYd_(mwwxc{1(XOo7|RH%P67i@$D zNq;jX;S`o9fm@=a7feYPXh~phDd|O1(l9Lvd^#n)WJ(&YC1HC)Nvljr7ivkQyp5#Q zrlgCsq=6!7jVY;6OCm2f`08a-(#2X5x&J}ZT2s;`TGAkq^ol9zQY~q)NP5+jG(t-% z5J|6@l16Gt=ZU0srlc=vN#~2C*G);6X-N{2U{^zqJQSk+p_|fYOdlU{jv{Y~!N)-7 zJuG3~^@0R!;b0LDKn>_y% zlCP2WmXHsC;_Ln#{z~v9Rd&R}-cVu7>*PDqitzK? zt9AuDggJ~zn!`7QZFZH1Fad=m9D_Abn~lg+!4A6Nt%V(p9Tx=KSUH(ZV55iQ;-wxk_c(0`a&u@iW%HU08FiEPin1D!WptUc5BT|;hdI}~W zrO?usk%9?GDYWusq+kM43N3sYDVTtiLhD{e3ML?>(6X12f(b||wCZJ~U;xp&|!WhOTlqb;BsB1C1jQ7%;=u*wk_)Si%<77PBPS)T2V{rY*t z!tas~zfi=b*RtnCluSu=>uH9VHsmnuq8e0gBZl%sv0~<^d>+M0UW8`p1{c>f#X$z; zh`o(M6Mq%4R5it;W92wzmy*+U5$S810{0RdK&%G{iKZe z%70SFj>i5U`c*5TSL%)k$8YNV(!hy<(o)XSo$b*9&J!FsPv{XR1$1J7(*k}P)eXYn z9q)#6YAa1Pi0S5i``ePQ8%-Sg-F+PlwpPlb$LrgNt4ns7Lb0997 zCn-M8hKy5~d33Hsrxk$5DJ1zI=zd4A6Yc$uU;(nchBz9?HLP%HjHO&! z2Q<&1tW|F55Ds(aeP=rIf|3F^>6xFjRJo*i&G@%7f`5Gx4D1#w#ciUvR*EBx5pi26 zuASmAyC_cD2?E~NlgMj;6M10Eyo*uMymx69?CX9`UC&^Oh$bAd8Wz!16I*ueNIEcvs<7csiEdwLd_#@n`HQb@cX|5 zR}|f60~*5l=deP+Y%gH_u?;UH?>#^`O%5q5aex44@MW#lQ_=qqXz1?b94ItRa{J_7B$m=OSpHX}l(LlX@qJM_{wLP1CH?LpVe;fAEya)|WS=%G>XD#oa(H>Cp zzyS%qPwDJ1T#ys}&;|o_>tS?1XG~C{11D}%Z{`mLJ<96uyPWyg!~ZKmgFG0^Y^z*% zpa(FPqlawlNZot%jtef}7=l)U19Ea0Je@#kZ@;9v*Tc4yxnae4n5-3f8eE3sKv7fJs%L59N4+jGE#%+0oJX9Hm)Klm?|oCw|2rHLWH*>ZmrUcI%^oyxuw3+H zQ3>n$3U7MM#tze+9qer>2r;lFh$Cw9u`8|GMlHnNeg^!-H>1(}Tx>2_G;BzmUKQ6ayT*5C90wm&Cmci%g{HC~=2P z&RylwDu|BW7YUS=t(R6EL6IT`6iLos5BbFUGr&VyCBC4ZtcTeHY~pLt3d4V=vwE!SPJDEwLUWxq* ztqFEYtO37fuL0%o5fE&X>M7oE98jc1l9vl3(xF8zGe_FBNOHYzJ?YU}tJvM?(OSFMUFp$U%j&BTO)WG0 zBxH<-2G9Qf4lW9IE{6Z$Om_!yavv;4C7b_X_-|Y|Ig9^+@qk(to-6RYs67-%PWAWW z8HML)^R;NK6x@S%((hb@knUd~<(^;|+!Hv2dxD{$?aoMYlYhsWMUasRrvhzAw`^B=?*)$8rBFE{k6yL#VuC zIS|BCVdM%ye0&TuBToMa9*9E8`w>d~CDjzV-$yjX|ND3w0^}}13cijvK*gE^mkCmE zl?cH@WA^0EnFmU5o&p~{Zz#?b{6_BzCdGZ3^-Rfz9%^1W?Et83r zqhE*it0@N!f2Eh?@iU47=8t93dps6xDtlUm1=bQ4iytG^thvY zv5M}fe-y2U#>$)y5zz?klS{0mB8QZwddpWJ?_v;7gA$ zuz}HmKk}BubLdqk*?+K$a3rzjV|tqmZJ|oI*%zupWUxxALYLzk?mJcrj^e69HRRj! zTBK^KLY1v`KhgArA{ZhR_woz8xI(;c!p00G7m2*-pe3LC7`akKT7|2WPNOfh?j&IQ zaxMOGeWCf#7g9)H$PQg0JbUayU+6mM3++N{(Y3A6Q4n1lc`1YoHT($nZ@>U?E;G51 zJJa{Tk2iezk077EAwRzKk~8n{OMtMYBK$1zS#q-*(d;8R*E9OLv1&tsAT;QrY*N2A{O{mAEz$n9O zjZCP&qKLaR-*&Wxx&v3|ocwPCZh-n*ds&Zjn?M`M-Qy~2^RI@A2W zNBA5eRC$Yl5?Ep@LTp`83ZN#`32gvzLe?+C@B(8`dL%RW>kNpi2zx?}B7;@SAkZuH zJdYjz#z*wo3@hQp7#8nIq|t zXf0X}la18pBu3Y{FQZI48d?iHPs>TCLTf>6(<13WXe|*JbD4A&w3eV|j-+FtwIrxE zN74z zPjZ7NupVRb84hip!PLc-QZ`~tyhz0O{o+F8!L4H$zZOm)X#79mn7{}1VNO3VP#ZAY zkTj;p81F|eqEC_Z7EMHr=AxX(kMOajv(z8*SMmh?6VMW5E{rchW|}inupBLrn{A`; z&o#Xsk{O+|hGrT0El1E+I-%ZGoI{#tb;pQQ_|E4u?;TtYH@(7^=t6C) zfNJW4BVaTW9=;Lp8XchX*jp*7dIYRw!ozn1_p~HxqlJ>Dj0nLM#qecD655jjyAyIA#wIaNZj^Nf3q;$2r zi#rLfOW~W-Vu}A7D%bq^r8^jsf<%nDNVb(erqvbR?%{yP_g!+ubG_LOkP;qIX6ASM z`5OW+k6mja5hn5nRBHAI)Cre@RAOJh5)FFKJ*iF z-)e#Do1v*&zy6LslG7Ug*^#XI>ofA36_ph~bzf0!zUkCKFPuc7gJ`2@p4QLF(yD?&>?4O2 z!d+CSZbzo-J|%roJ?$Sy4~*} z=+u6U`xBS|Lds262Xg;K@HYYfMGIgMo85mAY~z|>JBV_Z(vpQs_u#)s7crCLq9>Pu zYG%m!=?wU~pJl}NKLegbJUO0az}j*=n^D0GJmjH{;z#Md8;sszhX8LoO;TVfVg;Ib z)FB@ACor{K0Lue-6M^>=SPF2HTXYV7iOk+lV2rB!V?km&x0USgGLvE~eh z20^QHWA;J#W1Muzc!gVm4x++)d4;eCfFq>u0>W^FG#KFXTs{HVtvf;@KzlCF;Vv8o z>;2#ZyFUJPQKru=${VSd2R~65F;S9dthsfb%ZGA%_jDB57~cziLKH$8HXapsH^Fcp zjY^F+HG&NHuSerxxSw9aTk)rj5}x2C4B-asXa-6UJ!&^WDSJC`AEQyqR9=d+0((Y} zO87@u0Apnn7I6FZe#YZ8@ae|m=}rAp{1dqS5uHtN2S8^;^3xgiqq_cMuH*Oba7DLH z25L}O27Nw9Y|C_SVxV}osr5Tfdj_G>)cnV|3V3Str!@;ah0CXOe*mZm2Hx5H0U!l* z%jeu6P*XS$10lmSDoH`L3%%uN+o*#dkxOSzog_jbq8q) zOEbym9{MJN^6(YzYj`1!2KNn?o~23t-Ak!=!X*D%yr)d^;iK;;CoN=>{{r6FHexhu z>z9)N&BLJPwy#`R=NAcEec(7Uz_&K{7yIL!~Vu=@$I^h_g?#;73l5(wP?gWb5BfzSqgtKaI#5M*NT_ zkyik|(K(AUl>k?gqsE?1!N^X~;Cqp?E1lHsMCpd(0Ycfca6Irdh^aoZt{~p)))V@e z&Pg?%e^<;64l}VEC)K1b&G!2LE{kV!vu z{{-pdPqqDz8Gq{dIgCGn<{ZYKfOq!eZ+5Ie;~yvgeWv|VIz0&-gnth>=6GolD)Hbz zkGm#4V1s(~Ex zqD38bdwOU*t;euIeeq%fJWd>&N!cB{_JJh!n=mFggyK;jKTx(FN6aM@gG>{~yf~Cz z^Lrq{C=xq5H{2yZozZf%MtbaZ>B?fl#u2f=dmAlZgTBJf{W3H6 zGPUwWI?AWhY&zwaR(*^ZJ-lA6T&en(i6C>sCiV!IC5s4YRpnDI=U!(c6cb#|ME(vJ zTqlW!)z%NQCf&a#62L0fsA1}AS;enm-GY}4a6u(rKmxa({0+2@Lt_Pp#`D_kxPV54 zcblMb7ZssaM9`fx_n$KB{g?Sky7$ z*}+XWpvJKsmyy^nE4pjJn)PWs5A} zyimTD#rPJr(CJC);|le@LbijO8t}XqvZ#?cm9v5+vZ_MRZOo_puW{q2=gktP8&;y9FE*AHdgqmj4@{a=HTU z04UKyP5DedbTmT-=tY`bQ;;@pwIUo>K!B6dLQQjfG7x%>hRY*}9uGkzcqqe-23$J7 zLe2hkF`tGhpV=w7)PY!1tgER+P4)4QVCR9Zsdx+%5frEwQ}5E-ta*5X0;b8%LhD^O zBKL_0zMz!x6%f5uVVe~F z&O^}^Rv#3C4{ht{^Wy^Mnu|%DSSdp_OG$T}ltmLy3Q9$~(O-0Eq`-4jD)(7)Zo^~t z8iJvDordOiFhB9xC@>ShQ}WUIV(XZu-1`FLW*mDPxB!g~GtR7E{%zDnb)JXcv(hh5 zX`4g6+@yQ%NDCFEg~}1q_0rwJeUj(g)zYPP6QSOlse}%V@bA0_6$7EH9>7AZDXaGZ zw*WqqO8RCfl0(niE%j}NB6&?kE#}Hru9&`+Qs`gbRwHgV#nHdM-A3F2ilcvh2aLEw z6i5I14jFMrDUSa29VM0Z&Cmd^?yw>Z{d2sw(On$9WGa$|0B5ej>USR4&aUBK_*%jUmw`cTShTAiIM+O*K72csqon^)u_2K1n z)kj_VJoTYDpR2w$00+n*6Eb|Rns<`D3AGsCYg3r3_FsuvOVjD!v9$o9oUd)sagHPo z<#xq>4^Ag7Mcb=;7i7d6?4CG(>9>CMvoAT0WrxrIH}E1(!_H#6bf5gClrGB-xu>Fy07+=|1eS?9&NXJSuk1&9+G|^ADA1js6_>Ti@M594; zYX%5k18}fcR@o{Rra9Fd5UDlB0Nsxs%t$fQM-7a5_h;nc&HQ6k18S#Lh1N@~ilTjJ z33ou(ZbK|uZdfE70z%fDpVqe5@erCyy`9XHsVN(^IVZW6H1tK z`*2Mi1ZqN`GC9qmYg2mFOH&G!20N2gUI_g!>^387(1ax2S-im*;;9aK9fpVW{lVd% zt-e2f@jpY~f3@ufI!iR}-{SW_aScQFX|8KR29vr!UFKxy`%}PB_@{uO@gFS@lA=GX z0tPyxnywiS+IbMGi?9vI=Runu9K?fQI7RAU9t40zuz&}P^x%0sSgr@pcSgy&0ConF zvm?|d%nT&tiDH1LVK6g59v&4?YIU`0<;(Iyq~#Crb!dH#PH{f}ci z#kI%d-`8Kp>n|hqmrqd^&rD8o`O8}U<(K*kxpm=5Gw_muPlTVG{*ipC)!B{?l9pq~ zhrU7#{Ni)J|1P_Hp&S-ru$|CYh!O}lAyJZ9iG%J*!YptaooFD+*!$u~*l; zem{;}pc-19+CVd*3AJeFbUR|nZE86J_rOY{`RqX&E#3-7;j`JT*%oDOJ*{esB3CSmPg&XB!kk1hS*5|jQ zbJ+n)-}BtUprkXg*&xuh=n=Yv?(_DebA0slCS!cr4pfl%jhP?Q;Fpd6kyhg_SMq3) zodHVGKQEkNR>0RH>wsopc@fB)%%*-BxM7VvM=OcXCXbaJh%6t)HalDJvglvXL7Yt= zZPe3mqV%a4s*SR{3FNrto7rk8b&|uD@-a}h z>|gyABJoNT-LGlmMB3H(u&H0-HCX>;rd-R8vcr&}vYx7FtN>j=W>~me~5H=Hd#G=eztTNkWtzqQyS_?Ur%5kB0cgu40aY9 zkqDwm{|a6d2ypx`&_UUkB>y{|NRO{)1#v0m)m$cK~l_PO{a%EdiI+v??Ro{{|N1mNm7O^J$;tYs6iW zFC*h1tg87Rabpx$e$@Ym#B*q)|F>&M`pz4 zP@xpMoIaV0f9dm&=95`};3;JGu;aL0Qr8%KLnlk_>Ln_NlPirB;)sHuS75Q>b-0|d{Kl%W%c&Af z#S4`HZxJr9YtHeOarZgWBR3*3{vyW7J6^;D{)m9qTT&)5ddp8+2;VY$rY`47Cr1Bj z_*du@KL8b0iFE!VBGNm>ztpnuPSMAw>J+7kPMxYZw2(Uf0tfHn$b}-+%w+A?hYK!22nk+mXZe^bE}``;zo*JM5bjuD@!yi;DLc0CPZ?b>RVoFDZAR;g265s%?L(@VWObWAVO>2v2 zq~84$Y2~AJQc<}MQEVkL>F+_jgVkL@P#`j!*%4(<6KtTyK^cb#hbQdH!%*_;TEO5n zvk=h=oTpMtWa#`vy<}^Al3QI*A64jOFdE+i<8PUt=uwUi?->QB#)sDtqpul#>*sr@Hp@rv=peX#78RO}`!|g@Uk|gK=W)#! z>`>kyjTiW_5+YGjq(9Sv-d9mAi9&Pm%MbLtxD8m>7?%T|!R+o5Su|uv-vy@ z)S2|hlecEXqq#QU68z7TLQ^ni)adPKG^w6eSEuQIrfWD;L(f9P8QF|mL9=K$@55?I zRZaz5sLBPm6QCE*%xEQL`Ao}F{dbRg26bdqdMb6~#BbC3l&(EZ-*K%wlfHO{_B68e z*QDi|&VP){9Dcf@^H_v>Pp!Cp zYPh!bQ~y4tPBlT85aDz>)l^YGJ$eEM*$n(HPB#d1s+7r}j{_d}*YACMphl2(;zjW5 zXRY7+Gf0*|B5_cgjNOL!H2vN;apo(o-1fW=|l5TI^K1(^ZrCEkk`XYL3 zfxizr(}7-R-CjsJ@xoUV5w`I#U&?qo(KbW%7x~dKgo$m}l!%GX$ESh4<%n1GKoJ7v zdH}4OraY>t6_@HMq*h#_2S~@X2mvUsU{Nc?Tf&xAvABv~&y}uJtdC~qwP5@DDHZEu zdS<0!FNEv)^OO0ySi(m_sFgFa>H;1>Nrb6g-Ev6OxM34P9;kv7at?;5EWw%9wtWTnVU~K9#E> z9>5lz=!I!B#g(?-{T%byfeOv@*lvWyJhm8NF^_2x(&q6|BJ}z^j_CoK$K85>=J7T? zU`L=`570br)dMt-BM69jyasRo^XD-E_8bt3;C>DWMZC-g(jstHcpWGMAll(qYJQhiM~fl8S79H|Y-p;+_xG7^pQwVlaa zh|_fUmP$RkqvakI194;%1({w&~oDf&tMJ%TY!!I#t> zD`8Bd76_pJ1|^a9*@6oaZqX2}{Xmi&m$dp4x3oIP8L1+v^f1VfBI*c$_+oy3J&De* zUCwP)jP97sfeK9fLY(Uyjm<`tX+ScAcn;R^v_w&@2H(6JGE#Zt1|(Kcdr*H4hQ5m0 zVrmMS8RTuLa+zAPDGpT{0`6JKd${$cDyhFW&II4Fo09V>ZM_0!q+6J``Jm zai-r&{2JQFRV_;`IL;vcUzt;|RGBjg?gDLAV|dxp93*pBGVt%?dLkn=crzvA<}ao0 zIH;Y)zf-AerUI#=SFaZpuCL$-CIr82O(bpjIV1|eNIgn}Cm2|g7{y+wOiMN6lSyA2 zyAiIhJshZ?eLkBZrg>B*D+*38_i>2%ImD7v_y>px_0!pGp*B5+B}_pb}p*p`q__vv7wLxFJsLoVMJ2msfQF#|ffw5DpN&fPGkm5Ujr|0_|3_7_HC1k| zk!@&lA6-uye3SY%B0#FsBW#%wq#Bgc2FE9uh3OGRnGxkY0>=(?$_t(75cI;^$!-M0 z4qVN}+=3SO?La*^6_;M0Xcy|=2J&)!@D%a_)xOgAC@schOK>rqUP!MuLjU@{R%lw^ z-iCkk+bsAGKi}{<{x<#{qklD(wNL}r^s~29(z}#I|7w0SpD^D@-9xGSD3$)zJUd&Y za%c8X>XvHwg&sh65h&-=Mhldh14R^oW_UN1{k9eVNo`ik=`iP{T><4JWp1sFP=&yR z;_Bo(=(UT#9-&wGPe=bw(knFE@d}LyUM$+Zp4z=WP2ap3x&?E(6)D_M4Y{3!jxI{zS9EhP>on;MrSr{14)AH1!!wIE_F2@5 zMFV(3^yV0z&sDn^F1mK$80iT2(?&mwR)|6VLapGa!7o4fe60|u^o5!r5bz5Df}`yh z7!LFT)()S)3HY8YA3ira;Cqb!7pkA|{fpF(S@A{c*X$E;_W)_K=Nr~fkK|!1N?g!q z6R|edt*3RXzm0;0_>PQ3a2K@0eHN!5nMwWlwYr1dDA(A6=~hach|8sWfjQ5%jhL0e?=zg zHVZiOVT(w+*@heu&218Px}#eUi>2pSKweL{YnJPuwJITzoXg0q+kuHb2 z7Y)QI0l(D!L$Mb;XnRImG*-r~U=dWf1S<*L6+7{qF9i7M*rZ@aj16Z_d$L-i% z@?C%%_)GXd62EYq<_%Xot%Uh0yhrtoDq-98`n~ETc;eg%-)b0ds8hAe>acn_cUUd1 z0>-EXrxJ4E?nMYpF?#mN^&D;~{=NO2 zzlS5#>BDQ?tPAcM3g~NRrxSWKJAEVJry*PQ)asa>zG{b8$>9wi_<-Y=58BX(Bj6oI z6OZ#}T904?1k1~smq^Zc1f&uAl2XlUrIZ5JWU_-YhI%LwpLvQX7^c;fQfKhsa#|25 z^?Rk1`Wr%EO1+Z@pLHMzeE<@%L2HUcaxhybxgWTKZww1)n=`so47ZBlLTO+%QofAm zyTroKP6Icz&@CV1EM#Ig@9*>m*I`7(@9(rB9alTe+fA`Mz-`c-EOcv=2!U0h55V^! z7VOD5OBeFc)lKs{^$=E*``@N={z1`5A7^=6&x98dLZ(A#SPh;-3c(1@djpXo>}85@ zdx;dH;}Jr=hp**WPbh6n&eViC5PNu1|8#A_{OdLSlK>On3iM$pp6dx7;Lx}U7OtNe zXQAt~=KX`-u&ukFU$O#tfipx5xUNIQgVKdMA_Sml;6yBvO?l8eBaaSKWCHZYS*Yi{ zAgp*Igo0`c@yt#K0Ym{we(WvuX;S;AQ_k-~oM!Unf^Kv?i}^kGI|9hx!7t{w5?=DO zH_TbfFE+p3^&_r^kHbO#|CryHdsop+e2LF*45JfcB__BuuO>CWF=e4&ZOm_ccB5&2 zBlRZJ{6^|5)BHy2?PoE+uc*On#;4zyf)_4@Dc2`DNp*mbgxKFF#HM-6HxY0&%{L-< zV#{%Vmz^d#{@sbWZ6N;^!bG||Y~Fr)I~(M0$LB;#IC(e)@*~gvT|@}}PDOL%r>#y3 z`6)yNVJ6&#X)${*!sx`I+2{aKhRhi;LL&-n`htm@wGE5V)nnZ?$WrCRW0K$gI z5CU{il!ik9dib@5qc}a?o|sV8FhR8~&9yz266{Z5wkT96PsJ}@&uqBi7rv7EEWmf) zmtCxpRMgi@X|Y9t_qaMCJ%@i6v6GOGM4&1Q<&$P9NOEGxU-_gVn_oEi3$#Gp_A&+X zUvRSu+X2>O92WXe2Z7;Y*(CQ?Q8dx^+=&<8rnZ_mB3vLm$4T9 z_wEEs??n7c^hBG8?27)KG0)-Ww1wZ?SwnuBu0I5SUWPNTEpD~}3fQ!H`VgqXu{VdX z7aqJk>i0`!xSeR3SOi+hrocERYBDmJ;HY8~;ae*z!_LITru4Ip0RzRRmAG-Qjk60G zZv84)+o%g}U}a&e>9e`Vf~ml#U#C+Y=cX!Fonaos>nL^MScy#}>lm|<3s1X3q+Llx zXSQveNV^&rUTH~Eh~@*zx<*EkE;jL6Z<_A-C|pI<=4Pn3{s2lC%j2_ASI8T;E2<)Jx~jPC${lx zWBTO+a&f0R!h_lM#2jW6Bdz|w-a)6n1(lXwh7J;(<379?<^iemM6v=QMx$Rqgy-t+X0~SnlLnMDqP~`H8#>~IgEgYFXXQj`R8sPSxBJOuSyw# zj?z^r#Q>RyuAq^Elf)}sm2w$UDR(VyNs-vM5uzapO2~ndzp(flAd5;EI2!44y=@en6XK3K3o7P8dYfT^o;WQ*^GC{Bk!O390Ge*<;INVly@@s3$^=O;6D%h$` z07>u}zz2w}sUl_!O0)_AthxXH-s}73l}W;(_x^tOUcdY@-(GvIwfEYuwIAQ!YjcM) zs3tyXQ(d5(Ww>`@3GV{=n<6;wttV8Ha4E3Lzhh9b>`ZHUMhD{fvkD|jUe)0v(z1g2 zyzj@m$-9Nu9_GfbcJtmtoS3+TwwAr?c=zEE3hL_1A(&@_x!!eHqZo1rSIsx`zc$>( zyHM65Rk4kX)t%nTkf=HPl{{19d$8Rlgq9J+narST4Qi@xnP{l0WwOB{zYW||6aGBz zIKq`bMiaov?%?DldBMqbyv+vxZ{vTLTi<3=gl+ZTLR*9cpH+2|wQu-u*gLjV-DZ=I zI=~H_sG+=Q=dH(;Ebe$EWC4%PPx3q?7)sqX#iM&l(Kvk@n+`^v*5U{u2WcU3bfG5Q2eyrw{3sa^_r zPG!&!PKg9#5n`Z<3T)?H6F(Woy`om1h%3IXobHAlNpy=VAPH3S`uW`vn!{kH7&!tC zbc&auQ|t=HR7BXz=9_OurT~-NFW#y8MWULoATFs-{0q`=Dh;Bl&R4WDVbSF(C#z1; zHp!2-hxC5F|C9EAFh0VL!Nwlgee5(#ALsohHKmn83;O!IQs*_50fwrXYFCp_a454~1j^(!}0%U9#o6euJqb>R?_7i27FMa-#YVU~sjNU&%dq<}L zitb%SLLscr3xZVc~H5H5prHDg$X)p>Qdv4>7I5EEOX-o5^!zc(B^5K7J; z-fvuJZuif4U)311cWa+_Bd#Ug+UN6b7=FgwzV!`X^u@+)!_TNl0QdQ#8^VL&TZ~co z0dBh-K4E46w~5~TER1C!C=(_y{_cx>>vb@ z5|3vVtMirSUflVLT_d5#NTG%@M+V(kfbb5tnfsL-!L5Bkq#So7^uig!Sa`TQ7#a37 zzWzt%QsVq->!mWD02RJU99aT`WScFPr-P9L!;P=Of+!}m4T0c`4OV?IzUjw0#PcB>%5**Y4h?Q}y@M0xuO`)3zqz^f8xEYA+|T2} zIQPm|iQ@w-@T#>F#}Hp7j(%TkR+De$thQEPP70zAo=raQ-y>bQ>TW&<0>Tx2j_rY& z(^~ZB6lZv%uPdH6a3?;(*vlKq`D=a;wneHimt6!q8I5}WgBz$^YhLYo$Bk{78@t|f zqfq>@S+|;l{@CfpMB}>^ ztVrD53D5Ac0T?EUJ^)OjK16p|d=o&@Sd5h*zjreT!zYmCGqYR$=b*{Ktb;ag%~|jK(_A2&nG$^*+o6)qW2^c((s^9iXYrxoY-MFDK_{gu17-|9>=EtSql~ZV zldS4eW7nD;*rB^uW(SGujMuzNuLNN?EZ3?DgeF=4$B$v8S=nfj>x>WbI%D^k*BRUS zHJbcdblbj+o93OAeHrZBbv6GU<>*?Ic9Zd%6azDB`bIY}vnJWV3@q+EJO7-I?{%%Q zExfFOotPvOGv>y~UI%h1)xOM{6#Fu6EbqX+Oty2okT})6OmEtHKyvKcRPI`49K^ni zd))SA#&2KG+ys*+i}NvUrVS5W@kB0V0_8|#wY`imzpVry{~PfCp!{P@FwB42MwyO( zF-YTIV;rwxXNaw6)LC+v<$ALjv7@Abf30B=dgUqY#XBKdu9A`+nR! z7;jP5TJBm$g~3w@06oR7ySkwlpkIbv(HE8Heh)}t(HV4(p%TdcPwhoyk)2GHR_E6_6o5->JakpaN zA2Rc&2V0N233CQgV2C`S0JD&g37ILTtQJN8CjBrtTHGcsr1#u@XqodfaC| zqA!dSfO;anO@v^827wMJZv^U#`KBooLN{Qhf$E5wg0&A-!8SrH<@0^zgTlBO4qs%c z$K#4lMI0flRCG6CHBLRg*f;c4ZaSzC#gfnShH{87>;(!a>&UXUkdlrI9kFk? zBX10ky3~Qto1R3C8dggkytsD>mpbs(WEcM5_9T`%T2!ta_cN6f%*YArKZi8vwT_;A ziovxGh=JVbm%MYwc~xmCOUxImpXQ6*=;6YDuywX4XwJ^_#V(zuFH^=xq^)lRE1xcD zoz_xfo`j);xqoZlFfaBXH+fU$dd8%5no?$d9In1Q zaKWY1T+Qbrc;v;iLDooBGrn;doya1)!Jr5qS2JWx6bDWN^^+>+6!k9@f|axNxL8=N*cf5ZQ7Pt_D_-`BoaEmUGf zc_IhNXj1-2YrH~x+#w!lj7ME@k`~4tr&>eUIU#yrwG;Q-LYb(h7VkoB1#Z5QjJOH0 z3+;N?cYIPkj5i>E&;~uKq1}%ZtDDYgxe&26CNi3Lk?3NHEiLH2)quX`gOfqJig1!z z2_u4?sEwb&RfCiC;(D<48%~w#Zq-V0eL`fN&g{l_Es7ObTF^83ZWShus7#>OiUVq; z_ML$F8z(>s8?_%dvb_2j!z+QILVV2`ILqvb4C10soQIi+_>YMBC*j75e)AS5G%jz{ zU8llRH`Rm-v8dYkyxFsPD3t-4u!T3MVB`Eqe2qOmHFqqb;h8t{0v7dyG;vCgTl&k3JJp|&T40u|+9t%W?P z#En~dr{{|GOp{M%l1o{J!3}3e2ORGXcPfP%0WZEDFMStYftd^418>LbJ($@~jucLG zHUCUEyxGV`)fL1CvG{^MPRr})r0u=<-&Hj@?mBcEtXR4Q#IFIRZa%JP393(gEh@A< z;A>)ng?~2csE=#z8`XZg$nYdr^krS)rp2eCyRqU6Lg8thl)BW^f7W-{2C?j8FEgn^ zz}om;vv-UYCQPKf{rb}jZv9GKVv0ou8*&U2>r6*;mFb_p+%(~UYP8Pj@!w!MN&2jL z>`8T>h1}=k+v{_>!*kGh-}$W0~8q*Ox1S~>zqKnm`_MkKr zZCea=9bH?R@w%%{KmfEYW~$Q}aCX3X4K-t`4Lja_j+r2)QObNBCxw1}omQ|&+o+2) zfXhw~b}BOQ{us5q9NWNrbptk!9BpfLHi(X~cVn^8`IN3e1i zruQRy`=B!Csb`-to=xNjWLWsbdxR`vt$xM6`KC%{!26zWGB(6W!U{%9*cAvZz7{GaRsX2^ zn8m(YF=|p4`)U(VA+?<{EM2GASBn*5SYw+>YZLowT|+sTHP7G`v0U0WB7L2>K$}-@ zIdC8IjRjqA!^qmjp2^+X;TA{E0t~L-?!duD4WpC}Gd^;U9=!o^+ zBztQyjLSwkDXS+q`nLp^2G79Gp`jcYki&Sd1eb3n8(cH*V=;!f;-}57!5A>juEE41 zJ>>=@*j+nY;We_W54Xwn53I24y^plM^G~q1R(vd5g|@wx5BGo9-rAZm?XAK7q1$Jf zTEjB+|7KfjT$DOATWcR3!>kyou;A0c{dg9zd3FWd3%DZLak1$c(AXl5qO3c6Ze|>5OPL)@}J|3>0(Do**!#K}zKiMvh=J(@s z$$!CN*~d%mM{$4}7qxQ((h-oC?>AhmGPC(*fHo)mMZe-8+|@h@Va#sLpKkVghw#;7 z*mZd?&%d%$p6@iT&cj(&Y@E7e3NB$rcJ3cu=eoR$$3HiF_eX|6y}A1mHCiz*$v3Z_ zO44bgka`%hdC7G1>N7}s<|w2Ensk-oqgo*|-imN^BiK^D$R)qGH~&7QbYB9Pd%sI)Ub6 zuH3-idwTFowrBAkg{>E8KIY07`Fqbc{F3c8yhmZ{1DcPyawmW9>BleGcH@nU6!2AQ zh_$tc_W*Q9_M@(Lcdr}-l8<@&hx}bRgkNuux$;vk(`QE6lISU`Xe_~*@=jvJvst4hCs_2uOj>o6PC5}xF-gbxZm;cbax9F3b z-$8dVhwQ~C>j?~GefFN=oog*I?x{Hy1U&Mtw$d|a7SBO|6^~)AKN~w2ZAQ%aR~z+|ITPpx98%m zcH@_1*k@(wCfjG_VN)6v9YWWH#%SATy#;im+GjoGQ0=o8AVlPS-av|d)-}3|udqM< zN;52T%eBr39hVa&Q(l zWFDzZMWPT`)HKi8rItW(3eu-SyJHkPtrRnxoz^Q+7`hPn2|B}0t0lrHc3MAUhmfa2 zI0lx?L#JJer3>e0LZ@AvhrtF`1}6u*;v>porDvluF8mY<31w1>4=te>!oYX=QwKw_ z^AUo9vl$2>i>X&&#xV8zZwOkav_eM`irpxYj9l=eSx#kAvRh8IOeOEb%p9o^YqDq9 z{ZXntStg3baoQW=+=itBA)!T5RP29cQKL=WQY(I7Gd0|y@#;u@b0c^jdY zHXu`~)-GNQt5|shcO}Q(FgHugSiksh#b+kPhdv1L{mjaSf6rA-eMx2~REDq6Py)#q zyE_S^TTjEOTuCiIYdvva2&T===adC9Wt^Nhdb2E#helInc_JL_JYg@O@WO!?t+7|u<~dU)*a5>yHQWPFkD zojubTgF&!6Q4FEsF8>9Jy74b6XdEYZwR{i31sq#8tMxfMGEKMqUa4aIwquQbayK`HCOk9N)Gnjo2gPZsQG5o3_4#eRFSP=a0$}dguVK!OHn# zpj3W%HPZ3dR_Dpl&gC0pPjFcf?dAwXHC*|={81(Ng6~Acd#_Pum7jmRm*n+-rDv>uz=>un%98= zWs8}`Rh_VjDU)Qb$P}tCToj~jc!9q5b&e@< zjJ5Ve+^sxI$-lWj^?qfZgvB9NZR_V080@i)mf@UzlKrlJb-(h7wAe)hhOU)AqjfLN9%RaL43Ilptzw{67tB%>?z#Qmg1k5)7iy?@wZ<=G>Cr) z;-#A!Qy+9Q>JzTiKoQzQXkPn{D_PVE&X4fnP3g5VIIJq=LAH)Gf(|0ltWqAl)d(8P zbC@p495v9}ntu`&Zw4a=xpo5=PVw)74Fnp~sevhlxu`-lu(ZHi%;y5G2E05;Pb!ArV0P;ReugxB<}Q#9<|Y#wrdgfWE^GVDNAQfR;JD zBw(s@cma40H-Lh}4WRgN11LM(04feQ0QTo;HPQRBYKt#tX^Sv){spYDxkE)m;Th~H;uj(#6CK#w z{kzdXFnFLMwrBIyr4J9G!$|X&zGUg%Oq88J>EqXa=$C;R?pe@uB61m`vWy>Jv2YHo(Y{V=?-Hmy6kgm%!i^aU*3YWi(CM{f=*ayS!B|59rbZa-aW(Dk3e2kz zLmn{bZ94P~NW3bY8r&?)?40jJa#igNj!kR{baZ_w zFLsHuCGfkh8Yf(NErH*6)#Sl7r6ur(u9_)uxmyAcch$Jz%5Mog(p8fW*VLB4qg^#q z;hNSGSkqNA4K7bhU~N~82d?QYfycUPro(kcOW==PHD|zeW=r5tT{UOIRnQW6ysM@F zu30UCC%S58!ByB2c(SXe5UvYb0)OtRxiF6gk|v^C#DQN{qsTCp^tfs;ydH~m#gT(~ zuBb9yfo678(fKHzC`*}JET6#|CkEtd&r9=d08nS6%x-qFo!A!wxV28ht-TtKHVM8@ z@NI%06g;QFX zBlsr47YM#h@Wp~}7kruEI|N@L_)fvs3BFhGO@i+ee4F401>Y`sL&;s++97zihLy1? z%I^_;uiy&=-zWUVf*%xo8DW&+6VM9E@d&#=`Y4lGbllJGh>rV;#c?;g2kOOASfz8- zUWI!{UA5oy@VBY}QrD^oA}=)h;j`z=>A*drOTEw-q*r)4RAKECy5tLu zA^P1k-NP88s`&j-cQ8Ee+Jnh1G#+d}A!yiDtd-iJ)o4FG+$y`Y#~?jCNghM=D6l>J zu05UbQ8suuwredG6i3@}dXy!3yibpcB#-^{sIxr~8Um?@kuC^_uBbNJBsgknC0tvQ zhZ3$m$wLX(VS9*hFa)l1v9(iBws5^k9!j{rBo8IrpzR^T!Qc;qqdl>xu1AFiXBRr> zRK?I9I_Hu+bk5lxBHRXBI9;m>4O=)>tx#eR8e%=sMv91I4dFH(TIr(brAUC) zNo;(d9ma$cD%tH^)f1cvQ-?HkfF6(?SklstsIo9bOQ^AqG&RY36RhyQP^Mg7*l15W1T6w?OcH8jcnV zzE|*Ng6|Z3h2T2`Unlr>!8ZxMP4I1kZxVdF;Ohk6A@~ZxcM85t@V$aB7JQ%J3j{wX zcn@JJtqe8xgh!OtVW~0dq7LSv)4>TzgdWcPDu$1z{w4K))sHszH^X993Gv z_1QimTyK(x60S4JgTf_rRfk{_!nNBzB3xUNhZ3$S$%Dcr>8d(Gt+$1%uzjqYOY+b; zm*hd=61u8DFbUy2whx7aZo$AkNp*>`Qhh>wlCDyw2W<_MP*zGkL9ZsME>TugYtTzh zrK_+JUaKtjo6D=ybX64^cKquoFd2gHedm+m_+^CkK#OBwd=!1T|8pM!aW)YB>(AmI z9(CR|VUK3T?x5QDs>kE9p8PPj++sX_;&oJh_IUhVEG{3VzT06R%`~_&$K&l_OpM1F zj2e%}lgHzv>Hqa^H6B;)AHt3A*vaGZwMqaPkH?e8r0)R@#QmLKr1>9<> z)OMvUFjAoY#K^Kk!>ww32+OFQf>-(#_+G)Q(IW7Df-e^SLBXrBBk+bAouEqYq20>QTlzF6?>f-e(%hu|v&-zoSy!S@QjN$^&e+9r6bOKlgtA^aVJcME@~ z;5~xx6?}o<`vhMs_(8#!3Eog$cVbB4*04P@@(8|3y_ZJ|1mC9N)?&f83%-nRl=%}K z;&`M}91r!<0lhYnKi5(1)Yhoc`sElPZ7e1i$JaDBU;8GoJKm(%bM(UMC~V2X3$|jl zh05MMSg70vZ<^}Az7LL%f!Yk;AG}1=bM)o_SoH@CayR)07$k2mK#*{4p^ITE#+S(m zJ*&)L{@@ituYjo*Lsdit#~X0K_^X~(U<_FMh_0@jLZHjlb(K>L1fAAV>w2m>YUz%# z)?O>sHEE`x>ub_nLRVONSbd_QicpiV!&cofrD=5wrbmr;;o(X0P;-d_+XJCxOgGSB zOLq(_ych@wF{ciyJF7_Y=qE*;?O_SmWc%noQDwgpu1zquaP3JRO1KW&!xFC3_E8Rv%{>Y#+{OY%^{4cZ=-a0Y82dPMRt=TvvBb51a}aGoR&opZK_C0w!XqlMF* z@=ld=fxV05v8jKGy<>Q?Un*3Z@XXc|k8c;7Q}m z3qV=%5s-WG*^}=nQ9U_kr)*Q0&KBUcT%ne06lNl-lLtyBBuWKG7a*T7AyK{LId7>6 z$;zvfCL|qD(U_312Tz@lkR1~eRQ8AZs7~N`WKbM>CXaFwMICJu#Vd-swMi7SDC%gP z;6+i}a}iP0_FSYw6j!nE*J(IfApA{&_XxgC@NU7k3*HcXhu{a*@T<19Q}BHnj`j+^ zSMYtRXwmTqeo({F4#6A3-!6E!@V5!xBlsr47YM#i@Wq0!5PX^7%LHE`@fQoePU0^R ze3Rfkf^QSNTk!3IHw51yczqfT=@)#T?uetkg6}1an*9Ha_C{JhVmnmr^F6vaEJjPU zCs(@4X6S5PbCYzouDVG&yF%5iWSw1R`)JLR(AmX;u{BRYl84ef9^1pxJZ{@ZYaYGa zqja_*C|hR_s&dvV_5IKs(LUS5Qj5K|PrAo~GU!{y2yuYZD*L%oKVl#?A~=1op7(08)~ zzZlC|S4CB*DT3S^rMn#gbe|-E?yv;Vy_o>+A?r&9dXs_9WS}D%Xio;( zl7XgVpe`AxAb>u(7eBbM!S6co(!dmJe-*!`KpXrEvTF}45xg5#_f<`$f>0T%_JJHg zedex}J)7{)(IT3fx@n4E7kLL&#&#AAu{%MFt5ejvDL0eYfq+@%wz|LyLmYl(q>bTLZCo(T|PWH zbQ3r{Idsc8JUMiOJ3KjbJ3Tx(bn`wuIR;g0Jv=%3bQu5rO3GgBpCCu)KS7R;e}WwC z{{%VO{t0q4{S)M zgq>%KI>%Cp4n*BYsBRe1>Rd~bR!u*ZB(s0{zc^2LEz|-0@4!0Qo@dd6<7y~tw#*?p7`xscY!D1cp7#cPxKY7zv(X^c+VW9bxj;#;MRC|bjy;9Xyj@t z&m{P8Z`rzHAPv(%G~g z$@I^>+#P4f39$z5l>4SK^Lt#GU20|a7ZUa^$IfqVgdQQk|KMB842P*M?1M_na(dz% z5{@L`v~<7D@z{ltz>ICtEz5#dj+e%G4uClg3%7w8ubD7f+O-wis*uT;APo`Hd4J1H zCv1;m%e}Wd;CL2UKMfn|d(EM=Z2C5`DSba059;Cy5)S9E--`xBtef3ic^-;qD)!+h zg_?dHgo6hy0%99h5+h31rg>}yMAAw;#mH3mE-0*mD~T|*@pmf>!CUj)65%= zg0%8HPf6_C*-n8R&?c-P=~Y@%&Opvep0Bd#CL`3e!ykd7ew_d3nG$vI<2Iz1XA>On z+A{i!`j`Kg9QmM)m&B0|<}*$>1kC+7@_{SySP0k&r;9`#{kQ=mA*_Uwt^YFRrs0A` zIQmhFqaUpc;2$b_&K3Exnm%_+Hhl4Jrrf+cj9=Vf=$o;_XTIj!_Zph*-85ShFhB6^ zd({^^0dX>4g0ZDI?%{g0l?<#|e%PKew}-a&!H+?0eWU0;;+Fku>=6dQ^Qk-^vYo`B zg;`>_-vEQ(u?wd<(ER&|eaGyL<=kSupJ#{or_Ci!Ko!N!`$bevCrNwNqfaG zA_pzd+j_ZZfhO(aUFV}25yva2`i{Y+FU6wFIn<&_zF5G3`tg}p;S|(UI4ANDvt%(& zQFlAzC&6i-6XD4>8+aH?h*q|~_`Fi#Yz|3_$w3}-dAk2Uo|Qu*j1a=aR6_~IRI}-S z>}6orx9NxNi#B1*51wS5XWQ818-CMHpT>U=XPc{FMrJpT^&JeE9|p~rhp&Je->i4x zuy;x9q#z7;mte&~aVPDkO>MuC-O2XCgJ$Vkn(V97CcJZPgW;dUQHNxM;Ys0Xd*QiQ z{80A7Lt-yH2!r9gq+2Y82fgb<=CtV{F&Pd!(kbCde74E(qXurb=JR4N{L)x{UvhYe z4)YR^}q_`#nhxKDE+>bpUjUd^cvVTG!c$dYTjY*j?f<|Ii98gET1IX zE3t+F`z0GDhP>NC-pxVB4&Dd`V|k(I791zsrVWZ0H$Gni@6G)su<>h$iR$Uygtjo? zYy&D39fW-oZzn_9{9bVLJF9UuNzi+M>uAM|up!d(-GseX$~*jYctT{@vG~}vEH%pj zuNHIi`^o~Ye{xrNmqHJ`o+3Z_gSY`J7`f4HG~%wHi`_7MXaspL3SVF2$xObv4#@XZ zzP-37$Z^Bj!PW_Ax#-v?!S#E#cJpi4KRsd1$I3Z1a${aj7>#2&>_48NN{)gzF$62W zK^@Z6*hvn$Mx4!@a9+X(LT!W(?^|Zz5{M`RywdA$p|$Im-I5d3L zI*aXjwbgcU=qluOo{Y`jJ!*~9>pT|U*>HRC$MrBgxS=qqvXxf$g}Qb_anSVt&c+D? zud|Z0&Qf7t+B)UjU~VINokZA?^*N!U*KXLJ7EWUPl#+hbZ;-O+ezFtQ98 zdSXcLE)R8IqRKK!oFGm+N3j{>e?XN6W8cJq@5OoA=(+b@T7d7=7W0wtrh&=2+61M( zOKqQAieATiFywuy)X~Yq#HzyYFX3Tg2TBY&m@Qy4TEI|{h5=E7A7>OhajEE&%uzf| z15*%qvvPw=m9c8z>%)^n-WPQgh?v>|!IeZeBDmf}gu&=1DncuK7{+p=9R+zfm`_v4 z-0$6sBY{kjd2rz9B>jg|zNOJ;!&5@uk3&q#){yt3(B?QA-rjX+b|v1wN&_tMzKSq; zQkuTo^wHqr#y^MHrSX9o{!+7R;267tH_;b7rO|avhaGr-BJk(!z@Mg>{^G{oka=>c zS(BGiNGKTI+jJe8T3T7gzpQ5VN%|2kly;l@<77*G$6|E#_5^t1F#2dU!Jw z`%Ex)BiilddBLKuxSH$KFmwTKC4vKb95;bkSB8;tV$i(Ut#}-8P32k2+y4MMpE490 zqD2}$O*l%AzJ2onT=WbZzRqAQd;%(%^8{S1gbiE4$omfT6}b83@~%+jCfNLUhHwJk zbh;QH4rH5Q2ae3=lticJgeR1EtMl^Ec9;2}jKyG-u8$BT6vGWiGo5FH`3TAtSqH|1 z7f%p$@iH*CzvzX=7dWRWdd79f_n<06OiW;PDXKm04d{5R7;>9Wn>~6uf!im%d%eTv zy5Xl?EdewUbVc9m#D|j^J5&{QJzDxWtpb~Y4W+TOL#@l8NN(=I9R-+g1zrr8fAPD1 z3zMpG*!ksE6R7h8`LV0+WuCv`8{Xj0EqyUi^l|w6y#54TTz{-|!>L`~&1UJnP$jD| zDTLo?Xh*A*DJ&d21`%f`K^zPAz7m>w;lv$o*cX5WE}EyASc-c-&nby5 z%cCOyoF1dpxd+Ze44uf9mY|7iTA-6nWJc<#crsb^2*uSh3pM>QHMSYvlu$p|vo?fc zFKPWjadJPxapz-A_`xk9-GMeZ*u8MbW1s`+fOJi|Be`3R`ditbsHp-4J|cvk+SPmw zhgaQ!cE!{kSj~5+=y}(z_abym9AE|wgQt+WL5)A4_-Gd}GW1o~%3pxe-0YvRITA0n zL=Adj4|s?w69cB!@8IO^!LKe3v|iR}5tKyx8m|K8lY!6M>Phr%>lMREgcwvco*Bfg z4+x?0P3-4XA+XM2y!BM$F&KfV!22zI&GFk6L0S>AMZA?w%Y4~CgVrU~)B<9a6nzkO zs|Gd$hr%IiKRk8doQ{ON7{%!nJQXhje+fHp^JXZn(k9FYR8Gbo;Vh{H<99R42}p@~ zA186X;bT$UtAaCC-lO{rv|%8)mc6>3z?H7T8}*>H4P^u6gF#|U0zjYjNvYXa$_wq@ z55YiNOa&SL28T76oJR*x74@(;J_IL1ct#gYd@iGYayg14{#RfmU)6Xj{XUY&?gBLr zp*DtnfRfg#UJOn#BPlWCYLp7Z>r6$)y&$Uoa~{KM9Wi^wdQEp=khKNxA=boTYa_hZ z`G(8#F!EFM(cvYhqLwW`HZk~Xbc!DLWf{Z~PF9xqZ&gymgb;=k8>;*hQ%HhKyPb-e*<9e=WH-Pj;E{tpTZb^fM^$(=l#8BTwSu5G2L>;S zx*GeGUB&fzzM}UV|A4fAZU~wYo3Uv#{70yD$P(Th@G?IABTLZBG3`+pN+AKa2q^-U zInnnN+k^*{?`%Dk))_*s#+~u!Krnpz&kNAgo&HPp`z!UkTKzttejil7zfr#(>i75f zHH_2eVebH*Kz)5}{rR5j=a)BB*WOURsIfY{u&#P(d41LO3u|l6_uO#N>GM1_wPDY~ zMRnCxi>hkERr9_CR>kL8SmV)z9*faaTUS*-Ck$Rv8OD_hsyqz~Z>$1AL)bI7$}`6z zt@6~)_aN}OOT$$Sn%JVRZLF`X@_gPiuc{%uuttUEsjFRxxa%7d0aY)&zNTuP=Z1yh z1(2eluBtM8gb}XwRL!Ye;HeESsH*qOsjjWL9#Kj(p1HM+HS^}wFGX}$)K_{g@)RsN zy?V))JRUz{vOrtztWk~dxPIZ1^F5{Nt@BY~7S64%I`z~e z3?CE0T=i73Aef=2H=GX%JRXnAI-M1BYUZV6#@xpF^GC{yl+>TegdiL;BR7_24L+E-vbvNI__zBb9!G$4?KKxsK5CdTJJU-#~3+|&3R4-a>B9M;30MW zl;Oqy{mZo%-BjmOj4GpDdrd4p(r~sNX*hbP7)I~WhQp294BC!G@cA%RpKll)#~Y4{ z6X5q*!_o0sBWLilhB0`e;cPm|aP&bi&&h_P?Nrcx5n+50bY~gPVjLsr{j%X0{IZc# z0Buv?H5>)!8WZZyGmQ2^Bex?6zsn3q@iz^lZ!WU8(#Z8x1Fkl5nyN7sS%kRifU7rh zJPn4UZn0r>-fTEKmqC^$!&$My$SrO`Sa(9kJB?iTy$JtaBd6|O!>GFta{L@}{K9Y! z{sQsb5C8iON7HKH9yD^?4*~aE#P_ga6hDsa_%q&}#)Psi#M5QuR%|m2_fCYj6MQ=j zqhc4*_qLJS76;#ek<&W>+HfsX?aOf(1rr>(y_1otM>`yzqaC?r$2*L= zFXMf_V?y7B4x_l(;ixEf}&9(jOk4t;tZifi#YGugr0>Xpfr6+k%Zq?pWchm2mR1 zg(Loqw*>#Igq|{}Cp}*ACjCwLZxMQAm3~H{e-ISGM|HYR8iRfo@UP*&Q0V{8%GLatg?WBBv(tK5b)sqq56nNj3 z8*yqh(=`ooobkBkdt7^G$|?Sgrv&tCg?R8K}cQ{kOR7vpILU)_^B9!qy+(!_l-lIUna>j9{LwSZ{^Q;=-E@OJp_4Xfqt9N zpFa-y=YhUo=r0@x{av6R68h6ncIwGezN|+ZNWVeqgEbOOryoK8e+<4peAncIVT^v# z`NW&?oC%%TEA+q5CYLi+FSEYR1K&0HEX@apX7!UEkMz@|zYFv&LVwv9^eX+JUoG^_ zS>;M?NBI(NHoe!sS<5?Ky|+#14^{8=3E!dWz17%K#OL&E`6K?!&)xX9=r0%tJ@wvN z3IE)2&{OaA2>p2U3-z8A{>kGApW^{b{_)Vi2KrTpAkP%&i)~NyqcAS@}5PPK?{g zN4>Win>hGPpnBtzo*pMma2b(_w@qku-i~<4N4?jyOY_}?JXcQ!U*@>xUhwtwYra>7 zEh9ehXZ}44`X%x4(|-*5Gq?zc=jCyPeIOwP0#OP|F|HT;eEawu?pTX5hJozk6J{jel+Aor? z8GP=8ns1q^rDOB4ydMYOHL%^uXFT)6-Jqw*XFVvGEUuON~^{53fY|CeH*7}>t zcP;pqOwfEk%-YUU`SL}qZ2j?Sn9Aof-u`%%&>w1l+%9~F+8-}~VOu^{@n@93q>tsd z8~;I}e?$C7qi27dm#4%3{5a^@A6xV@#zD{i*rNZ+IOy3QPZfE_GoE07e1_0-R;-@% z`X}DZANI#q_+!;?(z8FdF^;@KZt;dcvpMtVnBx8XdbLdyASl(~A+;~9aE^N<$Y=D{bc ze9kEdF0pRFa-W5MtWEeX$rhiMLd}EOA9vvL5?2i}V{y9D$3tNU25pV%Mw2>p1*6YP(d z;DSRwW68sGvp)_Vuj#j=uTf7%x>NN%`91{S;55w_Kpu?Dm)UQgi8%63)qL6OS!zBr zzIos)_!rIhbhh$Ln5n#+SKHI0bd5@C%x6 zEd59ME(HA$(G|1kJEX6Sg{M;jb3--F;=a+c;>nKfOh@#RbK+4}djU((@? zw|`$P^oQENcL?91_U}u+tmPYT|9*|oZ_buKk{@iJs(<(D@W->B#QuG%(Bl)5e$wls zcr$$V?`H`8c=}(}zYG285>AFbN(!I-yCu(fx59sG9Qj!d`fX?H{Pd3@ zziQv09}@aC+33^yE#@oN8JlM6cq+2;QQ$}}1PJsJ6usvjxmbo7UvKF#N`K162lW#%XQ_tnan-SF^tvgT)Ie0PDbS6QVV z9{$!L@I4E@q06+KGqdt#<_qr-H%c{MURFM}PHV5*u$^59zHP!cp8l|!{k71q%33co z^W&G`D=5?Q9m#6MCnNulhG*(J&uiez`oN(=(S!>Ss&dEKH~>E-RIKHC!@Vl z-`$?u>bz}21nrFRO-CKf<8=*q9wiT-48F{M^jh$>3E!dgzjuLeRfCRiy!D9X#C3pn z;kyEE^`y%w-b@$Q0eTyCJd>yf`J~g2&@aw{?pVD{^WBmyp0w|BjIWwHXNBf_ka6+J zh!2f8tv}^BVTtg)tC&XO%hdm`fv?~e9p8$q<(|Q`7+yM33X#s^L>GC zJ{jrD><5}bJJh22zJ3V#`8fF8G0nH?5aRm~v`g;LeCk-sh;nA8kLv)&k2K%HamvYh zIS+h=cWS=z^rsvroFVj=WsN5@A307azFWuh3T5S!k^h7&vOWT z)6f(v)XCQ2;cv6;@);w2`I;(ge_FR*%W;CxWrQdGl)D7~6+(YbHu|)A2>F`vzd-oj z%F35nzuBL9HfZ^qV}N=+Z91Gwp3?DT)$bAv!)Jfm zBJ^4NAED2d|IC+Zq}izB9q)Ps`L4u&kMQMYZI7w@9msbZ{@Z(We7xFMJsJ6u$+rQ7 zh0kcd|CyC9wHz29*A6?L(|nh!$ncX9Up_n&{X6rQeRGfSeJLBC_>+EK68%`?AYv>Jmx66CAyp!oia@Sp<`91RfS9#Bp_gCZ{ zl=nB~eXYFb%X_4-KlS@G2R_Y#PjleY9QZT`KFxtobKuh)_%sJT&4EvI;L{xVGzUJ- zfy2vzV~Wem%NxRTD!*S|x#0Wd*Ei0opJ#ld{){&5KM%*&>x{PIc}uJ3fvj3r7tnn}J&u+WssprW zPIYx{C7~sRsuoq&EhW_C&|pKA3bTcv9VQM;mm5=%j{30i9b;m-an7PS3u}yW98Wjq z&0QF7Fz$0+evPmGdgI5Fs%jQ5tgo#xR!=f=7A=5~Rh5l6*}kA^4ik1Y0&v@Rk(Aeb&-%wl5Q}tp-p*%bX=l$)o@y0(KG?y@UX}NJr zPJ{7s4g}Da0<_(LhAMm-nPXW7C|8yNj1{>mOCHF*!T4>ik<++n(Nc9d-}sxOw%oV} zr`p@FrkT@F-q28WwlUz3gy$Ihaw=i{p>jbvZ9W()5lh?2%%Rip z)Aj|r&H&Q_R-&KgR?TTxYP5reH5#3se!K3G(Di{eI*w( z9sI16O0!GT>?X}hWQ@^vD^}qrG#C#}yuQA+u`W^bwoa^StgK!*&)7b3-onKT=T)Jh z)h@ED-Rl$4DBhZg5?VMP)+@vqi1FOS! z@QtQPD2Cg>((w~Trhk7*;!5M*NsEo21J|}e}_t+!2G66xLshLb0^#scZr6h0uKuO zYk_-9HNIQmrpq-P7r5Z-8lH@X!tmW?8vdfdeF9%5aN9RDev!Zx)UA4wb&o3k+Kw3ib(1yB*AruUqgU4Oa@>Ch$@VU!(Cq z61YR)UkltP@SgLtI)Q&DaEHLp2;B4o9o|lXVar72&&LA8u8D&4^R#?*VGW-tFzlTue8_^A zX?VWCP0KYL6}X~F!|fKlLc>o9OnWITpLYaqyG_GKn@lV0`XT}fvEebwEVA^t_yysc)eHtzi zxJlqjfh+z~xb zp}anUVSh%!a|Na?8scvf7kEw!mkjR_HRi4Yk^@4N5MS;w?CobodVNN4(aob((x5{YWRx+mvw3Q ze1Yo(zD{8G293W(V9zree!!xCPQ!l@xNNhACm*fjYkx_@UV&++hw@w@Fl_cHxK?1; z?@@3}VA}E_{ckP)w>12u1@F@ER*PQXcLW|3IM*fVe_PW}6BssvRQTr!T-LAQ?^^h{ zhMO$>I~x9pg&)xH9|SJmr{R|bhP@#b{-D6LJ;e05+*+QxgBm_ZVB;Se{+__FQ>5sl z0@G#@`5zJ(_KOs}RbbjOB7VQXZAWVOUyjl7u@MmO71$&2RRY(!HGYM_w1-6chXwA# zK#6dlMGss3gq@$!;lpl{f=?2-^8yWDBrt6%k^Z{^!@iP&Zx9%^mK1!az_hzW`ZWT# z`82%I!e6Z6zX(j*Or*~_)-X<Tw zj1hmCz?~9ajli&rrSLI<3oh00PX(^{nuh;i(U)qt%c2+fuNJ+)9|~L+(DYM|)AE&(0va07) z1uIeo&q{;8nnoW;gD+2muS|olO@rs9!PRMSI1Ro@!3p_ZhLqd!|0@1bgvvJ9U-14b z{$Iy`AO2s#Ke`J(#rz)+k4LQyjbsy3A;dOf=5&`UN(wpB^x8(sB;1A@Qs>*YKh}`A zl9ZTdg{$W%Vmka@yYJ?F-y zY0{)))8$XXXAL%;M_1_dYCDUIr6W}IS>+=&4ETGBT=R_XicRI6XOGeQYwky(!58~B!!|WQez&mEm@(bAV=X-%Ev15 z5t%d86Y{OD8lO1QxiT#gTj3=zX^BlBQbn{{cX||7b4~Y86(~hjSb1l$q#@G_I}>fE zGYw_QGKvN&6y;in#x_dzPARJ_awc*rjX+n8%$!am8)6e#;@*f+O>(v@Yzl1zj#20A?onmbqZ8-iH*I!d#&sYY$Q$)%kZ zp&eOrB}>ndG#*{Sb+>5q>mDIRQD+3BjNIECR`Rko$E3uKMx%>i7Uuw?MxRc{Fp=Gh z<|Ia&=@O5`l9m~Eax`t>m|`UAroI^=t%_o1l2ygnvz8H5Owv?GQju9qph;Fk1T)kS zJV#PP@V3g!$^e~m8Ut!@JFVp$5x9Z^^ro9q8(B&$=V?W zVaKhDB2DlSGbbVV80fUH6FnCsu<8|@*{iHgcQ&W1{q=h0ZrIt$C_SyrcJXW3{*S}H&^ zS#!uPk5tB#jKdz5ta<=AU8P6R1Hh$;nW+arldJ~-j@0D6*pf0@P#$jQ@+_#T#)U2o zMtR-hIee*2QkC*vY+!Sky&bB&Jae}cwu51BmHOotn{sP!75hMYw~_2{qF?4tDEiK; zEzhz~Nw8TrD=BN|4l-*sBWZ^4N{SJy3&+OT%A>=@U^aP6 zkn%*6NKmjDW>B!sTS1^wX5dj%69lZb85GHh8%cWZ8cW^rr9`85raq@{B0fT?5vnbM zde30WCP=k~F~vu3i&S2EM +#include +#include + +#include "gf.h" + +main() +{ + gf_t gf; + + gf_init_easy(&gf, 4, GF_MULT_DEFAULT); + printf("%d\n", gf.multiply.w32(&gf, 5, 4)); + exit(0); +} diff --git a/gf_div b/gf_div new file mode 100755 index 0000000000000000000000000000000000000000..b4c2a6635055c7d06942d725154abbdbfa276420 GIT binary patch literal 121856 zcmeEv4SZC^)%R|)fel3Oq99SBgc{q>uO|zlL}<;0UAU{eiiN1HMN}kyRE5H>pcYBo zU7G22k@kVsem%9;wrFi1Y!wg{c9WP7L2v+! zU4%2si#pI40NN&t@+fReC#OJ-IXHFd?1vw?bH@EsXZr(pI(y=`Xu6e`SFL5z!+IHZ ztvry7|C>5>+HC)r`oR1T<;{3-cDljY^y`=@@OxUz zMB$9~nhV1_+KYxxy=VIUGpEnu;Y|E$ejv(QrRAe=#=jo)3p_aI-UsiRdhdhPGjfzy zIZKph(;7zM)8jY&o~hNd?s#Cjs5QI2ieHqs?5hG!VKe=}zqbpB_)C0OhbBy&>XIkR zCJ6Y8w+#Hj{~^r(X<_0wDp$vK>eQLDX50wtL+4Q&i}Bp^vej}3znzU%i({ZB=R_9E^Z2C)!{4$UPk%h8`gaTR ze<=s>e8iRDDZ=v~f!VYEEESz2oFo$v_IeS%+uD}*UXDv_2u%e!?XR-U;x( z|NWZ+|E9pdDe!L!{F?&*rog``@NWwIn*#qN1@5yb?0qG;w^LE)*_BAZ!Hx&(t+Czt ziuJIPAKYCO1R5EMu~j$ zAAmcf@#pcQsIyVwto=~^3Y*g5Wz*wc_CTkX1-gMwiHCjaVV}f($PnxL8^`Y5;1QUhWA3v~z%# z{qI^T+UDu027_|(7Xa{u_xk%P>Ie`cz6jwpgkIc_H-i8*h_UeY+$`TDSD{r(?ZKxI zC$p32t6(Z$TKE^RmoGBCScz1v1ARJygh!oMj6V84iV8=i(D#u-ow6>xU0V1)>Ij}F zk)CYE%j$bjl1Cj=8lQlaZs%Sl9F?Udjo!%X6G2mXCG8NsEa75@*$yvD#oO+sBc~H*br&q1f)`{8r47x5J@e$1^BTh!HRQ{T=bS7Z_ZTQ zi67JW2zFOWp)JTCjMfA?eCl){us>dnM5y*@g)LD~7-7Ef22@Dn4%CP*|16b4TA;K) zel^m1BY{OUEWVCm0XkAj42}C~Xxv0Y;|r9A9YVuCp2!;(J|s$GRN6f#O^NtwyzH=| zzDyMw%}~@p4P<~6nkdr0gX}2f8TRNJ>e4Mbc>5yy@rf*3^Opju=`8Q0?{@d{;Zc0UcAh!*+)&=9-X zsPOA}OD;mnMU;}*U*q&Ej3j;VMhJb85bB3NXuuSU+ytRWj2;&^S?Y@eJz_YClRL3#F_!^&k)IJJpNtaFUR9{BHZ3Tl@%89eUr4+|bI z39OaXsuENm?vJWGs@o!~fkCo*k;0l&4w>~%{1he2s;?y6Duv(1J5M96xQNp6MAb-@ zoRKA8WOZIiI3%74H@9We?rw&7VgQB5Yza1~=;I?W|9}iQgdt zcM~Sgt%(^Jf#_)sW@;B;LkXq|kcljS>?S!9Y%IG~{+>KVzD=I0GlE73>VIY04aG|S z4R%Gnxf1KYQLTx67=D6HIb6M=3@`_+4Ox>Xbo8I zVefd@1~;pBsZ)x*>O*C+I=(0}u{Rb5tkESYyXDxY`Iy!Dcy&px4uxIiWiudVZ*aKS zgkl%FVUX%}un8qDHnmg^o=8cdb=1qN>5v0qhg-V-hzp~c#hvZS(T^0i*~_}cYsF?M zOo=_y-Ue7{+6kiAc7F`&03WAC{8l&w_#Ch2zAg%$YaJjFiR)RAhDBT9;sYSTVvt2go&T8<{qx2Bw1 zhBhmym|W5F*o89tVZ;(X8Aw=DB5M8z-pC)?FnGLdx3iU(wyA^VrDB-cY{)YIppSj% ziF~ir=8O33oM@3bDYtrKw~Mu*-fo$lSZWR2DOYq$3kMRCs{HAu?xj{KT*y&I!E2M! zA~lZ^Q=v$0ms8E|ithOb+!f7ID8ae68A3kK6Y&;U)P6Ff;`^!Bed>wWY^sAz`jpvG zga13e$ivugZIji3qV}a$|9SF=8-c|@b=;@2Gfz%6c!JF~%ttmi=Az9?u)&sU_5@?L zgbVaoi6@n4r}k>ij=W5CTI6C)sr7SZWm=ObGA879MxE{9uE)Lv<`wHRS+)I)T7U|B zsl8>VZLyq+Dye$8qBF23wO$DxvIS430!?a|h^f>zQiqv@k<3zw^HC8dgF9-7YwWAO zVdCHcy#L)gTRw|7Lx9&H9iPv!|Afq$PBI5G!N=gdi>N4e91NC^0t9w+nJ;B7Z+aj`9}iEZ~{Kcu}sI5xYz zI>a>!i&QPu$_^@4l&8ln9zt=IMw(}TCBmnX*AubHWk+PTS!Nqt%)QRbd{Gj=?$=az zF;)PWtV%GgFN1I^bE&?SE)@{pIu|?cVqI)!d>1+xOM->pK@a=FA4s8l@B$GdH@oSS z&EjE`OUqnrxQjhv!}gAiwX^RQ@i~!=b)YOZ7Sett8(V_7WeOWx=22axWlAs#={L6A z)9flOp<*ERrO-Qgd29qWEB;wxqvH3`PBiow;<3rfDTfnnI9v~&u=yVjp0EerJ!P`O z`%Y8F_hgcsr>Nl2m8SvD+I5;@-ycog11>vd*^|~&3M{;Hwyi;#)*XKy+@i4h=E`Ca z-ja9%{lV9Nq>oWRKHdxTLer-Ozjpp>E2ST|vz117YkV8#_ssRQ!nU}vLcSTy-qEQ% zr9^Cb9@eH*v^`X^=!i7q{3Eh71L~tgF}%!&VyVf2C9lI<5uLM3nHEzb{d;>V+WZ&s z_DRco$>kOg$%rPz1`f3X^^L8Gk3m;`eU9;*YdtYhQKy#C-uC-wo}!K`gN8v0-9}8N z&IGfJ7ZIa*#9TyRcBT|W4Q&;R1ua&y6!`&~tS@bi$kpjckXcKqrX#Ul!9mY*EK@n81_Pj)1)`U{RNPU$Gk_EL_iVqoE1tZv_SJZ`DLm$S#Iy!(Qq)Jw;@2U@rH(^P)NjZ3 zHGVZOD%UCpToNAyX!03)y8v%onU*E*uuqM5U4v9llMd)0y@e?KZoDO&G!Y>14h4E4 z03ArYg#!PenF-31S1Ir+F}C(k$g^&Cn3LVB-ePZ+Q+s->PmTEjYEn!QY;$UogI0k1 z&?c{XsiAh{nFXT`sR41b^ku<=#R{7j<+CMm;v1a);4^iiL!Dc+5*=w_$2N7y?6YuZ zpr4$2i$}RwVpB)B$$yB4f+wv~h`35-Zxc3M@*2w5?55?a#o6VmNX|YlDU0=?T=9=N zSF(3q!xF1_;cc5boJ}s)i8`E(?A=YfUE$qmoy?k15ZmoyNjdnb)!*06KIYQ?t>llfMJP7S%9-?=B(ru}>$+5>h_%GJhs5H8 zT@jmA4tDgGhee$!@XKS9SsPy(AM;Mz>Y3IK^|g1{wusyt+cBjjb}XMufvmyd1p z9Nm^I6z%g`H(68C@_bCDo4TRR7*v-0c)tAqv{R(Cq4WEfcGFLsh z*hbH$J>I^zIDFIAf}j0&k}QKR3BAE7qipI>&;f-CWM> zt-NnH?RU0{_OcddTV+=h+Ss!3Lw|92i+`}S-Q}#$h!4bB3yEON2iErFI+0WS<@_%C zLXZI_#gS0TYB4bmtE&f^@KYI*EtY@~opyT7D*5td1X z?N}zsBWx>WW?O^>bgJ1^u`#fZ^2+LUAQ3NNQ->?uB88TsRJXO&J>oit+uClF?W$;- z|D3k{I(j19&US)H*x_Nv-B_WJ78XN-ZQK-RQMWoKB(sm?Vef|9AG?88CdW4Ia}90s zsZb_ig?w}y+cIn?Ey!9o9l!u07z2h_ShrGsENFv3b}@Tb-n3tKHoJ#5yR2Ixwjtr@ z{8l70jS}FQxEcMQtA8Mo%hCUv_Td;u?JX_0Inw$kzHcikCdoUXi1 zUS2zuH%yf0_@~NiLz89T2St~G-wNB@HmP6;b`KBr=F?1TL(O_??QsjF zf8fL5$zF33P)|k%U?%~Eo9%JvA5tBVTaP15&&}E|y^q2(2#>2U7Wg~%81wHJ;|UX# z@>IlCc}r`A$0qkcAYr5OP?wEcAoNA>yTl-^YtcR*#7X61&6N7O{n& z=1Eslbtp~>(}uFO0UM+FBaA}%vavGavY^sn>U`T^%33fKk2WfqP&|iA7y72_(J1l{3$-((WCDMZDkr6x0)DLKFWnWc-AYU&)#r-JO zi}CxQI^cpjJ{E5)3!#Dcun7+1u(8?Am&yq9r8AD98j;4s+&CyZ@7-=J^_Z{38I^7B%c>H*3@hrsiD?Cf_yo{$F&jvj2;Yr}>!jq5u zzs2(co>%ZR;kh4m{}jJIwF(o#BSl~r>B;^;+M~`YS0dxFCh8@t*A|Ht%BD?w6ezh! zeKy`s(M*epxa?Tnjdw(DNMTlyH|>IC-W_=)#dXP;O_Kv7wnBNtSO<#@J%$}Z@EF!f z*PRRMVuEQJv+`d;@guR{gB<1p{-qLbU z^v+%g!yw|LLmyB6%Xq#H~TH$fv zCI}NnsU3aYuX)rP?Ui*`ZB5ndMg-aQmMiw*|qL~`j+=c1hS0f(}IA+-h7(_T5^ zDovsP*tlX!Oba{yI;luofSwGu%aUs|n^bbNUCuY6kIT}3*RydYikjaHvSXnQnYPz| zFZR98c3Q0XoLyoMoY5abfC0sDv;Sg`Itj{PT0zRr7}Y-PBW2S8+1ZwAk}D1eHep?e z-ENplM>Vd?t^>ywIcUAJKk#!1jo)@bkXCHQ*!~1VO_AK-LAM1`1%rzZdrs%@Q`q02 zE94`*D{#U$_#yr}1^eoGAa!^NPf$?8W8FAlqP_^zp=$E`S@SnCw|>m~5nWpF4Uxl< zC=(_8m3Oflpz|&iwK)>!2~fjf%H>Aw=h4wU1e_yr0YCzU&?3YEG>@n9IXIaTc{n;G zeb_=Wsm2s0JsE{EK}d<4ASH0Ja;>&WY7kN)(*MT_yGe)%J<9vzPZf5(5EC&WCOTY^ zN+BnlsbsMsCR{^lHOb|~`(R@(C)k)pwC@>jXGM!hO5lg2#G51~-U2nYqYuEu!7E5?~2XWg1k)Uja(toH%#_^HmpKeAvcAq|Ag!Dd!@^*pq22Jl?}9Nv7;(RqLS4 z4_yJQoLx?kz*608!vu*XESD%0oJaz+(k=kn8{$$;O0YR^bYNYe702k1Ds=QuQiWG0 zW8zhP>*Q1`A3w5LsJPh?U8t1n>o>DJX@NYE!-xG7BDO2j;$$pqat&RtNfS#oyJ8}i z(z-C2Xs>p$L1SIvs@DQ9Y0^Y1Ch!W1;lgCK9C^qJgr$%RsAAKQpoPNeyZzdp-G1%K zbMJQIJKpWpY|KQ_@71gbnxgRz`lX(4p`KssX`bLHz%Wac(QJ*tB+TTAOSQnIB?z3a z1;)7$uxf$)8U#KjMnRsw4rm|tYk@*Ad19LusKo_{#9Lb6%NDyO5jB!IEQ!BsfdWg3 zCGkfskZ-B9Bz~a{vU$$86iB>HDtR0Ehv_OHyQj+*@BTr3b;u!=m;yx5|jRxZgE~PNyBF8xB2$w8; zoZ2-xmzX6$x1D-!_FjQzpL3p zDeU!`G#j#)`Nnn38#+o0HWWir6-`6}pq7$7I$-nwg8+kG8WVb163#-S2|n z&EX#+gcmFbv95~H(F{zi+|Cb@mly)ShTw9l(QW<2U2%8OW7lh{kzJeignvV|`*(Y$ zy`$vw=D5@_RUi-DiU}B2MB)sNw>xe>kV+N)hJ%gL=KpOK_`{;UOa&<_Pna6W!W2w# z>}b_Gn!i0Z3^pf(qGDY_**TO4omCDbN2`&1tGHsQj+CA)-nWK4|<{j+I5v@Lt+!0|T6 z&~5IDJ@ZDo*>U%L}|Ky4tLVx^`u~s4dar{jB;~l5d zA0N^6#|LHhj?C647o1jq44rX&h_z*7xAGKpLgm3-R=+(Ow@J&Pn+kSFq|Baehr>`t$ z?W{|Q?XxM?%}CaZ@HF&m1}uLj$_Nc7{fsUzs6GvDS55)PNz!S{KNI{+^`9xf3GSfw ztq9E&O~{g$Iz806P$hyn?f!3e)!Y47Mv5+Q9@CW8HYl^ZV)nq5)>bVhPsI2~VeRGY zcGd^?LE&BB+Q8Z(MV8E5{o8T3bj6rz-J=R6{{rB>w2h! zO1Q=u>xw!2dCp_f@|eya!Bnp~ce8G61Gyo-uEksQ6J)K$LsyG$)gIE{QM_E(5*QI# zV1f9;O!H6xcK$!$*oGbl6b3LY&S1AQ;0W$>22VNzRr>mWB{D!6N7+BFJxN{czkpJg z2fNDyyMz16gD1=V+g4)V0rsU->oI!DxGqnGSCbBOyQXOnU5?U%{* zoMc5S=k~)==VV@>E~#?L)lgm7)fP$NS%msNJ5IkYj$Up_4Ym}_&C5e+2E)~$4nDp- z?1+a{@{m(&!>#_Q*x>o6i9tNK92&{TuA>7=9P!YJHI`nnT{f^th_YM8VWbwb`se|O zmv(H%`#~dpHLUU=XK-$99o~P`xVruhITeF~Nv`-Pu%WfVOI8|yw8To+fP(uMvo;~Os3=r4D%N2eH{9lcVIWmC+;;UiXsObLzuDSA{K{?ZJupmR z&=815NufnRmhW7Nv-HNX7L?kXn)m=IT*FTS=5}s}PD)zd!lfjk@kNdX+8&Df2SSQc z9w0>0)z#YpoJc|BY-Md|wEqHg0++Ph+uPxGwz%tTmyk&i8R{GfJkKXdoW!%8;w|yU zI~q9@w$01-X|acP5>0?kE6{OE%fEIpp)>TX&}q+&&SXL-?sm2Tql-C4FvvO*cqVZ# z*hQDOx$FUI4?azVW3SUXm0PU7!`^hWMmO64%7sXe%E?QEM_;d#nk9R^PTAvEZlT2{ zUvewzSUS!vS8D5Nw8Zvc@uNhF9B!QL&jQU!S+w^t92E+F8w=tO$73H`Zu^QbeR5ZH z1mZe8x3va14GStPgn8{x?ZCQrvygINdGH9Gdyjn^vFJyw#^nN@uCFemzU0pYob#J@ zyCZn;c3AT&>?m|r>`fGltqaj6uG3}>UdR@k_A=NGXVB)jmwiZw^_OB`(C-m&1X(=b zh#gkb53Ka6Rng!HNm@XjHb4?7G#lh)?|Mg9wMoGxAe~Rm^QZxI(S~H1F+Q=j_FOj3 zf#G6-(R02stlo3kg^IPya~WIm!cK~gqjl-1cMZ>`o zA0dj&_IBfWz-F^7t|1* z{T^8#dz_r=L~Ek_oMiG1o@yDUENa&5<&;WcF%PtyiZzN(wR=Y70qH^|mH-}Wsb9$^ zJ+qwF1R=tUx)K$_LQsid_|*IiQq&VEh(55lAG+By zqL1R-2s$W3kJAf?sKh=*{hflA9@fW9hdIAZ9ETE<*FOZ^E_!G`={0$Cz>TEhjZv|p zQPdw;3Bc?gg|>Mv%SS|Zp$&;Y=!Jd+9uS58lo$Gcq1(^4&*#)8&ezdufIdc`b%JYT z0~(Rhcf@Sq#-2pJQYZ38;x~=L#A#X&w{QxZIgNVLThRt0kH)azL~byK!Q;H*dD<}0 zdbc~*Ft|n^25Xg}uv<2WPn>OS^+M_E1kO#K5m%#+c-rQSeh2?Id)U<({T@!_p?;21 zGls9A1>I-zs8yK^*T!jK8ksS@?a=TLtnx?c|38qj@1#cjUf_lL@e&(oEt4zy=TM)f+8d!vX~X;Ef?r)p)>1m5|8TD zPc{bcJGk?7Zk*ABSJ6kU%FttiSIvE-38=%l`-mmJgSvSi{nbEc{J$EV9%Rapnr1R_ z=^soqLZ8Z!x*QXo7(Fi{>fC5_7Nh4}WclNXZvX{-$?-*w4qtCz(UiPgtcU)|^~PJ> zXd|sR{-CWl&Xpf&5~rEztXT0GJ{5ikbsFoX%=*qOs>Py!S9w}d&AauP#0Sv#i{9Kr znt%12#B-1C%vhytR-9mdnwmMIXHLx!(LH9-+Qd`?n<|bCmLXZohh$B?c-NojWQ$ zNMe@hO_))2fqph~?ay$oEQs<1oiP6^r`U5^?S&IX1}1qN6Cc*BSejyym!7|9QD<$% zLKZuamTZn&m!R`=(U>)OgY~0%y@2IMk0s1?u(IH5C#>Xik~M2u@WPVR%xG(Ktefi+ zV^O&=H?G$vZLw6QRUB2+bD2QJYFw?Y6KKtlUZnS!gY;SPnZ!&~A)4Jn=WpmkoN@8h zJ}5`aeHtt2l5#KiTM{ZVi(+bdF}}5JbSD0F!t1)PVKp(M8S^)9bPNV6VGtLGBW zplnggQ@oaQkw-lwX}!L9K(~?{zku@-(fdsD=u%XkrB658$ITVWHBBAx7O4a7#+H_= zN^pIIYoc(UpD3A-Kny?5oeO!3OO8t|sFx{Q65B zKGjz0Z1sgxFqheoV?5C^i+0c062LmldE6K7BEuT&;gq2tnfQ}9opuYDc>ruvoZYyi zgp}YhBCyU$w4ERxqcKQAFT5RTnmx{ZS~g`MVqi?RnopLY@c3jocjFDZ%NEbk8?*&!4-KM!{_9k`6uq-mJsIgPxLj$1^9;sbdIr~lXpm5^c?p(@8tF1okh9M zt1{)=S+|cnauMyj=>JRmvc|gUZ`~EuArw4Dn%7w3x z|DT(`W>;(Y{_=uYXGg&7(%j#~j*4^Z=ws+M#{5#|klS>`REwDY7JE)o1&_xP<_8xCBt_AQG zt7C=1`q!c1Cxi7nfRZqh4ppL9&4ig2r7HnJ6FH%4AGUGa!+Es8ixlkI!V4|NYgdz? zC{NCPs^FXg$AogZqTa}lf16PbN@}j8l`{R(d71_8M*Lq|L%y|FQ@m2>PzvYTmBN|i z;m4-L_Tzzl2`=F74OZmCQ4UU5P~E_tygGV)0-1vk+Z^bWwt{5}lK;$UE%LVtXTP@C3NSvSU7SvRX6o)T*p&iEbv?ed73j^sN)X~Q;- z64{KubROtTHseo%1DE3Rz{=0Z2>#1*Fu`uY1lxlx*2K@D)>k4C?qZBZZbxmas281G zICqRcn{Txk!JY9X2znz^migFL-Z~s%PqfPHm@gS7MEv=%h%%aO>s6Y&Sr`$(BcYAalN#-UZ%L-T3l~aoL!5v zo8k(!xI$B0A1$trDXy;;*VhzRq{S7P;?C3J&NIcGuf?6O#RYN2cv`F6XQotH_fiE> zg_~g1%fVv~Df~QW%)OsP9po}d3eO{=r*CrI$C;KcIt;i*@GpkQc_BUoeZ`6ApaxkA z$5lzF;FsE^&Y zFv2Deu|{ryA&vXR^f(*ekA_0s6ZuxO38253KsbfvpgaNky9som2Est2TrZhG{WTCI zB!ONwfd*)x0s&fX0$rqm$aovMR+vBoHBfH>T4@3mYasG+gIuqeKo@Hua{mL+Dii1u z4b(?~UNwO()j)j(=rt2)kOnFepw~^H!5Zj10a|SWeL(}AFFp6EBe&69<)Zsd#ffS2ScrTs zR1LOkj=u{tSYr8$ACKk*|sN=CB`x;`9DA{z~*DQ+CASzM;aF)X^JTQiGr7 z@Ju~1!|b!aNL@k^vE4;5{ikGVgpx_it+9@xY8HMB0k8TpmEgwJHJE0{_S#*2|3GOm zUG$P!gA8M(YFFRih@=sb6n1nQF{8K&cOjJ%lZvZw2lvHb>x~FU;o)kwcpS+OZFYe! zi~tj@nGnn|IwUU>f|*8#;w~JrppZYd@j@U83clZN)LOz{d9Ckrxq@e&E3&AuG-bMSS7VfVqk=Bia#9cCO z#Eaz}Ad6`3D_jf6Vp)e^01||zzAOj^AVFy6%YtA45`-qcEC>c5L1^B~f?xm=gr>bL z2nHZQXx7VuU;q+?CcP{O1|UIb&dY*e01||zyetR?AVFxx%YtA45`-qaEC>c5L1@0q zf?xm=gr++aB)A?+MmTC+UPF^zVGN{x;s-=(%_x>H6kQlbxF_vNHOj%07*G8W#xTPl z4>m=w(+gs^VwCwJzoX2_0R~U?^G2Sg{`KbHiL;a`{JI!z0o5=uDzlZkRKdV1XJAPC zTM$o^1^>}*p6!hP`US+o?~)(CFvO+TiswX>OrY9E8X=|)IWBfl4JyAQhVn$QV&tfN z9L39Cf@SF&E^cXxgA7U$e;b7+UljM9()Z=~Mv!jIh5xFZD)I4YwlBq9U6Xw&KTx_F z?dAtcF_NDvC81dCVQ^StN|C4^zD%h2}qr&lygV=gLp(TZ-u)J=8QB}x!<#Kcsa*LZ3024QGY&}vr$50d>AJ`+uzZ;m~@mI z4b?o(fw*d(wD>q1GEQM;(76(wR)8L-(BwmqOY_MMH16+fZTWF>JQ@=46X*Y8AG+}> z3f@Y==P{oq%P2UOf_>u%UW}lB7zMwFyP4+slHx{XvkFD-LBixNdb@Z(-nP)&3@Ym_ zdUL;zw}x{f;q1jkzWcGB&cPEV4gBdx2n~uE7@>!tbx$AT=tpokmlHJPM{rQX8%J<$ zL__XZ(4b*Bk-`9=*8F@A)Le%VpizAYr|>k&$@4K)mdDUTGj=Ag;HTpM!tq>-3ix>b zMfQ2SSw7NL7MZ~T8?5iSbR2~N$))1}yoie*u&B>zg)h$8sfA1aC|ddeeqF31*lC4F zb8D2I!4K~k`tkrJm5Ds%^d~-6=f-k_E^vq0Is$J5ICj8kR#h>mdI(D;_;?`4K z3&qj+5^AW?YPlX1qtEpa98m9Ey%bncExiatM+) zeE>x4*Ng}%Dc9jIGSX1)aT-J{xB>Zko(73iVC17CVeM$>?CX=Z?*LD}NqDAD$>=V_ zkH~2^_3gvYW=VR(f?z()@L6*M*$;gAnXLHC^GyBg9=QKOUxnfGa|R2Z-2@Fy4WV}l zp-0{}>EZ{%2Y!!LQEaabYzXI{{fj}f-JtcyHoOde;27~VIi#$Lg6H7syk+)8wEqL@ zy1RLQ)>D6i;rKB)bFL;k!$^y^g+ALhWCfoI{)acTT3zz2^0u7ta(ihoGIjk+RNzAy#qG7+-K$MhWG}_&mAd4JkrCE0y59 zWfuP*xGNH%xGa7ZP(($E>9m%G-*K(DD|m5InAR$haRrqBiTimHd~8N_KLvlOy|M2c z`3Tw!C*{YIRJ#=Nh$JoGCrTn=S2^-#kz})${5EIYJuN{g$}%qc0Vgc z=!8UG196Lk8G--ks%NDqu0nKz7EmU7c&0Uhci|6j5G-~_F2E;Oi3c7*J#e-FvOh+_ zA0ViZM`YHJ9HbFe`;mih<M4y83HWCvuEH4i9T2cd_|Rh>DDyp%p!ZH^C39 z%8}Ews3IOEg~>Al%CjZ^3aYM*uQ>oDBGp#Uv_>KYP{hrYk$XNAHEaPCzPY3RL5TEd zCDkL6-}?q-!hh!N*WCcaNK0YgD~03b76t<^a>#5e#X*EjH|+71p5r>dt^ET!GvY6` zQeV&(D6x}(q8dkH zlBg!8m9jXa6dV+$ODPhiY(`4z&im4(lxPl-y-?O1MnZ-x3Nb++n$$p$%pk26F zEiK$m?^&&B=5RWF!)bE(6@=E51Ic&=@yg8k0ooVHoLZ_&OsQU=RA12m z{Y`+s5}=<37;6H&NPzD51&Smfb=*4$uu}t6ngB-$(4+y(D1Hg75&w$@ATtKNs4EGu zSObhP0mcyEQ4Mgl2{4TScWMB)3GibAcr?JZCcrBMDAxc!6QGj-12w>nCcu}$+VPWn zspT3)ck)2rRUT8S?@+3(8UUaAHX8L~0<6&h!%ToD3Ghb^FwO+{BLN~Bz}(t*2;kQM zqfI$JCBUs5prJTMOGVZtI(aXEG>wnc0A>{TQ_}}&fKpQqUT(ey7-<6ZrJ@e(G125r z9_YL3Yo=6#DOIBem}CN6L2^CFKV|p_fd5DNbDO9B3fq@b`x3?65iUULxKL=)F?B_i{GD$wonJcW>wB>TuV?y;*mxqqnn3 zTG7jr`DrtfU6+XT|fNObOBgy$)}Qa{>y|bYK^@Y9cSCh zDTzz_9!oC&H$YIwPbfVM|2Otr{&jNsr{P5y%k2<#o#Btils;#9J^b-}4t%ha4(^xW zd!B5!B1L(z4{dM|WIc@L=M4`@Wbng%8kzNToL;&1gTaH9EYJx5`$P@gxG0m^7P)qR zC)y;(4%yg|+CB7+4~$_gPCN1aa;iT(&Ee9-K1p>q!nX%=!;8dW@)p5i_+$$Vnm*7z zu&E`7yAiD9Tl6Fi+|hzHimh5vwKsNDNVG|5EoQ?y64WNsPj zyqu#Rx3RuWza;*Z2a$+wnn-*K zbMB~=mgDH|fB;ZZ)+jAMf+9r>B$8a%i1Q(AwQ}vN4SGn+MGiu`5!X)Plc)w*82;0x z<@odj$A{O>$!}^D9RGFLl8dwCtLb}2`u8g0_=-|W|6)b@a}|kSW8L3_-@sAAUsI?{ zV!s47p>~Na!mrtDQzd*fg<7RLiZ>iLm1vRVWz2|lXpxJ}k#;STK7C}QBu`LOChc5} zNOJ0gNR9|DQ=Q5ml!gzRiLITMM)$&A)8tE)yd zVP^QL%IXh|o<03d4GgtEjPEyOyPHbT=Nm+?I9b+)Irjlrb8hQwYw!lDSo7er zN(wC(A$Z6>*+&Zh9wF_%McQ5)-+%Ei?A|b+L-;DexvJ9aaD|cf z1X{|`uS5IQjI#&4+C}ydH7Ho-xr<+`ahISe6F?aPpd`|g)Qr3L(ze2aez zP|Z}ks-^Z3%}zUtE<$k?&^DsDV!U4e3;tRn(q@vDzIlY6D_5lD_>^TcbqiDyIqbDp zB%&n~;L*S-e!+B<+PnzjWh2GO*^m*Q||J~x5;8#qAN z#Y`yXA2agcJ1Y2=(jfY#LO6h&Uiz3yKYzp|2RA;(85AOgqv_zIgRFrK$N*yozuQ7S@n@95J|C{Cr#QGd zva<`*e*DSJpljr{Qd+NQ(eXzSCv%=~vGrsm#>9tfeA8maghmtrFK|=oYa{(j#@0QN zM`d`e(Xn+`l<-f=Y(3gc&B3Q;xcJ`+-hlMCcCk+9R>3xsyVF(C8dw1nU9!&&JZTyu ze9VBJKjLGASmiAuN^qX71hKUxX@Z(qC;SFieHDBW7hk~!WXF3mf4v#Uh4aZ>dc8}=zEgWoyIEC~6-g#HtIo@cBx{>h=VwKdq0K5Hpg&{1!2gD26f|L226yA0&44E^ph4q;AugoQ??823=zt0&bijK) z_6OR;aZ-dI0a2>)#YS>EK3fp_IZk*&BZWy!lWs48mkcb*2#d&S+WW2|O6k`gE^jBg zE`V=zizV?ZRId3`&UEl7g-96lBk>l>OtUM#HUtNV{%?~j#A|UX0go=i(W4R>RAFB4 z_VD+JwLEsM#Uz->A7i=MA7eXQI#Y?=JxVn0gYxollN|e&OjnP?LzhsQnGPN|qQFe~ zh~wE?TN4XHI5SCmlSIr*3&r|`_Ji5YuhW}PFXrZ7#l8PAtkxd;idGFVAeoC}dhSL$ zG4`z%xE{P9{ngcfppE1-kY_uRGkslFdeiwAcZP`@KW6k>6I~H~myrwqr8@rfqT|n9 zSx%r%97iM{LmnduErVPkB(_*Meo%|xn*`i}WvDlCQD%#_i(Z2ZRZ)bw|4)%WqlL(i zETTT%zWtA9weO;g_DxdA0jbGXXHNdqeRa0_CsPBxaMFne0!M+K*3Jpi@}gqwBl{P_ zU1z($zqGs_J*d0uL~9{>T+SA-RWY~y#i8!uz}iec6=e$iQIxIN*$&U2Cfc!>_|@c}Aa0wDj96X8G72Rpwh@E>WH77){L&r*jWi^!SN6d3PJ2U+0F#G^XN zedMt8evJEb8Vf)b&;Z{JD0cx(S@`HN{733X{Ny-t zpbDv(WoJP)cx!61;(OZQji)s^p2gtWN<14-!OeKyL8~afRBv5hv<^E2csp#6f(sBU z*ufP44mpje}oEv-deT1JV8A0ocZURrK^RMxOl0oduE{ z*8RJ15>Ug540C>*rYChxH@G3A`Mf)B!knWaJR5dd+W0|_2z;+Ke9i!0?h4L8$>f@j z{EWf9CpoA~`@lj%;X4qs(uLfi1rVdEycJvL{8wTdCJK(h_d!p=4mW|488r(>Sg6@x z18OO`@z5MY!e`+K-ESZH_aGlGM!oP^I3l~=rxO>V5?=2@^q;7A2T;kVmwJHr&?#H2 zIpd>F(CS>9yA%GDARB&O;TEuisPF+^A?{8%9Z?dYJ(uNi z7g~MwcJPjEOngn0>356r2J7X)PbYdzl;jy<#?Euu1>E0ICX1|1?8bUR6haox9u?ot zgySS2mF{gqf*dDZ3*_K9DYJyP5>Fc?{EU~-k2??o8r*>pwW!TRrQCS5CcbNwGLe_! z^kL8FQHlSE3*aoq#0A`cP!H!l)bW|ldooddKJf{Tnnh!icsq+OKb>PGRM)f5b^iW+ z(q@Q(`KT*PKA%O^X4j9|?&S;?&o#7u&t*>^RGJ?D=vP5cP5!jxpr>%Tndy%S6~Vzy zE`LmbK#rOHF=Y}vlBlC~CAn}T<()3^X|ajML)>E)O@3LIH~*x(A55i^ynj}sGp%3J z`kdN>G=*ijMB*Nj$3gP&8Sblip%o3j_gsFKE|CuWkXk2PBCW=I+9eWvBp>CXg0w<9?Zz8cvj;O(L&Me4}#~Why~_Nr8|(n}U&?sKNIlXIDC1cMzo;j-QBS&%*K3(-5Y5 z&o(}S_u58cAJaLhwx-$BU8&LK zqj-S(&AKpKe(3(8GW(xu`(M-l)bMlYe(g8Wz7hfB~@{6;Z8f1f^hHahk=Sm=Dn3)ALg{ zeW8E<#Dnjq!__|w>NhB))MfKp@a?VO?%rNl#&`5qD(e0HbW4@{ao`x=@+c_3J#ZO( z_)DShfF?lJY~En%Pqqy^e{uARv$;6tY8WxA4?`P)tP9aaZfwTs>k62q(y!E+)d`#IcDk8)Dbqo78?|hll!6JnG{I%GTqE zxrAbnXzcKpE}+-KPG~Sn#E#C5&s98~g*mX17JdWtI~7L%dVK0mhmU94X0Bw;L^Clc zs_%EQ`*W_s0(J0tQ$rpJ`Paj6O~Q-A44?YLQV^256>Ei>?0#L0;Sx_h%=`6H7?(?- z|DdTllL_hjFj@WSQ|~H;8T<67c^*=Ly3{3@rLhoie+>KSD9rir7yxC#(57vcI3W}uowGfv0 zgeTHhEmPFMN}2tGj2+zc1^NWG<8tbzV#D^sgX0!70}BOj{`8eHKcPWN`a)jSGDu-9 zJ=(qud`3-fp|V91th`XZmc+=5TIlp7o!O^8Sj@KZtDZdVMJ#3{PN(#-WKL5EIkiV< zGQ?jR-=_Pc&`(VuBCpMNpUa5&EUtEAH4_OxL-T3RJUz?(WfLo#fAjq@ufN z|7_Pt!RM${?o;pFy2tFb4PCQ_x@H%soVXnYX7hKNe`kuVW5(sGze8`vmv@V}0*wwc z&V0Y`Wz zbZ;@@c2XSu>E3C??WZ{U)4kt_J4A8xr~8l*ca-AjPxn!}GPnUZ@MdqcB8>Zgcx|Is z+!O@7Ie8wsSm@V{`-T_+I_F;v{&oXVerXVFOnybvBOv3xYsLt`xWLfI#X(#^&3)m} za63!+vfR!hA6a3HqdvTRuKK7cpQk>c^SSD41#!R(GBLyFs(Cxz_a%&x zUmL>wYUPy}wKSZ*fDchaP|no0=r~6bhjKgOrL(gj+FpGf)+re|jOCu-559gq?fu^4 zn0EO1e-kg_H0&(KKMdCR_{Xnyxl+jVK$Ksy#Gha1)A=Jh{&IU%o7nzDHH@xmnSH&% zzkp+=nL!*tT$=20w-PgzxN(^P*+@o1=+5sYd_BRzaYa>&n3(q5%L$P+G2zpA@dRet zjM>L5A2l+D@6JlY(fm{OBGgW^3eA_86$O64ggXFkvmq9k8#h7^fg$IgpMl$k{r z=`Xlaf8c{!>@Z(rdDUC&t+XOB&T|EP3XCwPcPqhzP^g`d*~e^iYLY{@ru3?prVT2M zYwu4v{&(2>udrQDXNiWrHQAS* z`E!{tvoo3d)5kOodw&WT2LBW=EdFDaAu{yGt$^Onm}YCngLWRo>>}<46!M@=5BA|f zEI0+UFAsviB3Q(OC3^5Y9<0=Z=R0F`y8!nLBQ6O660~E`n4$1#KcoxKS`q1POBrT1AlBgqw($t;Vf2VRi}pQI=LUjXVWYKy%v5v8HzBl6bN`^7b>g$j`L>_H zIp&IxOttCut~B0Geg6Qt3ZQ+%^@p$a$efn1qJDn=fIdrWv2(N2Y&gE>i|_NQwih`b ziVAnpMHW~RjHNcY%=(72#wVi?A=SA2;1(%5K6X??Rb=AB?XAH>61(1tx^c&mk&_a` z;2L~fNp^89)ldP}Jp+)ik4Ah+I=0*Im}ryHce0WHeS!a7On(#52yuJx#CP?V(fZ3^{pC}X#S_y<3iu1% zH|8(D&|k={9*?;hFIoIV{K@4X>6coZZD=4_Id1&WUyP1__}srgoLj$8i5p?KJE1Kh z%AnkYMoDJPUKZysbhU%Na}V>xI`;;;MIo+#?A9&s`27%sNPPip}lpCr?xLXSAr4>*ioc8_$jgLu$q8A^umX_~O*hjUyEEwWA z4!Rh+&oLJI4zG11w4Y=+YKLCSWw~bTaGMn|UEHEJQuGB1bDO5kV~3CF1xTu}TUrr? z`HD7wZ^iuG;bjlRy)1yad>+l^m7?0YxHx{+i#Ji#GXb@dA`8$4lUNph&7+ihf z_c5S+>u|B%`eh73taNA{Fx|nn>FEa!$Nk{WYtq*_q~&cOAk-+D!j7*$oERr`uVl9# zO;<=hp-<>iDhB-=5umZK4UNl9Sjw*Ho>)99CL{UfcwO@HzzmU{t|mVaJ2Blp4AHQNZwz{(PcH*bJ`9`i9%AMpeuW~qyEqhjfg#ccOqI;k=O2Dp0#)cliF_`~lrhJPXWrv|dWu4Wr zcoCX_#IDK`n0>QYbu59H*6P>+{Irv;=fsj~c?5SZ>ufW@KLi|}f~z zydmReM2OCuU_4epw=nv^Anh7N+X5A&XFtk53hpWo9xL~MrFLHw>1t2XU;iLzs9PME zwp?vp^(9h>tW|=?OZ?ZF?w{1|y9UxV@Dc831P;=b3TWP<{{46ZI;obx)+9a|LbEcW z18-tNZeCeqIiL1P{(5}N`xPYYgITrkBYqjhRTvBWG5H*D4E%N_Y2P`Li<3XEUL@la z<*I(MBT$P=9m^)-6Xy8(FzUyB4ZtPZe58Ww?I^SempO1BgzSeB^au9Fq0b=E0p);i zA};xOC>r=ML3c!I)^^q&??i=C_%g~u4oot~AB`uo{lHU5>|w|8b#L8b@J(GTphJnkqEGd2$Pig4Sn6-P;45+|GEKm8uc zP~A(Rc)tZ%ylMdIV^aBs=>R;uT?*|ZTq>GoFA?*=U+7#-S{|kfg#Sv&_+tdRUDXqX>Pc9GwESf% zhl?vs6yk`2pI2bA;dQv2@xuD*5lg5N48@C-Ajb$F4{plyR&e))(vNNcF!2)l$vaxa z1pkD9)><+qFv5^i*2v~NDog+@^h9b?=-6cL$?;yJYoiM&yi`BaS} zm}u0gT0{4v68mtRoGq`@{1rTS4`(hMd6GFQzTZvE1;)ZD1568c#s`BHuou54gR5%5 z2ROUnKQh}Ov-NW9Lm1fI0uz?p42FA6)zl+DOqQ2m#==wpR|n;u4zQ z3OlQvz2OcI9+#T)JQc_0JfGDBQ>9XnmTl?%{J*GvcG~*0)_d|~^06CaccnoaQq6#IavJQ#8P2R@%Pa$DHYwbdMTsb9kV291)Wlo zT7`r@)?VGpu&v6*Zh07%XB}k97Tn#TSdYVw9iJJ%xjv`}AHfc>X3%qjWkOW%sn4fs z4?YdBK-i0GEo8lPTBP~++`gX18y|UJ=B4=B0T%e4oWp}pXBSX$6oqmaV4^LA@8Qg41*wDR6Msi@qBC|(XpslVIs?uEN8 zq5_H8?1m`Q8gK_{6zs1^aCjoF0??X&E)NU5CKe`Hf%88U~T8yL8)wIyp zE`96gd#E-?rgwA_?5d(dbFKbO{mth?vp!Ti&*PRa*rB{h7BBEAB2=PS?49vI?|D>9 zs?f*r%MbL-7UOd8GhDm76^k0_NB>Rz9sCS8H$Op`CcuAS;zVbW11oOoNZa7F75QZ} z-)7nqO6O`nPS@ADJ>gRGIo%eTnsc1f&BvK@Mm*59`RCzpffT+DeMU&X3#7^PboT5F z+s{l3XF~KWES!kcCHu}N82Qj*WKB(wkFQO{tGj7m>s zj-32$Mw>FNrzsz|x-;2}XIW1pNl!~!Zt47|gv{xu8#<3bh+~@D(3uL#ZD71M(ajeW zbnDXn(=QdFWv1~LA2*R3ah6!+&(p3TYH#)pYq}WQJ5IvbT-vgQ4c$MA_v_W_sJbe~#5bF)q zwRA%E!%S=PNhjEeO>J+o0UQgXR=W~1WVWx`6BCf1%trMU|>*&t_{L7%d8 z+Jc=!=~!j)$=TJCxJtaSz!qHM$$WQMzHv zjV|h8%N=~3c$gAqe#y5C(oG-AN4$8iZ6tYN+Dvhy?e{*%ICh{y^EkE}VKI&^Mp%qv znuN4*e3S&eK91vhfW~o$9-wi&RS(cOZqoxaj$8BqjpHZ+VjQo;+yDM?OoTlLh9bJ3 z14B`_oI~l$=TJCx{n-|daZJo{4y6EzP)TTejT(kglre(8Rzg3ngqZY#{H=0&k{Zc~OGS2iAJTX8bEfM50_ z*GjN89xbh)+su)%_!!zmtFfOShjioA{tF_uey})J;ay1_YF3YR+9trL-&fHV*iQA0 zLwy;0i}Z%%P<;HvETB<7C`30~-*;yv07lPcV5F@lPmK>tHg3mMb$wd@x}c7dOV$9yPR9A8GYkB4<;~eb8xP6G~R&=yr(4bp6j!KqFRl< zVOJKEBiAdrl<-0QdFc9T!o`FN&RaopR6IkZ^b4j}?)??=EfP=-JuGJRQiyWj5}Jv};m z*dnxLP$~XYVQ@e*0qf<9oN&s|29?6u2eh|uKzlnqItPjdQREZEcsZ2{Y5qMLU*N-3 zwN>12;ru!h=IY62Ugygh{hL`eDz;@-yfYcjNYBTAmC)!xw`#UNM%R(D;zgZUuj0iU zd0-JyiR_pSwUM9_td9@HKSMv$?>qQ4tdFak7g%tdLF<2ITG0Y!+IYANv{{YrWef5E z=B{Mm-^cZ{ENJK^0^`f8O6_q-J4;}@QrkoYQbl91?WJFrU@)QhZEc`2-ES=CIXz0F zCsu)^F^Z3%^^LA6&OC22*=u7r!tJ$}pyzXsXEVjLFIvzo3eH}8ImH5;VpNhrv5lOs zxV`q9=t&w}zsIjZvET{VYtzXsx7Su_Nu*n*uF;Hw@4x{PeQ$ovcsmSGJF2BS_99|0 zMUX`*Y@YA<01+Q>F>q!ZZ3zaKXKSOqRS-7I6i%b-_G>v^(?CrE1hKBp5FWyw4>sDo zMKH&{qYL>78e7#?XpK1@*&$kn@l>%q^=F^ZO>>lj5beapPi5D zxPA7M|6Er{!B;S6pUEaW12fHJK9^0l5A2_yuilyEp+4W#9_$1Q;yTZ1DDMqy?| zNp?gfkHE15o$|saIt;t;Ho7;0ZU?XCV{E~S`wq-N_^6AtW*m(^GxKtL@HF#+)h?gA zgeGIUOK>rqUPx=|VSoK#3oNbgXvLqItrq;ncUJJtm$&if82y=FRRc3%%|82G0=@Tt z+It)LD2r=<{MlrIK(V{pSh+3LwW31BnibT9+BOTjx~qvs0jsu}gb+w>LlTn>FIFX( zEatguY}%@qUvF*mxovN4t8Lo)g00#FkOZ|Fz!wl(<9lLIVzm&^n*aAZbDn44vLTfI z@8@^#&nKVkGiT13nKNf*&b&M`XNcm@+Pi95^KIf7brAL}S%uLfF*pTB|Np zLv_~xN)NmXkP{Dh2w-P;ALD+;fnRQ$6-rJsY`_poZsryqu@wRhN^K{9fzF-E`5v9P zzXtjHh)(QmhZ8#@RJ7oG3;EuXu-|+u)K|@kpwpM6sQYbZ!7KfabDp`t&fQVkbT`^` z4>0PW8mOctDcMll@uI3dPzm~$8#X$7)A`d>2f(x{d5*FhdmO1^)Cr+c-yFpMKa{#I zE?)a|kb8tvcB7Aj3th-Z#f7RG>hh_7j0;_rj*10c0gp-$s@fh!aUd5s?eLGYfbcx| z@DEo9gh%-wRr;*&N0UBU#nGfM=M!-E6L+#twogCZG67Rjj)HEShrV&$7WP}R>J)cI zp^@W`BqT-`G%0W@mf@07DdV+ud;1WtJq5E)DLutet^=Kk8nf-sN%#E}WYVui;`?7iVMK)!^l~;I6l@es&<(wJ!^&vRx0}x|q5|9YO|l`(kD{ z=-rMzT!3yY(h%+PbaB&Hb3?82;RZE)B<##6%-gi;E|d~W!7g1%@*htweOfY6$ngu*|p059Q z;SIxS1+9hc!B+JZJ`@elm}%~|(l3Z!085;EU|S94jqOyuzG!xfFEhI_SjJ6w%|)zS zxceFlrYOCv3tOg7@wH^n10kxK9T+ohkJ&kP&UM;so8P!}EF z7%+FjT*Fj`_U`avAI+Zd=U}H{wB*Tke-=aa!m8x)Z6VzMtS*0cf{FJqc9_L6&b{o9 za0Cb^FW27Bb-wqY8nIu}wf2!5Vlarg&`rX~5RnKL@(^rAnJetq6nF55z3T+JI%tAY7E{x9bchBFP^ zkjGorat$7_tNAlsFdarMe%%fy@VHvD<|X>RfYAo;#N(}*8p2S80Wj`^FD~KBHFkoA zS7+Ajupsm%*FD2{UZ*$kxyCaV5gZx<(LOM?7XSN#lo5>gHg$S1m+|2CI9CMgW=i#D z=vvPzZ8y$XgL$(*Cf%<%NbCf>1=9@C7=K2D!6oESZ7`(#tiNa}^PKJPjeDwbSrl zk|+Y~tQb+Bz4d;Y*!(GN`<{lcnX0?sjoRaA->>6)Jo~pt@J9O%w%*uY(5i-(N7uds zEz{@0#^IFz9qk)!?>x4|`&Ii!F?vx}x`DfD*2da5nk@EL+wB{{E{$v7z+M*DzJV={ zYu~_@j-!3wcMDoGf}Z&V9BLIN-)iVw)j@@1#lD&qo9$LjB4B7*O+=Ps%5mM!EH-ld zEJxe6EB|w>iM+eD<_)^WQ~7%loVrA*hcuNRG}rB?=VAs2P%5#B@;#Z$qD=mrQ8%@k-%sJ?=`ti&FUR7gK}AD~12bvKvigr&5XV zJ5TERs_X|_x3>iA52KD3co(+PCuK^X=q1C1Mb@|_!wqL?Wm1c`y<~_5bZC^eeGq!< zBW(xp^>p{({AF$Pqt5l2&f8)O`xl$eE^MJpH(tERbapFSNy7%}d-(Hya}&3yZ@jqM z*@b$Ks}m9_8verJB-Tedu~inEPqw<6xlWArSJh~E<`)M3n!2&w_M{-ze=(X3nlE5z zl8cT$HyFM3F~#znTc0J*3iS$%%fE~-x+I>asOAV2LnTmpVw5u#B`Kn}>$v9JuH*JT zj-9t|*8@+x9@rd=HcNude^Lp;!gI6Wt3u(3R%JI2_XzcgSSEdzrZawzax^IA`KFFHSifg8`JeaApAa+IV+@;Ok$& z(6-ADGq65=Rx|7j%)y`nL1*);j`yi$W_{A}7?nnjAD@-X1w0)Sf5=7Q=4#xzj5?XO zT#cKD3$Fy_im*K(uJe6}(r+&KNM~j znL>rirHtxm3ucdVaF`U0>%sr_40^38sB8U6$ROoBpbmuSa{<=;D!IPTglRdoLJq4# z`hJrv-f!{=w*!}%pukXma%TqKXh8~|#y9RVK`}SY(Hz5DZnOvuQPr@_!y%^9rLuw5?DMZ#uzVdmk3OQ7OCE zank3YY@{9y$p3Z3BIX?b8t_FdH%X(=AER+ z%P=P5at%4K_Ka`NWf(HY=o@-fs_dau5;Ws=#+oeLhJiV#Pc3km>zvrbr@rcKP~KD$W-Y;#m}^X zec0|2Ldyu^OlHvaAZn^^nWNrjgFB9J<&V*b4DvRc)mgaFhPT<^|4sbA;nug=(>uAO~x||LdhOSLIttV?ebvrPzL`oRr2h zdvs-{(uoGAtFx;A0VM$)({u>>>f3PhjOyg3L%1L8@7M?tEpqPPo68{}zD z)UtV;0=Pn|W)3_9!i4zZO3^R0jRv)TUsxi5RG&9NT&O#E(XBuc*~0;)<_D zGu*HviEeQnIDu;3ysW#4<}lbPHcf#8o#Msl6uW{^l@Ru_`IebYQ-O)^7r&?aMWUJ) z5*ODe{u%i zHt#RkPn4d&qgohArB}U;zjqsrn)oAI zVbjF7y?+F^0eQk%u+{RaW82_s+Gda+c}o7?=+pbiXb1#fY_RH!@l8M0A)X51P^RmF zw`g#q>n)rxe<7~E`pxaFmpO1|@*s~3D>1&+UBfmf}aIEMKuaP<44a~pkg=C-x^ zG7=Dd@ND#X-)rj1RCn_^5D~8EbLSL}2XU zj;_d#eAgXY1I^pQlWB$|!n;ywBpTnXU`67NPI!ht6@X!q$S;72)rZI)E8GN-G~{C? z$nV__!teMe|2b%KFzcZA0eib(Svv}Tqu@6RexnV;u)Sp4I|KcxiS&c_#J8_t z6g}%yV?nF)6l5&h=f@}1Te4t^A=CNqt_P?32Cz*~@BGuDKsYBM`2p;Ai#w04>gQO-zNSyIs!NSs5AMMZ-Tg8^|&u#)F>R&| z4_)zuFJ%Jd6A-n%jIgY&03iSC@qdr}V@xo@f7(WwfqyYbat{rX{A^ zKWSj)QYD{owk6UTAhYlrZ*WfsxrLexl(OwblTEbL1w01+Zyaz%-uXK#gJRB-(jIs< zJ)X1i1_-|CP>w6|xS~Oxk(E;rdH|nq63723;~9RAfJ)-xMcn$ zad`M=$P$--hFasoK&l7JDWhH7%jK@5dO8Bh@c%im>z98_dq(*GUx=UeG!$I~GxyQ? z_*bhhZoO{vfm(Sv<|d4-LuSE@Y0>!}>YSSeEhK8-h&gM|?{z*mzu@c*VKw$#z0q;mWZl_V(037bEMJm|HKo@}zgwGOZWY4nTi+ylI-w2&p{ zi`Gr|MX&d8;Xl~Az!NkVWci{OP1lzx<0I0}SAu1a7qm`qDKI~ap@VsFXWs}f_8{dn z-yimizkDuk*a1~ zb}^k0k=PwtfDr54Q-jrn)iX9P}2=) zd9LQKBKLH|yyYr2P4YRv%mH}#!sgC3XL6#0YqVX>86ZPy9jH!h3o^oPN*Lgyh&_rM zzu*Vvq3OW)GbicR;DP&{v$T7f62ZVUO|kJ^?Tghy#a4tTa*&Ni<&V4$lxUA{iw7Fx zF$+JdnQ;$Lts(A=5IwNk*?N38l!(!y?d+oD*9r3F2c?^bc*h{^OXzEnGv7Pt6?*Xn5w;tU$DhOohre1s%K8HnCTxpx&RL?oxoQ9>o`~ zS8Y~VisLWpyak`d%=u2Pd2Dj(>vO_qlBwM>ra(o0SZl#gB5}h`-s!n+GxOxrLUJj~ zFu39DlL5!uBb`d2HX$s&9u=DtlU!x`r!O~6IHVe_bH>20u$&}))>8JQy3d02`S|wxobGTb z8t+^GqGddd9pb>6~jXcD3 z(Xa>3Q0nHGZKB3>0`~%oSe@u%b*w!o4YHXP{;^gE7~vJ6_|$mrmp~5Oh9J}uk-JEW zmr3KzXx-ks6}N}8U-!T#z{I*OB#!%F#Ptyx7gNUTpx3o+G1PT*ZAr%Kp2c97 zUxS0DI-LP$2b@<@GbY-wdiEK; zpY+$&3ze($R)n*r}TzRB1Sqs43HjFqv4h;3jaR7#@$QS~v6eYJelq%`)`BB+qs zz8aRU6YQ%+b1|&3&7`%7eYLLP49uEm@rqb3?Hf&fowz`oS8q9RAM^D&U2nq3+I-LC zZtZZ3BV%%RTx`~QU>f^l?TFX1w}xBE#$|8qktudQXwVVsy>a%|q8OKrW`0Xgko9i? zE)AZAokPPJFd&EVUI8xOj5oMu-o;`Jam7!YU4t=Tnq7m5Lwd>$NU*zhw!&*mOW)^@6nPBmV?@Yn_wXDzxpj+u{C4?X5j{SbJ-*f9Uq94j(qpuuT2`v#m8QN*$T4 zwT3K;Vzk1xn~U!66ZN;U4~e_VcUWgT{>1G`mqUl@&2=0)u;`=s8C^x=4sa~7 zjr5-dF8H2=w?FU4oUeuJVFOcLF1N$i?feYej(3|{#V;)D3SkpY5`TFIZ#V&KBQ0A% z=-BTVX&P~@J;l2%@8FFC=CHRXxFOW)#9GewP;0h3n*BZRU(M{Mk*>iWUUAp7DIZh$ ztcE-Nrb?R~20l?)+lRuJVe1g~8{j^oOkXtD7oCjh&lz5vqUA!4@PSK;*{aKEjM9m1 zCv(nk>Kbs4Y{jR!*L-F!j%7G;EF(M-`(d1ezh!0*O@!#JgVDZgxf3F8{l~E%6Rd%` zXhYZn#FIC_#LoOAa<#dPH)A~g$rPL_ub6#2TtA`hO(Yc98yw9a#OIPYvj{4zkB6aKva;vn4BJPC0uXw9Bs_Iii$)nmkUNiWa8vQu8> zG_T0QSypVEx?n0UVTL#lj%;#W(#7MS+r0;yhC#i(`vNsuF)zqAub4*C>0^+37_)i7 z4D*VaB%M75X^tkHYhIB{((}h4&DW%U^9tM%ZC+3?25FHdU1VNyB}uAI_MawbpXxBT+fM$dFMv_GT6g-j}fdBXg=opE&RQ+2fqyV zB;I2L>jj#Rx&9gc-nk3E4E7@4V+891nvc2uHU8e&k6#Aci#IM(z*nhZ*47^01JK=c z5OuY?d;JiQe9YV4=kNMq{Ca!L^&jHxH&vT}9@)t6f-uX19lI}u%R|`Yi;pu@B>t$& z7o7q{axzv?ls#7HkIYb6Cq@c4`{ztu-!+io8|j4lc-?QhuqW0T%KIRE3Urlo@DHlt zLwE9Q+)ni5GVbtQi2bfS({Sr*KM^l{4r3` zL6~Q6S~DFBa(y6Hi*C)&!G#qwtWDi)7}GH%GndWnQH}k zyIjp-L~nkdkUv`KF3<~~e5QcWQmgNH4@=lor%&)5UEBf-!fhDR zw41xJWQ7BU9hXF%dwhD`z`I}dARJHSeRRX8@o90jW1C}S9XAyiYro~)pZC#?Z=t&w z!o0uiIh>nn^FEIAgN-9w2XW&oa)nD5RdzVwLfJAq%Bi!XoGLqp%Fa&_E3;!50~}r_ z^cVczIbbDyQzV2hkO6HF^J(A`Y?>z8X(hwsu+s{%vFx3mY;&p-e@h5LnbS&)KDxKyeDvr(*l|7Q?LJusC7h%ug@9%aLxUgY-`@V*!3VCJv<}B7N*ZOuw z%l7zV3UBZ`{2h1%nX=Xs6Z<=yw@B#hp5}DsTC{03I(NA*+9gZ(zT;R9%yFeZVC+~_Y1FYH)K^Pb~_^=Sy z@Oa4E0JXFQLMh+4awV){WewgQpL+e0G$~{K;vY)Sj7txF5YqdhC59=7R8xx zUYvA*5{Spx-O2mt){}5bDv9N1ttTD`!L+&gCuM<587JqB-Ym;cL!+s({0toIJ6beRgQRRovVK_G_>*3Km z3Q#5dlkr8qcflNI6b8Y*hhhkgbosxes2l#QqQ-G@SIaesF5uX)U9HdAiD|mEKaeU` zB2|oIAGtju8)tb2RJV+k1kB5n>HNPAtExuw{;kSgh^kiJfhymLOmg+^Q4iuU|BB0s z@P!Ze5I!H-;dl8zDEQJ(iFGx-0CWhespkH`tdFpr$~OzctUbO1d$43NYq#0@4N&=_ zi_r*_%+T>>Vd)&Z?)rJS-FdTKy5tS@mz&S~@y+o~o6?Bh&(LkW0czXMx3F*SP3-(p z5}}8V02{2F-vvs^!>f^wzqC3}jC8Ku8hwPzf@n8Wkkkx~)zZe$j7Ha-1$inTD?g12 z=N#aIAA}p=uF(PBt7R?qn5|1=S>NL9 zl_`PZe(Bc_egu_*R+>28!Tr3W*mvAuDU??rZ)>`Z(P5khQNtZ94Ik+LEV$+Sz5N{S z(7t0Zhvv$fHz5GZ7Bh>*ov?}IfPthTY>KP);S6kTzYz`LsA)CMuh#Y{lVq-@sZ?LM zC`jL}4EmavIHtrg)|$`YZsoa(|Ly&W_bc-xEDo`1TR$hkV2^FI4Cn0Q?05C6`;|XK zi(NEe=vx0nvWMH22ft^{QfM~rOpG(pjN>jug}tZdAE~U+reeU#$}T%AeQ{aoM^*y8 z^#L%Hf&U&cAVd=>F^fbAT=F0;$o4@fYcW9X#Hku`7?A6)Z$xn?@FOF$06u6X_$Ns^ zJ$tyI`@g^%*gpdHl1+`N54tJ!30G>M2<;&>uYJdrENaQ^JNWRX^jZ-dR+aKVtfP&f zd*0VosvU1Ng2wV32he(s8t84!{wx-6hMGR$+6`Pd#lH_W5NJ%N8m1JMpbAyP(gJTW zUjn!a@Y)D1Zs@u7yKzf8@9HHTEbf3vMS#MvBYMnIDjN0kK{t2n9% z`i?e&p`(oeldGf4g8OJA@EmOfIY%2o{?SHIbhHt$6F;ho#Qr=f0`GIGEk2i~EyCo< z5;SVnm|^-2rWhs^UBFRi&6!u~_m@lb`M&6!X~1u*EV?d{Nn+_qrrZCnPn z$3wJ_@G@7r*v*`d|Ic7VsWh8j27%8`(9lSOs|M-gI&Rl>??SoLVimQhWqU+pVSY^^ z^3l23nCo`1GhkpF?MX)OfhCB2H2;#HLljgQtTnjmcCt^%r#+cS9Yh92b~@JB+@ZYT z@GSNe1Lwmx)`7j*U!?~RRK)gHZg$Ue_o+ceg1_`72lu9;Z22UQU;ClILniKUf(2b% zC4{2~t=agB9*iFUdu%riMyCYL!XB<2Vk_`T*zf04#~e zKtN7T^3e}ORW$f49>C~@*8%O|urc}wY5E_;#UqwG3O8mbSU)RoL#Nk{p(FQS1f%si zm>OZQ$JMkuN-(cN3R%FQx9QLe{N540{j8vDXCWrGT`xPAwfXb9)dJhuj%@+#^=KM) ztv$z!k2~ErV)I$27dvb4LBSsb_tZ*T(@6K=V_X_LD1mNzBGmQDL|6w> zDLQs|N1_v30v%mn%!*#%Yzh3PtJ(=yR!iWwUDa7|O>GJMuB&<~T<(^@eO=XVxUyRU z_jgri!!@lX@IY7fG`Oa>1Rm_Fo(`9%C9tup+5^{&mcT<@)idCl*%J7DSM^M|X14_X z&{aJfuAG*@!(G)maLsKAJknJ?7p~lvz@uH&xp1A|68K|R_4!#ekTems2?u^zjhaTV zq{mf*kzS~_XxgG@Hv8S6MVkl+XY`F_zuCB2)xN{hu^iY6F$lY565<``GVqTdw?EAaUSo|qa@DbAU$er55$H@>R_Y` z%%Ll)i8Kn1np!c}7U!XuYmf6#%yrlv!W;~NYbmyN3d&}#H_k&b*B9rZm>aS^ggF@e zL3Fey7SZ)6*Wm0zOHNe`?V%+X=bE|Fh~ZrUM2!2zj6)C*1!`WI5fn1qKy<0#~Q+sIB|*)$G9HzAuO>3xl zW9><5tSw26H72RCTETZHnXYXu5qzhHBSnJm6@0$n`vjjO_#wf21aC+)cMIMv&D;>Y zNAN??)vUicg74FCBwz5of-e$$r{GHj-y!%~!M6*(QSfboZxei@;M)aXEBFqP0hR*&uD+NYYoJI+HX z2qVq|`Ym2p4JmnYRB18SXZr|qy>T9jxz0EbG8fZT9fFB5*KYd=b8T@Rin+!(4>A|0 zt7-+c*=DZ9_OT=v=by8a6+#0rLMjpX8s`uJRj^Nug z+?p@=cEK0%jWU0vLmc;aisRm1I-u7k^5m4Th>n3XWIcfbmy7tH2nr=6+pWIfX!%tLrMK z7>GKlqt^9Qb=1-wW39bLs%zX#LD$!~xrDB;^sxFwLlvPWVTY}{V{+5#7EF&C@4~|q z=b`2jIkpF4OPOw|x=VH%njKd7IOw`AbLcGq2yF|tR*KHn>kOM zhnAe}VKJ9)`)KBLr~I0doS+alm_yBxURq`p_~a8eYluxZ$P)(m6`;oC#OJC2J0<(z zC}9p!n=5+Z0Z$rdt^j4p-+-Xr)n!Mg?DE_g%m9fBWH z!>^jwPQmwSIMOTlUcvXNqD99e_#q8PIs|V>_;$g&C48IUJ%Vo(e2(C21)neY62TV< zzDV#Tl77D6YbE_0!8Z!tBltGKy9M7acth|Vg4d_fkblAV>5e$kEBIc*sLB6pv^Vnd zH?~97KHs5>!(y~ldvc|#Y>Lj-H8)OY>#7^4vrAOnir3jiwvX04F`b<+7+dq?#Ca&q z4+_q1O&Nc+4bv6X0 zTZEoJY00T3VQX%7Lbi`)?#Pw*xHNP#H1xn#;7_Ju`*vcswV1FTRom0BPyYpo^MU9; z-@|crY`;_deu{N_+tl}O9yp2r_{XMi7USa3BxI+@w3-pXJ-oLsDJt>wrb4VNo-y#5JJ4)yAv zRZe!0sW&-2P2cSf{9-I;T@_WLrU-IxlDg{&_g z=#2+DFUi{$32EXgjvx8Hu{Z;&$0&Vblh-)7#5xgB% z_f<`!f=~ig`#=t%K6BT~zHRvDXc0|K-899oi@bv>WjhOo*qxxo)hTM-l$%NHNDx>_ zTU}sIrIbswR+VMMEQ%}G)<{g{)~{^ftLOn5ESA^h*bI@!J_ z(Sze^C~LPUc8a=gxdrB^xXI^ISmwrx0!N|t<{x{9`sEnhCL=NK<(b_B*z1vuirY`@ z<9*OR^KU*_;*74sCZ21jN3Zo1M3?0hVuw0*q+|aM7NW0+?MK%JX0Q?cA`D$|GHge9 zhQx<^0w%)*dJlF|Z^Yj7HW)L(H;26K4R0Vk>@~Ss-b7ph?#9_+?>P78^{##3j~*b* zL%0C%&`G`nA4GNp%-w!h00-X=1!g_2IDxq+*vr(LwX+-j=I$Vk&b`A8+;0T(cDh<& zCerZFdI=$Y2X+T$y$*_wc;Zy}qS(q>AiU-rV0rOCI@JkL`Qy(--)kVD#p6*yh2IpqxG%=q~#i*7PvO*;G$$m_@Xf1W8( z2S08_et9;*@wUyQKd*o7FXhNb5M1y>A$C;Y$Oq+&6Al6MAdYDAB^X_U;~uUDS{Z;f%MaUA=I+qWKKL=Jov-BGMchqa zh~Cc#cs`ZqLw1uGv@lBy_bUkCcf5g99ccc2#J*+rMze1z@NSaRBG`@&v(EVi(crX# zs9_%P&pKf5m!!R77?Fb(=xx13v_PZw@ow_bjELiTRDH+LnrEXCN)EMXk}n!CpniPj zVw{3{4Ch4drASud6m_?A;InYr=R|n&%?9qn5~3y6ML2E;70%`mx0oE{F_*{tPuMJn zMwlR&iK>PYjH+hSpZ6zVH@E4B?Ta>H%)dU$I?uMT%{TI@oj;Ag24|bgVMb;zj`e*I zGT#rH&y6gE8{e$oz+vx#=x2j4++Bbb2NgPby~O^0-LyB}UU<+f+(?sswc3Ptrfo3% zb2#b{Z!kPAK5Z{N6N?|pUU*3Cg$H3UoR@Tq#qgkabI6=NBP1rnVMjV4K1t6u8Gg** zZPt8V?1f(x&F+hj56R))t5?Ke_zOw##qpP@ubTEMd*Q+EP87sQkeg)x1SxY(C>W*5 z@UId=<@!`8W;2|%3AVF!Qw*y8v>QMcGM@>VfBQo|oZ4)7ushHL04n!7kWhHbly&5N6Q`r!z9w*+Z399>>P3A5qvAoxmb#uc;S z!8w7R0`tZm3f@}xW#>NT({X(Ak_ z)Vw3!J)z%|b3BcMSU!ojS7HqV4vH8ihP=B%-t9rh9^MEBV|k&-4jd=kr45SbH#}7U z@9q5su<>ihiR$UyhPE*1Yy&D38G?NiZzp5f{!VcFTX*AXlA!kx*U|DDU_+$m>oI$+ ztg8p#n`jGU0w0+ zhk`NDt_s_Ots+?yyREj1LsucM^F(a+?on%;UgxLqoej4Ke^>{@gIjXr zDqCS$U#M#@6bDWJzt%8e@FiA~*10O~i(03i8_aA#uM>;AX>&#>@5NQSlj4bupAzzq z`dtuRJF_5qVqs)Y!^y#>n;_87g!Jz6Q1=C@EaTV-;HV3e5baUkBm1CPS({XDD_=x`{Y9OI^GXL-e(IPojgpeD*V9$9wv66#Gr%O0=A7%6jfLya3v}=u4 zl;e+HIUQHDn2R4)S2}U@@M(X zXwG-5Fb=q;@hs(SzXP35843;2A`PEL9HmF!zWop`dWH>OXD}K*0Ts-70xnjl=&;ZkuCnr!e;_`ABMll z>rc?d^+yZ0oZRKzZWjIks^o4=3gLGO+R+AO3Jb@MLB!cf5XXYO&xhumKQV~epxQn> z*Bx-|@_#AtaKjISWs83hf=z?qoVmR!L%vKj_C?2}}nJd7LILDT|8yb9#(Y>mEE0DRe?CEkP64v_L1D5Juvucm}fQ z5sRy3E^7KiYHTyIEvA03XKfh8UeJ17etbW|apyyg_`xk9-GMeZ*u8MbW1s`+0C$bL zBe_G3`ditbsHp-Oo)SV&?P|V~!>jH;Vr`Wn#e8dNoeoe(=?mf!2#VErNnbU&CTx zJ{mmDR!^dDTNjSR5n@o)Fgu7_9}q*stJu$}Vql%acHL0S>A zS-gc!%Y4p1i`FI7)BRvn z6*gi%pd=Z+pR=R_jNi=&Cm;ppU7T0@Mm~k&-VmIlWRLDM(1wBFM)vA@0#~>JZ`6ar zHk1vN4+e=*i2!}pM}=lzAuqIhHv|K1Q5EIDWgOOEavm8(Rn)`Yfnhii!xOq-29_}C zN7te_2L1w!$W@J}lJ6sF+MA>1A=JjO4^Yrr-iyI0W+VmXfEuL&@e*^9axaLg|D4D0 zT1U)Yv0l?17-DU~dzdvb*xCT^O}>$$ER6gVePm?y$*5&(KNTDNHMqqpUeicMgA?~9 zOj3ivZJVJ}{|1BPHx1$by0wEiU*=x>dj12A&^F49`N7DJhGzxv9T*0L#jvimn|8jr zk%I{?k@U6thlBRGPW3;q2Wo!6l?-igyJ51j#D9z88YYCu`~n$QE|78M0yVCrWr9fO z>MNRt9cv3P>^o82v6~44+3#b2L(sgKqsz%K`mU_7)37)W^1sUno4!SR<7Y!6aPl5S z+7DCp3tj29%`kHxp5*ORZW`UgY$gX6eyKU6IfSp(T^I zJcm#J5D9uY<~;%vha}(@Aw{5s6T!hmS|a|1@?D^Z(pn%4)$rN?=Gh}7r~Y;xdb(5Z zQ@;7M%SywQ zHPxP)WuPmqUJBaEYR`oi%sl(rRkN^#fsaQ&p=F4sqO=}lo(d1bge%HD<<(1Pd%|_) zrATKs0y7Hw*Vff6Z773hy{Dplb?MUbvdR^uRi2sEHJ+T+60yWvRlXdURi4s1Fzr2a z&e^j)=gw6TdC$%z3qr>id_0vZN-$8h#xt|>i{)SRoDT2PJ*z6Ksys`|$zFL~T}_?J zslrv9u2PhEE&xgk!&6@izLtp$kdHOB52EUoyogC3oSP8c$Ve-SYA&2Bet)qo^8;0J&4;Q(slHN)-}HVoh;f`EnG+JkP2x zpSsjjT@&_HuBfdlUr}BiE?@dZ2ttKp@n}M;2-kROYRl_XF_OwK7D4{?mDiVppgyc5 zVv&}!oDunwHR1AlO>EKEG}M)qdrtE#Ew2w(R_jVpTT_X&>sVPNqN>W})#VUKWw-)d zP<-JjM!3dPURs7Kfr78|lvdSLFGppPWIRi18mgC;)~!Ku3+u`}Uq<<#TDAI%9*-X> zS)k2#+9bz_7S*Swvc9;odS!WCxOhcrxUO>bJWru|YdI=R<&vuMlTV&v_?QVwRjLUD zdTRYVaDe)uM5jf;nj9~TB@N4#jV6nP+|Oo45RWv2H7Swr7X%vY zs^#U?$Vg=vwLr1Hw0v15CBCK{f|ln@V_`6K!6M_r%P(DAM8El0E&AG}#kKNYVl48_ zF9;X~mtM&K;;RFfUurBYDhMqy7F=3bbm_v-qJZJQbTMfzTkP{23qzM&NP6GZR~ZWz z28>HCqEJ!6Ri4WO7ltmq#N!WL5%LG77zIl6tSqf+ zfI>veu;@@36lWff67)PzeMRN6nEVB&#P3#C|DJZ?Z5*7a;`jo}>@20yG-F681Ix;Ew8CVpS(iV=Y^tcphxRYN9SEy z-GBiW+n&-7mDO+}K_sKdIbML;!0?NtYuK-&2h~EE?WwQvKv)>XEGZ9TEI?`3b6m0} zK~1xEt*AkNUWN*1b!hC*a*I)IbTdpI=op8=7nfFrDK7hI!I0Vp;BX{8uA~PS_9py| z7>R$3%ZVoo&s02y1D7Yo{bwdUq=|nxWsLtXX#LznU&P|w!jV&dj(1xHrz!()fa{7G zbiGK|9J+SU1&1-vLsvdskJ430*L`$dMb|IrT25CxT`TFjldcF|x6*YxT{qM9W4buG zuiz5mKs8;P=vqn_4VVsGO;;aXMRe_>>jJp&#>2c!$3H(;(X-(0%fekE@sF&xJl z8IJ8)P6s6G*ly71j(#`};s3!OX)n4n@juo0SK~hIHPJZMa5f%mWR@IfWaNAb$Z3Wn z|6dFv|FedpkzYzYMnt(6_)a+yNugvlxC~GaMb)g4Y_u zQCnkVwyiddp_>e+`(~tjGt$4=Flw6+ZoOgjwip?16Y@b?G!{{d-Xfxcvukzs5G?lB|N z{kUP|>@YHiU==*)Ib%Zp^GN4;BeUoohM`rIdhvE6GBeU})hcR@D!{MIc$jqPZFiHY=7da;M;u3_MD;$oZ zD;ybZS2&EeD;>`Ks~nE}s~tw~5=W+|%;D%Q1FqbG^GSx0%^y6D>n}GPtKAOw)Ja*~ zXEYmrtREYGz{%~%NF9mz@B%91lkW8yP8Rtw6P*+AYRa68-va#i3q7)`pA>qgcw1(x z^VSJX;DF({qSkhoPKT9PJt^UsT$A%wzVI;I!}y>1LmlpECw=jxgmc5QY0|9|TPNJ+ z{B|Z&=aVkKcmJo3Pc5^MFgZT@;h}t(-xi^#tkg3G{S6>kE%YpU^^8G(4+wI9todP+ zQBN9r%Hu`+&lLLiSWJA9>76(t9w!gxBOA0sKhf!YhGF<5hts|)-+s{d2|ccA(@z?D z@x5e;HceuEV?kJmuI zTIfGF4*G1=or)uf-w*mLk0Aaq=z~J<8wWo>iQ{KH^v{6alFxYP*`wWk1pLeeeUH#% zIHI5AauaXLe<|p93H|xvpHj|o^tXe)U+7Ok*{LT@{H#Y?NdFtD4{BBc&t&>h^#6wl z*ZT(@4ns8kB!?4krZXElvs37Qn~pDMqFz?{N4Ox3pyR=?Oh3u#$asSEw}XDQ&|iEQ zdX<0BZxH(Cw0tGDqil&co!;B^sOERPdap<5k5upVO1LA{dmH|!`8ze8e8ivf+>3vU z{!8Par{23;;y-sB^wfJBg?>Exg?i75|HN^`&+&l8|9I$M1bxd9@G}+qV)YTwpHF=x z^yA^D3iMYVLHs`eeNgC69*2CMjH7QJ2mObjx8(012mNg5i5}sndK~oM0DTq@I^j7E zWuczr_AcJ6pB&o{3w^Knr|2*7XM2Ab^c#4SFrF``i(l)L3AZ|1L9OQv@59x;Nr(GF z+Hf3kCyqy{_cmF=0M0NU)Uj#Gmqe67&_%ji3HQ&`*0_(?2(k_-A9P*CX`59tZt5Kp)(tZryGR+$A_V3ITwI_8VrK)$!2l# zNh#;Vev#ptITMFvLq0dDT6*|!EboUAE;y*ejc0zi7xcM8e>`NNo)rF4>o2dX_YCQD zR;I1Lso}nXa20>m;l7o&oh63L7PgZ6W6FtR+zl|0&*y5&j86*xiS3qhT7>^jT$I4) zq_py-K$;G_KCl5-C-E8Yc%Vh-k90iHCgF~BJaFaTHGfw1OOX$9$%k^;i~n4qe}$fW zlI5v=^?DG>a9HS16aTU3IUca+v5cpmG2-WVz@q=kIOsVZ$ofdpsc%W^7!1dGP?3}~mO%BBoX#lWI1gGqqU%#y{+v@0 zUu@li%OK9@iT zt0yJ@spAcf2d>0@#C$%KU@3g5c=S37#{)BQ{Ue{>r=u5t=9}YzjY2=3@eIcU6}a%4 z&*AuCzBwMqovP_~r|lmS`x}OPAL06VM;e|0WT2iDd8PK-vysN|r**jW^(;}&Om8W| zd8X-bkEbioqe`8yx<(}bRDmg-62FIE0wglj)Zr}Hk_;CRE`gK!n6=y2=P<|{G1Y>7U3e8K#3 zd~x>-9p8BS_YFdSr2Tulggesyz2a2O-+24?pwMqmCm)fA%0Fm^Pt);_XFZGkd)Dci z9u`aWlU&|$@w0!QCiLU!e^vi3^rwy^e)jJcKjYz_{ri#d!~Wfh|8=CLo-y)U1^S+u zT0Z{6@UPl8==+8K!F2RV{T9oO>zuW-bUGzz!;xYAosR3A?Lwb^y*W`IsP>I;cYi^r zLs&h@`5&c!3?p2JgsT{b+wc3r^9)y51}dHQsv41eZyCEIDBr_PfEDd^lnGE&U_v2Yex|7 zNrdb7>2Pz>hD();6aC@Pd>t+;Z8)_~Y_A)!ot=+xJqvWW@$`pP?5~A>L)v%>iFk<=IB(8!H8 zc*Z+Us1^Dn9Ve`oaIXnJsr4X{Z?@lu@t;$!`5PW*IM(m?@joo#rlt!g{-mFQwzEs< z*NlUH5$I^z1QtB}(LsGlhi*Q2{?p(OllM;^l zE^*(=4AjBlOH*I^2dMNbh~nRz!3-byi_e5LkN94G8*)8SefEuWNdspBAy6LNl_ z!!=V}e8v$jjBqVK*Wq@j{eGXy=RHi18%gn0r>&RFc#}PzVY_<`;VSOe;XFqWZaSJ` z(E~c%RcK4?g=#%Cl4A+eRD<$0PX~U(~ zZ}z9|-)sIHY2}m{j`jO}{P#<^yVIt}2=#i}3^*(PpwmgK-z6T#&;E3^(5LNxgg%}8 zDHpDL3`soWU5{Y6Mfl(Nu;w>2ZF@}IZ^3Z4;=k=t9qy?k2)6~{W_IatKTjJju^gD* zhxl*btixRbw|Y|QYc{-N{X6B$zImgB`=Sn&+K$Da^h@LD4>uknU$=ukYm4Tq6K?gS z@Wloe+xIF|>ct?v?cDbcJtglxdH2gZo_;j|bT7Z=xuOUu4lTvqYT;^hscbxV!Q>SnfS|D~{sRBN>5FI`i$ z6l4ucikDQvjzFE!CW5OACdIIYU^MaXc@s01vZ4s2vi7A zPoS1SU3od(tLa_^3k3>lbO3}ksHz$jszrh2E6QrukhV?0`f`=t27tVD(vD2Zza9vr2!fv#{709?z*D2uA1_kyASXEvPES`*F;~2=V4hcJqDaFRW zs&tECF~YdR0Rt3EmGk=!WnjzrkzUCoMO*nnDT zJTT#sD}8m#jgd*^)hjFOYO0MpvW$!s6_6-Qe}v16D-bbC;Z_H-S+l&lbVa%GA6Z<= z$}~Dp#!utt`Ev?>`euN~(#mS%j}C32syJMOl4QYRqg$rYahhRFUTXX*qte)#5nfeO zOe;fTR;pN;&aiDT7=O;7d5Wv37kppdnLfcWQFE?JyEVV2p6(fb3I^*R` zRiOXoTxERQX=F64Sg}SKD>Aw=YKo2hu%6JE0}ks^AmwKpyE0(MsMr|H%rx3^zMk@-kF$~YXFgz0? z{KW(nVaEig(SeUt_KbiRV6#>7Z}t!eHDO=pu6H0>?;MwCdiQ`77w&2J}wrtx?9)G(pm z`2M8jG>sIihMkkjl@$|X>!hWXD=U|l>z1Ppp_Cg>PC{+pF$u*515B|{zn>H@=l3Qd znFp9m@9+8B-l^P=A;}EdQ;|4z{KngpP$mZvq91m%y~o0jDt}Fl==w_!AH)(}Ud(1)nA`ZFrD=t_6Qt z!`#QB@+UCAofCHBfB@-fPk?Zdz)^u~1^zFA+XUV$aEHKu5x7@ie*0#8Ljq4nn^5U} zRi}53z&QdJ30xv@g}_4sHwv7eufyLdaIa6pzZV#`OH_WI6_|ERn7@Mp!={OXPel78 zKeTT``~rbt>qNoj0ykc%;fTN`S84cX0_R_?;ckJ41RfB${p%V(8Eu8>L3OG4&k(ri z8Vz48uyL)1R|wquO$~qBqOZ~LT>`@%i;54;SH<531<3roDzJN%hChLpsN{dMhEEeX zM_`}8wE|yi!A+XJOyEX=*I4*wjsK3o?E?Qy;9h~D_jURaP5&o>iv;cyxK`i;0=Ef# z48E0;{|d zC@^f@C^$Py^H=h38lEjMY~Uz-$bxUz@G^mG@6d2WVA#r0^z9Z5#m4e{Okmp3A^euW zjsL0PPvY|+<>9|g}8nD%|h?|BydI}H~ITq|&yz(x0K{Az*Q1da;aEAWp59uoK=fj#(u z%k;JjTq5vW0yher`3cQ$?M6*MU0~WFVtAjxut}ugB?8kv5%D(&3|mDCzC&QxEmClY zz_ejR`W*uIc53(oi++=aKRs3RLt96rKS$spfxj*=>>w%pI)P~uiS$1c81|7A+#_(? zP7S{%Fl{H1J`10BSzlmBNx^3b44X;{o+mKuD=GL|fsGe5e6zsrmo@wgi~dy&|5@Pt zH#IyN+bx)0+n|QM0@HRA`B^A1>^Ld7Mqt=p8+?t<~V>Nt^z(dDr_!@y>yGqeV1g0G;hQC)}*tAmcPJwCPiui*9H)3Ez z`18l=^kDl+!Crx32TQ?=1ui*F!|Md5jVy-0PvGA3G~8#={ zG7W!OVA|Ot{nrJC%`FA55*YTk6#PAbX^V^W4+`9fjn0I(TKFq9{AYn_uZ#GMPZ`FE z6VU%&r(rkXPeO+p?HWD-unTR{+6Qowz}9|%(*?Hn1Iz)OX+WrW82^RD!%;F%!-WFF z#+t%kAuw&Hk^Y;6k)E~BAZ*clG`>aPq0edfhXTXyn+pH1z$F5|Xoa7q@q+@>9vstq zUto{G8QGfOoRc-)EwEd{PZzjE=+6+?Bjfik30y1u76?q6bWE>A;2epsNnqGcQ}}j) zJtZ3M5V+_X4fj~|Wg33Yq8Io*i(cT#$Lsv$e_hj`Y{Bnpe!eJh@535iAg~AfM96QE zz_1mk;BtX!_m24MEI41o5rG@8()8aI7&hV*{m%q0xmDBu&INkqe<}AB>1T$`1vIG zr6l;xBzRvEd>{$t26hO`me(Fg=`Z+y3IBcge;NO;;2+(i@f!YL$NwAnNB3y-<9`7E zZ{Z)^qdHIiHr_Ad|Ihfxlh6NuMw}jlD{Z6MP*(`CrJcG0z)%V~+KPjXlywOkRyLqx zOAxj{*4VhxmKGDY*kOm!iy-yI<=4S7YkhIuD1r5IUG5kdSon?LQjzsPU6LP3%}M6Fb6dVx>ZQAd=(P3y?9gq_rZI)4A6Aq+TeBk7BMwn7D)_ zq2h~kF4kV-mhFUsOeL~9x0rs3NoXt0$*ZXnMFKK0oHFZOo+exZGBF%#$WjSaReJrJ zByN(i$^6IR(?**dMpx+MYCD&U$)i;Cxy7S34EQPo*-818by{7*TDn&1NvH&gq@l4G zs;O2E9kq(* zQaSK*hF$Y?U5WX}w>afH8f8L*)>PKwx2k%v=#spXzUK(B_DxKA)V5?Xn}8f6oKikk zk&hZUMLj`owB?_Y>r{*2R(vr`Qf6a_L>8^qoy?fkT$BA18A?zUxnt?7SSeUBCWVdf zJd#irFJow+Tv4vIXl!Ft?}V~S!$?e0Nd!83sgh12J1l`R%PzGozO7q%&{{N2tX6rA zk;zeuh?-e*hGB-(jnn~vJp>tL1OQxoJ13aZ(BU~+X9n;1;vp}d$BMU zy9q63EDDr8uivm4)0Jh^l8mKt7<3X7vHA^0EV+)+EN!YW8!xC>T@va+a<1bmSu!q} zNwvQ~cZ;?#wC+z()G5)ZOOfmlR`$|1$GFUmMWc&hF6RJaCZ9~lIC0AZuFbG(NixTy z1xXTyogGb^dssGNbyMG@WhbsrA!Wuh`SVRfhAt89Yc1S?jY3rh} zBh`$LDx6q)he0RB9c$U(z^YeprmwO#-PxS3j)M+V7%Fg{mXx-gFRKX0&?tz+D$lX0 z(k>$;G$KS7-%s1IkI_+sM$0Fb@3hU|rk!ic=y1(CO^~rgR;y-b>1ak1YYx3B6HfB^FZsWrRi&eN(tllWDU0KSP z+HY1S@0A8N1=<_Sii=Y>o?#mxHm|8)Zb2#@Z4+CuwRepJKYNRoYzw1b>h>}EF0Cm} zv$;#KX?J)jd~AamLACdc4hAY#8@Fso?dDRDY(Ar#g@6ri+-nvGsvT&A6Sh_T(fts3vNA-;nur8-i#Aj|a?zz=2ahsfCjoztBp1!g42qh-Cf_o&3 z +#include +#include +#include +#include + +#include "gf.h" +#include "gf_method.h" + +void usage(char *s) +{ + fprintf(stderr, "usage: gf_mult a b w [method] - does multiplication of a and b in GF(2^w)\n"); + fprintf(stderr, " If w has an h on the end, treat a, b and the product as hexadecimal (no 0x)\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " legal w are: 1-32, 64 and 128\n"); + fprintf(stderr, " 128 is hex only (i.e. '128' will be an error - do '128h')\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " For method specification, type gf_methods\n"); + + if (s != NULL) fprintf(stderr, "%s", s); + exit(1); +} + +int read_128(char *s, uint64_t *v) +{ + int l, t; + char save; + + l = strlen(s); + if (l > 32) return 0; + + if (l > 16) { + if (sscanf(s + (l-16), "%llx", (long long unsigned int *) &(v[1])) == 0) return 0; + save = s[l-16]; + s[l-16] = '\0'; + t = sscanf(s, "%llx", (long long unsigned int *) &(v[0])); + s[l-16] = save; + return t; + } else { + v[0] = 0; + return sscanf(s, "%llx", (long long unsigned int *)&(v[1])); + } + return 1; +} + +void print_128(uint64_t *v) +{ + if (v[0] > 0) { + printf("%llx", (long long unsigned int) v[0]); + printf("%016llx", (long long unsigned int) v[1]); + } else { + printf("%llx", (long long unsigned int) v[1]); + } + printf("\n"); +} + + +int main(int argc, char **argv) +{ + int hex, al, bl, w; + uint32_t a, b, c, top; + uint64_t a64, b64, c64; + uint64_t a128[2], b128[2], c128[2]; + char *format; + gf_t gf; + + if (argc < 4) usage(NULL); + if (sscanf(argv[3], "%d", &w) == 0) usage("Bad w\n"); + + if (w <= 0 || (w > 32 && w != 64 && w != 128)) usage("Bad w"); + + hex = (strchr(argv[3], 'h') != NULL); + if (create_gf_from_argv(&gf, w, argc, argv, 4) == 0) usage("\nBad Method\n"); + + if (!hex && w == 128) usage(NULL); + + if (w <= 32) { + format = (hex) ? "%x" : "%u"; + if (sscanf(argv[1], format, &a) == 0) usage("Bad a\n"); + if (sscanf(argv[2], format, &b) == 0) usage("Bad b\n"); + + if (w < 32) { + top = (w == 31) ? 0x80000000 : (1 << w); + if (w != 32 && a >= top) usage("a is too large\n"); + if (w != 32 && b >= top) usage("b is too large\n"); + } + + c = gf.divide.w32(&gf, a, b); + printf(format, c); + printf("\n"); + + } else if (w == 64) { + format = (hex) ? "%llx" : "%llu"; + if (sscanf(argv[1], format, &a64) == 0) usage("Bad a\n"); + if (sscanf(argv[2], format, &b64) == 0) usage("Bad b\n"); + c64 = gf.divide.w64(&gf, a64, b64); + + printf(format, c64); + printf("\n"); + + } else if (w == 128) { + + if (read_128(argv[1], a128) == 0) usage("Bad a\n"); + if (read_128(argv[2], b128) == 0) usage("Bad b\n"); + gf.divide.w128(&gf, a128, b128, c128); + + print_128(c128); + } + exit(0); +} diff --git a/gf_general.c b/gf_general.c new file mode 100644 index 0000000..80d0333 --- /dev/null +++ b/gf_general.c @@ -0,0 +1,421 @@ +/* + * gf_general.c + * + * This file has helper routines for doing basic GF operations with any + * legal value of w. The problem is that w <= 32, w=64 and w=128 all have + * different data types, which is a pain. The procedures in this file try + * to alleviate that pain. They are used in gf_unit and gf_time. + */ + +#include +#include +#include +#include +#include +#include + +#include "gf.h" +#include "gf_int.h" +#include "gf_method.h" +#include "gf_rand.h" +#include "gf_general.h" + +void gf_general_set_zero(gf_general_t *v, int w) +{ + if (w <= 32) { + v->w32 = 0; + } else if (w <= 64) { + v->w64 = 0; + } else { + v->w128[0] = 0; + v->w128[1] = 0; + } +} + +void gf_general_set_one(gf_general_t *v, int w) +{ + if (w <= 32) { + v->w32 = 1; + } else if (w <= 64) { + v->w64 = 1; + } else { + v->w128[0] = 0; + v->w128[0] = 1; + } +} + +void gf_general_set_two(gf_general_t *v, int w) +{ + if (w <= 32) { + v->w32 = 2; + } else if (w <= 64) { + v->w64 = 2; + } else { + v->w128[0] = 0; + v->w128[0] = 2; + } +} + +int gf_general_is_zero(gf_general_t *v, int w) +{ + if (w <= 32) { + return (v->w32 == 0); + } else if (w <= 64) { + return (v->w64 == 0); + } else { + return (v->w128[0] == 0 && v->w128[1] == 0); + } +} + +int gf_general_is_one(gf_general_t *v, int w) +{ + if (w <= 32) { + return (v->w32 == 1); + } else if (w <= 64) { + return (v->w64 == 1); + } else { + return (v->w128[0] == 0 && v->w128[1] == 1); + } +} + +void gf_general_set_random(gf_general_t *v, int w, int zero_ok) +{ + if (w <= 32) { + v->w32 = MOA_Random_W(w, zero_ok); + } else if (w <= 64) { + while (1) { + v->w64 = MOA_Random_64(); + if (v->w64 != 0 || zero_ok) return; + } + } else { + while (1) { + MOA_Random_128(v->w128); + if (v->w128[0] != 0 || v->w128[1] != 0 || zero_ok) return; + } + } +} + +void gf_general_val_to_s(gf_general_t *v, int w, char *s) +{ + if (w <= 32) { + sprintf(s, "%x", v->w32); + } else if (w <= 64) { + sprintf(s, "%llx", v->w64); + } else { + if (v->w128[0] == 0) { + sprintf(s, "%llx", v->w128[1]); + } else { + sprintf(s, "%llx%016llx", v->w128[0], v->w128[1]); + } + } +} + +void gf_general_multiply(gf_t *gf, gf_general_t *a, gf_general_t *b, gf_general_t *c) +{ + gf_internal_t *h; + int w; + + h = (gf_internal_t *) gf->scratch; + w = h->w; + + if (w <= 32) { + c->w32 = gf->multiply.w32(gf, a->w32, b->w32); + } else if (w <= 64) { + c->w64 = gf->multiply.w64(gf, a->w64, b->w64); + } else { + gf->multiply.w128(gf, a->w128, b->w128, c->w128); + } +} + +void gf_general_divide(gf_t *gf, gf_general_t *a, gf_general_t *b, gf_general_t *c) +{ + gf_internal_t *h; + int w; + + h = (gf_internal_t *) gf->scratch; + w = h->w; + + if (w <= 32) { + c->w32 = gf->divide.w32(gf, a->w32, b->w32); + } else if (w <= 64) { + c->w64 = gf->divide.w64(gf, a->w64, b->w64); + } else { + gf->divide.w128(gf, a->w128, b->w128, c->w128); + } +} + +void gf_general_inverse(gf_t *gf, gf_general_t *a, gf_general_t *b) +{ + gf_internal_t *h; + int w; + + h = (gf_internal_t *) gf->scratch; + w = h->w; + + if (w <= 32) { + b->w32 = gf->inverse.w32(gf, a->w32); + } else if (w <= 64) { + b->w64 = gf->inverse.w64(gf, a->w64); + } else { + gf->inverse.w128(gf, a->w128, b->w128); + } +} + +int gf_general_are_equal(gf_general_t *v1, gf_general_t *v2, int w) +{ + if (w <= 32) { + return (v1->w32 == v2->w32); + } else if (w <= 64) { + return (v1->w64 == v2->w64); + } else { + return (v1->w128[0] == v2->w128[0] && + v1->w128[0] == v2->w128[0]); + } +} + +void gf_general_do_region_multiply(gf_t *gf, gf_general_t *a, void *ra, void *rb, int bytes, int xor) +{ + gf_internal_t *h; + int w; + + h = (gf_internal_t *) gf->scratch; + w = h->w; + + if (w <= 32) { + gf->multiply_region.w32(gf, ra, rb, a->w32, bytes, xor); + } else if (w <= 64) { + gf->multiply_region.w64(gf, ra, rb, a->w64, bytes, xor); + } else { + gf->multiply_region.w128(gf, ra, rb, a->w128, bytes, xor); + } +} + +void gf_general_do_region_check(gf_t *gf, gf_general_t *a, void *orig_a, void *orig_target, void *final_target, int bytes, int xor) +{ + gf_internal_t *h; + int w, words, i; + gf_general_t oa, ot, ft, sb; + char sa[50], soa[50], sot[50], sft[50], ssb[50]; + uint8_t *p; + + h = (gf_internal_t *) gf->scratch; + w = h->w; + + words = (bytes * 8) / w; + for (i = 0; i < words; i++) { + if (w <= 32) { + oa.w32 = gf->extract_word.w32(gf, orig_a, bytes, i); + ot.w32 = gf->extract_word.w32(gf, orig_target, bytes, i); + ft.w32 = gf->extract_word.w32(gf, final_target, bytes, i); + sb.w32 = gf->multiply.w32(gf, a->w32, oa.w32); + if (xor) sb.w32 ^= ot.w32; + } else if (w <= 64) { + oa.w64 = gf->extract_word.w64(gf, orig_a, bytes, i); + ot.w64 = gf->extract_word.w64(gf, orig_target, bytes, i); + ft.w64 = gf->extract_word.w64(gf, final_target, bytes, i); + sb.w64 = gf->multiply.w64(gf, a->w64, oa.w64); + if (xor) sb.w64 ^= ot.w32; + } else { + gf->extract_word.w128(gf, orig_a, bytes, i, oa.w128); + gf->extract_word.w128(gf, orig_target, bytes, i, ot.w128); + gf->extract_word.w128(gf, final_target, bytes, i, ft.w128); + gf->multiply.w128(gf, a->w128, oa.w128, sb.w128); + if (xor) { + sb.w128[0] ^= ot.w128[0]; + sb.w128[1] ^= ot.w128[1]; + } + } + + if (!gf_general_are_equal(&ft, &sb, w)) { + + printf("Problem with region multiply (all values in hex):\n"); + printf(" Target address base: 0x%lx. Word 0x%x of 0x%x. Xor: %d\n", + (unsigned long) final_target, i, words, xor); + gf_general_val_to_s(a, w, sa); + gf_general_val_to_s(&oa, w, soa); + gf_general_val_to_s(&ot, w, sot); + gf_general_val_to_s(&ft, w, sft); + gf_general_val_to_s(&sb, w, ssb); + printf(" Value: %s\n", sa); + printf(" Original source word: %s\n", soa); + if (xor) printf(" XOR with target word: %s\n", sot); + printf(" Product word: %s\n", sft); + printf(" It should be: %s\n", ssb); + exit(0); + } + } +} + +void gf_general_set_up_single_timing_test(int w, void *ra, void *rb, int size) +{ + uint32_t *r32; + int i; + + /* If w is 8, 16, 32, 64 or 128, this is easy -- + just fill the regions with random bytes. + + Otherwise, treat every four bytes as an uint32_t + and fill it with a random value mod (1 << w). + */ + + if (w == 8 || w == 16 || w == 32 || w == 64 || w == 128) { + MOA_Fill_Random_Region (ra, size); + MOA_Fill_Random_Region (rb, size); + } else { + r32 = (uint32_t *) ra; + for (i = 0; i < size/4; i++) r32[i] = MOA_Random_W(w, 1); + r32 = (uint32_t *) rb; + for (i = 0; i < size/4; i++) r32[i] = MOA_Random_W(w, 0); + } +} + +/* This sucks, but in order to time, you really need to avoid putting ifs in + the inner loops. So, I'm doing a separate timing test for each w: + 8, 16, 32, 64, 128 and everything else. Fortunately, the "everything else" + tests can be equivalent to w=32. + + I'm also putting the results back into ra, because otherwise, the optimizer might + figure out that we're not really doing anything in the inner loops and it + will chuck that. */ + +int gf_general_do_single_timing_test(gf_t *gf, void *ra, void *rb, int size, char test) +{ + gf_internal_t *h; + void *top; + uint8_t *r8a, *r8b, *top8; + uint16_t *r16a, *r16b, *top16; + uint32_t *r32a, *r32b, *top32; + uint64_t *r64a, *r64b, *top64, *r64c; + int w, rv; + + h = (gf_internal_t *) gf->scratch; + w = h->w; + top = ra + size; + + if (w == 8) { + r8a = (uint8_t *) ra; + r8b = (uint8_t *) rb; + top8 = (uint8_t *) top; + if (test == 'M') { + while (r8a < top8) { + *r8a = gf->multiply.w32(gf, *r8a, *r8b); + r8a++; + r8b++; + } + } else if (test == 'D') { + while (r8a < top8) { + *r8a = gf->divide.w32(gf, *r8a, *r8b); + r8a++; + r8b++; + } + } else if (test == 'I') { + while (r8a < top8) { + *r8a = gf->inverse.w32(gf, *r8a); + r8a++; + } + } + return (top8 - (uint8_t *) ra); + } + + if (w == 16) { + r16a = (uint16_t *) ra; + r16b = (uint16_t *) rb; + top16 = (uint16_t *) top; + if (test == 'M') { + while (r16a < top16) { + *r16a = gf->multiply.w32(gf, *r16a, *r16b); + r16a++; + r16b++; + } + } else if (test == 'D') { + while (r16a < top16) { + *r16a = gf->divide.w32(gf, *r16a, *r16b); + r16a++; + r16b++; + } + } else if (test == 'I') { + while (r16a < top16) { + *r16a = gf->inverse.w32(gf, *r16a); + r16a++; + } + } + return (top16 - (uint16_t *) ra); + } + if (w <= 32) { + r32a = (uint32_t *) ra; + r32b = (uint32_t *) rb; + top32 = (uint32_t *) ra + (size/4); /* This is for the "everything elses" */ + + if (test == 'M') { + while (r32a < top32) { + *r32a = gf->multiply.w32(gf, *r32a, *r32b); + r32a++; + r32b++; + } + } else if (test == 'D') { + while (r32a < top32) { + *r32a = gf->divide.w32(gf, *r32a, *r32b); + r32a++; + r32b++; + } + } else if (test == 'I') { + while (r32a < top32) { + *r32a = gf->inverse.w32(gf, *r32a); + r32a++; + } + } + return (top32 - (uint32_t *) ra); + } + if (w == 64) { + r64a = (uint64_t *) ra; + r64b = (uint64_t *) rb; + top64 = (uint64_t *) top; + if (test == 'M') { + while (r64a < top64) { + *r64a = gf->multiply.w64(gf, *r64a, *r64b); + r64a++; + r64b++; + } + } else if (test == 'D') { + while (r64a < top64) { + *r64a = gf->divide.w64(gf, *r64a, *r64b); + r64a++; + r64b++; + } + } else if (test == 'I') { + while (r64a < top64) { + *r64a = gf->inverse.w64(gf, *r64a); + r64a++; + } + } + return (top64 - (uint64_t *) ra); + } + if (w == 128) { + r64a = (uint64_t *) ra; + r64c = r64a; + r64a += 2; + r64b = (uint64_t *) rb; + top64 = (uint64_t *) top; + rv = (top64 - r64a)/2; + if (test == 'M') { + while (r64a < top64) { + gf->multiply.w128(gf, r64a, r64b, r64c); + r64a += 2; + r64b += 2; + } + } else if (test == 'D') { + while (r64a < top64) { + gf->divide.w128(gf, r64a, r64b, r64c); + r64a += 2; + r64b += 2; + } + } else if (test == 'I') { + while (r64a < top64) { + gf->inverse.w128(gf, r64a, r64c); + r64a += 2; + } + } + return rv; + } + return 0; +} diff --git a/gf_general.h b/gf_general.h new file mode 100644 index 0000000..d60bf68 --- /dev/null +++ b/gf_general.h @@ -0,0 +1,55 @@ +/* + * gf_general.h + * + * This file has helper routines for doing basic GF operations with any + * legal value of w. The problem is that w <= 32, w=64 and w=128 all have + * different data types, which is a pain. The procedures in this file try + * to alleviate that pain. They are used in gf_unit and gf_time. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "gf.h" + +typedef union { + uint32_t w32; + uint64_t w64; + uint64_t w128[2]; +} gf_general_t; + +void gf_general_set_zero(gf_general_t *v, int w); +void gf_general_set_one(gf_general_t *v, int w); +void gf_general_set_two(gf_general_t *v, int w); + +int gf_general_is_zero(gf_general_t *v, int w); +int gf_general_is_one(gf_general_t *v, int w); +int gf_general_are_equal(gf_general_t *v1, gf_general_t *v2, int w); + +void gf_general_val_to_s(gf_general_t *v, int w, char *s); + +void gf_general_set_random(gf_general_t *v, int w, int zero_ok); + +void gf_general_multiply(gf_t *gf, gf_general_t *a, gf_general_t *b, gf_general_t *c); +void gf_general_divide(gf_t *gf, gf_general_t *a, gf_general_t *b, gf_general_t *c); +void gf_general_inverse(gf_t *gf, gf_general_t *a, gf_general_t *b); + +void gf_general_do_region_multiply(gf_t *gf, gf_general_t *a, + void *ra, void *rb, + int bytes, int xor); + +void gf_general_do_region_check(gf_t *gf, gf_general_t *a, + void *orig_a, void *orig_target, void *final_target, + int bytes, int xor); + + +/* Which is M, D or I for multiply, divide or inverse. */ + +void gf_general_set_up_single_timing_test(int w, void *ra, void *rb, int size); +int gf_general_do_single_timing_test(gf_t *gf, void *ra, void *rb, int size, char which); diff --git a/gf_int.h b/gf_int.h new file mode 100644 index 0000000..6b48d80 --- /dev/null +++ b/gf_int.h @@ -0,0 +1,101 @@ +/* + * gf_int.h + * + * Internal code for Galois field routines. + */ + +#pragma once + +#include "gf.h" + +#include + +extern void timer_start (double *t); +extern double timer_split (const double *t); +extern void galois_fill_random (void *buf, int len, unsigned int seed); + +extern int galois_is_sse(); + +typedef struct { + int mult_type; + int region_type; + int divide_type; + int w; + uint64_t prim_poly; + int free_me; + int arg1; + int arg2; + gf_t *base_gf; + void *private; +} gf_internal_t; + +extern int gf_w4_init (gf_t *gf); +extern int gf_w4_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2); + +extern int gf_w8_init (gf_t *gf); +extern int gf_w8_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2); + +extern int gf_w16_init (gf_t *gf); +extern int gf_w16_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2); + +extern int gf_w32_init (gf_t *gf); +extern int gf_w32_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2); + +extern int gf_w64_init (gf_t *gf); +extern int gf_w64_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2); + +extern int gf_w128_init (gf_t *gf); +extern int gf_w128_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2); + +extern int gf_wgen_init (gf_t *gf); +extern int gf_wgen_scratch_size(int w, int mult_type, int region_type, int divide_type, int arg1, int arg2); + +void gf_wgen_cauchy_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor); +gf_val_32_t gf_wgen_extract_word(gf_t *gf, void *start, int bytes, int index); + + +extern void gf_alignment_error(char *s, int a); + +extern uint32_t gf_bitmatrix_inverse(uint32_t y, int w, uint32_t pp); + +/* This structure lets you define a region multiply. It helps because you can handle + unaligned portions of the data with the procedures below, which really cleans + up the code. */ + +typedef struct { + gf_t *gf; + void *src; + void *dest; + int bytes; + uint32_t val; + int xor; + int align; /* The number of bytes to which to align. */ + void *s_start; /* The start and the top of the aligned region. */ + void *d_start; + void *s_top; + void *d_top; +} gf_region_data; + +/* This lets you set up one of these in one call. It also sets the start/top pointers. */ + +void gf_set_region_data(gf_region_data *rd, + gf_t *gf, + void *src, + void *dest, + int bytes, + uint32_t val, + int xor, + int align); + +/* This performs gf->multiply.32() on all of the unaligned bytes in the beginning of the region */ + +extern void gf_do_initial_region_alignment(gf_region_data *rd); + +/* This performs gf->multiply.32() on all of the unaligned bytes in the end of the region */ + +extern void gf_do_final_region_alignment(gf_region_data *rd); + +extern void gf_two_byte_region_table_multiply(gf_region_data *rd, uint16_t *base); + +extern void gf_multby_zero(void *dest, int bytes, int xor); +extern void gf_multby_one(gf_t *gf, void *src, void *dest, int bytes, int xor); diff --git a/gf_method.c b/gf_method.c new file mode 100644 index 0000000..07a9459 --- /dev/null +++ b/gf_method.c @@ -0,0 +1,185 @@ +/* + * gf_method.c + * + * Parses argv to figure out the mult_type and arguments. Returns the gf. + */ + +#include +#include +#include +#include +#include + +#include "gf.h" +#include "gf_method.h" + +void methods_to_stderr() +{ + fprintf(stderr, "To specify the methods, do one of the following: \n"); + fprintf(stderr, " - leave empty to use defaults\n"); + fprintf(stderr, " - use a single dash to use defaults\n"); + fprintf(stderr, " - specify MULTIPLY REGION DIVIDE\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Legal values of MULTIPLY:\n"); + fprintf(stderr, " SHIFT: shift\n"); + fprintf(stderr, " GROUP g_mult g_reduce: the Group technique - see the paper\n"); + fprintf(stderr, " BYTWO_p: BYTWO doubling the product.\n"); + fprintf(stderr, " BYTWO_b: BYTWO doubling b (more efficient thatn BYTWO_p)\n"); + fprintf(stderr, " TABLE: Full multiplication table\n"); + fprintf(stderr, " LOG: Discrete logs\n"); + fprintf(stderr, " LOG_ZERO: Discrete logs with a large table for zeros\n"); + fprintf(stderr, " SPLIT g_a g_b: Split tables defined by g_a and g_b\n"); + fprintf(stderr, " COMPOSITE k rec METHOD: Composite field. GF((2^l)^k), l=w/k.\n"); + fprintf(stderr, " rec = 0 means inline single multiplication\n"); + fprintf(stderr, " rec = 1 means recursive single multiplication\n"); + fprintf(stderr, " METHOD is the method of the base field in GF(2^l)\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Legal values of REGION: Specify multiples with commas e.g. 'DOUBLE,LAZY'\n"); + fprintf(stderr, " -: Use defaults\n"); + fprintf(stderr, " SINGLE/DOUBLE/QUAD: Expand tables\n"); + fprintf(stderr, " LAZY: Lazily create table (only applies to TABLE and SPLIT)\n"); + fprintf(stderr, " SSE/NOSSE: Use 128-bit SSE instructions if you can\n"); + fprintf(stderr, " CAUCHY/ALTMAP/STDMAP: Use different memory mappings\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Legal values of DIVIDE:\n"); + fprintf(stderr, " -: Use defaults\n"); + fprintf(stderr, " MATRIX: Use matrix inversion\n"); + fprintf(stderr, " EUCLID: Use the extended Euclidian algorithm.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "See the user's manual for more information.\n"); + fprintf(stderr, "There are many restrictions, so it is better to simply use defaults in most cases.\n"); +} + +int create_gf_from_argv(gf_t *gf, int w, int argc, char **argv, int starting) +{ + int mult_type, divide_type, region_type; + uint32_t prim_poly = 0; + int arg1, arg2, subrg_size; + gf_t *base; + char *crt, *x, *y; + + if (argc <= starting || strcmp(argv[starting], "-") == 0) { + mult_type = GF_MULT_DEFAULT; + if (!gf_init_easy(gf, w, mult_type)) return 0; + return (argc <= starting) ? starting : starting+1; + } + + region_type = GF_REGION_DEFAULT; + divide_type = GF_DIVIDE_DEFAULT; + + arg1 = 0; + arg2 = 0; + prim_poly = 0; + base = NULL; + subrg_size = 0; + + if (argc < starting+3) return 0; + + if (strcmp(argv[starting], "SHIFT") == 0) { + mult_type = GF_MULT_SHIFT; + starting++; + } else if (strcmp(argv[starting], "GROUP") == 0) { + mult_type = GF_MULT_GROUP; + if (argc < starting+5) return 0; + if (sscanf(argv[starting+1], "%d", &arg1) == 0 || + sscanf(argv[starting+2], "%d", &arg2) == 0 || + arg1 <= 0 || arg2 <= 0 || arg1 >= w || arg2 >= w) return 0; + starting += 3; + } else if (strcmp(argv[starting], "BYTWO_p") == 0) { + mult_type = GF_MULT_BYTWO_p; + starting++; + } else if (strcmp(argv[starting], "BYTWO_b") == 0) { + mult_type = GF_MULT_BYTWO_b; + starting++; + } else if (strcmp(argv[starting], "TABLE") == 0) { + mult_type = GF_MULT_TABLE; + starting++; + } else if (strcmp(argv[starting], "LOG") == 0) { + mult_type = GF_MULT_LOG_TABLE; + starting++; + } else if (strcmp(argv[starting], "LOG_ZERO") == 0) { + mult_type = GF_MULT_LOG_TABLE; + arg1 = 1; + starting++; + } else if (strcmp(argv[starting], "SPLIT") == 0) { + mult_type = GF_MULT_SPLIT_TABLE; + if (argc < starting+5) return 0; + if (sscanf(argv[starting+1], "%d", &arg1) == 0 || + sscanf(argv[starting+2], "%d", &arg2) == 0 || + arg1 <= 0 || arg2 <= 0 || w % arg1 != 0 || w % arg2 != 0) return 0; + starting += 3; + } else if (strcmp(argv[starting], "COMPOSITE") == 0) { + mult_type = GF_MULT_COMPOSITE; + if (argc < starting+6) return 0; + if (sscanf(argv[starting+1], "%d", &arg1) == 0 || + sscanf(argv[starting+2], "%d", &arg2) == 0 || + arg1 <= 1 || w %arg1 != 0 || ((arg2 | 1) != 1)) return 0; + base = (gf_t *) malloc(sizeof(gf_t)); + starting = create_gf_from_argv(base, w/arg1, argc, argv, starting+3); + if (starting == 0) { free(base); return 0; } + } else { + return 0; + } + + if (argc < starting+2) { + if (base != NULL) gf_free(base, 1); + return 0; + } + + if (strcmp(argv[starting], "-") == 0) { + region_type = GF_REGION_DEFAULT; + } else { + crt = strdup(argv[starting]); + region_type = 0; + x = crt; + do { + y = strchr(x, ','); + if (y != NULL) *y = '\0'; + if (strcmp(x, "DOUBLE") == 0) { + region_type |= GF_REGION_DOUBLE_TABLE; + } else if (strcmp(x, "QUAD") == 0) { + region_type |= GF_REGION_QUAD_TABLE; + } else if (strcmp(x, "SINGLE") == 0) { + region_type |= GF_REGION_SINGLE_TABLE; + } else if (strcmp(x, "LAZY") == 0) { + region_type |= GF_REGION_LAZY; + } else if (strcmp(x, "SSE") == 0) { + region_type |= GF_REGION_SSE; + } else if (strcmp(x, "NOSSE") == 0) { + region_type |= GF_REGION_NOSSE; + } else if (strcmp(x, "CAUCHY") == 0) { + region_type |= GF_REGION_CAUCHY; + } else if (strcmp(x, "ALTMAP") == 0) { + region_type |= GF_REGION_ALTMAP; + } else if (strcmp(x, "STDMAP") == 0) { + region_type |= GF_REGION_STDMAP; + } else { + if (base != NULL) gf_free(base, 1); + free(crt); + return 0; + } + if (y != NULL) x = y+1; + } while (y != NULL); + free(crt); + } + + starting++; + + if (strcmp(argv[starting], "-") == 0) { + divide_type = GF_DIVIDE_DEFAULT; + } else if (strcmp(argv[starting], "MATRIX") == 0) { + divide_type = GF_DIVIDE_MATRIX; + } else if (strcmp(argv[starting], "EUCLID") == 0) { + divide_type = GF_DIVIDE_EUCLID; + } else { + if (base != NULL) gf_free(base, 1); + return 0; + } + starting++; + + if (!gf_init_hard(gf, w, mult_type, region_type, divide_type, prim_poly, arg1, arg2, base, NULL)) { + if (base != NULL) gf_free(base, 1); + return 0; + } + return starting; +} diff --git a/gf_method.h b/gf_method.h new file mode 100644 index 0000000..6307c39 --- /dev/null +++ b/gf_method.h @@ -0,0 +1,15 @@ +/* + * gf_method.h + * + * Parses argv to figure out the flags and arguments. Creates the gf. + */ + +#pragma once + +#include "gf.h" + +/* This prints out the error string defining the methods that you can put on argv*/ +extern void methods_to_stderr(); + +/* Parses argv starting at "starting" */ +extern int create_gf_from_argv(gf_t *gf, int w, int argc, char **argv, int starting); diff --git a/gf_methods b/gf_methods new file mode 100755 index 0000000000000000000000000000000000000000..0c6b002b6950a4fbe5edc1e678447aa3a4428d05 GIT binary patch literal 121848 zcmeFa3wTu3)i*wqOke`hGbl<_C`nD*U|;)YzzY$rIWPlfbfQ>@SZ}D5Xsrr`89^60mYN)k#WE1jv;8cV5Ps?JJUj}X zsZ%G)6Wlkuzjr;wrvF*fgIPmV&RkS5b?RjIO_S4;@chqal%j=!n9+i6sR!=r%s*y(EWGKxNqugpZ_jrU;JwBwDR)q(=zE{ zy$rin9!SRjrcRwU+c)cn58Q2p)A8%p$_q*&0fj+(ZP%=zI1KbZQ%2dZae zDX&{A?^CT93ZEFi>Gw>no^|K_(?zYB?Tz^aevWSoIEBsh1OMJ3AmTsq-4@h6b*f9A zESn(UFOHH!i$j?IX<_0wDwlYV{-#cyIcvtBaPtnR-q~)Qoxjbs`xoPEP@?ARp zy0r3YG~!cOr#lXXJi1I- zruA!?mIGn>>(%&$+Nt$4kN>*GQlls35lCObU&Ox(go}{jj=xzg-9U(FR)}ZfD^|;X z{C2IeS_+2Nm%94G&*%%X@RWeqT#{P>+Yk*hP5$|NhhE@yYO@c8E%$bM()>PZk{z(tb_D#S4J7d2CI{ff1fdBpXZwmaI0{^DKzbWu<3j7bDz`YiQ z{Zk3-?pD-!b|vh0up@y6YjjtxVm+X+_JmCdMD0qT+~V({w;0WPtU1;n+m%lj;jaqW z%9_^XC2BiXVrE;03WvIQWT+dFO1Ns7!j^Q=Ycis+&=$Nkhd#%D zS|mx!!&F%33lyP*^RB-Yb+HhY>UKsHl^48{QmU^|5-&@}etkAJkCFMsA_kcK4oCsx zD*UXXq?Bm}vdmB2wrnB0@NC%PmIr$b2(T zw4{)?W=Ro#nnO1Hr&Xgjd~pC}Cx$Qb1l~k9oa0|&wRpkhszlDt5f<-nSp6d7D>SBeO~z|tHw95y<`h`>m`xx@I>IAZ1k3Y z;NPLBuMiUhNUcPf?*SP9jG_V|VllA888{ zpF}dXvPBs+Kni^b&QWUD(15%}zE!?WzFnT$+KA5Wq}su)Kf`Y*;x9+}-}yd8iQyN{ zTgDTFIH!isd5MQI7&d+l$N5q!gUWJ?vTjCMAKzq@Uzm!oBWM7fI5CEwx+1eI7QU3kP-9R)dm zNR@^!9j=x1vM6Z+WXUvM(h`ycD0dH~V`0&`d7twTFAyn*i?&m1wL?mR1zV{JqPeM0euhaf4;WxmH9MDbZbev<=Y>6qZ)>AgAre zN`9NiY4HRa?4CfxURmDgf4;Sm>NuP>{ypjdg|(#0rgy3@t>89=twj{-BhZJ0+Ta@$ zXB&m36|EHXAn0M~^(CID=JRi;!)I@86xAxzHaUA*<3ICSA%P%<(&y*EzEThS!ozwz zY_q~Dmnmw0MXe}Rn7dw`SP&lDKhz<48lt_quM&wdmAPA#G5%#z;Fly!rnMqZ3pK~u zjVuwOci@O!3ODu}Q(d;1$gB1{bw?^M8tNS4WdnyVh!1O@+qR;jQ{RdVdCv zID8`lM~eJwWwojZ)rSTnl}B}3WYs@ZR?k&fbJ8KR{_&q9v#eGYg<7T1+j!?`q!s5< zI-aN+sS-0ZxvQ5itMiINNh!3@$gw~e z+ZA<6g`&KZf47&->s12# z$~^2H58LQw4KDSzLa+K@sjQAK2v6)Eihz19N!cYwKhJfCcJuM-l3ZO1yWGoW*cEoY z!^I{Py4dwYRkwppC~~o>C34_sQVOo4US5UOgau)TTe|L`3!|CEoE^%cPZYMv%X-CY z`6ekui9OTa23TR86n#)34uXo>;sRDI5$^LD@%}!FF&y` zSPGdQ`?DedSDOHP39uLd7dwDKS4i`tItHWd0mOOPVK4g-(H9xfQxGi%;RIgVudcR$f|X7t2XTl|X}y;QK1s z$DZ)5CAP|NrJWNkJSXW^Z|HTgHq_fIv!hF`{=4MzUg?P;grq8e-r2jb0MH!Zw@gV3R&) zcGTeeer5Qf&pm9btPT;iFSYv4kVoABEQYA#K9`+2a z5Vgf!phrEPWW0mgt2I0P3ejnii!~)T%$1dCO`hFk9r){x}jaRZkF7Nh#kldgI_S*u-lKv(&M8u@^CR%S2Mlwsr&qPJo6aF|zoMc}~ zYG$3a|5-`>@7CFJ8Qu&5UW0Ue{!-X&kU7&y=3plH6s+`3DvBKedv>A#!JZEzFO3|x zsrU_4{A>WQ+d<;~v?k#Lq|&JJDqbaK%9K2mAqO_GwJsKQv6ev6Aq9VdGUs0sNLKi! z)+TEZ@Ld&17Wr716Ugg{9`-pqD6>s6+vsBMbzW8( zA@S>eU3C{?1%Sz_2-Es92)9y~T3PQ>0jXT)Vn<|Bn2%R4+nZGm$0vgPfulCx zLxH1q|GURcmVfUF%D7b~$$6X#_Fr}a;H;e|D0VuylCQ^(TlS<4lmZ*TtZi#lruD{_ zfm;;T&|Fan!dv2xqd)li596Xp0r_|@&7M14~|qw>&aur!p<7 zga`Ncl(+fL72DqB{p2!>hh#((V*UHE<($^u8b1QM>g#ii=WOeViHbV46!JazBQ#G@ z$CWDJw&3l=Wa>;X%XkqnhDXdr1ZHPSLDbMzu~^VzHA|5np~?Ev)`(n{iUgUplxivx z>op$fyWOkag9_YKmT$CIy#s-tfLB9F-(W7fVwXUs@wnKTXqgO7E=087^@w2`miMP*y%fJAs!22{avAwY<1zsb@Htqki@=NW$MMNAnZz=OXFSB>BHF?^_ z8eQcL^Dm@zRrty`0k>3!OC(5sH&!Gb^~#diZ%_et*rS~ts{eWDJeS5+VhfJeU*?R+ zEJ;Rs!>uS$r>uIZ~asEY=JZQBiz4 z(Jy>yjaFRScd0E>=slDa9+yY?AHR<$sagl#cpCiKAKIf!Z$x@53YUoFL61lRcC4f% zk^@Obi6pbMB;Q05h;S7C#0B(P)q|E62c76o1&!^xS2+|%@VU3BQ21C_w!UWdmREBo@ zDrqfdUG*PC9^}|~75bB{q2Za^gsAYPk7-4}1_)xC5p^#wc1LU}kCH-9BeNV>Z%h0I z9bX@-LbN+vZS_oBL!?^UNjNl+pEB*u|GAe1O1q>=Vd?jbm zQ9#uiw}7UA{R9;g2fZ&~lLZHn>R9KX_^YCnhc#6Hp`}DA_V`LvgHnE{)$kNxE2;aa z8B}|m#2FF{)2ew;LqSwyGgtFHQB9dvlvhiMY8>%NqM99uBV^0cO35>qQXoqC*d?0W zFI7rW{0vdbGg>JhLf~V>UM6rMI})SGO+tJ27p;_tNLeb^uD4)F#Mg*QZq^F=odA!( z5T&69E(l+`PGgY}he@F~0FZ0e*Tm~Y-eMF@O_^?j>qE`TYp0=h8;zskG}IE~_)xn@ zBoA)qs(U->wkifuFEn^)SD@D_J+Y16Gg{Nk;Vpp{YqCWOy^7H4 zG9Ve-h@Bm+Kq)AZDrlEft0KV%RjkcElEkUgwPUQLS|;@>E;b4Uoe<6|kr<>#5J zxPYZRj~(Jk>SK2y2?^4nNR?XePr?Gqj$K9D98`}T#7LAZGKa7F30f9&5?%m&!vy#> z0SYw$8SCgd{zQNz76O!`)C7nUU=Ih35@%uSwNyA9GqSItR1pm@*aT=Iz#lchSQDU? z08eND5|BD3y#(-UfC>{pLXXC7)c|G`ze|8|8epg?$8`j_NCRAH0!${rAPsPZ3GfI3 zda-5C+><>=fE^m(8dHvT0yJrWN)y12K^Xh92DrflxS9ZqIY6W6BU&ntDb{V~Fx5~*6`sfFZ{kg)9VvEi;B_}yZ1Gfi)$g5s} zxwXWr{-8u;78?a|K1Tc1l)fjhuMjJ`2-jB7PJothTnnd8bf|L+>S;u?!<}6+`y$ls zA0#K&^C(QPon4_O-+mqn9J5M6TGh+!ZNjEUEVZ(w9CIW`Ie~qT||f0__~qnLFfmLK<-46Lq@r=NuIY( zX3Z#w?Q*e%9QfSo8|Y@A^4<3O#7{xeLX%b`oqm&K)MxIoIFnef&!5aCqI~GEciI-uv<@84dq->y%l$E9C@s;$xqK%!)Zy%@WLrIlwkGmL z`@GgpYf@T{6>YS$7iarxtx!?9k0hI1;9zDG}CIXR>zJh;}dAqGwu*bPsNW=C-vFH7LyuSeGOdYt!i^XjS0=xst7X z8Qrz9fgiH)g;bTp@oExE&ZVPRY+`a8_-(We^OKL__oPQrPw8UxrPQ1z5 zaYhBr`7Jo?m|ak*{-~@{b=&1;P#qksP8zCF44lU~G}$tL3KZEuro&fMy$EWm{0^w8 z&bauHyEmCkPN_AUX-J`B`xUC&V*{<@o1utsq zZ(4D%9bgi6V8jtOG(?r^bUTIwYwwJ)h+DlfD6>!G5$}dN=3h^mp~IbfT*Xb5>TMWs zTjfJr+2#>DNFmnR`5^`n!5A=vVzZUY!gqMUrUgd zA4dreI-4}(hhEvIs1j>KjaGI*T0VFXP7tuUm_j(@P$f0gv${S{=QqOVMCEn!@;a!z z5u!ZDS1PX!XPD4)(m|tv-wNB>F^J%8Fv1Ws{_6Thl{2vF7 z^_vsNiZgsp0s2UpW_uLpKgll0tw#~2XNvYq@1yb1BFU8>_5TAp-UauG@q`IVc{=Q> zxT&>q5y74!*u4f!`fDT{w2<}!LJlj~Cw$;>hpuSLTun7)Nvl}U)F>7|yYMy=_^b7l?HAv%O?n2lOyo(L9hgB9qK@+&T--A-d zai@$k?7sbV4>qmze3%2BFn*Wd7w2G>mr=Z|PAEVY)R5}pu^`A?AQ8gjz%vZbMR+d9 ztNC1!_d$M4r85k4XT0h zf!fbiXTzavgQAYN*A+PM@ttAD}txHc*u1eKgQsxQY;P9|P zGTsY#kJHMD*k7?Wc-RegngE!;h1POMp{)QGZfA!_wT^AFIp~a*R#*s9$fL@39!)Pz zwj7wfMLfv{*BEz;e|=zHi&a`!k0Qd?CdHaW?)<<7#gv>}t4-0oeiu80GWTL$YIeU) z4NqL2#?Ls1utwxj(2=`}+i>*Z4mJ8bj{rA8myA%DCxe-<#8wo{ z3>F_|{fu63bATP~f!-W(o`MZ=a69n!1m-y`Qt)H(YR4;S#rbg0;D1cKrEYchP`5g% z$fMo@hM82Nut|2UJ8*o?`y&il#&T&vKw@t};P7x?Ugw_h_#_+WkU6EW$dfyF%VAeZ z68*=<6;fha*zwm%1=<4iSg1pmT$|XWqC*{Wt`U7imj2qn#uX`QZZpV^({{+T-M$|} z1?KFa#Y&~KN1Q>V^@k8(Krz(pJI|v|a)=eA?2JXU?yIMv&MF=8vye-m5L|>ffadX3l@3m(cn(xAbRPKxT{*!N zCOsK}no3BC8zCiN*>a7xNoo{QB0Tt~3cFE=2|ddD)6W%loe&dIAtt(9;R+!qoXJF? zAtqeKw3_5{;-6q+E+^QPi)i07-p&dZk(9s>Nr|^eN~{MpwxJKe)kKXV&g*V_{CuuV z8&;75RqXoXz>v^T^ho0LFv&KUnXe`{dgI!KlMASOu_UYU%ESFKzp zCtLaWkzt*3c{BM);}yp(2Z1SVl7k6)k#E+|6aOf4|Zg@9EH z?b7AUsV;KWo5JcfN=e1R6gG9V{DOAAcEO|Z=McbP{8-2}8jPd3l){J$LxBp=K`vSNIJIkX zEYw&2)UiRaYU6k-w^mU1ecR*+}6+B<#!j%zfMz)?Cks?^joUkx63o_ z9VM4H2d2+dfn2-=6EM&gHc-6XvAV8gGXJ+6Y?QX(@BBbUeT52wi8SG#9Ku2rOmggK z)jEd1%^#ttO9(~9x`fKb9^%RC@SAuY+9$Rhxrv~$o$nw;?HZ!Ec~i~0*t^J2Niy*2 z-hndKr?;2-Pw2`6&Kt$RB&F*eL!2y}Zt@;w@PU`=Dw3P*Y)CpOeOK zWbJ~z<{W6&ZXfDWA1HFE^9x|82o(#K&dr3cMZGbLhGmkjVHtLO{R%Rw*JWr|j-Wtj zEnK6n>!V%yGqN6meN&4|NnDWLwOv1$|8(*h)XUr(eEb{~w`0mRCXMTA&+rkm7g) zUs3VJak}D|n;EmWW%k)?l)jJd_~7h833p2WCzo#*`r}WGwF>EvV<*!eZ$FX#_@J&o z-Uq`5nQcfcJfZ#=I^)=IYfF2t@-%cpWr3YmpFI+@Nz0*|3Uo=N({Y*Y+k~`Mj;n7< zS6;D*wA^Lo>NS)?{Ryp&UD)JTSg^2%K_2PTlrq*wNp(Y+P*SDn#rc7!uP9?3tVfCN zu_@L~NY;<=H1ul*EO#c#2#zHE40J$(pO9aJ+m++MVaapC@=pdoS^X!=Z-P6heJg@9 zMH4dQrA`lZE>wwNPP^}_o(8+`vT(s!&cmA0+6HBIPt@+e%-X8OLS+7jP_XqO1ilb~o@fPu=!71io@f!3 za5cR-d^yg;((^qB6mj!yu z{JR2s$^ysAd|T_W?*RK!stp)DrCgUM!mCIJTEC3L>&fFxYojQ~|9m2*z5Ob&fs?GD zelD)G1&-zT>k=xbTn*I)?LmPQnnkGZvE%gXyvR2!$zhhfxj8u~&0x41)WOG>haL2g zN*;1*ZK%~Z6&pO?G+iYNjpY1m$v_N8JY+G5rB`&P4J;C*?3QsDsfDZ|@*%`a2R7q_ zppm`;w;~{CU^i$TIe7HAy1@=P8HIvLF8{>8v9-}lR_v^j7wgQ^;7yJ8%)RH?$>YHcKb<#$g#FbrnU5Qs)e z!9_rp?_7zq^v1Ckl-i$~ct0szLr((cc5Z`CN?P8+r6i&8b&dww9*p>gK#EcxAVkvD z)!zY}NI~RmWo>A*?<{iym$cm5-{E$)xa(}^lSwuiN+0z9iBFQSgk(F!Tim@)BZtDa zdf6T=cE%>63D9W;I&NwCf1F3?6rU109of;DOz6bi&Ng6l9>)mQ?+5+Q#eWEP(dBJ6 zd(7H>&l2I-8?;X4H-_F}Z@JkTH`@rx1xb&}$xDMrU$0~T2c=&6dY!VzmZSHv=A?H; z9gE3gVwqCgK%*u40Tw?>xWM6tZRIS`oRmeo=i{hQ@Y`4reP!4Wz&XEZx19s~ z?tnI2VTYizVsD{XY+Z;pF`YK6@!}2p7tvnEkwKdyUiL8=Oa2<1K){uI9xpxN1q=W@gcVDz;EX$rA7uF$FR2T()Mg0|d5h)Q*w z-J?DN4Pq}=ua)XRu$4xHsJFNkGUYOFF`6Hx8*Igx9t;wBi`N30&!G)U@n+Ack&1Hz z$fa02yu}Ulf~iJ{?gu@4y~Qnxv&q9oio1Vr3d%k9FdBvNL|s`2&Pt;b@4kSB@a*^K z_Ls-WsZO*e%Fjt8-sGv4ValRr&0bEa6c+VB%c)o+=v2FBR1T2NSE6y?v6lLk?gXZn z(;6p4cu|+3LYRT#=)FM-{D7lZ0YX*cc?5)-4MKG*#VwU;ZW<|Ir)kS4`ry7}@g~ox z0gAI7bWnJO|0aP1L=ws|hhMMP$yjqw-sLO%fyh(dqP3;n;)?Wfx3b86#f>S#4WA0yB@$~Cfn zjmYRbVm5GNUm{Kqu%tEw}HrGFf2Hc8;xP`D6e>)HVm}h z?aej}uGWXaTBR8G#th;UXIopnQ2IK7bCYM(73d?L7S^<;eh2?Id)O6e{T_cFYu8q^eG%}-l+o9nhTAhR&Pni`%@foO}qjbB`J-2d{%-uso;3%2s z5P`&0ijP55Dxiinf}Tnrd|WE@8GL_;e*q0Wukmez2p=NqG{qB8!rdklKIVRD=F+K2 z@0ungJfUcs^Jx5T9jk+28$%@AfLPpzK0R8n2G6Lo1VuK%dHy`UWOCF?#-nsMDj-DU6=ek>!uZzXKHXCCAq}I()r>MN{G%VmPy!S9wBF&Aauv_(#z9i{AWz zH2>;piRV7unYK#Vq&UI+G&OTZPoJ70qWjFEwehJ2HdP!OEJHGu4~h5~KxpX3`Q=!? z*evCVPe+Vr8LYH$p^j?9%%aPW7Kr*2kJh^Qf8!)WQ1Ewj!0|7q;3<BXK{WadY?=lU4Y6n^yy~%xVb{Prl|wglRDsTY-zcw z1lKXRCJOgS%apLYMq&JnkhB9>#hWxIUIu?7IIVAv&3m1gQm8R*z&o@8OTD;iMz=$Q zr6>;+sh0xlk)3Vvux2^1*Y2TH&OJ5{ds7blGY5C-0v}it1NHee(1^X?6=b}HUtfu% zQni&hTPs6Jn9FR;GM;FeMZ0Hg31A)OJW?6zA;TK%;gsS}O#DflPP+xnJOH*S&R*OR zM@sM*5m@IW+D;IU(HJD57utq2%^qhiEgLau)=Q6lA6r$u%~!pK_rRLDn#zGZSZP|s z?J>Er4-DD}Xd7dD>-#{6PP1P;F^&3T|L*ypNgtL$pZGTHJoWQ;Mfh(ilUDVsmLBM; zmhRXso|(HX(yH-xM8**Dk$7f)WKqUUsen%(Fb#HG)zXYhzX_*1fcA)IW)C?-5c(TH z3-P>FEzSM}p3SzcM*s%T+nVs(hRBXw{ObnfXai!`;FdTF-2X11Q4O%|O#n6^qFql; z1sz?BSXkcgN3#7Wa6t}yWH{gqz@d~+shCrjZtu(e(|m>YjX(PY?L%W{oDVFv;1|dn z?L2WCPp5q-dBORo+CC({W1DzpZqxfOy;DX1amBXizv{D2(7sZ%3*9E#1(MI84$0ns z=m_e+RNK&@$LTw81n)Z(meF^QqwfrRZgHOUCHimnER8pRI;d~kT!!5Nkfr`R3)p}i z`}QBGIlI%~QMAwCQLy8Jv(h@{AF$W{^7uZW;ky;~JegxD4c|iGd79%1$rlVr!*zvr zs@oUeQ|2G8x##?I@izs|DG6a7|3qJ7T!4RQKxY~IS$T)VMc;9+^G;45-dU9GyeeJ3 zopSrahJ3qtyu|zD|8e^=#>Ha&eDX`RXZF2w#T=GZK#!B1AteSWHfMCl7fGSf>N2* zQ0L8CR(GSd?nZ~Tqo={)+b)lqT;fP?7Z2=9 z-~eTJpgb3j3URuE>W1v#)zRzY$Q*dc=0NWi?*>}JW*j)3^V~K{RON`1w~N?-qdaFN zk2FlehXEA73Dm`|wbTO>m?rR8SRSr0xeV4)mL}O@ZIXvuv94~CbMQJSr%5i#;g(3k z|3|U`2G&XR$2SSJX-yS%S6VCTW(~qqWbMHjzr(jp9yQaEcn2tL+{#fRoAH;=0G-KZ z{7F!DDJ~Dxe>q0*U6_Rlb_pif7HF}?pMqLn35OS<8W=|2fuEJsi}2&hHpVW3!J84> z5xW3EZ}_%lm23-d9geUkT4i=vnMPjDY~TaRuUMnb9-OnfzGijN-7~!9Evl2P;Tx?A zF1AuaYn!XCpb*RoXKhO0nB;H6byD0|Njm)ht%P4A1XfzW0ZQ#2ve<|{cNi6`Au1i` z=bmYeREifyj)?GB8*re`iw$2{h`V1fXMhjJk;E9&+F7?ez)}~yf{KLyqZXX^arvgW z0b1MuQ`|r;ZlEcyK#MCd#hszWoneYQQ;R!Oiwoe2@w8TXz)Y#4?&Wf#3OB)MkOPMu zQs_^hG5079I5Ytj{8GCV z9EC&QFCox;7o32P4VK}I8}l7XxiJ1KkU^855xjzED>qRF$f0Ejn+DrodPpK^V?i|R*A&IJJK^N-wq+P$lZRWw z*Tay;{kPOO8$XDKLfzxJR^$Qb?8GTsInD@>pv8mPYj)tf+t8i>3MBG;=X(0Lk&+%E#O(gZqR0}T+M*G!-b zG|)f+dffyXs(}gw=nWHSm zcKN4igQpb16Ahk!iOAPPdvnMKLh*Tj8h<5vk|{f4ao>5zfx5X?_Hq#zxFSxSeTkq*IJr9;k4LBxy&)wTpuS4jCy)$HYK#l+R% zf&W){ot8r5sAp+i0d*-Z)p2M0NzOG&GDN%)c!M|wkSNwbNzQTP_%QGWu_2-#M6^k= z@9ZQ&&4+Cdf8~;01;2919-;Gv@EHT!DODw=!@LUq>UPB6;-GW7ja+$jIaynl{wj`q z+OjnM101rTkUzBXLLdq9e$-*qTEt&@t^ee51M|r>C|aD;?U01|{IybK5iAVFxp%Ya}25`?BZ z6C|(!OGY@#UtU9#U49g#e*DKoYRxE?FBDxEMz|;KNv@Ft$1tAA86Jii{&}z|dV^jN zOJB#R4F8TYC(bc=YLGYlEcLH9_b!~J+{Ukq(H2k*5u?KOo=X)Bta1j1v?r#f$%6lA zH_vv&hW;M0aF+Jza}fXBT~X}SXGByKQP_~v2r+HQaj}bPQ28A(lqZT6BS+=qD7N%Z zuq=Jk#Vt*7kU=S8lb=Q0-^Bfe)O|U=5u_V)c`s2p?Mrc2*JNMH50tJzyZM0< zxynC*ed!)DMjXQ%b|`i>{i;>4D|JVN<6U)TS>R}YX(39NjdXEkt`7 ztUfw5*wXf)A0t~@P$*o{qS}Qwpjw+#Ti;Lnh#2oG=ed#Lr44_*X)mZA)z&N6J5k3$`5;;EAb+S$`R za@k@Kk4WY%celZuk;W?bI}5L4r`xDb5QG}F4O{UnjS?E;133BF_Kw!Yq*HD;RP#6o z;;MPl;^S<{IE9%(=Sp;10eT!qlMg{IEuhbE;QqeWmY*iZqal7DjeuY7K{sAb!CNT! zC(NgbQVNcx;J_Gy=OO4DLBU%;OrI|)u0E4hDDnd&Ozfn$^Y-FxGri5Avewg^`=5Ag zJT1PMy_m@No8IG)&(Of1c!bcPn1Kzc2o@yEi_mtRTUb?L zyj4i6z~&i}wc0K1$6@Y_tAa?2VTH9EJu{P*YL_&l3IDc5@UJ(5j@@LXxD6E7LUF_~ zB5o7KwNV^K7sW~2Ai#UO6M2j9Q8#eQjPp>@jQ401EZ{%_acnFrwwcf=j($DL9<<;^`|zx4Ers%+b|*Gyat}br)FAaPel7aqOQA}_h$q3 zCm4<&)1xb9d{`}ww#4v)E|}epVu;8yKBd@CkQGd~Upt7S!}sW%*9nU?zsw+kcD_P~ zkjQN={{WTLjYyPqobpjK9foP;C;m&HplRjx@b5Be(kAGyKa~;BuNCw2sQ)4NiD|ug zsEJmgC%jQiH__mVl5P}Jjx0BdrNlVh-s1nwed0QBeW7Vlo6g@*_F|*uZ`G zS}_-&qT1l>>4{o>Ic!r;RJ+01)Dtyb;6%uDffFHzuRr_-b=jPFFB0#0^j;y}`#GC% z$ulpytG{zob*Sf|{;W6J)!*49t!V05Q{fvxL8RfqA`wJ(9(0HxO6EbkuRy;lDk5wY zA#RI`h#a4tE{uwR)tBcyhK$+Io3;DQaA#+`f%U+t-H)wpbPqbnKSJKuzClZml(rw| z(e76FgqO-#Yd#iTHe0`A4u}RUJ1TW>Fh9EkQ4pbh70P}189EE$e=_APW(=NramR>_Xz^~ zDwux_{J#=4=nE1u+bq}a?FNnI=zbeJSo;CJ;{#)G3_&x&UO71!o=#x2w?|UlYjC%e zxp9l}0NqyPFnqEF2WlTtR4O^HyTY@4i=L#BAN9KEY9Y_#avpR-Ut)8Ox__OV|2`ZO zWWO+pCYky_+kH^9N3>*aDeL|QM?Gd^2k4t=>}@HCdC3yQ5%oI!O3SwphS=Lr!S{Na zfb?z`n~o(KHl$i4nI=l))LWz*LLUrEg`jE$5ztW8kFV4q%Mj<5K(}obG~kwEkb@Tj z0%7qbaWBIn5m`D)++mY*dxf+ds-q7DfReH`((;2SQp7+a$@y!bpMdNCY#+NY=pii^ zIjALTaP0wi;%k6~;lEQ_4%f#VA6`2rzo}7hJkw=MEY6UxrtcZ){*q((ic(Vdn4EH# z9RCfpAAR@@93}h(1bZa*D_|4skk}&pn!N^8z(+u^RjQ+S!*M{77D-+%j7WzTx!4?O z*CKymjwDY6R3`b9F|w0W0Yq{{V9a|we*_vnj6BCqZi?B4Wf z4KGHDNK@GwW_8twCd>>!2^syN(X+3=gCW6=hu}Xr)7?Rw+y{$Z$>u*8{u}2^$l`xs z45(I(=Mp?GYY)ZIM~(L2DZz87>AOHy3hu-^*>~0>r27|0xhEJ5_XG~%o&c7AFeXWE z@Ncc33mutoD$okJU2`7rhURPld!jji!_{~HOD5p6h=scV`p|D)9VESJIu!Ci&WDHo zIlw`4Zg9Tt+yI~1L#%LcAO)-Nt~ogf9JcwsA2^J!(pDf+4)ofj&}FFtY?!^q*Uv5D zjt=(JqM5bF=)V*kMCBdMfg+v?BUcC_^I?pPIQ=7fAPOb#K`8N;6e{$6MS+U{`FJY= zOFhLVd!+H}&=H;>SB zWs0;MpRzoGz0j&-pzW(Q_{Z&qX2M=bA$uV^Y=!XbvIv+BiO%#1H`$^ghKu?BMgw`usbgUL}GUo{w+dzkuOgK)joE9}EG@=N2 zft%`F6CPYNw(jv9D#L4yjIFz*h`Ti3a;TY_16Su<{BHqoK>AyIShsVFU>nKZ?J95e zuYidN*@yU_G>s8HW%(E3CwzeomP!sEf-h^;M(y!C44>p19NT%@D zDNt7t_kBwg#Ov=(C>9l7h z8P}Op(m@Y>f;}yg6vv+Ao1VZL^vM@Ew0RCg7oYUB(Za+FAo}m;=OPWh`ilN*<^qEH z|0B)`yi*_L@&g_9CPo`TBfE|MeryP31!z4DL`~+RoF@EQQk_kdDUPhxa$wWqYlU4kSp|U9F5D%z|Zl`2qS$HhGk$E>^#qT{45$YE*OG9 z3jGdL*M$zKKtcz+SIJ(WO&ljh=wT408eeQArLCp40n#5C#lGMOXtQ)iT0 zN?3$f(GlEAqLglS_w5d%>q7YEv{>T5M&+76zjOyfQjml(SIM?eW}02$?H&$zeBUEi zJlB}r04d?2<>vKnAAdtw%VXDCNP>y{0hO8k0d?SO7F1$ypAwDx2)sPpWWv5B-PO>@ z;Q3T$x`Uy16qqg_p&rDiQ5FVaC6e-HgqW9~5bG1#UuQPIPH#TFn45n&_ddh0TAP27 zRt+&AnJZy>?nOH>_N^AUzBw!P)z$aWMsixivmMNuzAhuZNq^3rF~Q>0jDBmpC#>%> zvf;l}$Ddww{JATo3Dk+>h~!t2$09<@AXgBHEf$U+)FSYS%N-~M`x6(Xw`iN_HMsB+ zMVR~lIQi3Bi2TSR>f`O(`)Ee{&P{9IB!wJcnS4cMrtKLzGy^hKgRtDjD;ZOS62IT|3&aO0slp_a3MCk|03LtYsB3_l)I3o zEPV7B{)==KGdV6AqC#qB==tdkcy&!ieBU$R@w6t#vlv`kfoCHsn1Y8q)KPqi-nxNk z9d-!tw$msD79v)#iANpeQ6I)t%UQzR4!ob>rU1Wki^jn(k=gsn+LS*Q0NeOo$-X`= z$de3Rr$mwi`u$=~0%|ysVa`ue^rWuo1vjKMpLfTNm~%9QXX9K@8$aj~f$ufOooC?7 zU6B|lnOvigp9{E`A_plc9}Gw+eAi%BI-6gp1Y%T`w|vW-|B7$LM8Prm5$Gw{;YLt0 zt!Ckf0W}+JKrN+fq?%(Q_{2r(nhUw+OPZWAgl;jy@#?Eu$S^U0xI*Y80@4|XQ z6ham@9u?ni!o__cmFjImf-de~1LSaVKfQ$Y@n?+^e!)u^1SicL4U`~S)Mlbmc05|+ zHyfo)8SoG{@LexV-t8gg)TqQ#eP)R zx6gI{{(S-_shSvAfVwi|^C?tqc0H8o-o#+>Y(wk!T=on=rK$0ceiii8HYvv5iWRV^9KM3yds(X0i+W<9IvBwCAknG<()3^X|ajML)>E)O?*q2H(ycS52I2^ z-aoI=nbt2UeNOE@n!?hqPkL* z^c~`&g|6iP18-~#Ei`NDmXHDspR0wwx%>yQLU8PMT={NXy)5<>W20v1Amd?Dv zPj~&>$oD$k^-mMB#wC7elgKLo-{_n|nM!~g$q}-rQZSMeHTYiS)Ji9FJ5jpfcz{^; z6dVsc3t_7F?3#!1Ub}|a$8=7rt*N%h-oy$su>&X7WG*mwE&?6g!$8F7@+l|nas?@j zE+<_n4^Wqn<^k@v;p|NLq5CID?|-W8e@*{W!%w6Ci8QCt|3th~?|-vn1?vAe`R_CB zm(t})U?2Q@z%j=w=y43_%`#i>V%%aC7SeREK^D?<6vl%LHOSCJdKhhCN#E^)Yxw*Y>>)!Vcpsw26K{ zge=i4VgCvfT#8zOC@dh&UO0FwxDhTYB_rI#lfvK?r(vnarBC_xfteZ7}2N?K^uXrPoRt3*o>2R zU(8bSYozD$6gSFT;pHjrs9r=4d4W-T?aprMPwQc9P+vZe2#*uTCNg%%uDw60{l<Q01@ zXWAyNWKKsj{`H8y-^uLHxeD{wDXgl7JmT=LSHMiqi^Ggc^(Q4DBy}s+3N@Mi>e`5Y zO+Li?^#T|-Nx}c5sXCnrVLzPNpOyH^EX=5;Kg;uw0@S50!7Po1c*lI~rz7~hG(1HZCg09IMAhN-FLmA}F57QAGT z3nuY`68P%L-y!QbHI{N}yrg{{7u1OGW)n5;p(51E2%2;9_ETcL{W3qPmy0*4RWIR7 zYuWVd&Kai6~)thCyIO;4^A+3zaRBVC9AKwIoJf)Iz5xsmzt?1BGlWziPnKp39;} z;#A5?mdI)fL0@||$xj;Jruz%fPg>~yf*UfPr{d$^Y@eIu{=&H!B7A}*5qbjs616_| z$d73{Iux6PU(>w>924(FZa&I?&4-+BfZGp7v@laXnGYT9A_MFq(;Oqg^RglwS3rT2 zvO-OBd(t0vj)u!4s2=x2CAdGsjRssgzeZ^P1abkKm-3mN(o5})Eubma4Ae{?zXfB6 zw0LwA2^5$YQ|r=k);v7H0Mq2ZNo!pfB6o`izMzybl~BErF$D0%gPRyI+$PIWvMoK2 zZAp|ZYVY;ogNdnz!*LUtgZ4UEtlkK@9EmV{isz6A$OLqr%OPTWK3k-c2uHynkc7u1r z?HT3EaC?S)bO9r)!8SKD>OX`lu;iraqwarRr-1ali~RF~gUtc^lm~A&ik< z8^YYQ|1ykP8ctushY%qsXKGt?oFj=tx$Uu%*%=URuf7iJl(Zbia!;JU^qIf<+%LhU zEiwMz!izW!JB9HNgEc<>@v9m43z_bZa3g>G`%NyLKceHmQNKo;*#1a0jHzmweVxI- zfMceaK^#C_n&@NOkC{qX{Kr8ylF<;l3;GFP18}fcUezKdrVoC|36U`|;nOU!IA+?k z*~cs&H8O_o&Pc=2{A2YZ)K0Ss&6k)J1%AMUJD_W`Ar_b$Hxl-PAs3vPhTH1+RLoDQ zFNjfp;DZkA0AFKy)tl_Cv?4Lia|Qf17-3HDRRa5b*NZ6ZjxQ7}gY33CcsMUdKsU=5*)qNvpfR%pR~B3Psa^F+|01#Kc| zx6rq*zCfAy_7&x}@d)f;^oSgb_B|};MuJ;mqqULDRI|u8A+%2L{kIO*jnDq&+I|M- zOe=yi)u!9K(s(=m{R8B}2LQ1p==gS@%qjUQ>c{;6+?T*-(eO1_LNgPN@5#a5U$y;> z@w>cP8pxHLWES~@`GEX==j)CO<#Pty)}49V%J$wH$FSc=#s2q za1B1LAiKDhYN!C~o&!kOMQmzkH6ecw%yr%U@RN zFTd1Z$gK+pO~FeBKM{X&`A7Pt7H1n8NLG&RANvZ?@eiH;_wTan7b=GJNVh1&^^aY;IUc_sf)J^1VZumPDDY`c%_5P#DJ}PJ zTzNG*j)QR-LQ|*M;g)D)^(c2seuK0EDuff>f1vR(iBR<7gNoAf?F##(cBchH9LGWD zLHBv3g}wu4ZHM-g3`gzIYq=~}j~!{VBBqC1)P@VbMqzH#v}x?fEBgVG%$0+>XK37k(cD z%C|25JzSw~62#K4X5cOjFx}3!>gk7!bgv6^U!A(nAuVqM0ij0G6n1?5;lwzhdnLQ{ zXu3l38GXWyQZeY~hyZKy+t9e|gr)pEwYX5yk=SSm=o+*LO+xcI{%9QUJsoBAFWZU= z62CRC$29t7TmMKa@GaLp==|&ppp^Xc!hdund@ZteSO!)ULA=Rq;@80&*2s%AllW}% zDA|F?iW0Wb*^HOD-$Vm(Hhri;hu=W(iRh}eva_Sw%_d$Xt!Rdh4EDlW2@g?SM)@1oj05u~dwTu|B!eQIQpNdC+#~&a&h9R>P0d> zk*VqzJN&h{)Uj+LKB0-P-=Tio*8p6i%||k@!Hz-;aG3-5LCAh6PXAzU9Q*G+pE{%Aaz?FXJlVh=lluS@C{gKz0#3GFK8 z+o2t?2QeGrw3rf`PK$|tbzwT1#W|rI{RoGC;=AUUv2n1ce<#sP;$&0o=eME^)x8vo z_nVN#tNNinCY5i94!}d(q~IRHrMzkO5-|__naO^hWWV+J!{kD9U`iMiERj z>Ugc8`%&>dI8M%#*D3xA?7N3E7mhs192MV>B<2ER;gkWU1v_HH&^YYH?@8mT8t?(m zF8GhkHsS-(a`a;u*xdvZmh23Mdyt!APB1CF|18B}$TC=HMqrxe^MknC@oe^%Z=C%L ztarc**hAxZljitqFvsHvtI*5mcRJlfce-+~9!b%3P1b%*xZr$*0PtEnDn?#%KFx3W z-PO+iaL0lh|IIm`^22lfl+grJrBZ;FZK?hIzo>q8!um7Td-7!Z0zHm_^5M;HG`fLEEA-9kAFU; zf1i~Q_TpL#SudRsY5qNti&J>xBcsc_<*`}$P(($P^0rwegnHK1pov~soEq)HXFj|m zV)(#_5=~sKSR0l0qaJlUSa&C_h_DWAPEITmhrSfAOzXTRe91)|EMS-!iKSNXQ@Z@N zM6LFH?`-Sk+I`DF4)8iGE$FOKNZ~tpyEPf9H$Np>d2bz4RBl5QD}$ue-)(sJs`Hi- z6-dlxHbj}$h&xcDVT?n9!xMJpp)2|4TCl)tVj+?hI8P-^WZ3*fy>!?37{7Hrxunp` zaA|xKE`N)IHcHWs6flCI()3iR{P+&NVT@rEV&qlBRGXF{ql`-Rz7jY!KCp@w`r4&$ z{d^DAX36x9PJ*3PRA{!I#ynKYZ`H=`2(x2==?@y?fRH2XJ zmmlbvEym@*7r1tJ3l=rfV}GUo4t#-|o1Y;}6X5%pIMG>H*2cN1BW;6IR^%7be4B1h zD4nkTI9*@o_Jm8#=X9HCYR+;_w*Y6(Y4JeU=9`EAc~aN-969k! zTAR|Xrzsz|x-;2}XIM`oNncA^Zt48TxXkIN8#<3dh+~@F(3uL#ZeYAN-pdyhbnD<_ zy!A13)|+<%Lua}Xn_qq+FRRv!~(Ku%(!OiqB!%q@$mG=tf1YV-Y9 zcA_v_W_s+zMwP=tVCJc^~8W<08y6_@GB$gH?j50H&%5dtt?!K7A*w}dUL zVR1G8JXg9=vEG}Rp9S09$EaBE(~}z&BPEw>CpRk2UM8%BX=3ebRGPhDlMQmZ8T4^W zr!ClNluo@y$xo&9nm4FpPor?^`co|&EhXkSjnYS-Ug^YOr%^hw;A!AaOe#t@Y`M`z zeQdb{Zx9bt!t^i6l|s7dL;0W=@3m`4UYIsh+-Uo~FENfCsL(u)?M7IPV~Y_M!^U+@%L-9BG}_tRh~ z>Xy?eoqFw5ORsr@I`%XQr>;NM!ZD7CIZmT=VxQ9}ofzyiN+%XPjnaupMd>FQ$Hc>w z5Z{-_ts?!4kTj0tqdq6oDSqLi{{)2p6Jf*L<5Eo8@nMMRvtRil&3`h#{j&v&e7;?Q zcH7vlVAz^;a;f{RMbFl7`6?9#jy(SO5#wndaTnn0Y?3m+Us8K#<2OZLGIy-PWg4|W5cRhxiLB3Not=j{QP=6onO10TdEm-V=@OOFl}>iu5&2Xg$umLCGiK>WdKFB8ht~q z3@AsgUt%fYgZgvO_0@!n2^F9j0vg73ru5`FJ1r)XH9%`os)@obdlnZ1+3m7N8}cF3KATKB$)v6g?d?cwG>kS+WS^aj>bQOO zlV7bXq`<3~vrlG|oralaGM~*RyAtdlr?0M<<)J>`)b8sB3uQ6MZtkZs=_u$;cawp1lAJ6rK@W~&AN;pZDZ$KS@k!}M=KRSnF5HT&#$3G^O;=--0h z&Lqw^pgRe=o1pY>!3%c?D0vkpqi$&d|DcBvK<@!}4N_(f6j1<{;aybr+gAK1vso=A zZ8MC3MN3J>+!~HBg}{K~=Hy%HwUfUdq*wS)NBfS^D=gdb3X2F{Ebv}Kc&|ybH*bRY zn(;NH6HdChfAtZFI=;m@-MN6}Zk93Kj4|B;2*0R?RN6@5v>|CnA4=e#=;mJ5Y0w!) z=Nm~JAZwV@GmAC$Db$KpCx}M!<}jWwRlDde`s~1AvJviMjeZJPh)(`WSa8wUF=z=B%PQfxi(y@yjd524ERXy(*!2$9&-MnpcIV+#wrkx@3#d!fA!Iq)wV-3h=sBooEX)-1a~_hG@fmZntIukDjUP|Bv9p za9X&nq$Aj-zQTuMkttKn=dAR@v2$UG^99&e!~4c|svch~ztxwU--N4x@zz>OS-Eic zA{I>X_VUhaojk$Unm-$asA>sj)HshBADQ*s)^8uzIj8m32a zaPo4^16=3(8&o6qOS;xPTtEy4G3U8S7#$=MY#|T9HoS9%{iXtMV~0TOWd+3kh!vQ; zOBHyx8zA-raET4Ornn@BW9wY^!>C|}zg;G0yi*Lfir_+N)M{Y9h~Fg~!f>X6itaYk7%2m=2>BzjC`1cwFsN`wBy!$7q9hvhda>4PmHK3kHn)5Q^bRp|Jxryt=7& zy9J>)x$h;~D;HTdlgvW#H7w>j>?T*iajOI%T`n<>?sqia2@ zbfR%48_e5%Ip?TWk%E^7^Yha)Q7DthJ;=kc_?^%4zz9tp!NT=3g$A$Fs(q6_OzW;x zm#mJ2>lRCJGB||KrL!-3sMxnPo z+`boIPoEo@v!s1a%y~nu^XBBj{spG93tK3&jTg^1ow(r_wvzT4sPB<4`ppg8qQ3ru zZf6(jJ+4kjrD%Ma!%3`<#IaQtn@={oTDVS(^;gwsc;*)d{+he7-S)&F)_*aY4Vuqm zXi|udzAzZO=}{%}%$uH}%nJ1yjLW}@FS;bg{pFyVDOBW2q4Xpvrz%QP#BSAb&Ae5| z?R^Y8Z{4m3pK?98F&Jx+1Y7>B67=>2y*%xk7b+TbMX~cssp&m4blt`4O7w;QZ)d_x z%|!p#kP~T$<4?`ZjJ*v{laAk>*~I#3>i!|v^GX==dNyD_iVfJDJiUjiqH1prYcDK# zg<{_-@WJdv_goKjrFj7k$8^~anapu7G3UazR+kUwO#J2r8K+|~fHLPz4d7lI?+y_B z%U3Y8?efD6tWTfSj5q@`F{l9Rbbi(GKDoqfNIM>*(kSrbvywTNr(^7gToi7W#(j-Z zr^{BTakFvZm7rWvwg<#@whvMI&ADF>r|cb{f~$zO227mCvUaAb_3)v-0hB)W`1ft( zTflB|G!yJ5U%(HSuIAg&Jk;##jmU)lw*bIip_+jeaZ=|r_sO{%9J`t8F+U#W;0?~s z%=0XnnSTLik{QQtfNdBCt0rvoznjF^W@xS(d1R0hzyB3ToN7SIH*=1=pP%hfwNUUJ z(fo311{EroGOD93m_5e9VOlhGgdoOv|Yi za#$VG_nYMLev_lz4qRq}7l!xOT}(wy7cEG+lj!0u6TIf;**v?1szWoz$MGXru+n)w zcCrukwod`H69dV|*=peO_jmjlPV>k=1{Y@{9y&QEp3BIXC3UE(`dG^{R z=Ix}1V;Ey`xrQ8Ad)hbiVhov+^o>0#Rc2fzK{H;buFk`47?^|l)B=aO)`>lQ>Z{&* zezrT+|9#xcokX`>OuYWA$6GrZpF;s1y%Dd*EQL|sF&j3}TZ}_M$6cko47{&b1x{ko zT34k^Mn-v6%1J1)5U-$dwVbQHyej2HU`gGCTT)!+8G!goT3nz&%rl-+=_M{mnJVhu zSGILyC{{cbc1}^?3#K-$BKI`S)Y_w%CdEoUIM*C?G8CshX>2Os#U6`~LzS-tVV^LV zd-)b#nSt+(yTfK+y>DoN(V81>t91MkVn9&_*0=LSB@Q?y@V-{(6c{JO9pAyV2dQ#M z)%YAP_ztZdBHZ|66vOS0QUYXP%=B(mAS|UbW1_1{EWgan^eb&|Dj=vC|;Tdi)h$wwXFhDFp+UbOS(LrNBR zeM%v3tD;AC}H^*>N&p<|j4Kwo_iex6Ys-+Tb~gZ&*F zA!4P@JsaGvmUmdQ+s0H5@H8iC*=$Y$Tp?96`=173%6tTr`+u*JqRKc%*GPB&Q=kLt zk;fEDH}U>m2-Iq#g##7f@6@dB{$D8i%Q`E2^f`d-_MpaD=~%yO*j*wuT(DuJf||~2PZZMW6i`s z6%{1Bw~qaI2=|IweIl;-nm@%2JCf)Y*FqAg=8a3bn`sV%onrF@c+e?cfKIV17*h#h zFPm?P**p;#d%t*#>KBP>nMa(hPkagaHz5Tlc(TA`<7U~Bi4wEm3Onx^lnu9r`pbQgo_3n|C|<^q!#eL;eCOrxd+;9-(N!J zix@&}nG?(JNJhxi=9tc(u%j({8T*N{%a=YLOtp8!en#(~puM9L0Y&$$`jl_zNp;~G zPO0tajc$(YRuC?ObG2Yth1Gd=x3Py_H4qb9qTW6JqQ5nKdLU#kAKq_VXg=qk_P(ky zX3vgZ?`B*}x}(?U-8A&L`P`1ze9@;HcMd(S5&_)ni*AYxf^Q{8;Rm?wa_IP(0o*2f z?GrGTfv8NF!1$Z5`6EZI z#uK2zSBay-Fi5t|VtFjsd|;^YMOYAJ;!RAA?}KR%Z*Syym>W6#`zINnKNf@mCm$E_ zo4i8WlGBAJ!qB4B=3w>%n(uaHH!ToG^dhR$u+*wzi8MI5WAQ$Axk3l}_04S1jmH*;2dn=dB?(Tl()pZ9OgUAgLRJ_jPg6@8B90y8JK>dz_8$XH)j zf8M|?_z06&-msUic{yy0RADZ=0(LSQ_524nP`TDU-}R0g+cGzIz30YD@yBM}Xb$>g zQ*MOO?b!4i%^rVj@{Lg#d$_GDy1m$S+vY&a*2s98A&K&?R2qrKcPm(txGj#r$fpA^ zOcMPCFv zJ9PKR>>zoa@w#^zlqBqijUp)f zGVW2^ml?f%J##Beo~+Erw3#+Mbj1_7lnIohP}GTKgeC1^fc!P!cdz^~CK%$6wo#_w zCk9FGr9qOPuWv;K=hp9^Bn*<=LJbQo=7T*EG~vkc`uq3M^}G3UM2|>Z zH`7K1szUTkbOpK>+Tb_4cA%Ttx}(?8)8>qJKmy%=AbxF2B=dgUqY;gDKduA*#QnH= zFy5l9wcNUa3WKK*0D6jDw{}A>K)(#RqED;9{T`6Qrb+Z+fC!IFz@3^n5WN{7NCE8w zL?y47cBo)i%PpAT0)>IK37@w8GT7W((7X?ZR8}%*@RT@c&*48;%VLD=-8>E=x}po= zZ+^?&n1@tbF95Lq8m#6pue$%Fft8C?`9$)qBAo&<3%~vv_hgV;sPRB4+g>!;L`z-3 zW8nYRURU(Jzq2wZ;jB`60YS}=6|BDo1>bz2z!iN=(IC&r%E{pF$LE{W@qfm6hMyy# zlDK$bTi+UI6k|b4+j0nT;{q~{0RI$O;_^>XYg`yeZNYNNa2NM-xht)n4xwcDdlusQ z<%em{5Pv^I{H&*;*nF6|kIlhPt-iSRy3Ikg@^bWb7+Z(T@RUihIUeeq>&9{{f8322 z_=n8=DZ#d*ufd#w6c{3pDZnfwWI|?&DXT@%|AYXb`{08l-K0H2*WUqTNh}Nr+KxN+ zLx#dQ0jS5~+e8QkXb|au3Pz;9m~XN&A#@dH8mNw_DOmeZ6>KxaQX$__A$S?r!sBaR z?eVyxlaNLTD;3>MSdCMUFLtq>%1r?kl34X6-cSw^hP;5)R1W3c{|bt%J*1@LLPzXk zck^pQBQA9y_NGTrqlVN{2QThj#ib5>HF*oaw>`N2DFE1uGv5 zw@q#hnPIkA7fc9@K@io4}EP5hR{(WXvp>J zZgZ~N$7vlt$i~k1K)d*M^m1ecMfM$zJ~6h~8u5GgsFA4eVaR5f@B4PV8Hf%e6zkvDp;#?c zVnul(2ia&+;mB)$xeoZA1fVeflLgJYNc3xotuE-k(SW|?gOfqJ zig1!z2_u4W)W&0R)!;b2xE^e~*r{^ety(FrPiWqt3%l`Mi()O77W7QMTg8bZDq|S5 z_JCTceJ5c4$_Y^7M(xLqEYJU*@r7Zi5MOf!W|&)=2XWCS&clpF`V(UQak#Of&%E9V zjmsN#H>mj3O*N51EUGp>X>QrJFO>nBu$4EcVB`F7e2rUtYVH`O;hER-0I1k({t@c=EhZ!U^abp`!{SbV`4Nz3bq)AnBf-&8d??h14ptXR4Q^e+UZZa%JP7}ckL zJt}l!z}LhK3;$fyQ6JabH>>@0%|qi{(Pwpqo7_JM-HnxA5DHK0q|~LRzHfYsZ4k>o ziDf4B60kPD$Lu-83KJ$$-ah^51-E{sE-}TL2ODw>6YEUJaFywwzT7nCfNHePDgD2~ za+35}i`kRvJ`1JK$G6AlbVn-Cc;ES~F5^k;5c`io5o7%liW<`i+y^XTb)t(^t-5~;-Uh|Yi~Llp z1B~!W@%q$w?w3IJ-wYONiRh0=ikJspPt=#x@zB~`_b6!(WxwvhqaeiEO(eE`Fyi_M zjf*Mcb5q9NW^Mtrtk!9Bps-rE*|knl zn^8`Ihp}=OVekXw6&rSX_8EJC^g0Na@96obR|Ss$5mEk`chj+?Fg>tsN13rrB#!OI zx)h3(6+hRF(}C#!K5CMPx*V_+LLDQq3ZVuh%AS#xe~b@aSNSJLa?Z9H%y0P3yRSiL zhE2Oi8FRYnJ6P{y6tM@$l9JQJT|9p8>nfW8?|Z)S*bt+O*DM$*WAhMO|9YsDRQ;ps zV;1{r#i&VH?5jmlA+>!qEM2GASBn*5Sd%c5)-Lwdy7uK@);x_@#ByohXzq>U0&QNs z<-mQ+R~K}>4I^uFJmb5y$1R?m@!huAto6X)aO)kZ_SSGK*{JNTJvpxc`S9%ce=GZt88=W|0v)P1*Kz2;S0BSCUv&%3@G2U2 zfMbbmsP8mLL3bQnf6-4lUyC%r2Bx}PZo9AB`8l>7?^d;nUsBl>!X}(F`HJ>mb3E2Y zS~r2vvBxpgJmgw)vUh9I{%iZqecmm>J3?(vtmSMAwdK2G`L}rgYUVc&bq#Fc6?e@W ziZPYXYrNfWs>9Jr*It-6xN zD4p1LGV|=_u72mxW_+4^!)F%aScVhFG9qKKAI3TGTW0paSQNc=FxqzocS6|Ke;j)- z!RnudHiRueJbCj=?95LiSDVXtGsaWLWZ=~MO4`T6^%L6OgmoC_&)iS;7Dw~@@ww!e z@L2ZoQu|RH#=%AH+<^3NkeBWv${m}_F9WnW;V=3b2jQ-kafoAXTmBTY$GZ<-J%(Ht z_VD~GJLM%#^U^$=WyQv+b0^{wW)$cCp$)DJyLkL_n|FWnK2UG#K39!a%yaY2ODBjavQc>^aZ=I!h8pKK4o9f2(lG#~SZ zP5giR7W^mM6L3dh>j9dNdBfBEfBR1SC)f zSX;O79)RxV{iv(m-8T#Z$;Z6;1OC5ZAO3r{m^XX`*Keve0X?#w-vwco1v_?MiByHK z$rm4Is7U-VmoGK}isU4$q9}W;&>y*>$~Z;}*ZXHqyrHW<$2Sy*`gqfCy09nK87ev$ zIT^ajIq*l-@S!_-Hf{%cav68{F85urbtBv8ojGZNvPE?XKh@g}MkdCysrr5GF4m?@ z`*oWw4SzILbP(p*n^#Z9f?O|%)uLO=v+!ZX3~N)@8^&aO_{Vq~UZ?V|r#DY|k{^+8x#hv$!hiM6`aBdq1D^nfOcrqor2g@i#1CQ=L9e zH@2V^7KGa|r0Fo9!;%#a7jvIEst4hCs_5fuK7&t-s~lS$LuvoF8l&+B|?8Uy&?p6wj-j%1 zH)3UW45OdJ>y-Y2-#h!Qq_2yH@P)kjAQmuT-DRl694sm7YM2v-^z3)uV82tOtIG+{ z_mtoD;I?3F?98*<@qv-ss!>t!iMfc~3whDcprUMG!TdYl`PxlET9u9mgPxwg@I9!ivg*CGSjAm&rRrPwr0 zwbM$0M`5QGWFy&WeG!v8Ci|pn-GLq$vu(I6CDl|*s)+7TmQ+EKX+Kk2Ql+B$5JvG+ zN`lGb>SS%1_Ruz9DM9Gu9Gry>nMWy8k$4Gw)ilr9rItW(3ev|iZ1~;?c3LTBHao5J z@iO#F;3wz|JFS)oBiLy@HW33F2V(dXESZN+J|9aL&SOI-&&k7J11p2$gI)c@%3+61_Ng^5JMJIufQC_)a$<@YMs+^ok%ElwInk9 ziyz5yDzlQ^a;jx2c`s(>$c(F3c;5UEwZ?eTF;eaC{Nt0>8E21?3SwX{YTShfl)wHn3p)Buaxw z%yFUzv1&hv!oa}6hlRMt$3oslsHIIPl&bYBSHdb*-oUN))EgFMNg3-G|4@3SEj{!> zNbgRo7$zK0P2GN3yL3W5IOzZ-V8+MY zmx2*oOoicd7|u<~dU))%Fsg)qJif^H%$@0s!64WzcnzVUF8^7Iy746yHI9?JS}#X* z0mt@jYJDz|n5J9vL#blrQpGs-k=rBkah9iFb<0>uz`RVE&j0H^RnuP=;=nz&@%{_r>A7eX}ZyJVK zFZlMpfF+A*&zWuC29+^t6JDU{bx-qv&*qr>#! z4wi=Z_MHG}>HcjGhdZ?I7%ZTksL6PG>A=cH9nkyt?M&lAsjWW z!THs?US*QZ)jW~v3l|0HyOlv-`wGXDIL2E0Ioz#0OUb{jFZF(9o`l6AR&DF&L>TNz z7%js&dz<~PK6StH=V-Bu1`J&{+)43p+w#CI)+~i)<7Qx-iDqoO5Eb^GT7ImuLYs;K zD=Rw_S?RT9r4LyN^fUw@P!4`u#DEY@q$Dj8rAWzxxFFk&LRo_WavY~>C}BXZzrGf) zJ4GHDnFa7cD8g@K{yKgJK zP7O>cEJ78kfu#lBV!jCQa=>e%w78+?*6-Vvbl%rXI#}ERk%|B>BM|{?GbAJEIMN6@ zk2C_BoH(K^&{)M0MbLYs5eyz_1ejbMSr*(!8iD6XBPcl12#Sw1g3=?6fSvdeRV4Q3 zX%TpzRc-OvENu}cPZpt3tHun|Z!pC$#=r&}wAvYY+C9gu-7_Y^4HtF0T5_R|!Z7Ho z?d{Nn+}2>*&0GdfjE86+;Z?45v77laexJjLQfW5541$myprMflS1r=Vb=rrG-WM9S_n>$prFEWiiMgQ3dO?F^!_bu0h2P)#kR&I9Bvv#XNMM}8zCHr?}q7vnk zK7Q?k{tlVA!wD92ag`8`9<=4-D|#??++$a5^OSW+B5L7PPuB&Du zeD2o3{arO~`0`r=4|LV!!#All@L*TXB={z`2G(`eOoq?X8d%>||MBl*P?XHjaHEfZOUc+}5MvXp`W31>Y|CLBVqxOn&1d&EF|_w}zu}!FvSXBX}Ma zWcs~=FBbfu;7bK>pxa^m<$`x>xUEj`9>F&WzCiHpf-e?)hu}*E-zoTV!N&z(C-@%0 zHwnI1@a=*h6nux^4JCJNTc_aN8dk=pD8EPWJ%TR~e6R2q3w}`WrG)VgA2#r6>3M+N zA3e%s7Cm>fJEG@D>{jTxjokzFVhOC$xoQ{S-ceWWFLn zVOOzMYJ*mzeGG7`;?e%7?WzHi=yWv16C(-F7Ad1#>Sj^CA*ykTY@uT>X3#GFs)_> zX8XP%sw@K0l4`6oO^tP=sj>DnHP)1-#_9y$smgRsTe;xl8jh9Y|C zCc$?IzE1F+f-e_*T=1oW?-6{l;ClsMAoxMSdk8~e+?S!o9`=aRIwCbjUDU}k)Vc`j zsza41)fMzvy8vuB_caNCW@Xn^brKBwHCpjR%M&52(@3Gz7NFW-u`PhDG^wi!1YrI4+a6PsFC0yJVK;e?Qs#7pY;W`o_ zB3!#IKnd4m3!rc|T~#NjjS1n(6CqZ~*#dOQ*#anBQdboSCMldJ5kldhTQG39sV-4g zs!yo5=_+MXNr#W65Giay)-qX8gJ2cmE54DR94>v!KwS|kjreXn{vF6+tnW6LeZ@3)V~Nz?b`*VTAj`47Iv@pz+sJicBD zAmj0V`*{2wU~px`f8lssrTE`tJRZPMI%Pl}f^M@7$h)Zxp{h>Vc@{?9@O*wZJh3QW z6i4P8xU_cG-7I}qOEUl}9ZRLg5em4?QmGwETVSL>{fUufr-s|q_z;#+WXq}(;p0>QUyxUE?59fB{Vi+BETr+6NSi|4)`dZ5?F^4}FyJGC`xw0L_f)peCq3`CvQQR{lDI%?^TvDRKI)zvmr(Dl_em(UfK0al-As8^^-*kP;g zn9{Vl1+ztscM;&R1*o}1K_URLWlT5FVM})mE4&y82r;J)syi#U1@w`kE)ie}*OUm+ zeWEIUC0x5;62f)Z0+eu_i2zHucp^k~*wP)p$fSD&#i@f5uGbczgd0o*Si%{sf#?y* z!;(|ou`W5mB!u(W0(8kG0xaQ*6CqkS-6_AJN={IS8^WRHNG~lj3Viah%^H%^4a$T8 z{xVQwa^iC>gE-~8;VEYhQJa_bAOL|h&b$ni!mt@ z=Yc`-=$Sm;lPKzFyC_~!)NM_om_<=X>jW=~Ix!a!MV*+7l#Ajj7XCU7M+=0%N$?)Q zw+r4a_zuAvg6|akpc;PFw#5bCtKn#m;Clq$t6nWS9>EW4INB+AL-;!c?-u@c!FvSX zB=`cs*9pE@@a2Lp6@01S%O(9{!PiOp1%hu9yhrfuf_DqPL-2;+I|Z*#qapu-@6{b~ zv`6qggi(|K$7pZl&kHccL)WqaUSJ@1mt!u7LXX~nK)7j;!ZrOEqX(B{x zo}|t$7ED6(6xafk=J6x~EY0IiglNs9mwS}XHUyQ>*@Nmm>y`RGXpU%aBEV9MJ&BNX zogEiULby&_fD*1F5nu_|o(Rd%*-e5<=b?qVzwq9^jnC+h;Qs-aheTz6hhvZ0hqI#>h94etL^rzpf2D0iPvcuy=+YY$e6AYI zLH(|}xBwhR;N?)S;Th#+2bp=3(^CxH=D>f9<*cis%GDI%AGmwUia>XD5`kflZBQw3 z!SvdJ9y<`X1D$rD!w$6DfhIdpX9vm&pil0>Kit^hcO7_UV4}6ZieFQp4Zeut+6_wt z??%*pRg^1S)HG~AQKCJ_Ya(6%9TGzX+%tpU0lFA1AHD>T@W>=!HjhXG6xtC; zfDyc|lt&;0UZ@^I9hn?@SaxJ`6sRU~WO8_P9RK)BL3!wQdSr6w=6z)4F{oPWk;&1k z<2a)7==mqe5&tL1(fLo1qvM|-NBciPj;4Qt9Cb%32XrWRb6C3bwsI9AuR9z@mG6WS z-a$>Qb|$KMyH@4uD_43g)se9COi|}pD$$Xs`v}zyBU+tnNz$t6Cz53LFaM3pgx5kH z!0!dDlkI*2Jvgq0vUZDNr>N`38)1%$n|v;UWp1n}a1@%@{FB(BehCJ*=}3%wd1iMn z_IjkFZ2O6Qybs!Ee&~ZG&e&Dh#B;^u*cF~|Y)L^0cBo@VI`;2iA^OtfespbM1{=}O z$Iulg!?s7JNPM^_U_4BqZ^2IL_1Jsf4r3M-Or8_Vh!9P_f2J%_oxcH#47ABC+%Gxy1cm& zdYJP5oo_u09HzRk4=Sz58HjU8IFf+V(tWzbV`n!9rtOSwzbkGU_cm@Y#xrSC`ML0w!y z!r>hDd(mKub+daL&qMJ{#a`=7#X)#@&>|39K4o$M_vpbq8Fmq*oT|Vn3-fth@nAYP zU>@j>o{f0WqIj<23hP`2!*H%*T6^p^db6CTAj*E8Vh#jQ=P8IVcO>ffSt^Y+KT#!a zZcwHDcbc%71w8I~P zqCTAe=a~|9@Z%=rmuC|k?H#L zkxhQ;=*Qm8T>X5QY<-U_KMfbGz|oHq9Q|lphVW3)pIyzTsp)gK6vL^vG3VxO5&Xvu zhQ4Vp_{fBjT?6O6&nf!1Jj*AMzZDK?}3Q@V^EIzvC^O>Ok}FCH5V&CzgL>*t`ilMx?8bKeuzk@cjQQ6`Sm)U`w)%!%PvlSIFUQ&D zDwvVkg=2jOL*@rT^Vy+!@Z+2HTR7|;j-3#M;qEY292B>0KW%FJZQf`14? zC+Q_jh95m}vo)U=d*SED@_X&^AvxT8^_mzAe?Bcfn|$f{RrbPz-Eq8#p&&QO{u!mr zHKAaPCd0o$2$k#8p`^`l)+X4_)=e>}_S4=0vXJ?7$o%k+#qesg;lb{}764G$dvP!i zro;0F0`0J8e$_Y3=cesf*2b`Hm$7+qQ%@f}A@8OjZHA-E3rAmT+yOvTACWK{{vOy@ zVl%Fo4G+!?Yzdp!ZegLD&xOs`!sg#X%5ZpzxhrHo14GsWYf%=HY*kSDeXBfK9`p1X z*g~2J$0;@MkoSeqA1FDVrU5LU*zJ{A!+`x#3}ZvyogweGpyLJJ2nJ($q3CuTC)}wG ziWfIN8AkB7zA$Y3CgMc(^ln947;v@&6^ahRzKJ)^Shl?v-1g2rxSAyBJ-~Ie;zrmI z+43(*d##jr=&{I{<{`(*Pp{`&vkdTRF~`5JEa3XbcSYV(=z&*K!An!$?a~@A-(&aiJ-AQ!4xF^VQ)#<^uF;Aeu2hDNt{g$oWJP-S)$E^EE zc_%erotG0q<5&axk5ko~qu@=9!76W1hcq>Il7p@hXEVo~nGAtYn-RkMmKnK3BFX?x zdi^cb_We6?fDz609@M!wle6j`oA1>nmHR?nyK_r$7OpJVhGWC9LBKctMzC!G^!)^!CVnlL+lXE#8F%x>oKVq=S3Q>& zPjdW}l7H0iaBR)gaO?{u(H9y|3N~Md0{vV_?=BB@pR3-bO`IT3J4dk@<9|by24k1t z!1u~LZS>sxE-k>vwZ(j7ylG&(t~Nob?^4^xm!Q}29t?S(DRIPkm{?W#{b3#^cHoUc z2eSoiLkrj!q+vkR;D?yR8@N>T5tb;Prh$owyhZuJrQWe>;2R_3L*A!#5=fZZ0nvq{ zn-N`4GQnWQy&d3Zv0b-T^b*l;V&_}20oRj;7tq# zPf2tG^O1TdK@=_ zSyzUUa%|9?<5oNl_$Kiz<;}kXolhAG4bdVEpC%loN8i5f04{ol4PR$47C9aj%y~R6 zR>Fp@VDtM9^cA@I<-)E| zpp3;}l&+5uBoxC9M>CzLgLwkwYTf|GWDrjfbn!AUx4-DA#-})^DSF)Xz0090L(ELz zo)T1h+#AsO`(ns#K4xyw(+S)@;oajMGB*r8=4uU~iJ&XGJdO`1(_T1$+MzntArv zAZCMV`|vDxz_HVRR^Xw=JA;)Aei(vHgW$|rJu2m7QjNEvHyg-T7iuUOy8O8-AjAI1 zzgjZ*V^y1cCvEb1pN0&bycO>sks(z1beafON{zQ8Ug+1tpjnqv$yxbgS5FESU5BF` z*P$!vFJslt(C@G>01I3+Pcyaz_k5laj$M~WMgDm`MyYcToQV|TD3;csiECP*lT8#x z>Zy1#S@ej-)jA6`{r75YGqg3Sez0e4A6|R7?aE?%Kf-b6@0;)sw}f;D+TmgM!Xb}= z4x|&(HR+DzHZ|&RV}GKi3KV!k2tBo{Wg&-G-GL6p)E&5oZm8%<*Ns0!?3g&f3>pSc zA#;-&e?alkE@1P%Z@6ywDJact{%PBq`-?46gI?GJ-ba;*0aM$zaPs!xH&zDPE{IzM z;b?E;0$@HK_) z-{u8rMJOTSjci)xv;JwcE}^Ct5G!2t*N9s+uxU6H4q5vUs3YfeB;>^?PUqm!{vz;) z*?F7SLUENeVLqTrGWGyxNnwoN%_t`zVe?0vSNn!Ojn{ogaHcAIbf1BC3-9Y(ZkQkE)&}V&IV)mBsLc8}vFwhoLQTAWVVGSnd(E(INJ?!n@2QOlH zT)&w9MNImUHFzEUF9Rdxs>W04_mMR3Dp2zfYGc?32)9-BU~q~VN!aXHqf{VXVJ#MgZU5eSokS z*0pBCjH4kznLmS*~9Iq_#->9U92_XtUSH_idWn4K| zjVo!HAR1qFY4bkEnlOfaUr=}K=E6Ys2iV^bG%w)jay*Q_D=X|YERKWx?=!;Y@6z7* z=_nC+d5W z@1xeCNO*I=v-tFnB0(?5yhmZ;kObT!qzF{uL~$^YmWY3$Lg(tCv@Q^GHNMf0dG^rI zDI30yp6--Q>i=f-e~bG6nEL;O`v0W*-=qF-SO1?@|DRR=cdGx-tN$;m|1Ycmz3TsK z>i-+~&nL8^Zh6&;s+vgEVoym`WLfRvhS{D8##QH>vUs*<#suTMOD{X1OOHYofkh$lrG!ajDPc;kln@ndN{C&y6og&66hyMLQi6Tq`Ft-DpFk3cK#EiR zN|IC_dyqYKJGY&N?<6$_TZlwp3rV~sy9748y(DbGc1di($x^Td>HG@+KQm zY6L7h2~qquSjBCF?NZo+?FC>9PD*17)FqKBQ%YFM>#@s~LS=gksc~f@?9!#gk(4hb znEg(mzP`48wrA;*vWDfgSCy@3Tpp>eTfVxizG`W8ZOv>CJ0MR@ZNyWZ?4Z7i?Qx3F zQ(fcHgdU60Q(IS6Ul9Q>sSIQOvMNtQ_0?4%Xoz?gRe35b(kf5w5)UF@v^rANpouN| z+Q$0ID$kcZi>n$U)io+MPhD*_I>-8kWJJrWm)4-SysA2~3{o`IRaHhN7?D~}RYm18 zPi@COJa1 zOM$CyD66hnSydk?TTv0IuU<9VQ=(j5jtWz~XnEC1CrvPX%mllAPZeJT3-pwR*^t2F z@u;HHMNv_+IHfQaH7;2)Tv?>#emXOPc*u;>Se;RFpk~p`;2ZE%EMAO4OuUF0#=KzY z-1&O6VVts28(8K_z8lCvjqpON$NR8FOhIDcQ1s2Q=hp@7!pZT>OzQO7x zt0}8zg&qP;_bjgU)Yhn~!r&#f%TY_KYnF0sq#opt&kWD6!M zi7lx>ea5gxm4l`uq5@U2X6bTJR5UCbj@m|VNma5SJU>(#zRYt;;Jna97kd1mOGEy^ z1S4F9SMFI^vAnUWfi>Eq!%Jj>cvL~o_B1T3UV;iLk4Aaau+FnowL3WVRf`)dt7fYr zIIq68vCb2zs$5o6ePv@6ve-~nr7(3BXk)4QmvP%{kMf|@8W$}`7BzW&En z`b8P^i#$_T)YhXMmn^BStY*E8EUSpr=xEF8rlrfT#)q>#=Qb{1Znf*`O5I>16{zEh zLcs8GHq!Q2H&oVFp}H-vU21eu=TN=R43Pm7bEo{NP3Y*`B$zE9z<+ zsv+=_>Z;|7p()Ngck0yBuUJ0qitkLD?pc1$Ri}MN6|BgS&0(C-HmKJXH4SLxcsbOI zsuGIolyXazYvlO65+7o3tZ%4B{m&X@wumHL=hsu+plXb6eXPxFbc-s`WK{O4o{)cL z->L}d)nB&`z5(5sC3AQUN>5kTu2@mg;HmoR(yw~HZ0oPSoTw->W_uQ-wOuwDkyERg z(=_L4?AK;{0;}qjic@dfl1u2>NzV3!E3U3yzS@JDSAjRFtI^cj8U$6;p{77MXbe@A zm9kWoNpfhVJ1=mWQfXQ`@9Bjz7NLfryGNNcMC#GL(B1C@`4ts}0dFWZ3j!AbmkOy5)1EjIjX=SVF z_)e= zaiu-DU_9k-$VmNTTuwZBcqZa89JuVw_RCFsNMnDv#AW}5+dluW z5=fVyUbZddZAymYQ$~(sTb|Q_l6Gt}Xf#YerEvJ$|6?6Qe=dHw2i~|#2aRpVN&3N0 z8;%0VQE6;2}Vx&WW(Y4ykWRc#1&#EBA%&+v+h*G(SE9t({rj}^h`ILJaY|)J7|n44dK(z`9^O0w-9zY z(!dBR9x)uf5r}pj;Omg~b%s&C25DUnTnn-qH5{d_hSBjo!`abpIEw$>a2DTU2zk;m4h8(yDq3!|0+4BI>S%>g-hNB!G)Jq>Rat0p(t_$gH zHVn^`Ms6?Oq-VP^reFus*(i$B|nw)nSzS;D#Mz;uj+55{IMs5=TxG zE^uj@?{F3@a5xGsbr|t09J%f?ha+AFT!lkj=8=zL<>R>e62q~|?Ql;Vm&bjPvk=Dm z(0T_KvY3u9(U8&wlOz5zn7aWzUiQ1e0SWb<5SD7 zfHUG_7#`Ax@mnSI&eUH%N>c$c2DHpFl>-L3f^ z(!rU063%qOpua=tcOy;pWTeA2?PR`K-YwuO{kcxZ(q$R)>DZK9_k+G%=$A`)HhPtR z(02&^RQT0%2)XiwEL%Hk`cEzIx3ab~*3i^^&PT{Szt(&wvNiI_kYfx2n-AG8n|`a~ z8BKi)BgQ(RhdS0zM!Ko*pXJ7`Z;Y?>bbatoE|2CUkY5hLy(_-)yZqVsmku4pk z)<>kj74*x59;RsZlb(N}SNR8htI)S(l`FL!M!JUp}MNd+UY%aP?kX z_zqX^wQkb#og(?j_THGEUHDn_XN`iMdhZU2|7)Y5r{23q=trYpsQ0Y+zc7mUIUcd( z9}WGBpkH+u@=S!jSaulnXHy>u{b=M_4*K9>#Q#Ik7YhAJqbQ#zZ1gRop#KQ;R{8r! zK|dXOV!g;yGYa}|gMOdTAB%UPp7i!B!K|M*fxeH|-QdAgNIzNV+1?)leJhSZ@Hr=2 z{8~SZxyjiEYCUiH0KRrw0mJi^tbD2CDNFCQ(wr}zF&y^tNte$F6U|2R*qg?*Id4Wf zLboL7A&3gD-Pj^F#36^StJJUf43ylW>;b6QExR137#~L;n%z^LPgoo@Yl9 z|8%J4^+NyaQP6)I^o1|!_PfGE63p^pe{98nsQOKM_Q#g|qwSA(iu|J) zudqMfBJ?MToFlz2_Qw|eXyRvoyk6oTjr{D7?>P+l*&p8_^wd}C$x{Brvp=@t?-ufm z{0_5j#Cb@uyk{&c>L)#&VfZ)?S%%Bg_+*vOIZ;VwUpJbCeymRT&drvdh{1Zo{VIibt=V0gaFesem~=;K|Bc(T{C)N*Eei^1p4*L;szcT1kp z$j|=$aO7eCZpHuRD9Up==+{rt`Sl+{e$~D~zf%FP^K(%l1-SH)z4q^3V=%0xQPW`SD0bhskEgMC--39vIFY9#v{Sfq& z<7v>BPSf=BQ5NdSkSkMvPlT>tb*kozW=m%nJ?sZx4{zAULrWyXrw8R^G&?zl;X^sE z0bk(^&6oB2pOcz4`5lFP_k(Xx__E8H$+ru9!P9hlG5FP!Q7)pWZYee*E*o&ev@b2Q(~tbCc};zWPg`*qEimz7Vg z(nVYqKWv15xzW61YZ{1Wy1G{7H61xkSaIZ??d=`Dztq2 zM#;ze{Q-V`!bdHxp7eSr!K9ypwzEa(S4((Cy|K~H2mPc)S}y-6=vzQvDD;6t&@PgR^1XDh)19UFc>5QWq?wR=!OA|04L@ z5zV(Y>-)~+%ZH9z)~NY9v+`xO$1wPMS8Bd5!>^u<^33c9T0pz=YRz}iVU*`X;2XR~ z^WAY6>3sm&g=;mRGJ8NlGvv(7AJ+kTuh)Flqm+~NaxwTOMK#}O`csY*@`V0^tm$Ny zBgYAz8+AH=fnPlt`O564I8NBYjg5HP7%iU+zRYnD#|fT0G+zsgi_a+dBH&wfx8{2; z>-Y0aIqzk9otm#EYrSN~n-k+1w!1gLx9~Tb&vO`jlhG85f2;W}Lt9eMA@Y|mWZC-D z(g$>W*mkL(^!Oy0a)`#{oeb)X*=(CkS%Vjcty%Nu8 z*CWU`AHRFnYk70Cw#U@{4&=KDzotKEz9$cZZxi??J*@fuGb>-_`{UYS`=2!5h48B< zwV;!Y8FhPboDJxkoV;$AGSm9FJB zN&21Q-X-o2g#O3k-X(6QxDQ#b&~Fxao47BE`=+?>iTkm*$5iV4#)W^fz|+M2nz%l3 z!{S~cZk@PK3xAWqH;em2aqkiLesR0SeO}xFarcOuw^-hTxL*->wzwCIyG-0`#r-#N z?-cj9;%*Z6S#fuZn^&dNIZ@m%i91W&IpSU@?zhBUA#Rhn-xv3X;{Hn9xVSyy_KLe# z++&vL^iLMIKwQ7L3&s6cajy~gHgSI??q(_fM+M#??yKUyEAD=ACoYwIi(4S>S>l$6 zTPAL;xNF3{UEKS`-5~B$;=V5KUUA1`A&z?E1aW7HTOzL24nA=|DS=N);FA*gqy#=G zflo@{lM?u(1U@N&PfFnb{}T9AaambeL!_eeJ7ty2zEifev7&yladG|Bb{)PL=3?rM z_Tt5>moElczZ?uc(^+6p&T|La^EMXi#)t9D4yQHFeIl~u9cs*=)ShVE` zt6EhZA$}SBOLSBO>lD}+sU)z9Kz&se{Y?%6SHX;c0$T~LsDRBDMcJ;vsuh)Wt4Vx^ zfDKhDjShmzH11I#(pXl(T-A39+(2MGfrbW{Sy)0auE0orZFk0T(wC zw4R(YV}R<6`+6M@ypx_%dk+HGAv|t=Bjt}T<%pyU#^kUxMIa>WmCau zgl9)&T2!7oF?29#>m>Bi4YGo=Wp`O>-Y<+HyeC(~fGC^R+S~KIU{}0$&7D8Fp%hm%$K!pA4PPV_h+o<|9pW z{a9cR=7OfZ2_J+~X*$L#;eJM%UyK3GApRR;8jQEcE~P!9WF>4HSEWpc7`KdDT)nb- zag}Z^+WJV9@xRBRuKyUVgI14{TsMxhmu-9;lKB;riT{@W+t(}q!$>lRMoc759l!C1 zad;1#!P5SSBGdo>MB+;0*>Nk47lG^Dg7<0kssDrO|33T&34X_*oN50-t&0c@TMr6G zy;6ACeNgZMfoTJR^i=}G7KDPY71%&6CZ79*7=O9IG^9b8b|Z*?T430aQ1E*KdjuW} zBGMNNe3HQB0?!t>N#IKa?hzQvI*Q-p)$#MYC*!LV_zr>N0{=!}SPxb5unv+Qwk;IA zQ(#ZAh6e@i^lA7g6e#I?1pbo1w1I(OW0nQa)iC#~sQd}cZ{&oV1g6~p!f}CP0{061 zGl31ij&GyD9)VvLxL9C*<7Ry20#DAB^aMUb;0}RH1?~}enZV@%onDi`odVx3aPfH> z{|A9#`$Ogb8G&gBgypecVAupv@E4p~9@+;Xp8IPkFKmS~P1#Y@V!$+YJGJV)SQS@ID7=T$aQAl=?1w(%lt`xXV;MEqsN#nmSaJ#@i z7dS5P9|bnnYWhD5Tr6;}z~ut(6}U;@qw!6Y@^=V)lE86+zb^2gz?TW^zFw#IU4aV( z{ttmm1^$h|?E*h8aCx)le?#DSi-tcE*obO4AN!stU+E1Ro-QzK$*A%RS#Z0Cmk3<` zeGNwihRqp8-(kVGX!ucqX^RHQ81D#Ncc+Gr!Dm3G-}7S)pCNF^|IzTJ0+-&U;qM9z zyEZC5)1nvnUV+Q+(fFq<_+AYU3JiNVihm-$r!#%n#!>K8foUg)@}6nII9NzHEO5EN zl>!(4M&nlr+$3;J;JCm)7Pwd7-wW*it>)h*aH+uW2wW#{F7_Kx-tzl3{bYe@&xi8) z1cq%N1uqhqc7BMzMqt?dQSfa7!~TzgI|ZgKAkuFaIF66qgb!Nuk7)Qa6SX|F8ASXU z0{8w&!~Y_1U6+Q}3QXHVr2o0Vurs9KEdn<^rQtUOrVS#}=ixIh^ACGO3O-d}*d|i& zY=L2?NWoVKJot=;uNT;ONyERe=wH$BO9B_XuHo^===7T2*05J#+CZW_^8|)HBn8(B z4BJQwjtNXVNu>X^#lKg>k617^Co!~T-uKU3g>0~-Dp z3x80znR_-Ed1X!{5yd?GWVjO5wc%8y*cW5V-V<8eS_f zZA~%0`vs0;V=&=fi{7VU=dn6|*rZbU69n!k(eOC}(_R(%|3zTfu2S$-0>h4#f^QL+ zHmyj%PT;zE8s2Q-=WF;Sfob=O^f{k4j4z;1^wewE4fq)7P@_Y`#}h`I>=pP60+;_E zjsG%W7ut}uAK)~Bt$hJ!3T*8Qm?S}Z)~sf_OqfqOrz;a>?1+iD8`gutZ&@3Q#elK-H8~N)5X1s)N5g0bg6x=DW``a3h3tW7e zhId-@Wg33Vq8B(9a{^Dc%Hztzef6Ufnk$P!HoiY z7HfE|1)rtJAFzo9o*k{3lKMyb@BdK~WO@o&ySg|Vnchle-(&%qZgMW|) z|1=H$MH<|h2Cq+p<7x1gG`J@X-kAo!ng+j}2JcRT_ol%-pl&a(ak^e7%I|rU(hK;# zjNdEx_2Ty`ey`z&F41@czc=xF3qN#;Mjw9t_@O&A2JqX3AF8IZIrkD=JjML|v*Ps_ zTwxo|=D0$L&Ckq50CFkhaElHJq%2J&U}bwbxeAd8$08e-+OlG7>m3Omy$;e)R&^yT zqBfM(55uh2>k3D}$dX1PQqAH0T2>aI(s^J)xoo&Jbog-8!^ThYEU{yi%(CmAd@G~} zB0YY+0+}RBTP{*Lona`+!6tg74)Kw(mvcJw*SZuYe)(HiLN)>fh8N*VO(AJUD zms2H*6l5x&GN)aYg)aq}%7+@VSh$u~T)jF?nsjWs{5E{nXw!Lgg-)-wv$&c(Ohunn zHeAC%s4`5QmS0(@)o)mru9bQkDy2lS&{zW1)MCLo@#@kdvT`#DqD5S|w$_r*EUT$q zRa4iUqwL&k9WA!0Z^H3nOS%JNq1jW}6pM)8J5 zAW5lAS?LP1tA@hts-dvVt{MPksD{E}t9T)m1HWe^YM!nu$#8p%Q=!9ArZi|xWvzcJ zrIL&;EjaCiju7k6)Rc#9OO~)H$PxII^08j|u*@0i3FSr``Wd;-w6JZ(m&BxHHi<|T z(Q4i4f?3TqJv>#Q6jf0;lCDa=1uMohUVG<}hO%TCK?49}+u)xHAVEhY*>>;4o)oe_wWiM-U zv}JB28vPn(aSkwI^67Mp6SpSdx{5?CNtbvymbAi1WJl8$9#V{C-PAWvB@)*$VP=w5 z#Uy4e!>AbBR7Y~5iR`ZlGXrA5|jCtaXv38Nw?mhOJW^Kw}q_ju#_d`)(=~h<1~tU|Y?gV8gJYK+Vm- zqdF%EShq7M?3o)*dhWPO-7Tj?V{oR@XJ|4#La7O^;HJqkxew5`X;Saq7;B>|Jlx_cbyaq?TH9G6k=n;fmm65EovUT!;g(f; a&=O^nDz +#include +#include +#include + +#include "gf.h" +#include "gf_method.h" + +#define NMULTS (14) +static char *mults[NMULTS] = { "SHIFT", "GROUP44", "GROUP48", "BYTWO_p", "BYTWO_b", + "TABLE", "LOG", "LOG_ZERO", "SPLIT2", "SPLIT4", "SPLIT8", "SPLIT88", "COMPOSITE-0", "COMPOSITE-1" }; + +#define NREGIONS (96) +static char *regions[NREGIONS] = { "-", "SINGLE", "DOUBLE", "QUAD", +"LAZY", "SINGLE,LAZY", "DOUBLE,LAZY", "QUAD,LAZY", "SSE", +"SINGLE,SSE", "DOUBLE,SSE", "QUAD,SSE", "LAZY,SSE", +"SINGLE,LAZY,SSE", "DOUBLE,LAZY,SSE", "QUAD,LAZY,SSE", "NOSSE", +"SINGLE,NOSSE", "DOUBLE,NOSSE", "QUAD,NOSSE", "LAZY,NOSSE", +"SINGLE,LAZY,NOSSE", "DOUBLE,LAZY,NOSSE", "QUAD,LAZY,NOSSE", +"STDMAP", "SINGLE,STDMAP", "DOUBLE,STDMAP", "QUAD,STDMAP", +"LAZY,STDMAP", "SINGLE,LAZY,STDMAP", "DOUBLE,LAZY,STDMAP", +"QUAD,LAZY,STDMAP", "SSE,STDMAP", "SINGLE,SSE,STDMAP", +"DOUBLE,SSE,STDMAP", "QUAD,SSE,STDMAP", "LAZY,SSE,STDMAP", +"SINGLE,LAZY,SSE,STDMAP", "DOUBLE,LAZY,SSE,STDMAP", +"QUAD,LAZY,SSE,STDMAP", "NOSSE,STDMAP", "SINGLE,NOSSE,STDMAP", +"DOUBLE,NOSSE,STDMAP", "QUAD,NOSSE,STDMAP", "LAZY,NOSSE,STDMAP", +"SINGLE,LAZY,NOSSE,STDMAP", "DOUBLE,LAZY,NOSSE,STDMAP", +"QUAD,LAZY,NOSSE,STDMAP", "ALTMAP", "SINGLE,ALTMAP", "DOUBLE,ALTMAP", +"QUAD,ALTMAP", "LAZY,ALTMAP", "SINGLE,LAZY,ALTMAP", +"DOUBLE,LAZY,ALTMAP", "QUAD,LAZY,ALTMAP", "SSE,ALTMAP", +"SINGLE,SSE,ALTMAP", "DOUBLE,SSE,ALTMAP", "QUAD,SSE,ALTMAP", +"LAZY,SSE,ALTMAP", "SINGLE,LAZY,SSE,ALTMAP", +"DOUBLE,LAZY,SSE,ALTMAP", "QUAD,LAZY,SSE,ALTMAP", "NOSSE,ALTMAP", +"SINGLE,NOSSE,ALTMAP", "DOUBLE,NOSSE,ALTMAP", "QUAD,NOSSE,ALTMAP", +"LAZY,NOSSE,ALTMAP", "SINGLE,LAZY,NOSSE,ALTMAP", +"DOUBLE,LAZY,NOSSE,ALTMAP", "QUAD,LAZY,NOSSE,ALTMAP", "CAUCHY", +"SINGLE,CAUCHY", "DOUBLE,CAUCHY", "QUAD,CAUCHY", "LAZY,CAUCHY", +"SINGLE,LAZY,CAUCHY", "DOUBLE,LAZY,CAUCHY", "QUAD,LAZY,CAUCHY", +"SSE,CAUCHY", "SINGLE,SSE,CAUCHY", "DOUBLE,SSE,CAUCHY", +"QUAD,SSE,CAUCHY", "LAZY,SSE,CAUCHY", "SINGLE,LAZY,SSE,CAUCHY", +"DOUBLE,LAZY,SSE,CAUCHY", "QUAD,LAZY,SSE,CAUCHY", "NOSSE,CAUCHY", +"SINGLE,NOSSE,CAUCHY", "DOUBLE,NOSSE,CAUCHY", "QUAD,NOSSE,CAUCHY", +"LAZY,NOSSE,CAUCHY", "SINGLE,LAZY,NOSSE,CAUCHY", +"DOUBLE,LAZY,NOSSE,CAUCHY", "QUAD,LAZY,NOSSE,CAUCHY" }; + +#define NDIVS (3) +static char *divides[NDIVS] = { "-", "MATRIX", "EUCLID" }; + +int main() +{ + int m, r, d, w, i, sa, j; + char *argv[20]; + gf_t gf; + char divs[200], ks[10], ls[10]; + + methods_to_stderr(); + + printf("\n"); + printf("Implemented Methods: \n\n"); + + for (i = 2; i < 8; i++) { + w = (1 << i); + argv[0] = "-"; + if (create_gf_from_argv(&gf, w, 1, argv, 0) > 0) { + printf("w=%d: -\n", w); + gf_free(&gf, 1); + } + for (m = 0; m < NMULTS; m++) { + sa = 0; + if (strcmp(mults[m], "GROUP44") == 0) { + argv[sa++] = "GROUP"; + argv[sa++] = "4"; + argv[sa++] = "4"; + } else if (strcmp(mults[m], "GROUP48") == 0) { + argv[sa++] = "GROUP"; + argv[sa++] = "4"; + argv[sa++] = "8"; + } else if (strcmp(mults[m], "SPLIT2") == 0) { + argv[sa++] = "SPLIT"; + sprintf(ls, "%d", w); + argv[sa++] = ls; + argv[sa++] = "2"; + } else if (strcmp(mults[m], "SPLIT4") == 0) { + argv[sa++] = "SPLIT"; + sprintf(ls, "%d", w); + argv[sa++] = ls; + argv[sa++] = "4"; + } else if (strcmp(mults[m], "SPLIT8") == 0) { + argv[sa++] = "SPLIT"; + sprintf(ls, "%d", w); + argv[sa++] = ls; + argv[sa++] = "8"; + } else if (strcmp(mults[m], "SPLIT88") == 0) { + argv[sa++] = "SPLIT"; + argv[sa++] = "8"; + argv[sa++] = "8"; + } else if (strcmp(mults[m], "COMPOSITE-0") == 0) { + argv[sa++] = "COMPOSITE"; + argv[sa++] = "2"; + argv[sa++] = "0"; + argv[sa++] = "-"; + } else if (strcmp(mults[m], "COMPOSITE-1") == 0) { + argv[sa++] = "COMPOSITE"; + argv[sa++] = "2"; + argv[sa++] = "1"; + argv[sa++] = "-"; + } else { + argv[sa++] = mults[m]; + } + for (r = 0; r < NREGIONS; r++) { + argv[sa++] = regions[r]; + strcpy(divs, ""); + for (d = 0; d < NDIVS; d++) { + argv[sa++] = divides[d]; +/* printf("w=%d:", w); + for (j = 0; j < sa; j++) printf(" %s", argv[j]); + printf("\n"); */ + if (create_gf_from_argv(&gf, w, sa, argv, 0) > 0) { + strcat(divs, "|"); + strcat(divs, divides[d]); + gf_free(&gf, 1); + } + sa--; + } + if (strlen(divs) > 0) { + printf("w=%d:", w); + for (j = 0; j < sa; j++) printf(" %s", argv[j]); + printf(" %s\n", divs+1); + } + sa--; + } + sa--; + } + } +} diff --git a/gf_mult b/gf_mult new file mode 100755 index 0000000000000000000000000000000000000000..016595f9cdf607a9eba14769859c776f2ad94b30 GIT binary patch literal 121856 zcmeEv4SZC^)%R|)fel3OqNq`!gc{q>uO|zlL}<;0UAU{eiiL=+MN}kyRE5H>pcYBo zU7G22k@kVsem%9;wrFi1Y!wg{c9W0~LMZ}v`yRT-2xsEgt(Es)ACZ8r`oR1Tff!% z|D_zj^AT5qrwGq~1ZK_rj|c9%>p$*(Rh`L2{_pCV7)cA zJ72LLR@la*O$o;AO0dEb=%TmyC@fZ4bNp9O4}823|EiFEf)YGvSJ(+fy+#Rl`9~>{ zZ~g;tXEgo-eiU^U3jQW36cFRX(aRDpc9`w(vQ)hNZfNOH z;BjPjhxZ1~tDU>925F?whg5}o9r#9i>PgBK86E;6NTfsx=HksOxjKPi3sAFrSr@Q- z>t=!7XDAGiMnD__@^=lAPlfu{1&<}9E@p#^_@F7teqtfn0X-dRb<7I~x z^%bhnXojK&Y9Iro(0Gyl9b`u-&*BH5I^feHViw{@wDU>4B`!oEyp_QdXkYosRI2ub z=!{alBU>jhiXZwkl|tVq9Ej7|D&z$*Uc+PAv}fex7HR%iBF@}(B~s`YNH%hEqx7SN zqKrLM#-C9}Vjd#IC_~Y|qUc2w{RtYTmpe#|SHK@4xS!ymo^bTgcL+3&gDQ#Qw^Mo- zMIP>%9&k6oO99u|;i-?1EHR8z0Qm?0?k1hrMkA#nxwfKfR}il$YzK&WC*Z`m@1f0r zEG1qOkZ~I1C4#6`OF{A#>8@c-3fqbN7a^4(eLuV%E~J#i{u-xWWhCi?H$v!(git^HK?5dRPI<6!f z_22EQ*y^9W(4!7jSf>(;m&?-fBALH;IF#5fJUp(ytT@+-=n^HiTaUIOx}L()@-F1G z{Xi*f^EfRY^pGbQwfia>0?)NJP#wnv<7<5GQTr&YC0#bXQ+*i)w<&BbqEH`!4iIYn zZ&sXb6qc5^QqY5-hh^56Tu{yF6K?n0^;uMzy4l&)nou!n`FPKq4}u5FJnS+-NI z3iE-1dn;;XnZn#F)bT};(Y?d%lBYh_o&PG47`^3gQAP%qO2JvpP{9;PS6ICNs za(b40k=3~+;gl4fZ{%2(&LP+JPJWBV5tYTZ1$RqEVKT!U?-Hi}}~cnz#Rw%g_^ z+>NM`a97~#a@{4~{6LGMf=pI-6u(jBt9Y+)FMaraZ~k0@5MdLWF}P7rZ)bh-P5cfL zxSKF>ZcR+b2t-e7FjKn#8%!`&fJ|foWH-r?U}M>B^7rJ)^6l~zoe?xTQ2#4aZz@*u zZ?Y@uq)M#+MzkjOVfYC)<#6?;GQb?PHf&9-C797a9L^?h#rm%U>p!dk16Tvjr8Qu= zhrQ!r8{DkkrA{vPst=dR>e!;l_}*9;utt}p?3QDn=3`dpYK`t zkJ#GeuSD2{$KyqW$PwTSB9u|3o~co<6}Y2(Hm)-HJ1jv*1!=_eU0I4``XX@a^^nqk zQUu^S6W|gIrT78>u;{{|QxH_tkr-`vAD1H zUg&NV8bnYOdY+N{Gc2%x>%ttlzec%F0PbRx`!fU+_wu=A;GH>=f965*bx1C&w-hB8 z3V9}*)~|N-o6I_cC$ZkR2Jcw=EU3UQ=PCBlrjAsL98pr7RGaoWTfLFL&~h(%zBT3C zGPqeu#pH^X$1jxG4fL$@J9a7hQZ@yyPd7Pv`rl>FBQYoWF2Yu{A zPvm>0HebYN=R}LlPPx^ayIrge^>)kb#1d=ZF1eyxT5t&=smh;j>Rw`%!i5}V6udSm zEmZR;F%^o`b~)ATuIQe3z+KTSg%X^5n<3=$JP~h!MeQduD!!k3!>68z&89logio0r zHTb{di#&q;);3waMAW{->OW5&b~CWJL>=>~?97u>4W3}L4fB!Bjk##E5^S)gnmxgo zE#U$^R^Uk`+Nr%-vm>t%off)SQ)>MjS()18iHr)lol$3dxa;vRfqBLHOjd0_qZXjT zUTSaIXuILQxNv&6chit)7c!;u`zP zZ!3dUSlMdjVhhIN|(Mr=JO@wz-G4A#bPej5==Ry(9eL$<5vY!mHsKUDZ( zyBI3~OjaeB)|W!KmAO>k3YQ9qZ=H)Bcd;(EGrkKQj3vQ>@1Tc$;SZ$Hy?B9$k(=Fg z%4YGfiKS&OHq^x)wPAb5M%&qUi};+#Mmtaz8x3i{f{iXg+){;&F7v3a(lR9&h4dR; z?rCr~ygWV(n-%{|u~G5+XeSza9P!v><&?vTHXN=8PuTpA1W(ul@18PQ z;r*v6<9jkm&Qnxy=<3q|XYM*pvG0$h?g5vbvg`@#DFqhZIosBtOzn=p0B%uOeRE|o z2yaO|iT>d0Khno2ARq4qdZFplf?qrTwUyG3+u2H^yEVQI^LyrcT47tSffEIcAjKmUj zsWZSVV@1SB9x(?In4KvFQA1nBVnK`5EJc2RChJREBXU(b5@gm=s_96q*LbA=cCUIb zDsWR-{t;gF4g`J(UJa-G{rOZAzX~#q$HnbvnG8-YMzvv#T2iQ##Sd3fteZz(%n`)8 zP@Pj2e-{Ak9b44V947FjwIg6}2w2pm-e2s-2n*M9+-PV5`db0TxV#I{y@5G=91$ce zAWL29cLCWRnCMaGmBptc)~!xLOnYFaM}533KAFedh?v&EEsFYBS^NgXxYRL-iTdr> zzQ(WRMde!MfJ@>708KtiZx`T=E7P*%9rmg5uIrHM8PWkAq_+^I-;K9~lO_TL-l0G* z1fTGlCoGI$`${Z zeKmX6H6*c;7v8q1!`b9wov6dv$ll$w+ZEm&*p_-zRQ)cZ!;St<_m_#E#{2Nie|S?DDZ~ zo}=56g`$04>n3YTT9%K=bW=BU-)pT{BDs&Jnp_p@1G`!GZTj-rv|qt&qv12YNam_1 z7u)FBw8z``R)=ruTJW>~E|O)?C80MsWt2@F3Ob-rf!x~E;eiyz;-}(RpkqwLbwNs* z+UBX)IJ*q#1syk1BC!1VY@=()Mlai{XIhJN4{yiLp|t@uD9!a?(o}M7CcOl$DjgtK zs+ITcrv1)V(O%Z#Y^&^QLK|B)e&{a_Z}AVbw!5758S#NQYatPg`M}zqTqkmhzntGi zUx@N$jI3qX<;!W*v3t97ImwmLNfbE9`bIu{qdV()Uu`NS((t@mY(*X=1f-zu-g>@_C$AUHpWEZh_S%yT#o+Vv=7HPYHw+o&5_nW@qMEiKF+Qu5bIf*xy3syA0eD=6vB2N4$C&qk7*Cj> zl&2%E%3E6-782~|1iRmWN&g*DfQ;B-rhY(cEBne41o?UaD(=Uy zUX0%l)d3gO@zHo&Q3wsZhmCU>hmFl{zEnn-FP(7=)rd46=EgzUdGB^(smFXJ&Zr!` zuGhm-mby)b4z*{0-D`tQC!Pa&*puLQF@B+Jx4aD0WOZB-vY>`^7mo%(<^YK>9tWO* zc)o(?8aysMH{-b-&-d|E!syh8=ev0B!Q;nMi)R6zU*TDT=M_BlcsAg94^IM57oL3N z|1F*u@w|$s3C{zl`=|Kzsa2Q=9xVd9NKf?#(jIkYxe^(RHBm2Fy}n4SP&RGaqd>_; z>a($Sie_3&#AV0oZmc76Qwp<+ylEFC^X|x_DXvS#Y?{0zVk?w~jdrlu;A7Y!1dm~z zbUmtp@>}iYsMbqz zL~eF?SU(x>MZCvp+KFV_&sAHHVcmn&VAkLcgdLL2DhnIuNNK% zZh|mTl-kkP{kliJ*028i_n$0>BQ_;y_ z=21sEGt)Vk^%HuX>;OC3gWY*zKc7OD9l+ZYoa?aQs8+n%@k+ZgKHRhT9}{nxTb(t) ztxhQMsJDS(CX^~{f?ewlsBd|HL?FvpJ}n4H?9IbDt-oN?zR1`V8{?2UrLf48H|>=p zuF@3xkBup&#I&&EuM>*21?b6eyDYgjvk4_f+vR*C`nW9pcRd?ZqNw@JAUhV?kZF7U z_hH}bY^TMF&)FsRz#07^1Q<{ZH~TO3s1u+JrWK^@j8W}FK2kOvkezL*Cb{BpU=!Ab z*zJa?bX4QI;s$VRk%QJd`vX6R(D-c^1Zl-)jO|Y_)D+1L9&}qERWP{tu;+9RKaKqj zxvAJL@+-w-(* zi84{bUwIe10XpwOQJW)io&Yr*rd)2+ejXj&L%=x_7XTzs2rWb$Ky!I2pM#Srk%yy0 z(uXY|lWI(1(o<0=6NHqw1yTYhE7xnAqy`}+BK?1?uv>(f(4)LR{#0Q%3NaBAVxq$p zsT6X;nMxKLV!}0;R+C&#ybm_!a)OOoNc*0#c2=~Iqy&CQO1w!@;w?~PJNf`zP1Gpi zyzaIqE(4YTRi;5z{1u=S&WVF3Fkf}y#fNP?M9K_^lyWXoj6Ipg!ec$Gl4QzmR<#bw z{Lodv%Gu=v2`tskHcXIM!g7g1!SN(OEA0ZHy&*2uqy(GuMh4aeT5*gHsX|BpBvp82 zGA3Ttw@yyA^6?{^g^HUU(S=I6zJ4>ylNQJmIegeZE@HbnElx(WCfDHgnl!Ogv#Z8q zDXj~W@%CyL8!*}xu6jN2vL;QmVgj$C7%og!$&rVxKv)X7fGRc(30f$ezT2wxxQzZNJ2lP9)mfm&RENW7&5zHG5u5>X?W!;<*B7AUZk zSQ3BK0{NCoOX3$=V6deI8aplUBkcPU^Rxh#0eOj;T3{S*S|p}v0jM)AiCeY6g%r3! z3tUNoZ)t%s6mV(*MuAc-Fc8#BT%rZ~q3;rXv_K(-cj7Y|1ZXx=SrVOE0D{Ys*kc4L zEs4!q;L8?^J<+NKfVCs>x)vy~SV|KAZRDw`OgxJKM%;%&uF+r|#-$WST;v!B9pRFN zk5juQ=Mpmo=+?AAi|Dx~36SqYpo}>yaSy`!dYXMC*7MNO%qU_TS`*9PH|$I@_Pd%r zl)~PaL9-!yg`Z5Z#djdYBHzPb7i(}cSVqmQA@wOD5K~ktGn{;HRJb_~<&5v!4eOG{ zl!2r`FAy;J28FPo8GwMIhAEjBx;a_BsfcZ3TcJBSdQ4_bq?x(2{b)N|Jcn{)(fuw6 z-W>iRLU_S~5bLT49nHYR%I*9hd6^;bYX~l<8r{}U+!gl}J$|F68rijJPxv=fyMMQ5 z>N`q4Z;ne1Qw8$it(bseMI_GPc)R2F1F2NuZ#dW}ZQkEjf& zl7-id1j=|Tv%S=Ry8YwoQ79_@+*e5p4qKc0SFb_XNFG8!U!g!;58kQ-yRDZ-gHdN) zUaA#GR>5PbKu6Qq5y;(%H4LmH8^n1dFQ1)Q{RQNYyVJ#Au9vr|WAN4-h3#n?JHS*J z*XLyL8=babuQ?lf=Q?D2Iv#OXp_7*P`B%L&Gvb*RTvcwSEN|)f=<4 zE5}hFv=**mH}=r3{Dq(6bc^F{zJ%K3W}nJK-X>faxMY{mj*O~tp?@~*hqh%89XQ_Z z7`)A0v1jgZH#_bgvds;>$xP0m@#Xj|X7dv!xr<-kc4~dV7^dIS^Vo^Q%5BeTu zQyl7=S}n%eBwJ6&)^01#qoOWr{E_y-N7z;~Ye8RA0JhR~$Mnnh>+FAm{#afGnQMV= z&_Rmh^?XIe7sr{3V{Ycc-j>-XuTlCQy5l3W`X$|I{hwU1L+Fn`GS(`jKaQVCf4t*# z`r{+I{`jEG-jUh*kD)V;53;sw>{gzJPN+P%%j&mB<2Gp-bW_0&iF7(H^ZBZf z*2*dMO_|Co7L}H{tX#c@Qm8+nwXqAEuu2OS_BaqGeVS6vdMK%GE*DCwG`%=K@bs1C ztetf!v3)khx*5rO5uS#A&4A_4KpCN-q@U5{1=Xj)?aC?OI7vEf`DcQkss1zNH^Cj$ zzU84Aq6t~@Qm2PH2dYFcr``X}u6n!w>PXQA&SRR=+6HBISIi!`+S;ncpJ}A8FTN_wgq{xz)%fHszYLukq<7>8bF0=m9j87}+_cQhBgxdgXyYqNP{8{MJ zrDbSftI*7^#ZR8UAY!|~8N;j1Ul_41##pmBW5IoP3eoX1LZQ}25co_4x?+_CdP5JD zPzl#LV_h+aKhJqgS{Bp!Bbe$n`ySSfZ6G(q*R^;He}b&Fc<5^JZQ4WnJBpVJTLQx( z^DPixm}wpkz|Q{%9NW+nfWiQ##To2&1{}eC&frOBph{o=uRsPU<0$*bwI`{I{TEQ` z@?dv)V0Um|dGKVpf7=S|JHWn_YCT3z8Q0~B@G8=Qu2{<9E64*vYlA2!@LV#kz5Oz| zo|CL-#hiXv>YU6A)Fo9;xf-eqyV@ctJd;r0XUFN+rO_)bsezV)IeB>~&0x41)WOG> zhaK^dN*;1*ZMfAx1sgp7R56I>ltUx=_ziSGi6b65vBuIXw#x<<2~l>-7>v|nRv$e8 z@zRdXct2>QuZ2|}pH_C@HiM$nu>lahBdV)`C)dQxhK~g=_dJz}(L5&`C+lTDX)XG``5uK-)u6|0R&3 zlm`frbanN104GuqIa^s98tuQpoWLb5^Y(VQoh|M<+ht@DM20#?0x$4M5-0I&r+7=O zc}F9M!nS$YJ}vgpPNE6WX$3lNY1!8?ZIb=aO@3Qr*ezcci5Y5*63y%K)DdMgX` zbg)p!E=^a?tZWdAwEDs)`bMMh_BNqLr)woQ+)AiM5)R+93fOCG+ zZg&I^-T`Y~g&l>?ioJNuk4Ej9+jv$K% z9I?Y{`oR@mwJI7sAxZPe(*{T)g=T@g>|O83sx~RO7^L&5c^)-@F4~YRGsY*@)}AZI zI51o+FnZ2chSYnmxKOcnd9J`MB`3@`ql&Y$va8wZm0K%)Psb1$Nev~ieHy>KX4 z;v+|^gj_+$KzgJx$No@)>vgfoksNK=fxaiva;KY|*97PRFi zLip4%c8~fPG>F|;z53L?U@MIXG4J43$ds$SgVFpLSx^tg^k9(4J9sUig&bP14Bp}y zHdJx02e}k$yLWIsy|l~OZ`eV z>6zuUCI}H;)YYgECMr03ua|;%bMz`fC?B53K&V+DRHrhy#i! zVSNhBb^^sqi=I?VoU;y9F;y#7JxcF{xoNw3M918yW0Z;Xl+ zjiUa*N&sf}D74LUMLr_33vEdJK`-H})j*)jE+k62EB_CQj3OxP?>L%xTn{-ikI5c_fAfCvt-^44&W>&((&3 z*1O%ghQW3EFj%V$hTXD3eBx|ts~1XNCva}^47(P6#M3rs^gH;!*~6~Q==X3U5A}1D znlXF@E$BXzN3F_axHe7;)5whJZHI=3Xmti|JZn}AC#Iu*j?(Q$_k53|WbPg!0!PV2 zhbSbbGWaA!r2=YLqv)y3!N;XSkHPoH#AndZ^BUhah;RT==O~_l5^QTt_?Y{pnMOI3Q5ELN+k;SYyZ@EBc3!PbilXy(8 zexfmW-@%=ybK{I2yox?*RR$juylUBn2Vm4f=TImMpSYA>89GBC;GnE0?}#nKduy!8A}DA>jf-7dMsgXfRzPbJ7FcCldM_Of)|#gW=30^W8GYr z7>&w}xpBQVX^W*Yt>UPnp34L(R^w`Ioj_}b^dh~-9Hh^R&n9M|3eoHqI)6hS;*5*0 z_CYyX?$cOFmy~k?6 zj~>)#Euyx*GR*G4ClhpjC{Q>_hl<8lOke!wK%vVEDd0S;3?{LquXlMZLYnnhT0Nh5 z7G;ZCp60cji#+NfN$d5+1G<&u_ywGwh~8(CN0+1WEPcA!K5mXsu4(Fkw@4ju54N;i zRf6jyToZ-+4CPA1U8695Mo8KLtl~}D0WaNm#c6$W{IfO0ltPWM5%16nEb*$7m(uNo zP#MYtMOH|`w~(D}^{{3+xZm!fQ_g)h4_hM#-_KK6LvW8Z*;k)mgALgGT}#GW`1O}M ze5$R~+3E|YU@o&E$9SS;7VVy~C4hC9^SCeEMTRxl!zqJ5GVv#II_(xP^8nbUIJZq3P*ejkHdo9wcv35kp5%Ga|W_(~#Moy~4opLY@c3joc^x<8_<|i^mxnu=4qQfaC>4_`XV+!g`||tI|3v#PK3)6J*y)!6i>>$t z@$uC(SKaAE&A_jsM1c^e`RPFx=pkTB%e+l zlDq%V5!8R_wxL5$(Rbho-ghW0tM8sf-x>DY;ymed^xv$R8gG6V*SE6U<}&OKfGqXj z1;7UE*t7pY%~_ia9!2{M9tAtjyC9=e{tkQX&yVjz8on=FbOwBjf#*4nCnTTOCj-~z z+No|&e9xMHxaOYo&)2))^Z_(ZO9=D$C;A%W0{lY*I>*@0$vY%2dX9UYck+7h&ca;h zRhjbbtlP&Oxrp{%{QsqWS>s}nem?oR+B55Z!Czm${=%Mp)nh!IeSaP5(}TBOb6yL0Iuv(uewDk?)n_iFA9-MeOzQANc{zN!UoC}_%L zk$W+VD#1r^lfkYu%MP;fJJp>8#1{ihrgU{Xu*zk)tgSAl0)p4}nq96!HJ^RXkwo3E zBMY4|kHR}W=qU@u(>>$!nkxyVsupEfKPk*zywo-l!lu=Zu)BAa+UQd415n?6lTOW~ z(4Bb2O}>y7zaH4-mp8dzvNX9DTASRzwJJDsYI6S#??1_FazB%|(uP|3H2~ZSPQ`-9 zBq{VaC@7Vw^>yBYrFFMh>uzya+q>!={vGnLiKUL@7T6xk9n?&0=pqF_dylsm*8+Hp z)zQLW{p-;1lfn9(KuH)$hbmF5X2ML1(iMQ9iJZ{2kJvcw;XGR4MGAIp;e{6CwW~=` zlqcstRd7y$V?w!HQE%qQzs)EIB{f&lN|}D?Jk0`kBmOU~Cg0ktC|)UaD1~$EO5qIh z@MBYA`|-fO1Q&4k1}pO6C+`8 zy@75#WJ6a)3Vb}$FbN+BP~uil7rWLn4@_X1D3v@&VRAXFr7TUd!`dVdvSMA`BY{s7g2QIiUA!MR(8e zR-MhL95fCH4;ePpo_ zA3znWVJaQx=botzREifyj)?GB>v5pYi;avb#@#QNGr$MXP-2X!8(F8^$5I!%mWssv z`xcz?v%_+q@z%OUBlNgDEiTU#m#@X;o8oL*oXr$hpv4uK;(BRuy-abvwYc7=>^6rM{&Pv7LapEE68bhzj`!M_+H=Y{wX^c5$bhZl46BIixR5jTc&apIRCgC;+tcm>f`Zl(;7LrW1hp~MZt$2oXkvK ziV-$(kTr4>3~AgirN`OWel!&7p3JwRO#uDP1i~pS2jvOS-%X$kH4p|G<$Bo!>aT$y zAqn(~33QPLDiEM$CeXzih>W+9Yq<$@i3aK|Kr2k3Vhu!IZjkF$6X;S6MDBk8T4@4Z zrh)nh&}$~pLWP<-B>%3q0|WXg_M+&5I%;yQX`i)--H z9G;;kW|)2U7paRWBDT8-rvH>ojZiXanKjl?RL#PVBj8nEp%UDcGmsAHoe9A_q(khP z5X?q8q%aeLIZ21~$%J5L(jk2_A()?ZNKqyPvy=`wFB5{fN{5`EhKLyps%;6Ru9OO! zs@cmoP$$T#dJkNZ!Rw(E9z#7#>k6n#aj7nBM>xf~MoEQ$d?9jP-;I>Yhv~4OlE1ngi8nduqD~`M z0bNekmZiVMtqgrxnz$Q>EGXoUZM+ajf`ae28?~13S6=J;Ty9`KxgJG}lUngNdw;Zq zmd)%9aS`Vgekp`4z7@@7k%{>0Vqv;y;$jgmkpSB4D=|L@-yj%vAKYuM8WxvZ#+P-C zwqXXp6)RD%dVRSXrc^XWeUTUWSA2Zz5XLYfX$;>OvDsA~!T=PSaCBC0Z8Rd0j~#Tw zTMIiH+i&m7Rp5ZzN?8L9LySV6XuJ9zleJ~UY--Yi$yw^~N7q-AHW$0$y zCF4fCSl$7$kmkO^wSX*=bqEF^L1^mBf?xm=gl4`h2nHZQXyVI)U;q+?=DjQk1|UIb z+RK7q01|{|y(|a@AVFx-%YtA45`^ZwEC>c5L1@a$f?xm=gl4=f2nHZQXu`{aU;q+? z=DREi1|UIbx-&t7>#<~nqt<0LG}#r#KD z3~5hHO_K%x(QcmYjQ{#Y#KP~AAHOifrPqq*MU+gS+C~~7rVTkRc2Nx~zaxh7M6qJz zsC*p7%U*_M=^7WeG{r#%rHH?cLX$6v`%dZma(p95H|D~B)lQZ8_%z#>;;ydAzLXy* zU5j?}10{0RdK&xEePoRI+MlUmN8|rVziJiiO5IW6_)VQr9z4-on$MSXXM1#j^8^RZ z6FM_Avivlv1A@Uj(hcL(78-0&(@h`#U+AKYl+|!$mPdbJL!KOp^Om*){t;&VA{2(F z&`s0XLaeLa>ZenKt!)Pa7}?UiV&Mu6)y}^e)#4P%k^E)aXmx-5SARnqWvf9vBAK_s-3D_;8mrvz*%-W>W1}{KAk?TopUl}Pp)o#;lb`MHXkAP? z%8iC<9_K(@HBVZ6oDCVLFw^N=iB2m(k5g#!A;_h9jePm^U798JN#aRe_#&_9HN-^1NZ^L$BhBeGeABKII+au>Z_x*u;_=xsWc z^%lLk-^W|SIgxPIA|l@dSWoBR36lo?^dp1@#dM6&L(saXk8$)PIGoD~8uBAJsNszx zxHh68_bX`7Fq}wX08ndwz6WZq!wAr*K7>PTCFu-rbojScs3FfLo?tii)PcN28zs$#5KsjzyW<8}M=n zk~e(-MC{j$2r4Pp;V&}MQ0{RWL@c-g`FfrPiBn+Yqa$JMXz1+gleX^wPrgZbrccS} zF2j$=X*c!lBhO_?dc%TX9?kGsa|78AeEHd|_{{T6{p%jM|3P1c;q!9_3!dEs4NVQ9 zcNw8a-Ztst2f_z_k5y4@uMKPn=b!zHL9^YU^~W~641Dky@iaN4tcrr?;Oe|()_Ao4 z1M0eacz@PYe}duoF+I9s`hjX`ge8s_biu5RD29kU{bP##2wA~&hqQw@I((1Kex0yz z0gDXMXy+?*2#MV0@()uXbeI_fOUn}P4 zQMvCc(Ty*7k6gk6&*9oET;)FvZvw80`p2&zKi`p2YXHZ`S4wf0Z1{P&f5JbeV)=ml z><30T+tt8xaOb!r{uST@M}Jqt-BiUtq~Atek*nZ65hFa8Q_h91$gF&K#V2zw(ddI6 zOLC)0UTIz@o82wEcD1w=wg03OOC@;Q3?Af_k3u0 zaPCrz{}0?12~b=bzXm9xqQo>>%fj!tR@_y*I4MkPmB^R^%Kzj8JPAHFqq?7lztrB? zcMg9PZHANbV@axA3VB457Vr}#k+7>AK1n3mtR?vplE9@fIV-!IUaMN*cVU48uOr>h zNfA0Bkyk_9;$TMLKeFmM>B(ylouCDji5{M*P2gSl!y5pL-Qn}`$yMTkM^O))Er9Hg zQ}71}D&!HFH6#aUgw=lJ;9Gh0Egm2F2Dh&I8r6v$!;r(n8p&O3eiNc1!>4OS58_Sm z!>V%lR4uBAM@eDwjDYfN$-jcCE8?pU0EtMo)ibq`NC6aaGiCUm4@C`I0EKVvsDA(= zJz7cih~)RaL7DKMdHZ!Yz%bHM*!N1|c)5kaz>6F*+e&c|A=3?ee5L2O&Tnh~fXgJc1lC^!f!aH9vrr$f-$S}B)k zDN%|&u@cpw6sEyWB+v}%K57Qlp14X>lL8x1HQSJYP!QG7X`ru&V_w(3_`AX~N|WH0x(t zDWxK1nOyr8Z50!ZqLNWsK?MRl1Va>13%DRM{4OnPA})wQw~ghRw`vk~B5$FVx77sK zhnkhwPD5=njiW&{)FeG!4kbb#R{G%}8fKk90pRHxcs0PBYsz*YOU7GsMS8f%Hm|JVf8Kp*EStN3lrqb1lh!oLrz? zxL7SM*iP?Rt!d_PI(@@wa`;t*R+j_Gcm?u94lJegyrpy3!Ic_S@8NUZhlC z(E$BTfWH!;p9UCh0=z_k?)L?XBp`L%I|#5-15}y-M+wlR0n8|V39J$Siv}Pw2EC}O z39v{5j4}a65#TWmaIFb2l>m2X0JjP7V*+?I!1X4;s{|<506r6-lK_`!fSXN#FN3w? zC-+jzHHz-!fxas}rc~ddR9iIwKJ{%h>c<3FtpSFZ08bI%j~ZZ%3Ghb(L^Oc8weJwX zuK`Aya(qgF+c-c&ag>&dtV?wAUI1wtAFct+DDJ1G56}RmrX0N7d<`(%1n5ge9oS={ z$(uaTcjebisRmN2Mh!5*1h|UidXRt0@DBk0kMie2BmQU;iT2ZabXkaGa0N~tT_{Hm zk1kSjjBfAx|Hhw79Y|ViQYvQdFL2lnMYogRujcDr_&br7&%vjt);qhpVpe}1+uRk? zJZLm^#Y_$~5HdN?K*-_m4bj3ycf`WrFie4NTwIGsgXKkD>UV5|er z8#(RdBj~_^0Utu-uYQCF}f`30Kq_doMc9 zwv|&7m-aoDT>fu>ppKtVdKmt1?795w*qMVa_t9$2P;{i5&rjy8n|&$CbKPa z?fy=*Nsb+|u_Lv6=p7#z!&;nn;``-Pe|Vb1rHOr#>TZN@59Wp!iNoYAg2V9178o>r zpnYIdOAdD8Y_4Gsu9FM@0mo3e_mOCliQcDZk7&u9 zGS+z|M?G$1hv}PX>}@IZG1?Ns24Wq4rDa?BCqU)YBz&)@2}tjCv1w3|;XnWfE)+>u zLnK;6(WgS?)Ft%6uyhDV-5>%Qss`|t8iG5w20Lx5BosavcQ7J9M(cJ+7prou(T6P3QiWo>Fxv&xEL)dEN+E*L&kd}!YgmfdWox&$k4X`l$ zr%TK5=?RVxubq?M)F?Rq>#!vkWyx35_l)%KRmAZXrIh}~iuC6y62Hc}zX!j8qlCYv zP?yAh32Z{`5?hF0v)86d_-G2XN_7-(IBqJ@BFW2`5$Vt(7nvjNS|ok?$Vf?^pr}mR zxf+q=)CrLs5nQG^l|LvAA2tJ9JCjQ&4I}nYX0(PABmIFXwT2bDKQmgxi`|aX7Lb)fyYNo;YgQnn`*%vahw2aaP!8c93f9fo!ll*+wyl_h$w@eM zY6aY`Id6JHbM^$C^z4`5dMNOs3HS_R;jXC@TvkvANpG4Cg*=c8u_Kxd95m-rm+8)> z@R>ct3I|hCs0#0zld0e_oBun(V|FQAiAXuvZIi-Rrwg!Q_8PllP6^djyRR0_tUXD8 zp?je6j^)7+f(j#7R3h^+jEn^RA$lMRCGSHh`PVcm^nFEvO5nLfD+1)MN(!yU8>nK< zgUc!@v`mEHA^T(>Dg1kcwEGrmdu@FG#lx_B!+Z|ms|4r3GP{V3eaVw5R9G#Z=H9>+ zM%v?ODM!B!?N>9-zUZ|svWJ*d5*%TFxF4*nGZ$JaR6{*{`%}L+;Msgg(Ixso2>r)VG|tO_!wtUh!l>dgO3fc1~woAj2ZlH3;D#KUIzPoxT>D) z;O5BAE=>FJr#6GGk=ILUy`n|OA48nXdBVlklaUw`AFlCDjTsXfQ3SleO{K4o^e-7* z_hcTG;k8Cb*IiY@KPj{IXfrhjpPJ#~e=B$c(%;&}I-Oeu+eq$CS4C@JIZSlPJ~!}` zX^ikO1A6|5j}c;(w}>dgxwaC-)|R9RYGR%68m{^(_#!U8f(^)ycM^Y{gyX__g}Lfbvg9CP8iR92-Q^TLb)zfN)6-PVvZzB z9j=-*N0O1wN??vFv+3+)#-Ojhj55hIXC?SNBPChotc0-5h$MrYl_XruWs)tc7G_0~1a zC$+{CY($@Y2BVp0F?8`sPa7>vya1xBe|j;}@L_yWGZzrl{~vHp;GOyemmlb;HC&-r zFtpR??;k-0!Tazn8i<7XKV87x;!K+u1a<)iCdgY(GpzxEi33jg_h;R%ftCNWLAy$oJ5uqY!eBCBZcyOJoSUwgQ+ zo#;9rzR@j~#II1f=1)1(!J`x+Va$)jTPQQluK3yz93c9?O|B5H$EgH7x(G*)N@PHV zdA-}i-y_!Y*tHguU?P8v{~KjJq`_BMrCF?c-)8r zGvyl4}!W;ef1Z$7=4n|}@W{>QLdd;BX}HN=2qE{^HB z8|}o{w_4zO@PhPLSO0-FlG8w*?MTk_by?|6=U?0zCT{$g(Qi$3Mf6=pF8r71_|uDy zKX+w0jyiD+k$e<+j3l%Sa)prCV&V8fErM?na0iy5-o!EO?yeK9h3IiPTfkPu-1ZlTx`zX6Gx=1MDey;8wqj>HJb$7n)Mga5 zh!+LtrYJ{uZc6-~MQ<-$eB%4wSfXkC2~Q3?5Yzw3G=G}x2Xgw4+<|`M!*vB-a$Bz8 zLmA(L(=;(?$2p71SxkJ(wqB_gugHNkDLjIB)R=Z!Z$$!d^e!n`7~wWqsQ=5AXut5sW zN338IkJ`_pK5ECuGH~+~-rV4Q4mUNq%M&yXej&}?=im%P_lE~y8~0Vw)5jQj@*{T^ zNOD;B@4`ty4JR_p{&AX~)HU7UhK%O(?zjbWj)w3o*kx(s2R$P2z25LS1AMtFI0Ge< zYdZ2X2KS!ipf2qL3kikqK+H-Pa)%Z`jH>chY@PjIiEWrDI0oMbJq0`50!n7oEF58> zW`hl=rR2s#a|{Weg~N2eedOPRe7G3(!e`;I?0TO`T!>0|y$jHPqTU@qC8J*I0p3HW zY_aByk2*oCb8YTU_)~&x_<4m}zz(9q2YH3Kj|oSV-v))@h_Wxp=ec4uxLbEbNrd)X zk;h$V_0`+KJGL?LHBqMDEy^3Hmj^$c=rK`}XP6l~&lMMNe?yrpvNo|B>j_Z^SvY%C zd^;14lYmsZw+RVyoOC^qgX5&k65dKYW0deSUP3?aKnQ4X2SU`MHWQU{aQ{I)ocB=2XFBi6MD>NlCpc;rjZNb1EV}%3j+Ibd&py}r z`}awkAqM85t}OX{7FC;FKW4j^Ggv&=(E2@>J$+DVdi2%#elx{eFB9=W1$4}2d znCd;-_$c0M8;N~P=cL-2YDatxR+!11uoNV7%{g-rXy+cDqDGfb0vf09as?@jE+<{7 z(d8p}fcwq5Fk61;{-HAapKAMG)Bn`)bLf8}%{lZx5%28#-|X0m`aePblTG`jOnDMK z2>)tujP?q89KE|mW^cI|zcGXxYILwcH`M4Tj0=|&c){mmwQFf>d1z)iW;NR0MPA3A zoUTisAe+25>24=JlP$A)8Jb8Bqb)4yyIuG+Phkso1rB^?8CN;SV&e`~3+(_rwZ~|8 z5qOVO6{!YjD|qm4-pC&*QRb~ALXHdoVfrU7y?NmlL3qUX;*b;OSr?#9^m_oZM7xRf z-+mFb98p+6n!TX$R`5$usFW^2C7%)(j&K?lzbAkJu^tsst!f0NU|eyU#h;u9(BISZ zQ#O5}fB(dT@211mKMd+OD5TV7b6fE3t>Et7URcI=^j0eB{rz-Hl?QO(7~k?3D8D^$ z1$_8Rq3?huK-O&DVCqk{4Lg5v^op~&IOb{?F{_V28-c6~&_!-+#_8({n5E>`NYCXN z+#qv>muGN$^+Ng}0x;TGyQ`D>(|QaW)R!+M!sEoTi7p#r*WR1dexrwm`cXXU;|I#t zQ0A`XX<9IWX?o0 zF(9h%ce49)j=}^>P@O zOQHXusXCJh>H9EQ{pnNhE`=HUv}br8Qh>VD#h9hB5O04R`{^jm`S9fsR`qhT``73} zvRWh|Z^1oY#`)C{xHUYQU#xJ~q1&yjjh}ntAXttaBp)7))Za?J`x#vCBUYD|{n}2w ziUz?#-dBUqaa?8R-^F0=MQYW{bd)bnyVoH1$B5CxjcV0WHLyqonHxSyj&NPFh>(_5 zJ?(PtcGkl%!R1UAZgatPr9iBvuD>Aa5H9K)oBg&gi)VP<5P<>G}=gjS=#CrQ>eo`+F-mF%=h%e#ljno?bq8>)0 zmzcm!nT^I-eMXys1u*fU!S;h%Tyfma1^ClfvKb*`QXnJKO=*g$a1oQes`-i(;8LqYNJ(!{@HXMn+lWjX2)yu=)q;!YBnrQv7-d zOMJo;>8qA1YG8%T{z1kLZu$a!0^4ypbxW~f`{BWH3z~t2f;WHK3YnkKASHbvuWBiz zu$CTeUkW~>Cbv-8A_-PrC|^rr4H<9P2W9A&a@-$mzZ!(>`XokwTq;7pF4P2h|%HLjYeqxCx7kb98fu?)F^DwkFFLZtV8s zgVpJV!*M2Eixk(bG8&FMHHTtLtbSa`I<$3Tcc2w_Y%ZniTuKG1nNN53=(0}o=R#7^ z-L!wUYoy@wR4VtWcW&Kd_S%N7SxsHD3sg?rfdaGnJI%i{#MUw6a@F6VH{;8@gDzv|-e0`R4g*Fjltki-B7R zLVvoq7;!r(j{bD-G~)JC9R2CuZ^Rv&AUUi~ybUuLXa*fhfN;2sS3aqUjNkao;s#1YlfX=;PubE}-VV zaA>%lrF>a#XOWLCz~;Pgh?{IVMx0R}UOrcS)RfOtAJF++^|gXHUlS?VScspYK&SMPG7)>s39n4Xj^ogBZ)(~9r4mxSrBcnz7Fe@j2y;tPw)p{zn=Df z?{Q2!eEh$O7jYVP7ULfVYkd6USG!y(WO^XVFInQxuk-2r5gmV}J*rJ?f1(;jR<+E! z(coXeG1E*Z4j?W~_PAS#nM&NaOn_`8qak$X^%B0G;NZBTszpppd+y_e$eNh&X}ov> zGi}D~W0sE^8N>HvrQvA)sd^!5r&)#OOU#M_KVZTg0Jqr?3(Sohp@+bb^Ulw}ZFOP_ z=BM-*T&X|sK`nNeud%%9t@c)0kr?N>0zMf=nA5tI;6W(VPRQ(IwmCJyp<7dW)yvZc zmBw-=r@j#OU)XI%R{(_+c3z=1Mi-BF$g9vjWbcm^{@L35Q;z=~_WsLlH_}<6VQ)?L zrDy(JCd}+i=Kl0CO~c-w0*1jq1q_S-SY?O|{c$Uxw=<^Mn(?5W2Qj;dy8(qfXw!pz zcn}Lt0qx6!Ag~A)@nDG_JdXz}_2Bu=7~L+wJp;+PG2AMy8AwPILkAIJxMqMfJXj)v zR02V%L4Z>E6ts&V)lNa12oi1-%o9PvoPt&nq;?@#L#U!CYBhqDTCkT0mT18O5p-xl zn+V!1^zG5lP$oW0O1W)30(%%eBG00IkJPz=;8xgZZ6Gt%9P&*Ft<&5;XlI@H>~g;C zCvc9rJS0@*vW@A=~UysGUb zj)$VcU38HJmIR}zO)j&(;jHn=C`3p#E=GJK({Ew^^e`UQF-yL4|VBE_Bn9JwVTwW=vor9Y*pRmqY+)lh3+KvX%MU%hdx93F@+>XK3 z7k(cD%C`;|+pS;55X4G{)&bKUY@43`lA*XC+<9I4I)}8Z4FrT5MN`=E^@kJVgzlB> z)}!eP$tUy)T}s8EpCbY^7Pg^rxd}`8H3aEGNqcgGA)sr}A~Xrj=lG*>y!UjJk(_Nq z1sDmfYS2M3uoj$__}5rVHsFi0`VrZ@m~aQSfkI=Oyal6 z!(;~{D@)l1XER>rdaP;Ot5I|h)J6!{waD1e12_uvzs!_xv7_uTbf~Pe zIuT-6>;oF z5QH~m+>8j(nG=l13g{L_9~h)vhiF@%g7oZ1*+;=$<-ud+{;$;Tiy~d^N&4#_01b7E z1JjnPt*gF73X!!+@OX*;2GjkM+I`nSx&}VN{fxjtx>5nnThzZFZ$Ky264;u=Cqrmf zMs(m!OvueEYAol|KFME?Z+X9pgnclp7JkGpqqquVfj=gn2abW?t|09@M{-f}=hX{k ze4<>{FLnfKaj9eJcznVfUmr&OxUT`YM4OLPaJ?Oc7U41n?t_s1P=fxz-Z=CbBs!oR z@J+-ep9n<*A13IINX^>L+T)$5Pzql`S;&D&=J=!WWVRo88i_sZIKJ+!TMWLbizT#c zFy9XCj6aOo2&cuA*mPP<^s9@|(Jamh<=6)}^qb2a1!Bg=!Cnz=`?caI$xGs7bNr{@ zLm8@j2^8WlGD7A*M-?Xa^B>VQiq`UUqpec%Yrgi`y;^1=CVzjfl{1@6lB~QK+7THAu@| zp>nvm(nKMSDEN5=CL3Od%NZ}MuO7CTD#1{^SP62B@bTcLJZ}YeUnu?PCIAyJqo2GZ zMNIHd2xzS(V-lma{IrGmO;IyjrY`12Cr10`@my#W<?bSN?TFDVnay+OLlkU4{?uq-p~Jw>Sw2|KWn`wPb7~UIPU3&VK8*P zpyD(1QM3|}xf!655GxgJGfN5eoU1_-y|6en+Jn!m zd56T|BuR-SuT!iI%El8Obu3tS7p;h}4sA}2FA;~n6t7I(bbaKiuW+z{VP+(jTFy`D z3fq#k;{Je=YrR~1a4FggUWcUxoiz$6d=%~P;FX*tTKG+1Es|{DqTej zeeKe>e!holb7Xo)C&8{NDm2&X-_+lHJ~Zn?wevh~`GOtFn`H3At~&ulR+2S3BLyW6m+k$&{w)Zf9+aC7q$glPi&2PRH*7CErurjE1?PFs;* zLGx{{D@6$6I6(c2=YiBkp&Rr&~glS^!X;hlKV3Q4UrWy1p zOQ$W^Ih2l77EgXQrPsVc9eWOiQ`b}Svnd=cCFVGX(usY}p>$%fb10oy@El4fCKaU{ zw%q8V9=6=UH;9KRVdj^7%OKtKp?t)P_u5917pBb=H`;#hbBtpLDm0H{yAc-S*kXjm zIHpNR8^=dU(Cgzkt_Nrwcjy5c$6NIPjpH^wK;yVY570P{A|S@`3cUUAAIC)4b6_Z< z`#CTab;~)FPQ7-vrPsVc9eWOiQ`etu;TXrn9OqCvvClb_P7HPqr4tLDL+Qk%qVzM2 zW8z^-*vxO6>0jigahw?TDVa|33m3h|A^aZ*8|EIvF=;0TBBsZF<+F_MSMb|ETd~OJ z797xS8`~8OTa!*MeZTbt#4W9&H{qA=w<`E-1@5-OtZHr)JLfiK$Z=)kakdpFlk@py zKXR=ETjP<^a=OhN8I6yjjkg;6`7ua0M(sa8V(SNsV-?<&#Gz*OXs2x)jQV{QZGr7n z-x$=FvA0NXNDjrvj?V%b<%2?Wv-N#Ou7Xk8vbz5pXZX6U*?vU z=Q*R*q?PUu9a2;s01{uy&#%YQ`L)ZrwVKg4zVl!L(>5FDI!EIjxWIc#67RV&3n;49 z=o@xrK{;~0l1m65)Srj0uO?hfr~u6n$EDh3!el*}+YoWjO5ek8pV0SMp!b8qP}_em z2X#0@lLA_aRMlW!n1fPDA(Fw@FcQbTZJ0xghPV6u-jUOy zvxY20TLzTkPZb6SG!w91zSs$;{A@reoP9uhy9V0ZY0=qGG>9UfB*x3BR7msh(fE8H zrmC&tehcT<;V@TEHuE}P$>`tAs!_2mv*Mk}a7KDQ{;PyW54u&e^)b4Rloc=O#Ci=c z-pGRsiArS0Y^aR{m0*2*DE=AxnSS5FuVH;$-8|od;|yB=D^rW+D^th9U7*csbT6Bq z2QYUf1OGm*pJhQqw-6X#UR7$3L)uva+m+fTDv&A~g>5hWx&(s>#cyi^jp=@)InU`) z8a=TJB#lvg7_Dz~O>yRVlgVBiyAf`$y%;^8dpw&drhU6!*=5+I0mb%yW|_I$9> z<}HFb_MKh8w@YtPx&c~?(oGb0*)zEq$ZeNB!jKQy_St08NhWn|Xm5K)qhYjpI{WN= zRLAYJpZe#zLJGc$Ir~gD*%_E=CiA&$vVCCx1by|+ED!bhruJYbSSW`{c5^R{L4SuA z+U}o?N%j}{NIM!oj*k4lYLcy)a&wDp!;<@GBW>_a=G%yXsLqVAWk--{P}&+CSuhGS zBTBL(DtQEs9q5!7Hql|&g}2eY5p+9vH6LROR@`@D2Es>Oq}5|+^qHBL+k>Z>7p!*W zoW(R5(_Ml~;q*dUT@U-~hgx81eP=8F%xJaXFTS&aZ@#>ZKga0Lys8?Q0c-Zz?-J<$ zwD&gfQ5M(!__N6Zfns;Hv2t6gYej{MH7lqIwQUx5bypLO0#&b&wzf7aer%bIUvUncf7V)?W7rzIMzE)&A0Zr56M zp&F{Y22gt7U4Wc;z(W8#!}}QbGYnqdQmP;xW3@QAGtXi#c9`3rRJRL=M4 z#QinM-$!&}XFHtO5uu_5-&@G{mW2K0TcN&cP6VC4Bt_kCGYekncbxOg1$OR^(x$u7 zrh9-<2h~6&ElJ6S+Kv}h?SV?rx7@JN*_+OvraAzoRmpRd-Pq$u6{AiFjr!&w{{NxW zb#d|9r-R%hoU$8z99-x^J}NF$)lipD{bO9{s&rH==n8mLf>71=D2fBQz-fnnoCSpE z$%lWqIv_mC|ESVueLtG?(JGE6eL0_iyPvp|eX@P}>6QtYigFZm<2>|@>$b4pl2xa; zGYX9ycO)S(x}ZscQ?U$}j7k}=t=rp&c zvr}DCbDX(p6bfvMB1zVmB>CncT$1eVjO;)Ut9d!+3NVPT1nO2v zEY$a6`dq&vWOm`)ba)LH!@4*ds@YcoDCF&3|pxYNS zyFu@E?BN1*W08hvm#2%HzM30ql@B+l;Ui&ZMq%EjRd=D3SPFLON|OJ0a_Q6hNpf1T zDb>FIInb=hcp0khIwuu1`uM5@l_hb~BvXJ_t?EQum<6r(w_%7j+UIu6F4ZjHvGa8O zzYA{|PAh0FY!9}oukfL0c*aa~x0QZD^a5Do+ymQcC~s`1>h(plTYQ<>jlnW*!fP&K z<-*<9STIHDWnI`ZeTuIodmac;)$G8SX?x7hv4dXF@{Qv==C|B_JWEl9&pP7pFoe43 z_{M;_6XqJGGPHMx7yD@Tgg*y64WlJbuKTkXsuxxzk8ca%{%3XhvlC3bhq1#fj&bf~ ze}p4IIC;7Dey;Pq2i1uElCHInv#fas~d>4G{YQxWtBCQ(Th6v30KdVN`JD?NOPW@lIFV>gH4NDnYVqrKIDyC2nl&%c_XUhLcqblj&D0QvDhz;eAAE5MU#_te zG`u>qW`_l#H@WT^#`8M8fzLIbv54T%5Qz4Hv9-gSm_cx5v36ST|Fu zH$&HYR%yF&#v06<{TW|Yts((04d!QOr=lQ}@ZHG6arnQ8^1uj99l^r&f4K&)Gp%`@ zE==pLQ4m0W#!t(F(@xI)DVOs;)E~IaWu%CO=HOvM37#t!JMmz(Bo+c5v`qu z|B^%zU}wdM`s}Uu)5PXaY1{WSe9ctd1#i?INBe#q-{aZ8J%Tsdcd+%w_JUS5v^=`@ z9cY<84>k^`{O@SrXnW_eCEl;vH;U1VveFIQRkJqMzR_f{zuIo!2zF^)`v&&1xb_Wf zaa{Wbwsair`@UPyni2HOC*V-4F!@$P=c*1WBrEpStk`U~Y7zlM(`q8J98-?#c4o1W z<7YYAwq5z3V@>4Utu=4ZHJ-}fi{R8HNjf*t)$XSbrFGyuiD#l|Cs``a~}oCM>eXEg5b&ODmIFyzM1JETBW9wC#h? zTOVmVfUl>!2j?$qn;&(q&vf1vTiCzYbar72WxDa=MW(Y`*-9EVP~XF!_nVuzMSbJN z-Oeu5dt9B6NYU^Y4kxib(uu9I*nG0p)y#EbtiP&8!!y4y@YmFh?Y1WcvHpwEY|wlG zLz7%|^tr+4t&b^|=iK@%c~+=bU|jxXe9s2D1N(i5YcsVGSiyp z_i^mJb-NyT+V#NZV6<5hZ2pr<5GEJBJnfqm${TV;u=7i)>HTNwx{Knn`@;XbGhrrX zV&GilL>l7cGg31X--c&N#~+_rd;K(V{}AkX*~Yw{3z&~#12!j5@1Uxv+MB`J3kzPM z=+!wsn4Rce;DN3*FUH}RuDC-c^WDqL1+cBv<-<7>zj<-W=@<;4%!M-pxYx$J69ixX z0*1C-ewcyv>9d+)XJ8Hn6$m<;Uv<1sEi>zrj>o7pa{TzLWG>+8nD|333O85d&Slie zyya@#JY0AsC|88-0dbx0LzI4V!B+|r_Kwfs;e>#R^H|o-RJ9&H(mx34V~>CTW|jhW zlcSknH~C_IxO6pPVG#NNv#-~}QwH7u0DFaM2A0Q3ozonab2m74v%q71G|IsnoSj+d zSvDv8V$LK}j@&hKZY@xo+W+L2~@=7r=3%0V&^{`R)OJwnx>1;5nlC z<;)Z+R4!#yM_Vv^oP)!pXj~8ew`b66O+j7jPeKMM=K*yfM4t<==2yw}eI`uHsTFcq z9n$xkWbuBJPq-bp%mf97@{>C=@J0(#@HD=0mkElwX&%omq3Y0#@$vi!7A$k#jGgQw zeQh%U?ZQCv3AP%%`TuwP)1T&%e}rJDGM!DUX_Wu-*qT=u4Wey@a(L4T_S*Zf;EhVz zy^fPU2OS@cOn@V3KGp5xwFmUd04_rlO@u?4$x;`cK;V zJzj<}5tnPofwgCRb1uV>IY!^mt5RhTrIMf-uQS$U;WiA+L49h0!(8XY9zOL|ZzDh3 zo#FpZR_uKAGPC(I;`L`e-rCWy8v;6JGm6G6f=S#7n>Nvzk3&GmuToBW-rJ`eX6ars7vvD;;&(eWfdR3rl(IOA;c6HJhr#)$GD&WN)i;qHOF9l(rFqnBw z53kI?_r`q%xE|Rzve;!WzY}JGfz)nSJ3%7wc57z2oC;5)k)UAk(aS|Y>m3jCL48tdkRrQdC|`64=Y~W17+X^VYECM<_W>Tv*;xf z6-^uX3W)yKH|6=Nx~QgnAXfeEQBDfxW+jdMX#+V}`~6=pjkzk{N*V*IttrL!L*=A2 zp4p=-GnGy>I9;7p{SPP!=$NKM&{yAvn`cxfHyy(LV1LI(h-i^>|0cJq`7K}&LD?Wr zbE1~b;}pOZQZ;kn84xDSM?iV-A(a$W#wT@+bPqfYI-nkXT%mLmAJ_{|ttOf|Pyzmj zI;eZ#7mEJ)6W=2}YIK!`ztTw(Rhi*Y{bdy zzOrx$7?p5;dlj>S73J8@+b4cBf_p`+J`q=Z zEt=tm9Z7VH>%a+A^X6sUO*DtWPO)hU9Ox7;MyJ>njH-mNm(91#Y?=y8e82cT)h`m& zypXuKKJm}Uzo|5csybiM%7jIishq4jdAmd&Zw~AIeE*&He=t76j=_c=*nR9Y3m@kF zCe?*yL!qvBu)PeuVIj^opNQdKe8;LQr2fUOe@yc-*x3}c&T#vp(`VpAdrvgm5p8%F z{UZBJ?`E}ss`WxgLEccqpORve)B@huynC3Md$29;`%B1t5ksh+`O)n5ScFV%zUll= zcC>ka!G5Cj{3VYE6YU+bpV9j#Xz%DGK#~2ckM)f_r7m2bk7v1=ss5GmA9<`Y3T79hMM zZRSD6M{s9f5IM&k3B7QJFcu!^4mOQ=8eaN6r4&2A+Io?UCqRX-5}zo5L9*>ukjH~f zhejG+gauJ1-pJHg9!z_5dm|^n+{lsNKgsy~(I5;s`M8MR=p7k6*_sbHw)wr=Xw<|X z*$SH`zU}=ZxDCh?&VsF$R~_31U(+^&{K!-C_eP)IM@B;+_+o=qUyN`1u@3Q62!}FV z54=T#8(nYVg!v0`_0?}~Z@tWcGm{5-To~tG`6_Vy6$`v-?Zh$6SAnD77oFSan=`kq z)t8Zg=!0jY&--3eSEjm~&w+?=MW17LV9xXw{W--Mp6Kfu$Qt|}KEl|`8}afruYqlm za?E8{z)nVkp8wzmD%XQAbiL)qw#=JY{^;CW%pret#w{?q9i4rP+3Sx^za;`= z4|jA$cI3P6*cxcw7M@HqBoW?~N+Z$uZUrk6cXYxt{HXv8lSF<2Osqac_E_O2fTSTG zD?xtmb`XY7V36Y$wEE9MlY?0Yy${&i14a7+uj-IPfesByeGbW z1*7O$ry2`dou?pU**-r$q27`OQw*8Te|J4N)i;1`f_mql4h6zF3CRy&zgygSY*jx; zJC6(dEIu@xovdsH=%iF-z>L9%J)&Lvl<_rvl2u)5?0RqycIfVx*+J|&;|JfSSB$V5 zmTT1nLX&L%{f98ptZcN%b;cj?I%D@?uQRsgH5UJxb=$s-o93OAeHrZBbv6GM<>-1a z=_ca`6Aa8e*f+L;nFr$y%)sK#lgrNe@CU93Z3{0C!cI(_i5YWi(|!k}lxSb(!36s< zZY=M>zRc)bY;oQk!Nq=zHZRjF?Z9gP@u}}p(v2BKQ1)fq#5&qLQ$_)IAL6UoDkmP3@TTsEd_4_|< zgCyUhhJ_aM!JY`3aQFm$f33dDHcmgu<+xb&yX_i8k4Rj%(MAQTLgYMj1-cj7Ws951Kocher-!6^?uxA5RG*|t^;oSe%ve=Z&B7-Zr?#3E9o_KYA0yF%pX_tQurL$IteVgB3HuQ^oF}33#qnT3}E98Sj}Nx zb^l2NE0-$ygtIM?&H$N(-*|(2I>;^5WT2F7FPdzkr7qwx@PFfgEAr0YSs4^_mX!9u ztLgEajWO^0$^k;fGc@{FvUg3tr_e3Ll-PZ`hfa|Bcp7cb)0w-V7Y0&2SWX%3;$ALyCDqdrNQVEg`lc%rLaQ*-Ky^e-!P6MW&mpTx8Ri%Y2 zF<-Q9x-WXYhYSC~)&-uRxgg6Iy=c0=Oc@`McD@oUd%U1^dP{-%SqvS_gFE|1c(Dg5 zr}-Z7fK^&tGyA&^opj(MEUN|n>YMd9U+dx#bd>NKas9g6T;TR`T89s^(ThCLF20?8 z99cn;eUqb4j4if>{oeg*BY%~CU45z%#@T)Q_9Sb!_{{LzI4%aSMxkX4_Q1JWQ|lc zK`ivD`)HRKl`gy)nXO3`D8|E!nscDkW`DG5k!xuJpt~rwv9bBXBYR&)|QtLo`-_SdnS8g36Gv1g&}-cxwNm?5!2FdHpu~;Zj~iKD zc!=>8z)&H+<_w-`_B0LQqEDQMnTYhKME#R+V@1DtvlAMZH|lOu@u{0?!ns&fZFtJ; z**=^Y0GhCcH>hCa{Al4CdVFf`SU|%wuVw|JO=K!mwkhb?rM8K^G6nVi1a+4JZ1pI< zaJ_1?%2FJEQRglAEN0Gka?N9tQ(vDGK9fxCjxhx)^21sSeiDfrcJfZob(@(dpB9o! zS%$$4XP*o>-X7^x3bhGg@%4Dk+wcm^sdNv%8LjhRWZR3T+Sg8ku1UzW{aA$2Iq@ zYCm1m$RtE>9dxyC)Irxq|e8<-{*9P zOVN1W`WG$ZY3vXOJ_!+H{SxED2rY%80e>IVs{IXIhzKPXHH@Huj{mIc($URY{b}SO zri+F>aE4Mh$7~ZdrW3dqSj6f?7pr6KL1~c9tniPuI=~382*szybH4;~;5G!ImWbR% zQp7y?s$E|a$3yG(-mSPjoc+27J^?1yZ6R^o2P3YJ(72c~UI)FdZHu9{Mjt{ZW{e z8tfW$Cz+n(IZPy>Li(c~iJ4mfEvt3f94M^TZFN1UsLcqcz@u2X3)B04y?s!b^VGA? z=>4SEX6kV=?(Brv1#`hV@@`G2kU)|B6>fA z=xmwRJX_BlPw;zRRoM)9-|g&V>+Pr$pf%}-R&*^#-M%Lzg zCUIuRldVI+wmuEPr4jBRBx{1(1ArC#V22N3(e>%8h3zW ziEX6+EO5d1B)t83KjwTbTn`(V>T~aFY1TJ9xthSQ}~C z0z$`r$4JwNYwaoCZFvW8959EyJ;4p3RwveSwuf4?-O=ptdH-r=H;r@+_V9|krcL>n z%4ap)={Hr{>@e_&%Gy2@z6@K3u-^dp5oP+KxxVOROn=Vs;uI|xa)b|DQp{FeMq`vt zY&)5AepAz~10!{vbY={1Oi9!mGr7 z6bGnrQ9Cyv{Tuk@dkOiDF5s5|+MMv`{TBz}uI5RIV?k^7471lejISOeu1k7({*|5b zGN*Y(7S6I_yj=W|J?39*fb34?cEos(TaINwt2-gl1?9k)Wev~ z3uc&C%p~dTF-UVX>0I-QT#}wY25G(~^_y4Vj%f3Of-y*oH0dJqiYrO_jWI||H0e_F ziVBiejX_$gNyFw9t4Vr8deZfcB*kN{XA3p&Y{4(WtiKiSF@m)L&Bt7SJAd!ofL{iK z>KH3nJJ5X0^*`b7og9)e7}r6^NUZ~CKIVE(RLna!;+Mf5#(Ruloj~(3*KgtPojv$v zuqW{zBUmrce9ZOF@b}JL_+_vc@g5^sAJBZv^{?^w&VKwd*j~JGkpjL-4YRiP@E(Bf zrh}-f-QDYlfaGJ|_C9~t598O{W3K-YZ@;P91oX&8eiwvU7VOx4DO?`HCSQDiVvM4Btp6)W_?7(}g{;&QRV5;ZvZioP&Q* z4IjFbXXAFFCzo-D?;78=+cvX}{xBoSQ?{rsfGbg>jvKast4hCD(|BkK8;U{s~y`MBkQ=Sz*ze&@BX}xZhQ;f z#SrHGWzXT6;=Ud?9c801KF~?lMwn4i)Bg)z2>g_w09GW51K9tIKDh@5#UGf$hQQ z#5w1;;R7SLRimQd6LTKB7luVYgNm|=g844XeBBlnXxto1dX>c=eKa!o@~D#TN|v;V zh!|lh-?pR!?E_qQxBH?6=)(uU-Imn<*qiYWZ=clzMq~51m;sL|D)|HbuiyqhwHv=6 zT9kv*#u>|N@Y#a)Sy{UA_E~w@ltx8|(KVql+V)v*2HlwUSx-7r`>Z*L5wg!3Ot8;- z9==^9G&gC6FX1MT1b^CRbv3U;&b5u!3n5W5UV{v1gP2bPmtfO0(M~HF9*3P)kd0-h z^+inXnCw%kbq9K3%(nijgj5qPsUo_=SyBZ_s{KrDNtKG~Z!n6VAPFYF9eHK0$6*7O z9E48Ez**Rk`3Yqz5{1B`rg_dTwFHV&kUkaLx5u#4N;cEkXeAp;P8(VX%Rf!O6j{fl>Le^0QSL7k&(dgfc0_hn7$j zap1fB$wQ&&Jj7t&YzAUTW9k){!G_aZn@UC zGg`LCA5(aP-{J4TBgmAso|xF*;k-pcXZJLxGuNU`tI=@{9QvDlZ4Rf`SUXvba%9H@ z^&h9Kbgx94NY{LTdM&1>yl`XN1f~=#I&htL_$G6Y?~e%A2(wZfT}>RE6h^1c2t_Z+ zDvVz1E{uMfJLqtH5#s{CwciEuC^&gnTp1UfjO%S+!T2digGkJAA|GJYeh9+Az`=)w zxQ53=-Ug_pEf7li#+55!6)S7-_W0E6m!wG<>lgn}dS+aD=!1~n4=piFIi#9;{AKOZ z3H9Qn1C&5K#_mquN4K7YQ&LGRKWjblKnSMI%|9s%WXd=>Z}etaei|B0mE~vPVDEvx zTw6_#eq+XTUf(BQynOh|HeG#>Ft_8Ym-$w=T9!i%z~1+pmZDHFf{Q9Yd=A68Nm&n% z-cf)m;h&5z^1Tb@IHNEK_B|9sXr#;kB}LuvXB9P$le=23L39Dfj_qoF&Q46zt^I*i zu@b3b9Q(-a5!pD)GoZR_?m|?x@(xt_PGpj+caM4yhxu1r zR)jBnxQFoh$PT~D|3Sf*eoCyX=>?!eSWPwe2WEYQ?Nq*57-sG99oT~!|l$S_0lD8sK4BN-j8pNZ`zbb^nQkJ;|);TcD{vub8lkj zkCF&IbOhL7<@_#CN*-Q~bo`~&d19n97RU}^Y3|7XE1-|y|` zaEJCCgE=%;*1QP;P_~#^EbfF&BnJ#64PjGUwGU@tYx|982uDq;aelS7PnjfhHBF`Z z!bL&)Ze`Hdyu>jjjOGz*1-M|u$OFVOnuNzsZY3414U>Lp?U2)u4GY5Zr{O&H>KB#;IOKc2Vxy< z1l{w#u2SuIs}VGo=Qx1YbJRd@YxZZccr(=W0oQKe!YTfJuz^5hI@K_xumn}88kQD# zi}@13Re;w2AX(s@@e>0ogOL@ELlh8+QHGsGfjKiUX7jy3|CoH(j1&{)M$ zMbLM&5eyw|1ejbMT^8I&8-eF&Bgi@02=b3Mf}*31fSveJRV4Q3NfCITQ*H6NG;I+k zPnMujtHun|Z!pC$q38mRI&03nQop}kqTlCK;9Zc{?P|`1Ix2ucUu|!PCgipSTW{ks zust54eT0{}(#3A(bo_q?BTA*&^fCy1c7ldR8eBC3zNw^Sbc)# z9R>n&a*~gJAgZFlXYl|=FT4(D2ZxQ(M@ZBEATA!U+)=nOL&5r4c^f*tb_^Z4{~{Qz z&%x9PgFUXM-BE&h6;j9o2E9#(Uf}nR=Z+ar*UXl{@4KpJ!Zo`k z@Q1GI*>L5w1Rn0H&Vg%gOW={N>bY>`wgevSs?LS${FcBUyQY|CBEfeEzC`eyg0B^PuizU6-zWGs!4C<( zUGRqDyQZ~6@NNw&V^iecBlupy=Lo(}!siQqNbp63QHGBgC|Wx1XZJ^kGMPokPuU&O zaTmK4I__ZiK)qN9t8}iK#khCWRdbDpzvVfYy5vLRv|pvHH{Gj?JlE)l&%W6+fV*Fp zdaf}$ow^t)k(hY3Yg@%y3fV0hfM50hPJJlK9h)Uc~qBeg-R(SCZk zmAJIW5IsC`9>es=u|52*eVy=8Hh4I;Yt0uFN81DRD2nrVmmVc?9tY`BYkMFzL{bMM zU0@DfQB9;#aMaX_xwbeD#aw%whhnb7_7LV^2wY3CwNp?wbG>mMin+cx55?S&?IFy; z;18msJ+X+cN4W-P7g}L&mtoU4OQLpgJs)QG7Pr$Vn z)diF4d`zm9E{a@)3|O7S1-Kg`7@ctX3U)h-dxCRd>X3#GFs)_>7WaKYR9P6T#nf0w zk{WAIQe$mNYOFCyjnxXiL&Yz59KjC>-XnNJnz>u>ZfWL* z;5~vLg05!$%@KT`h9mic?-hKJ;5!9hBKQu$*9yK}@Qs3R6MUQC8wKAk_*%ht2);z{ zoq{hCe6Qg11>Yz59KjC>-a{A)Y@(HQ0pS7t9B((sw?QTHUZdh z?rjtR%_?43)rvRl*J#BPDY1Q6r;$QYoQGr7>NVBN(d@)wFtSAJ;zB{M~UL zNjJbB(N0@7i^H9t+#(9vr zI9*jMsLeKWCANWaf{8Kbv3(hfyoejCCqLNTufLGv^WOF zN73i{?-~T+Y#{nKpUFKudj0NeF^h!3iLvpxtS8@xEw>ntf1!fP&mNDzjm70-)OS1V zqnQp@>Ug{z0b}EFMx(~#1M%Z=()6GHsv3_g_Xn?VJl+sL9^a@Kkn#9H{CNCsU~px` zf8%&urT9N%JRZPMI$=N_f^Lf&katrXLRFnQ^E`~Y;rQHF;fO|9P#l?W;?mleeoE=P znwtPn=~ybY6TX03EtT4?v;{^A)Snnxc4)X&jSpcNwNvm)zXIPYcr{uCzEAM^5`IYV zYU~KSp++a@Qbnb^HQZWjsdT|N3O+~hZGz7ie7oR_1m7X}62W&0zE<$Pf^QVO)upxx z-s)1@1#d|B4#B%6e5c?&g6|c4j^O(QpD*|!!50bMP+fOyNa5D7Ju~tMzEQo`Msft- zrs39n!M6*(h;NknBOT(nzf&Cd_R;~pHjzKqQti}KtI_%;7$9xUXDE)ZX>PveRbcnL zO0PfB3#+5BB?~Xuiq#e>d$VAnau>X5s(*PO93KL;9lqB+OVpp}%>l6L4;bZMh96{< zyuAQX!nK1g#;F26$AHkY%KYVZ&l7qcfod>RMN)9Q0tbx0>RAQGfHn8)>dGkux?EjX zImJNKNgcJWr>dit?ig$BHBw#UW(vB##?2*kg{6npCmN~

Ux)g6$FRbSfq)Qm>Y%!_k~oikQqb=V#jbDg%2>ae9devwJ{3W`$)#av&UhhlEX_OO^USOd``G7KfBx??Rl!Pv}s z;ykqEY!8dMeA`Der#t1>l;i}3xWOE1j`Y$pqrfMhxLHGNxO6sEH|c&$~awHk$)i0b5lk_m}Y z!I28c2@?|4OWwanO-Sx6iJOpgKt*Fh!X7+vLc-vfkf5^P+edW*$NfX%&@*|IlPKy) znEW3IMN|_L&CQU-YwzV1n&`iqu_G{Un}^0!IucW zNbp61FOl@~1z#)a=Lo)0@E*ap3EnOEcEKBh?-0B`jfVUSzE5|=kzT>~5=Kq_U!%Q| zm%p(cs`mK~T^tsprP`A#U1d{rwywExI$KxWIGtUh>Q=nYF0y^J=85U-e8Je7CnwHB zX&#U5VQC(>?V~l1UhYvk+YpqkvxihU>y`R`XpTsq?P00KUfU;GXLkz5X09X7LowHG zdsxi1**+;cyHQZK&aRE~P|TIs9u{*&wokIo&KHc$Tuz*amYnTjG3T~@QgpTF;yvL=Xo1vixt^$8D4coU9v#rI1^{CpOhJE@kK%5Um z|M?z{t7H3};`dXm+uNqTfAhde{KxM*VdhhP{H}#VZ_p~qu|9$)?yIo&3m@p;{H*>6 z{vUYrkf`kMaO_w6aCY{=@WbPd$Yz)Sue6QmX?Oz*U3!Cp&sB{%sNYrFX+W`5;{yve zSq$Th{8H7ERihx(PK=r}FFgbNV8eo|>Cwr|R`pf}>*3^LZEGz@_Gq|Vx#RUuaB`?u z|EzMdgG{~2>1q0IciBY^Ic1kfFp0J=95z#Sif5f2P0B`#!r z@j!1p&>0VO!~^Z|KwCV}7!TCO10@8|C->q9H#YcPhn^jrYVEJ$*A!@j&qG}MV2R-E zu)42m8Wn^RsM-f|2=$q}R`zYfKSzsbYU-vbeqH1pR4Ln8FvRWzEv`;c>!#dHVn>3& zO4{lID-3b?l~G?{b&^E;0k4VJ13Dyz2)Jhk|A+X-X!*!F0Eb7%0ke5j96)GC!2w3_ zx>6nm6DUw!myeDQ-2{$~58ZN(jt|}7j*buAPLGZc-Mo*Ek0I4skB*N%9mhXjN!g42 z6Zq)-C-BknPvE2dpTI}kKY@?Ne*z!1N6QCvD0g#My7P__6(O%X97UDyf)d_IO{{h% zs(HIs89YWK`UK zVju5=_L+b4!4hY56*lo)J3V@>ry#m4rw}{Tu_GP(cd!tBMQlI1HZX&Y=oew=ij!eG z!ZRd3+!HVvCeVAZlX@fep0~l63BEbxZEtu3;bE`I)$%6d3UD{h4tvMBKd*P~1Ap`Y zVIIN-c!y5%9rz%!BVg|Ky8<})b|^6Gam5MDMZsRC-mIP7=r?x{%#Zs2|+khjy- z3Nw+0f7VL~={vAHFzW?#2%ErVZ+Fm(J@LDDzJV|S$K$Z;_!(c`<{SPDf_KazTGzw@ z25yaaM|Q0KGL2kKKJM(9!G{X5@A z3LK`oun#Ip6_%Fi(bE1PP}qaLU4bL03GO&JCD{x+CW! z9<(T)tGL!WS3y6VtC-amy@Sp)=P8JaKTk0q45;%IM3_76`u!!9#@hc>5;r#~Y5$#P z9|Dlpy|o~wnKv8-Y2|sIg6LK0PJtZKCal2eV$CUMAZNzUS6OtE5o^-nk3e2O&j0gF zi8}akEAq>;368gI9{qX!Ykw(6K7!za9}2Of0!Kb5XPj^dm16Sf{t2MI8PaXX@ zu$8Nyk1$yO6Ut4)1uJm$qYy_wS}Wim%KMY6=`1yU?iMkeaR+m5-Vw$xZZP!C+T$}{ z^c{E+&GrtOtqGWa^&NP@7d-)KQZB*h8XWg^D{lW#U~AC?d;u`a@KJE(9rhq%S$Adk5`-haYo zIW)oq!Aw*&lwedfoBq5%0lT?PKWtyL31j~CQPz33jcvY>SMB_1{53e+Tn;lbdvUDq zgOK@t(0p!WA>8<8{RR$u7eqfBgyHT2tT?F9$?GNd_v@y;@%F-lX5mJf?5ou#yfbZs z;h)1%hj@eGaq($;;h9+cQ1-$@VlO-hgWc6Xv6MuOZV`zJ`5YeK;& zO@@D!5GvQFLNS}+tWB_;t(#&{?Wf%UvXJ>q$o$(M^5N8G!-L&{9sp3;2XHVCro*!a z18uNpzUr&y?pX(wwJ~hlWo%yD)YA_~$h##-o8jp43L>vG>;xdHk4Tsee+R)=Vl%Fo z4G+!<^c0vk_E6~N?gI0b0`t9)G8|rL?hTpG!jSdgI>=<osgl-;yWgu~4spT}cz+ zIHl$t@$L!zo}A-p9K`ZTyuA`@7;sR;Ffrua74mKmI`;5JFc`}VMRwpg;Vx}ZJip4lBL1!CKp~w*In|M1J%l3DI+uynySCa(2hq#WG-vApTJztO6 zYbC!UkB28TjW||*Y9mX{GQg|Zocyk`fa{;!6@EjZ2VY9yU$qz9fE8@I-fcACuAupD z7(O(DycdP93wbh=Z>|IKJ&kWK?g?_NIy=}p;Yl?3pg9Sy-?Fuv7h?bPgavSNK<1cIp`X2Hgm#xF&{9s6+XOgnUPB*q73j#uD`k3 zzJCV}FrvBMjXKxKI?9UuGfB!W9Qwacmei2w2ju1zQ(G-!I0V?d|G{ zcRv)2iFQ@k9&DZdn2Lvn&syiQJ>PA$T^za!d7UR>vv-eL%oea6 zEnqlE!+@y44>O6^aH;5{lqjCY!KsM6S-BBPm9c8zo5Pbs-e+_YNSN9I(G^6tBD&sK zg2Bi~DnToL7{+p=9R+zfm``KKJm}qzBZ173`N7~PT zg|-i%;qBjqW>?_-i!{Ij?+b{NC#C7TOCJr+Z}?+~T^b*l;V(4129LEXcq4rgrZBRJ z`LHAJk465J9r@!l)1Tka8!}HUG^?``3JC?ndz-FBQ%fq#fwR=iK29Gc)d%F7?Vw$2 zw4xk;^vdbDqQzYNu)5NTqlZ^R(c^;A>(Op6$qMFu#noJ^hM^U>l?V>#aohxET^UBo zi9vI|TZM7JHH~K}Z~GnSe9BO0h!$!1G~y^d`u6RIaM3et_&S5p@Cm43&J%F45;klF zo8EPxufWYOmvn{7w!!AVGlUcPrqjjra3I?ZJ8)z^qaZRPBRrwNTa}fCwtJHg%2*6W z>G}vpLQ&jsG{<>10#6}dO`8xf=EV~PUAzp;?azC<;c3ok@}6*g`x>aq5Hl0FyAahL z_Xc!4ln=hm$ITu+oxtrA-u>PYbJNJ`?1XP$W0@;En>!w*t=u%s=~GzlKTG0oeKFRTHT5gW1u=KcLKC@r`WpXBIva$onw- zO5B2T&zPsKBk zMUPlqEpt)RA5vqRk!>;cgFS1*DE5NZ>+<9K5so__YQzt23F!{B!NKl@LmmSiNC&uU z)E&tkYSiD#{zOd`$nca9dTLkml^kAm2ijGj?!euAhw`3s-SPv(j)?=zpkeS7GPkJl z2NWOe0yYhQ)wTY|pftDpXKim9$hTMxdSMTEm?{$krq-)*^7ezTt_-wZ+-VUMMEV*Q z1M|_~X|{S2ecQTlB#sb+s)pG?-1>kR8eYYIP89>|9L8IZH5`i(n2NmL($^fnEez6% zkj>&PY+B}X{#mpxp{5pKt03>MVYg~vvv4RJy!OLWN6zU;$cs^&&cR~?c?e&?&fB~Z zimR{@^8qEv=>41}6=3{sMmPZ}Fz@2L+Bfnk6!(VU93^{npMf?E1UIr**AuwH4S1s- z6tS2QK5V29xv1AgZDs_6`igi5Q;H1v9XO zNk6(4#WC;~U_`EJJe7PONz>jOH4mXShJAp7*79BqPB9}XFbC8q6^NIZi>WHrmg3`%oJ{%F1@2*Z6!%jxAsisB7Ser~$Trwvv9fjK0>Mcqynm}1WHZnGO;Bk+)5 z&(2{C@KF_B2xX$DSFIrE&49s+ysm~mWmj=?mM`z!hTkFYpBsi|e2bN{^_cIV)w+JZ$C7cKjCejk|FO=^BJ(Si0VW@`J1~AVa89DX0^U%|s zdY}4zK>co1zrR<%53Aot)o-Wz-K2iEsNb#Xw@3Xxu700XzfY;(UiG^JzlL#YRn==wAoEms5GGtv?kTTcI@=Sj zD=$Slvk{n4(7(2>W@$qiJnKCb<*Q4VmX}qoD6R6$tgi9otd@u+-m3EDz^w9=)`4m7 znRCvb?KyX@ipYC*E?E#dzTo4jR8fL~sx_XOm0v9XqUUsYpYB;zSykm(Qcm{D>*{Li zR8AGH;&hdw#B%{qS{R=CTJW_@WPp6EsV!GxQ=+P$Vw}3vm|wcovucWd8B+)qDlCu? zdV@lgYW$KZMk%=q*VK5bO6!)FPcb0P1Q&lm-DCT)q zefiX-p6Z&gr*cJYRr!kY>TvneFG3J19E(R2T1B|VQ&U@Br;3qOhOr3pudlqm90c`Y zB@v6XoaKzjm#hhw*K1;nzNVqBtlV>&XK8tTxUyPTirSh=q+Q3#A`w+pF0U?!Kq|u( z;DX`{PcgzZp7PQ%R0$M(ou{;_rg}LllO*F=Qqxeqw6tywl3Q3;=J_(p|J170U-WqV zNXY_izSAZ-Mzp9tHI?%zq=O2c)PtLJ$N)mzI^VJeqYm7jd_6vM|%P^wZ* zAkb6m=Ya#%4<$M+3fAO!VJvA_wrn(6B;9H=RpGg$(j(xpowM7to+ zU{@_KuSP~H!>9#{^`+&@DkuD;4x zxG-Q`aw&fo_!ch+US;?S78Ux6jD?H*d=>f@T^_p92rOPu5W$vZ6MO7-~EX_2rQKvQlU$v_ZT5 zpQ#X~sM^)bt3Xj&UojeW9KCMdYdnRE3l@cn3a;{89=I@c=_MY2=!%d(FvTcPnrCHc zRRa_vT82f3%Ah#&c$A>$dFm@Fm&N2SI3<41a;;>SqxC}JL4z@`7uMA@)Oy0@Wfj$x z*EN(Qi}mH@S}&HOttRGQN^hP=IUu!$B~{2`yc*R_Rnpb;OH$~UcxJAsse>GsL8Dc& zeupbc!__+4;#y0a+wxJhWS-}O2B^)X-XL6xwqgqbm1rK)_E*-I)s>^#R@E$zFBhwh z%}b=vN}^P2LXV0Ob)e33eR*At3T{Y#RMmtWOYx6fEkvgT*2zIVtAVQiOV%g?HY*S% z%FnLUo^L`Ym3aySi-MQ>=Xn;?tf;N2uLQ%(D$A>uLgS!YL%&uv>)LP5n(e9j@~X4G zsRS#0q&S?bjY))dFFWnt8102RHZHAv*;Q3JWoOC^_A%8QS(Ys zlDZnrRDFMGEouswgT_!*St(srnG(krP-bT-m8QAFSmDeisA2GiOzOjR7^a}xMaeGn ztf^`6l$BN|mZMgLXK6)vmR5w4(Mom+Y-uaX(exlO@Pvwi>YXfGt#a7Q*z(Z2RL7({ zE%1XbtRCFN(`jX^=~$-atHT&|p-%`jlvPzOt;Bezv}$=x9s1-Isy;6iT?0K@cRD)n z(&`2bu-NvLeyFU56A2<2Mb7a8)CPuMBwfRP9X+TP%4|=4jR(TQC}v4{7-IoSyPo5c zH3@2(wQEHU`tveWIIBZrf0kQ}YNMNB@<7Kp48FLuDok2W1JxUe_j zZ^TIaV_Z%=S$L-6F&wx&DegZr=^;)0!zp9@e?jZ#9{M5{?-q`n`g6S7DmYaccmrHl z%%JN3Wo|Lb~pw>ngf_LDzD++UZ(J*PV1l=(?4z+v&QQt{>CI z$$bTv5C^L1+CzG1ZEeysNS#)Q5FhT#qvnV!Y)`?Q+BeQL_VGP}5INdiR-J6mA&4y9igmCK(qqoJ#aGQwlR>SD{9^$_pY2Rr$i~iln z%)wTQwx1)epCg;U1&_Z4kGKZ2b6M@H&M#D^D98J~2o&v3HHkD2J4fLBxIT>KW`zhCH)RsE#UGsW97 zTb;K~XaWZe#}&1g@noR(GL&h!~C`gJ!PeyG3ajq!D^vr(W_?+`g=f-`(w=y zn~ZwW&{G~S;(w;lzsF+YlT7c#5%D;AFdx~V9r}q*=Q9k$Cpnz(IV{C5cbo$2VspY+oa z)bnc{|Dtiw7l6J<=&wEuJ@d_LT^ogdF4^Rhl5d>kNou$3Yg@Pp9M5zWXq0f`&wRWF z`qe`JxpB~EqwZ83LHvHuUwH)ahe00{dfzzs`AHl<Wp+B$u0bQ~paqzf0)P7yp!Uj-$UF^!-AA3d&ABY2s%++Cuu@NPSSV3V0^dkD~uS zM7Z8R=x`XK=_fgycr%^Z(3zb=|J!tYITQ7=%0I#dX#^b)hGqImPDjQQq`w{XtA+mJ z!_cezgMNe1H>c$*u^nYgyy^7bu17V$Et8+l;>Xj zTl8NV2R-%P-4g$~!=T^5n}qRvIbHl(pG>&b*$QesZ+IWB_Dwq6 z7t)5~h&ypSO1-yXvko_b>Wxowew;9gYBWu}bwaE2Hl#y7mm*v(PHFJD0Ww!lO1RW< z%?}W6BaR94c|n4uq$mEA-;S^s9I4_@5dF{a(;ddqLCxY#j8{F+bfP^glifJ$UA<>W zr?WC`{Y?$`4TP)ss}A?AwCyZ0T(+>4+#gd;9OG_)fqXt!Q)YZp_)l!Nl+z;ocjBT1 zJ}0G>F9p(c*!6)8xH^f?c*g@RLVu*=fi?+uq~n1r|E~G7s$YtHkV`(4%U=BF3jHhe z=of13D@MbGhoMUQ1X{frSm#{(AqSH?ll@j%u`I=|x?&u~02P3WhM zBYutttoRStz9>(Q2Q2=_J09p4d5&ki#PPr`p+9*X{Bt~D(T^v7jt6=q{_*h7@xaC- z;Gg4xyM>JeR^((>n= ziuhvd7A*I<=tmnR+y&{<6E;ZC@j$zw)X0cm{8RE3N6&dsr_lfUF!8JWgFYWOF!H$s zI#@j^`A;2ha6E7&?jz>&p#)3eOU0wtQ8*r$iR&Nv{5~DM_%q)e4{Q|r@r-9U9;m>D z*L)7g5A)6OK<-pczdLRJkl5ca-1`XE$2-#S1Rw+Tq{u6^-=2*$hCi*trLSj+a%Or< z5zaGBhkHC-c_z)%8SV!NxA9+exO>r7)sw19jw$>R&= zm*b1OXXyCG+rMuR`XlY%+a=tQ_U{#^YW~LCzXyeWdph}uJXHQcGkltke?041?BBCa z*YvPhs-NWYj*FlD`!t~+Pyeg>ccDLZ9PzV%xA++k|Lot7gdg_rR{XCcE%l6%-zw1e z%+&JnABKO`zCqtF^be+^PwKZ=Zd~WAou$($NgIv~>+f`2=WG}H^y|%u`ardBguD9- zIvv95NzVT${bLy6IwV}hIOO&d&<~xV)A{aU=*h=3pf5R7(=UQ7M&~P4e@}(3Z#hec zi=<0u6g})mxV{`6j+O^TPY=o|>3ii32p9C~aB08)ITPnE%-?+oXPkXFK2yW(MYt>H z=x|ZS!6&7>67?g~n}PnY<2)VCV|@rs370BQ_U{|Ms>9)Pvwl*-rKWc~!gc2Ba9=xu za8Dv!zfXsolQvwcT%70+hvw^WS!u(mbz*zni0$lrgzH(L!;PmutYUvH^c&LF%T#&% z1mQf~s)6SdtVVoNe+?6yIf#>D4;Znyx?=yej(BV4M*5|}<*&>E?*^pjGLQ5llj&0d5Xl5pq3t)7%{ z)OU&dR%W0M4lmdFc|d}tmQQLw`VE9@tkB_(r2oAg;aVznxbfB_mJ`hI4Sf#_goi<#m{(lkSJls5oXIMD=WHg1()ckQBU}(J#S2<2TSud9& z+)Pu48&7}Aal$mAzc_6=sdD5vAt$QSc^+={q~t5LpW--SSDOyk%4qqdgi9RWjaEax> z^ghIY`(_>P61dfqQeU&-9qZpIU-r!#CEOQvsMK~W{-j?TM}N5S5c#?t^jTXpU!8EP zCxtIIu-Lv=p;9jf>22q}cjzg3_sP3o-tqLKxqo$?4&Ns4@5}oq^4=)#P4a$H-p|Rq zPu_3I`+a$TB=4*`9shCio-Xgxk>FNzqq)#K3rP%&Em3(Zx%0aD6Ly+Tvj);P5Uo}Ris*@Er02n zs-+-nSW>*C5_SaYj5ZNmT`(zzWu=u>z}6MPzkGFNnC>g#URGNN!$8XjmME|xTt=Wm zfO-P81nSDm>0V9uDp)8`P@@ANtU*=Ps8B5mEMHMpyN0xF0@jzS^fnNTrPr=Nq*qZ& z#_R4TTwf2P4T_x(1%~U&Din631+GBGjk-<&mo_M{N5HD`YGCnX6dT7tesxILVN5AD z{#B)042u!Q9S#_vSgM@gcPIl}#*Z8;jGs7Q#K73%fI*3RvEutf+uQVfrInUR;5QQ3|&@kjQhmAcVrHpjDXADK)YKU- zXQ~4IH|Hwj+fE~+Va19y%2<)nl~Ge{?1%M)#vE{1j{+$_+t`%>J4VIEU}mP#mILlP zh@!wF*?b){wD;#D<>Zoml( zIkBYQaWVtOd(N`bhO!FFzLznK)cP)A8du?`@f-N*tOZBK=bUS_OfXtekd0w@_J!e@ z5aBN-s0ceIIE@Z`q{8nb{B3)eze9h=nqMOQ3!1)<^mk-{rjJ_=GR`eF-kWeXGW62K zqyW*0DuM4zbQ-+}8KCnOEHx%Z`fE*dKWI9eoS5J` z^~U!nEvIRuST*dNRIaR;7+WVTtz22Tv|P6wZ3v~@cybbI`;JK{E*M~nh5G%Zcsai} z3CTRbWO{$k-}X-Behf)w(4LCKspB`^o`f7-6`B6+CUKczX00^7jk^wdyHK{q zF7?}|e*5t&58LJ^A#B>N;5sY6QxP`M{P@Zh*ds8^PAGhdz~utNmW86fQQ)?j8s`2X z#y2GJZv>{@3&#J9MgIj2zawymz!O2l@Vx?`Ebx%P^8|LEp~GJ;aEZX&kHzpE0;8%a zm>d4#G&TraB=BzpZWFjiVA$qR^t%M6eGWK{A%S75L&2YbK$#xwb}0BXfoa2o^m8rv z%NpiB7L`AN`R$yr8wUhPPkRD{iv*4eTr2Q@3EU>|W`R2d{)@o90`uE9;~NrqI@*Lv z@2fh!a|F&2xJcjKNC3rY7KV_JS6ad!0lhx_{nH1Ob@C{#east zMb~KfVu6ioHM~OL-fwF7+ZKI|hVK#>_E=PWXuc}`HYh;m=T(8-t2F!xv_vKUn>Boz zz&Qf@1g;hMQVVX<^ko7!3cSX`H*5TN1a24jX9D*M485<@k7)Wo30x#_pTM;O9}u`r z;A8Nul>B!He6qm30)ItdW4(_5DuF!$e@ozef&W9`5`ljsaJ#@y2wZ!Mj_);rVc$l{ z_d|hU>qf!ZS(?9+f79@6fnftj;X@XDyM~tuTziLxBLc%#j-qe3U??`0=VJoXh7RGk z1aACK4Sy1!2PvPvA8Ys=fjfSx;VT3#xm&~E5*YS)RD7mIFYrAA*WRP?Pg`(@hKB@( z9Um2b>J*(mZ2Bm8rogoCLw?V*;NNMuK;T+|%LFdEU*lH`+$L~T;9h}$B=C^H4+-qS z2VADNUEmUd-x9b{;LJ~Gerq>s`so7G4iUrq1cpr_1uqep_KAqUL15S_Qt%xD!)}p+ zI|QZ;Bhv2>xVKZoA6WF8H2mqQnjhLaBK=l@{o5;^Xfnmo3=P7(FYoS)Pg_M@J@?f z;I{h@2f_Dl``&Pss6u1!s z8^WJIR;LHsR|@tD3_Dl~UMz6QX&PQ9Fl}Tp{CxuVo~PkHiyjvh5br!r#}8Xt3jVCX z9hYhN%L3ER7U{n(Fl=rqc$L7gzop>s2~1mDq<>K0Mr?E@yw$>Aso_5hOnY6#XMD;q zPMm=L_c{%`0e=!Y)M(f634mQ_lh!_flLWT*1Dr0fwI5&(;7kKT#l!e7Bp!~Ec^WPh z7&g`v{tAIsn=nD*e9 z-unW31kT9T{N|jj@os_L5`MbCB|?9Oz#bXDe@Wn4;kQ6w+N5K8B?9M2d`$wwcACPs z3+yS;aEHJ}*J!xMqA%0%a~8e8?^*N$Pd;AfC;#i3{$vY&SM&2lfqNg;@B)E7*e61M ziv)(PI0csrOuKi)UuVJj8jc9uc$KFAuE4Mnr|5qsaLKKj{&!aRf75WcMW3tTzgYCA zYxsSEX|IpzXM9@cuXnbFrwL4(fW)6DFzoUv*efvX2P*h{fniHf!50V&yMhWX6u1*r zn(-|JjGWM^o^k~%l!8|$!A(i@W)l3}B>4X%!R<-#eO9*x)V|2qEPz(2Z2qaXhR z_qZHzm+NxJz{q-r9jOlCJvVI-ppwJD zlymWDX=wk^s7H;TUK1-7(gTqkzg~chktMAashrNW)+hBsQG670CBnod zBncH?oO7}E8n63ONz+K%{YjbaNXKkN`tAve8h$Ssan4S-TqL++?m zJeSIWpEK;5r|U}0Kfc8&-_a-&8nmXe7Qa>1i$#~@o%B6Nh_!EG%A>X=i`fL^7~z!i zv5I`uz$xkpa-%K(lw7A;1h?XgVUjW%LnN|jweDobtmc~RpU6;xs>mHnSH()fiZLl{ zeCLsbvUnLo1LcZxtwm!Sqk1QlRT@TOl1d`b*-Mpl64_x1lv#GEZSig0%7fOTX=1g? zYm7{eT13>$nllVDq;8}R0PG>iC?f#i;@dgFl!gw^(K<7D#}}{7NAlo6=n|@~_>H0` z;xX}+I^I7yDocEpbZu6wI+lVMwK~X+6L#oPh?E`~g&>)uSkRw1x9&l}jT}zTV z9xX_cFzoDT+T6pk5v!Z}CM`R09TR3IX;q9pYZ*nw#7%W17n&%(m_QS+h6tvpA$X3a zhTv_Lmn8t3UyX=K(MDJzNuJ~(vRDJLqGJ_}Yxtwn*caB=X~ZH*&<+W~TA)yf=y7+$Dj(v=d8Z=ryv3#d({xrDF9)aqY@d zzSMrRGI_5wuqn{qSXNw|y73I#0I_*Z{c;OZ@o1aaimknC9QfH=v}9Wt{ZhA&(RXQ0 zahlCtf=#=_OW|W1%m}KzXLK-7vD&z0Luxmdf@Jd<-7ExbaN}OHI8g0CBb>0cN_ix0 zB%|M9cF~b!6-y~?Wgs%XqzNb1Ib9X$uQp%VQKqeXBu!ynaWQHg;(!@jo^-qz;l^*B zQl4lhF$%WL6bd#3D+<)q6g;YLjDU4Hg(5z4qjArzc8S~M6l?TORr2(WrAH_+!4=#i zSuFPu#_f^R`$*L$PBe7c&RMK?bLxGc2^(nD_SXa-y`@%p>D{zoj&iX!#KOU?u +#include +#include +#include +#include + +#include "gf.h" +#include "gf_method.h" + +void usage(char *s) +{ + fprintf(stderr, "usage: gf_mult a b w [method] - does multiplication of a and b in GF(2^w)\n"); + fprintf(stderr, " If w has an h on the end, treat a, b and the product as hexadecimal (no 0x)\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " legal w are: 1-32, 64 and 128\n"); + fprintf(stderr, " 128 is hex only (i.e. '128' will be an error - do '128h')\n"); + fprintf(stderr, "\n"); + fprintf(stderr, " For method specification, type gf_methods\n"); + + if (s != NULL) fprintf(stderr, "%s", s); + exit(1); +} + +int read_128(char *s, uint64_t *v) +{ + int l, t; + char save; + + l = strlen(s); + if (l > 32) return 0; + + if (l > 16) { + if (sscanf(s + (l-16), "%llx", (long long unsigned int *) &(v[1])) == 0) return 0; + save = s[l-16]; + s[l-16] = '\0'; + t = sscanf(s, "%llx", (long long unsigned int *) &(v[0])); + s[l-16] = save; + return t; + } else { + v[0] = 0; + return sscanf(s, "%llx", (long long unsigned int *)&(v[1])); + } + return 1; +} + +void print_128(uint64_t *v) +{ + if (v[0] > 0) { + printf("%llx", (long long unsigned int) v[0]); + printf("%016llx", (long long unsigned int) v[1]); + } else { + printf("%llx", (long long unsigned int) v[1]); + } + printf("\n"); +} + + +int main(int argc, char **argv) +{ + int hex, al, bl, w; + uint32_t a, b, c, top; + uint64_t a64, b64, c64; + uint64_t a128[2], b128[2], c128[2]; + char *format; + gf_t gf; + + if (argc < 4) usage(NULL); + if (sscanf(argv[3], "%d", &w) == 0) usage("Bad w\n"); + + if (w <= 0 || (w > 32 && w != 64 && w != 128)) usage("Bad w"); + + hex = (strchr(argv[3], 'h') != NULL); + if (create_gf_from_argv(&gf, w, argc, argv, 4) == 0) usage("\nBad Method\n"); + + if (!hex && w == 128) usage(NULL); + + if (w <= 32) { + format = (hex) ? "%x" : "%u"; + if (sscanf(argv[1], format, &a) == 0) usage("Bad a\n"); + if (sscanf(argv[2], format, &b) == 0) usage("Bad b\n"); + + if (w < 32) { + top = (w == 31) ? 0x80000000 : (1 << w); + if (w != 32 && a >= top) usage("a is too large\n"); + if (w != 32 && b >= top) usage("b is too large\n"); + } + + c = gf.multiply.w32(&gf, a, b); + printf(format, c); + printf("\n"); + + } else if (w == 64) { + format = (hex) ? "%llx" : "%llu"; + if (sscanf(argv[1], format, &a64) == 0) usage("Bad a\n"); + if (sscanf(argv[2], format, &b64) == 0) usage("Bad b\n"); + c64 = gf.multiply.w64(&gf, a64, b64); + + printf(format, c64); + printf("\n"); + + } else if (w == 128) { + + if (read_128(argv[1], a128) == 0) usage("Bad a\n"); + if (read_128(argv[2], b128) == 0) usage("Bad b\n"); + gf.multiply.w128(&gf, a128, b128, c128); + + print_128(c128); + } + exit(0); +} diff --git a/gf_rand.c b/gf_rand.c new file mode 100644 index 0000000..ace9b9a --- /dev/null +++ b/gf_rand.c @@ -0,0 +1,72 @@ +#include +#include +#include +#include "gf_rand.h" + +/* Lifted the "Mother of All" random number generator from http://www.agner.org/random/ */ + +static uint32_t MOA_X[5]; + +uint32_t MOA_Random_32() { + uint64_t sum; + sum = (uint64_t)2111111111UL * (uint64_t)MOA_X[3] + + (uint64_t)1492 * (uint64_t)(MOA_X[2]) + + (uint64_t)1776 * (uint64_t)(MOA_X[1]) + + (uint64_t)5115 * (uint64_t)(MOA_X[0]) + + (uint64_t)MOA_X[4]; + MOA_X[3] = MOA_X[2]; MOA_X[2] = MOA_X[1]; MOA_X[1] = MOA_X[0]; + MOA_X[4] = (uint32_t)(sum >> 32); + MOA_X[0] = (uint32_t)sum; + return MOA_X[0]; +} + +uint64_t MOA_Random_64() { + uint64_t sum; + + sum = MOA_Random_32(); + sum <<= 32; + sum |= MOA_Random_32(); + return sum; +} + +void MOA_Random_128(uint64_t *x) { + x[0] = MOA_Random_64(); + x[1] = MOA_Random_64(); + return; +} + +uint32_t MOA_Random_W(int w, int zero_ok) +{ + uint32_t b; + + do { + b = MOA_Random_32(); + if (w == 31) b &= 0x7fffffff; + if (w < 31) b %= (1 << w); + } while (!zero_ok && b == 0); + return b; +} + +void MOA_Seed(uint32_t seed) { + int i; + uint32_t s = seed; + for (i = 0; i < 5; i++) { + s = s * 29943829 - 1; + MOA_X[i] = s; + } + for (i=0; i<19; i++) MOA_Random_32(); +} + + +void MOA_Fill_Random_Region (void *reg, int size) +{ + uint32_t *r32; + uint8_t *r8; + int i; + + r32 = (uint32_t *) reg; + r8 = (uint8_t *) reg; + for (i = 0; i < size/4; i++) r32[i] = MOA_Random_32(); + for (i *= 4; i < size; i++) r8[i] = MOA_Random_W(8, 1); +} + diff --git a/gf_rand.h b/gf_rand.h new file mode 100644 index 0000000..06bcea4 --- /dev/null +++ b/gf_rand.h @@ -0,0 +1,18 @@ +/* gf_rand.h + * External include file for random number generation. */ + +#pragma once +#include +#include +#include + +/* These are all pretty self-explanatory */ +uint32_t MOA_Random_32(); +uint64_t MOA_Random_64(); +void MOA_Random_128(uint64_t *x); +uint32_t MOA_Random_W(int w, int zero_ok); +void MOA_Fill_Random_Region (void *reg, int size); /* reg should be aligned to 4 bytes, but + size can be anything. */ +void MOA_Seed(uint32_t seed); + + diff --git a/gf_time b/gf_time new file mode 100755 index 0000000000000000000000000000000000000000..c417ac7fafdef9ddaed2c79a4e99c57fbb1b8839 GIT binary patch literal 131480 zcmeFa4SZC^)jz%)Hn4%fT@*Ab)I`w+`<4Y!B3g4{7w+mtv4CKG!HULrp=4L^1xVao z&GfoT`$%hBtv*&;w6?`oA)?}Lf_XuNyeJ@82rq?Q7bSp(@FM&Fo|${^-rXz+w*TMn zd48Yf^SRkObLQpDnKNf*&YU@S?V-<(_OV#3y)2fh9E-)$56|F*3Y3ZCiH zC(0AtKX(7B)06Q0k&IHbFmSWv;-O#qn?Bul+gzV` z7vc2so*#h508lnllt*D(DmewJ%)#l?D<8P`mYMfVuk`tEarVToW1f|lH&@G~hxIb- zT6rKD|C>I2Mx}4|o%h{tgwyexHeZzYZhw)0!l1qJM}1)aLwPgrt4uXGlYWJdi}E5` zCJLvu*IXFh(Oxuk`W?63GwZh5Je-bShgRMOEgyx`{(8{Qf8U%t@4I#So%dDD%u-$? zEbuGP8b;yc<9FK~(<^5Go# z5t;OhW#C6)b3deVinr<0Z>a>?jK9qC98u99<~Eu6MC%aMrc@zH^`SY{ zTUx3jQ>w=()n7CKt>E-Z77}1lmU6=ccsL8-83N4E0Oy#B`V#@J&;UbBfHw%>)BvwB|K^!5-92zXC(Fz0Phm@V|U|CI=c$5_zBXFh5AcjH+Fj2ZQV*>w?lN?qkk10 zN4o>PMn*NtC_$8ezft~KM*01+mp=&w7bxto5}w!L>;gtsq}!)R7q~n2Nf(sI_DR?E zw>^X?XvrMQ~unQ5Ays?`=o*WGV{w*_uPRNiwx#u`xQ0!SAU^qIutgg$=Mc2=1Nca@`y{CK6n#CAdN)m@E<$BY{gKI7>@##10$+ z?@$#l?P(8eoSq=AVg)OouMV4)9(c#DvH9j2&*B&J=#uicLL&go~&c3~bgCJ#_YgcPLw^O$tp!?dltx0O3&QQP6>fYEZCTOIu)t zee42zVZ`^sE{g~vA$>#WX5_Yb!*@Frb(}+5L69eYh-%*=zVn7hUB-W2qGX98ps@}d z5NkP{OtOt0)oqbh*b0pRmF2WQL>KNs-+9@tSg*URmUuDJWx(4t z`1U&ixA%lcGfsl;Nt+-+jKVK z6!jfyX6!qZZ(`4Uy|sM(2wu_?zT<8|m&6@_J9@(FH2C`j@B9iZ7?Idd@Ef0qe@b}p zG9*ygqG<;y)c zNnFy3A{X1K_td7_k(mnn3Anm^#_lTuZ-3T_)D|f?0L80ZTF~&^0hNW)7}`KE{8dO( zBP2v32s<}b=<|abChS!%9=vRymmQ6rfs&nFsE?39D@US6lI9`8jf!|4@g$dM9FS}- z_H)^+#S6G7*UdUap(u@nOzhqpxQGKL(>NvuMyC4eD@1)^QD3T_h>zEUIpBt{z+qaL znukL8F9gi9BrByS&*zvq`u~pSxGqeMrdj9-ge#=t)_u}Vizw$6>g%6%b`i|M35Blx zEjYcFG;gu%fQOO{iEhg6q`ofYAu03|a1rHMBFg2GnVRx64?A+t7xil4RQ!M}@k%&% z5tYqCO~{>Mfw?>sE2_Ul+&uzs+sp| zMSYfP{Okcc11f88wZ zQu*ZTWqZBskiv<}*>?PO>E#z{<@aFGp`u*QqN40+VL^_8{8vt{jK2xy5i{^|v>P42 zC3ewth{yA&r`6a0LF5FjEEslZO8X4Xg-vJx#N|xl$l0v&lgsS~tdkJyslqO~5 zKq+({k+OOvErYI?Z;)@4Z<41s*J~5Lpe0rK(G*m8nt_N$WrB>k7LX$-xksM8lxG_k z+n0R^Zi{v9haB%Xz_ERXBDPS(b0XqL6tRXP{*3lGld(E_uM%yWOu*L=VXT8f-6GWh zYAy?roQ?leBpynMgOvLv5s^a?l@w7)5ot268H_jhxmm!p5Q`vve1n{?Arr_Myak~1 z0g8>l8{&UKh24R7v4k$h8|`!SKUzOKBeYmet*2|L+oTuRlq0OXTXi3a#?B0MSSd{z zUvGQBKI+tBuR5W`t4=RdClrPvzFW&nH~4N`=ut-~tW$}`N@QtSzRce{97=R69v(MH zR-CIubfFU6rbpWlT}NSQSr>BJeyZfPc$^kbpw8|IMC|3I_5K%|>#2@|7^_%VYx@9? z+E-yssj}&v>PsuQMPaKDh588eDWNv#8pYW{VQE=21w9CQSbBYlCo1@UFx2L=H`j}5 zl^N@tUCr@37P8Gu zYyf`u0{x6ezh8T^-oxYZR`GqkKLdvxzTtsGh5l8tI;D^n$b(Q7tyyK&KSWkf!&*4$ zkXfJjFHo|qmKTParO=0X=V_#6r%^hdr~;`HGc)#IvO2F2Yk<(BMvkSa9CA&c#1Ca@ z>9?pXwlT0xG76K~N{WmoY!t;d@)}rObeqkUw+&H+p)UXTH?+L)pkXlTBfLTMmNWILW+oF9Ijqn3>fwnbL`FWRRlBo zN87({!2T6WiF;#S=Eq)k9_?jIJnSP6TkB?ZF7?I&uX=y6td7qQPwW$lfO;-T*(OJi z^m2!G@bT)BTpbF#*vn?x6?V15#U>QE*wsT+w}VY6bg}8fO*ybvVe7oCTfCO8lR}i(Gvh;m71mDC`xW9KTA8_k6^mf!B_B9KaT4O3 z%}9fOyIxTz*cB@{c|tMhg8k{$B`&sZ_&TCKtY3I-@Z`*E;j)lBftOvTx=f(T`ht_B@x3i&&v*aY0@9H z%7|`7v=FmVyrH(Ezw24sMvTwcn}E|sG(2iQf}+s>GI9q*?nsv0&rt4%jNJF4?)V*i zVjcSPEXnVsXH7d!dT-r4MRGB>-wuFxQhSr+UrXPKaQC3eEwJ)~%`pYA)0T!pLq>)e8IK`D2XmYPlWD~?U&g)nXPp( z_i8UIkC6Cvzpc6pumZqjRfuVQ34~j*OD(T;seqKPcCkY))`gYnR&+3JY(X$6LO+v& z*i1mmd@47(>69D0@5#f8U2KGlJ!n(itJzpP`*A*>6WLe?%3@<7?Q7ZCLc}dm*w|u^ z>Kaz81R{`rV@o`Zu3?2#48*<^{0J}eM+Od;`DRxiu0p_99ykmUFdwhb17(%N@rfXR z;IPg2K;W?5|M4-C<=u6hGH#H$LM3|zdoDT-aQ4>Y6nh)El5^QHv6{4oQecUkwd#6h zMt7`aDsg>XV_5+RZ;3yS{^09B%*BNY$j5tuUTFHX;FtDhmPQ-RCD=zc)97xFZN&VZ zzMfXtdN)?cQ^D*VoyyZn*p}mAElO$2{e=tnOEU-Tm!+B5w(p7JWdT&D4Gt`M9p2K& zoUO`?s1hF3$5YzkJB_zbTH0GKv3N*EG$7W$rx~cXHpd^twnkr{Lm!@PJuy*Hrx!!M z2cg`-NFG_kyx+sNZ(Cf^$t|vrm}paz3R;fJPck9C4GbVR1>=pGL6T@YOpPn!N~=vHiS`& zh3UNF*lZr_=8>my1hFnu=N8AV1Aumm>R1jFc+%Ptu*(E2;!^J_aASmp>NsvRGyy$Z zKrt?F0d$9dE+0n($rX?#F7?NNZ1PX`s0)f?dnQAKs8bQs=AZ3R=NHHR!DFsMOtb%5 z=--QD?<2;gjzdhuXQ%Dwazq@11KIl6Gk_*u0Kde3fw%ZWAc7&T#eCsq2V(bAs^`5d z8M}wxf|ULiO7EnJ0D&nK=#7Av?TINA_y^5Q(C?0+z+1%F+TStdS@mvG6Dw@qV&;EQ zW*=c|^0bT9yGrZkpG)hi@TKnoZYd8BlQ2#E6GF?QUOFsx4k`#m{G*+1s{cit!7Yw0 z$7U3*zsMPpOaCD~7jcCz?B&81G3!!qp(|Y3%U$~AywkOIyPaK$R@3~^=%z)frP1BQ zYgdO2L+y8>c52;AlX&Hqq7bXhryr%!y@V3V;qf_g>3dR`aw~y(ODw*ZIHLi@C9y`J zh>GI35&goW3bo>{<;6*%PrzT{ak-TL@q2iZDXYO7PlG@E&=HFZ?13%VSt5DRBa(m} z%PEQEK$0Snr29wW6o&M75aA^4&!X2UO<1}<;lS${_lr`v4_@SVp|pb5NZ>za%8SzD z7b7~JK7?1V=f9f~dx3uI5Lgnogvoj%d?Tq8dkhlBnj_S}9A@O35{sk}pd6 zi&o0xsZt8#{Y5F)X{F2(u!{sP{E$pzx_AS|1+czPD`mV$Su9t7V8M`xuN0LG(hB;v z01w9y1=IpA2#>l%%i52_q|iG6$W~tr!Z^jh8o<;D{))9aKMS-m(pJWRXB zMOqv`OFSsQOJ<*X&jTKO8}Rrn{6i)8(rkdPxmd%QrWkZjS^RZDu2Z!(ovy)4ku5x~ zHy0n@<3*&4k6eNDCszsC&{rf$6Cc|^J~aDHEy=D!yj_8AtMtSsde3N0BZoHynykqt zDfA{n@0I|`*wx5SD^N<$TRLyGnlE31ndc=T9wP-^wqsY(`6Q~x4q^mw5t+l6U5fc4_9tW`zL$ zBp`L%YYDK&D{wC}0oD>=odz(Yc$5I|XaKV@#sKh4>_rW5si~+z1W+}=B__Za0?gI` zSp6DU-9mutG{6-mz|RR#rUA-LfF=TbTLWBU0wf7AfCDs&etD&6t;dwgi7_6FX#lca z*6DK#0oG~&?57OC?F3k=0mhjC^9k^*1~9kw&jbi+fH9^V9}?hh4$x5ik(P>9>pCW% zQYyCwFr)Y`weY(dfY$MP4qoo58eo(Ou$_uJbOn(?xBuaRe#_0Q@g=4DPy?9Fhw^EP z2&}YU-yDAfV6neo-E1GP*sD&nH_OTGJ@kWyeGAM`utjIyf!hf|>3{D^Ezv3 z?{GF?`G-23E7`~G+gzb-{*B4^Mb#f8I<(T)iCp(XKX3>_C6YXSq#K*$d8=jCh=SNQ z7fZ;2BUWEOH~WI`wm(SxjGN4)&V$nFH%Vq~)Jm7CPyl>;4pYi+kC zrKMQWM%%k#YqrV?6_xu?vcXll#=nhq!wyI!)RtY;AK2|{_)N@~xrK>~wR+mOd;3ju zl+Rd&{ebhePerv2fJ7(pGptpp8wfpZf|z>wQD5Z7yeBTD(8Tnl$?-Yg=Nq$SMBw{f6iZQNGMEmcbV2 zhKO$z2$>j?!pE9)IoCob-4a?Oxw~8AzCm)Ro79Y^IR9>DXo=JKop_VA;*1KK^XqZg zQJG(^-d$3zy6ti!_!k_kP8y<744lU~G}$zNDiqm3rp;Gau@Gviyf&z*
onb{UX zfthS{mA3kKQeIiT5+vd!w0F2dO|Vr$scvhtd*qc4x3$eE+f~}K;AKtyeE=&CwgpVW z_6m%;T)hzkZlio)BU?Xw z3n|2!+dstsA{YaPP;53+ekc?JK=vv2vE05(b~d_;8eP`)VcXfE$bx1hGwBY2XM8I9 zKU@D`M_hvbZ{G>CNwtr()aFQ?{|Sjr`!z^s@bo zD2F$l$|p=O)~4F3zhnD3r@e!hpzEK&iO}BO--k~#&Gl7ltks8T!{GlsaJ2WFI98nD z!TIPTWrpn$oc|;{Ah#Ytn4YQHFTIb(LyII=Uey10=y(_0BgPXZDCOy}t87|x{X&BM ziePscFzK(6aF83hLC9eR`-BfX4iPUsaW`H>vN}vWkk}p8mar}KJnCq!uchixoD?F3 zeset((+fr#g>toBS=ePkrGexC>B+N=vVaa%X$z6UgrGyEC#`0vme&@V#uEs3Hq8g- z`I*j-z0fXs0t*}#{}1KBg8V}1(N~ZW8vefqY1v_4UWlN+P&pO5jM!b!a|?C5g16c{ zn7eq`1c#@Q7KYH6HM;HEuY-PJlCu(NJj@LP(EcA|!|Y+@g;3B0F7JK6lyTfC;|#lJ zf8BviD?Oj)KqrjfMfk-g#qv6em(>X{)f|i(Qe8Y21epsYLU7 z6P~;AlwtQ(j_1dCZpY)pQ;p{dJio=W7|$Dc>hP?^^9i0fo-RDSkpDS6ui|+VPXiv( z+57^(|(hiz|fS-m1(tWet9w<|bgq~pu+c8X?NOxR_I zc5S>Pd^I*mt#bQTNak(f2a|8ostL1c;`FdBPaZke!Jw#QInq|Ls7U2xe}`}2LQJlhUG)IENOVhldT6@t)6n zoK{Z6{))BE!>+N@1i<`Fw3a&zZ3VD!JKH>}b!>ypL1(nI!a|Tj9#yvUXnJX|<-j~C z;z`!I#<-jO9|Yz#S*1tGbSZpgQmjej_U$exrsU)*ZHnggyVwDgxeN1Bqx)@Yc;ezT ze#SY3H6o9Mj@(t$f};<2sNUxp7q(e=TwvbYR-a48ELYT`PI)`;5O5QOiJ;Vue(twD z>NWPVnhSGWp&Q@!|BaVs6y#q<1yLFPw`Hub#`rPe+NjyAGd=^I3}(I*TTw7GSbV7U zOM1Q00d}+px^u*N3O2;S&A{6enCGxa!Oz939j|xel@Ip}{>Q{y>{cs>xYbF89`$-K z%%ouon`GCz1IOpQKf;h@tQRc^NbD^L96Z~X+rBeAKFP*8WKJn8^5pg%a@aL2iT-2b z3Merx?D*@Xd~E@GG}IMz5B30(oT&CvV# z>y0?qmDtbzwj=01JwUi zQJX_kZ{^nSqg-y(-oKWHIxBa?PeCq$LU1AC0Gh{Bl{+|@;yF;g(0Sw&bfpwinDk@> zYAPWmu7#9#Fg^LXt z>k3VI+yA;IO|)YCZ=x73OjgL@`>jA&3c7$QHVtuFC>+1rujtwBR~$Y0ZYRFu-Cn^i zohbUfg5^)7j;CMh`6lZ5Ri4HPj$F)P@nV{-5txLTJbsoIII9qW0a{?33jwPZ=v9Tl z7sM#Y)6W47-YzYW2PThi)B@GG03QE93w+yRx5OhxGKVGpcP)@>DYV4@papta$}I6; zYk?w56;4dGz++5N@SFGouqnVY@);7m4{=_&JOxy3`e$dlsp2FUlMYAD$ zgIfl%MK>eF!Z+YQPVn4pwGy6JMaS2OKurEHnZfd~Re_O{`Ud5UReTJkGQUMcQlK{o z7~mH4SkMeX0JhPT%!{*8S-m=+ZDbqh4$eW+8$&a5M%#flwrDO=(4zZeVAL3T7$Ll1 zL5Ou#cp^f=@H%m(A@HjR4g+Gh^-Fi@?fLVs(o`c`+qZ|FCFFhEJTpF0dhzDKl9?)y zi#A{a2HL^~inlvf(~(T(J1g1!a7Y-d^fIp&w_5 z-zCLw?3IhK$cHkX)xCK!!bb8S0{RLC;<{*q0{#CPkwC;*lap+Q$xz^6(%;cAel&7- z3Ke6$u=nESqjzLVWJlfVO@%NK znCmK6uPrE72e{at16$l|gL`;vTzJqJFBOaG(SOkQDD5$nUaiGA8)WNY+1hP|Jy67D zjXls-w4ZG-vljFNS)? z{Ryp&UD)K8S+KB&K_2PTloHlMNp($$P*SDn#rc7!FDqeftV@aRv?+1iy{3f`A+P5q?OEe)vUh4Ev=R%bT z=Cu1R>#DQ+E(+(L;ykD+t!+?dcSY^~i>%FBOpb{0jmFx`+3l(hfuKj0R+Ahfv#v7f#`%Dpsr{km2f$|IeaV=#ZfpblBCVmtTljN?s_{(5bG`PE{*L11ye9w1@S_%pFU&Oe`$Jv6pK@%2 zj{ph-nQ z=qg!gB0bZZ=6Ww#y-xlBD=L`E!j`|y%QiR11M6vtm8t;+4=P|SLT*t} zsAzPw!!~Y&&4G&R3z`6HAopIvOp} z?O6OM;e3Z1ww1F%b5a)Vn2)1E!Ea+h{1JHUV@qro2-9g;?~XR^w6uPt{kGR$tvd_s_^0(jw`mMw-S)AgC9kbw)?g0}0N1Z#EHqwR|u2?t%Y_Jzmr=zlMcXdNjX!`If)=#pCPI{}JG=wWZq1#^`C#O2mnkYXfk$8`% zT7oHynpL@!QYkFzftFLTM$oBt&&V7gou@?Oz+)BlE8Pi9F9(*^K!g`{5h{ckD30Fi zq`<8ly)qE09M3}_R3!-2sT4JptG&`l0Xt1w9?=K)9gEg^M)p;lt)PQabckL+L?!w; z>hBb^^sv5WI?Q=4eh5lTUjNzXcF{w-NUzD818yW0Z(_{kz9VJ>H})j*MLLmN ziQhB|6Q^lC+{7tt<}~U}Z)ppNJO;yp6S>|P29NNH=V`-0>)q~b!{Bm#7_3r?aBs{Y zK5@3S*$btw6F4_`MqYwG;%Q+`YwCCKf1`(8lGg8`cn<34C^cgE3R>(rLj67^o#9$I zEleXbs<#~)9-`IPaN`NHVkkZn^>dVNGP>smj*_{1hzJ}d6CEOum`c%6h)M<2utv~R z>4T3;g&u?N5Am;{q31PzXb|C3M4hB~0!p~sWWvYXFO6I}HRxT_fP}{tO>-WJ->ze| zA8ccYgliCs`_Lyx3s&bDd5WM235X14#d*sqI$P+>`fU6mz52<<;Qa^&m?y^>J$My; z)T|U86ufHgBTYc<%ic#UejV!Oee@Rto$>!}bb62}O=_CS#HD{Az7qOWj?{UW=)~xG z4N)gYqZ1fCCnL)riGLp`=u3`sI68d2fkji|Jh2}77uOpfbfb;5-grq{Z=5VY(j-nZ z(+RQS3w$cP4s{yqrS$r~UQ~-k0k877qMCQ>3-QmO?-#wfoizXINr~qk-I=yZS*JL` z{4_OlMo*ubA)44+kPQeo3nwo9O%veTXwIzS;-nXt_^gB~?=P<$hCK zMP^Yw)W*^J>(v<5w>&ss!nePx*0fse=O z{7|59lnxb*t(d;}&4NOg7gE5vPbng?rLT8+Ekc_0SX#Xte*tBSTAt>$oQyo`AxZ1> z#RIyPeDQ_h_>4}*87f9Bv$U0}O4(NCXW1NGSZT|&lN`1K8Ql&iL3&Sv;YgSpJwEaQom zS+sk`mH^ga&O_y)E;6ja9!@FRYvNDhblNRo<^ixpadzX5I8uVgh`>50(RPA(jK&}d zz0fA4Y4kXI(XtVfX07ztb=a!vZNBU+ya!g!)l?4TOG(orZjQ`plw?nOX*)K42Q`xGJQXqk4av{IcELC7xMbbXX+xH-Hx4dA~xc+-t3nHhq8q z44yYN;I{>lZN2cX6OhAoh+T$ACsPz^3;ASc8aGJvkM0cokw{dA|qA_MpH8 zIq;F;fHMGxQaZJ4PEESKFZWOLFSPIc^N-U$GDn=0?B7mhh*aPW2K`q#TjR~UhV^Wl%dk5DvebX402{Dl&;A27E87hoMf(gM z1v@S{C9PBb4twoyj_-XMzRS=38hi_Y=ShwyBwx@s4cBGbscuhvPndtW=AQG<_j(JQ zQxd{F{)xWExB&mqfX*`Zv+@p!i=N|N=bfA$yt6Rdc~!c6JK^?)4f*!H^UvXZ@_)E} z8ROz9{e1EpwWsnf!C&9|aZ%5<^cW8(-d}tA_Ta6zxbPM7{~PmHWrc?CD~117d^5(u zyZS_-t#1h5)Orm5sIf+(LmD-=FYdM)&Zv18u<#w2MU}tX;m@maUJ?JS5#gm_x{&V1w!SEdU zYs8Z0Nc@;qJHqbXHOvMhui!mU-(5zhX7E0XSKQ?C|!`jwW=kRTo zM@}B*NUVqLu?#acVXB_{%TIWVaV>ziSRE@2*1rc0KN+mU$(L@hUWQ^d6J}bJaw{7+ z(j_`oKVajyhjM6v7tYQ3LntE1P) zkvZ^y&4J!4+5xnL%{Xux{K6(mG{q4oZx^wkgsuwbmh(u%Bz!19@oAtgcCE!8n7}kp zD*0@M$tAFsvNXsJYlD2Y73=B-IR~!;a~kBr9Bzpu{C_0tU|^j@f1FRK&1fjAxzt)# zGkYMOLTeY!_#M7Y^2k|^#797B?M99g*^Iy5A9N<0@h3sq#kf3B`^^}^cWxFY*d~}@ zQ=rKj{}t5wN;td_)xa?FX8bItUW9{Uwl?<4063D+gIi+1MbI0*aY;Gbz*~nS?1@&H z9aLtJmoppqfbuKWsIv>_tgdfaU3B*hZ@CL=WNY|ZtAdNIl+fDZs>z4P@&-6-QvydN ze+#aY;>JqS;s28oev4{cZUF}<)jP>zBX$l|tcIv`oS%DU)Ke*57&#)sW39u1IxjYS zX#wtj!JGj;7)KCe%xGnua$ieL@DeH#{*Rh)%Fp)6eJ5ILo*J#kuZYZr^WR%#pP>p`KGx3 zT3mlq+yE_ZfEE|P72_Gra^G1}Szt}DXN8-@;8wfPtS;ygHgJn45 z#=hv6#cSLl7h{cpn&H->XT{5K$jCO;#1 z1<{tTqYRKkOAs~>QEamlPX2COf)O_PY-{*x7}B_3OO3Pf18FGKJl@NS zHUacE69}iU9F!wKe>Z_n)j+)j=yellkOr~|&>JSuU=5TjKub-a(=-qnZv%~GCeY~` zsE+{Enm`2_h`bCU*PABL85)S(F9NjO1Ugd#^%bDEOrWzg5GY5v-Zp`TXrO!nddCDB zs)70o&_mnuK~Vh$b^K*=R(zByT;fuklqMxi7iI(c!THPBJwrS z-Wc+MP<-B>!C#4mO`7@DOg##aHNyYkgV6cOE)57U22riLk* zwA32y$gg0b`3QK`H>dh}_*#dOhIX7w_Rl`35Qo)t;MA}Q?XS}~)z z^0p$C6O)Q7Z!`DBVC#bjN8Y{)_S86iCc*3iT^Io-TGJtzV{}MPIs`L~4(XK+!F;1b zZ0QipIyxjb9fG+>hxAT|Us*J&v5qO6<29PM$KuOMVe6Vt(1yx9p;wtSGObnJ_il%G;-zAKrcHh|4V#i@V0!FoRD+q*uM7M0^T^#%Ou?RsI#za<&I! z7+GlyUlq35RUX0s6q;~!Rv&FNB2hWp4PQ;T&>P)l@5fc(fZK+#dK!ipg*?fpSns*m zW{kVMqX75CP|W)nzMJu0MID~k1h?S9YjBs08}VWQ0If7K!VW3mjS^5BnZuW84wIWg3z>=0l@$y2+evK5DY+q(4?0E!2l!(&3PFR z3_ya=l$Qa)03-;_co`53K!VVOmjS^5BnZuS84wIWg3xqlf&|uJ$p}aJORH$I%Zq~4 zkN=cNtr^Agg`x|?2=}Bt$(3^8D8>^x!^1GcKMyuV@6ZckH(->NhyR;0Ck7ilHP9P= zp8D6@>lU1)+{mws(H2k*5u?KOo=X)Bta1j1v?r#f$%6lAH_x`jhWs9}aF+JP3lRU@ zT~X}K=R{N#QP_~v2r+HQaj}bPQ28A(lqZT6BL|;s0VuZk6VKCJ&n;CUc2kzBIy9V)K^OOo@P<#*=SzyK&14md+>V5at@L)rF1)R$x0zJd2lVFtC*JB$iiDL<5&0g5Sk6j5 zL!*2A5kiAvCPwHUXx&rCIQkJB&gBFR`4Jq{@Wv5b3(=7Kbv9@iiYGAus5QUZ4mH<4 z1ZY(6!6`hAa{BTFRhGlhLo>G|F65`;|HkoLjSBd9{&nVgyIDTcRTi1S0UNCExpW+j z0m-G~V7!QnAF!y;Dka4^JGF4|v!bP+JwuC~jzFgsUzD*%SREXu$z3aRu+>V1YzT!f z6zd5&u){97I^m;|O+~y~hzONS6CNrfr2=Si#GmrA%4@HqVf(6>ez{ z4s&N-b}Q0?FR+%QXI9cu;gV)H;NPYQ{&h#tvFof9w}#@HD2_Nr#I2*a7K+2@qBv<2 z1bBC6B6lG^G74^)c?K$)`3a4JTqMJBC^{BpZmP%29!TEQ0T8iYBO<7zY=^(dNJF{D zX%MmCTIB0_8YE7Ek&ljqwWFaEuTR>v89ezu;h8!mqq_``(MMCLZx8+^L(&@-1WB6V zGv)@eANb?b8S&}onWp<6^wmv1Kc}(aiA~VZ)DU{VA@s=GIbHlf_%4L8_q2fx;e35i z0cf@jwEn_|m!b2pI)n)k=QZ#gJ~h)+IT7vujJoc2-k){UpI|tCOpmUZ`DukT8lULm z=f0J#D29kU^9zdIi>zR}J=#GW9ll5ByiHiF{B;HiwDT1@ghXz0`TMA(PDG-lW0bEQ z5^Lorey2~+wDNlJzcXsmCg^j1l@ZUc74!3`{~`B@X}x)%fmWf%y-~an$>55TZWL3F zEH{d!#5mpF;{VKj;u>&$foV~j&fie>Vx#5c_j@?Kao<*2HW#0wTI1~Midua+Y+YAW zyTRGe6*XPpM96f36CsDM5Bvso*qnGT6z{q8UMAjqI~#DxGdH@ek8@o`sOy0~tUKD# z$Jrn)Yv@{8=IcvAq~XCr5kz(#bci5I=0UqJU%x6UB5V{PZi|YD9G{&ojEaEOm+L%= zjM>kdwfoF)XJ@;Hb-}6KPpvI<4?1Y2ZAef11}!~O+J2f#yIb88UMh>}d{VzJjhmlY z{Rj5Ee~o&iCKF8TYU4+3t7q`o;Zk<9;+RLZeewd_?-J<^4PGfg}&? zlHmK4&JM%*InmE;xS(#`hvw&u2ugI|#P8H+>Qj#KJ&Q8tUkU%OL=F0agv{2<)w?=D zV>!CV#`aflr+0i{42~gaCfFq>2f@<`jP`a)s(U5wmNGYPG47+=iX4Vdw%|bRGm1(j z$8}eDmT%IN)bpcW7hNsnd0fu@PUuT)u95eymh=7r#{}6gOrlAq{?B$FG_bo&v}A5E z>pYL69<#B1^vyK(p%nZAZ3*ItdNqEfrT8u;KK~#mr{c?P4M2K_i`|AL8aAX_B$*~k zDPsvlpeL6*~<8v>oS70`fNia-ut2ndA5m&Cmci$rAUC~=2P z&dp`gQmBqT6#z=gR!U3vqeu}0i6rN(gnk09|BvxWFF_A!smMVsS&3^8xD#ImEDZmh z(o(oS=J@d1Ir&YEg5#METjHq<`D*$exb81GhOa0kb&ts@cggW*p#A8x zBFU)$A~_;3<~^1_0u3KF3(GQ-O9Blec3*n5h7-FdJzB$x-IX4#;l)V(Vk%q1tfm6d zgqh(dA)`MudiL~paC)%q0r(HjbaxOZ_rao9viT2&|He5JviKhu1FBWvxeyQCpQeZ6 z=%YqE@eIRrpy7u=Rtj##JK1;EBBc8lNVz8%1os3E;hx|W$aZHWxyHY-b}l9-;Z&d* zaJ%L_;2q7`0QN+4Uc=RQ|Enh8^N5AJ0Qxd?ZVe>8X*v}AFXzES{~X|;IX5^{cW!{s z>>*Y-IFN!<@UA&I2pqKet_vKrOQAAE%7Jd16uKx?fDNPuP9LQzZh>ufZQcW!FTZns#tU2 zGC>M16(M+NJlagX`e<)&i_Aw#2^s8IXcC1R}K9*cpi85|CT_a3c(v8`bFY9VRAT z*l|a{E;^%UPi0h=>7*g9Y+$iU(nkGSPM# zAkJkb6!4E3dEmzz-%=VvK7B)eq6m0_o9bK<9#lBC z=J6aV!)uLJgNX>)hxnf~ zjS)U(K+nhc7$H{qfQS;9XDdW(bzzF2Ce{hP2jPUI=g_SWHi7I&rt;URP*)N6gz7{B ztCm2pSLjt9JNW&*lx?bm&!pw>1?Ew20@W%?@ckht4sSolNJ^e6*93=A!+5utBk81w zduB67(jn1uV2&#rsqCai*H>RgnRGO?9DJUZl1_z|L)fN8(t*%&5-#R4=`3hDQOz7l z$3V+TQEiT-6QJef?#3KRhd;|=##H-X!8PwE(I$?QBJ?1LQh_fvlGDpdLFkt`;R%f-CNWLAy-wzc{L~rc784fX6}0zV zPL$Hk?*4>Ti`?)id~;eX@!z6y&7WVogCQwM!kDXMnIDd@HGo6vAai!#(e}{9&R#W-;(ZXXhiT#Dl^@| zP%8>dmyb{n;?pRP24N+V@@9mX*PjsU6WU*AHor!1KE0Tme=+wy!?0SLe}PsFF(8>M zVS4UHJ2CdH7P!7SCH2+Sf1r)zw1#KfpEZ3=MtYO}oI7KJ#i<$n=6F|F-(_UOf3c1~ zz3BLJS4tD86UPzBFC~vfgqA_BAQD?F96zW<;1icSPzv@TE=q6FCedqf;U$VN_x~~S zr?n9Ikww(U+qdhHjP{+D*1ky!Ilwacipt8Ly055a-(+f_7fzzkK;S6Q)7m*fTAE*g zedM45xQlA@4U(4Dp$Bz$QD`khkIUHvwknX5uswlOtbP7f>3k~66!@bkTY<9;o~KY0 zYBP#@iWddvS|~?%u0{NwL2oZyOyT?9SfXkCiJOF*5i{uV6n~oZ*IE4+ZAQQG;aZEA z>?bGq_^#%^XcKse6vp@f0*uhef6+wvFDl15^NsLdWS1T#rs19q_CXerGl3g%cyt@c zg4?tn)k*G)hNt#p+@HW$2vUA!wGa1S1b-9oUo;yRVzc`%!ri!f+#N)@kJ6Ndj~>H+ zk*;DU$3>^BkeV5KemVnww<;sP=Na<9e~>3S3);XDuqgCz34Wp^oB*>8mN@_;Oby z21+K^DCFk??xn~d#TGiPLGz0b!_MJ2r6C(wVQ-pxQItzPN@ z-b2T1vE~ehIzg**RrXH!W1MWrc!lf14x++)d4;$S07pna0)^oSsUOJaId?3$TX%#+ zg!Y`9!(BM^)7!y2c4hp#qD-G#ls8l_4}PN1W1=L_NHcbxb5G&--P2iQReT%P6QU5Z zu<@w)b`vh{1F2MR6B2ZB{|X?7i~H#%d=P)$DB+j9gn|45JJ3K0qD3txDrLu`IsRj# zl!?3)XF2wa9+miyxB!=xO<=KF*x`5$tt-ie2r2J$iBF47G#=s}vuNVmy1e-p<^519mE`@4 z8l7qVlG5i?@1`j%?MgoP&^Hc}htF`|!3(Wu@V)1f6TFiD>BH1IaV38R-cwid;iK;W z7cF!p|L=HX8)%_fRkMf`X!u+$^wnkJc7CC_s}CMW0=vSvA8#J8A;4|a2!h-D!oW;i z;_qFtkc$Jlzh90q;>YcMx5e39^$t})>-F>-{OBre5=p8^j8kTovxLz_fi0r*De1jt6Xz7hRC-7Uap8f8`|Llfy?w1p*ow+pV}^O~?LaNs+CxJob{8+WK$Xb0e_ zK1jO@|0krXNY+DJ!GnMEhW|i`(yd|%Ir<J-Al9HFs#Oh>A+Mrxn#CSp z0MOr4^HU~$p@09L!BAvxp)My=IbJE=dd2eCnY{R|>JP8=J^*d4p}KBV>=J0duc z;!z(zP_`aI%$XE}L}N$1ehR(jbwYztD0XyixJ!OM19M;_J@I|e?^qc9`|+tc9zLEK z>$s9R9nJU)AL{#^%>JCKFn^80rc{wf9RBqRnCW?Om{G1iJPd@SZpB)mDzjg0ThXt{ z2YA1p1>+_u_yd}%)0q(V!5g{#|^0dpj%~=P-1eY_Bx6uXHNdmE|nnBj2`xz1eth`nYQ&UUJpTX@Gykw9I zCh>w2`0B~uAnQ0a7ISL6s(l?7)QIqA6E*IjBGmE-n)CJTr^I^uWqwjG7p+sLyoxWe z>5bGH{i+^DqZgaN4e5=>S$$fYfdw!rM}zH2_i7~>!4X|V)5`0%9`xfREo9ai5^ zT^P-T<|K9nge5*9h4fRFD5@V{W&H;kJGki!sBvt^<>a*mhV6$3$1P|E)`s3*x7Eu0 zga#?;3oTQYKniQ=(e@?aGiq`Zl`WEB<%ROKBt~A;LZ>IG%;oBR1#Ba~YQWQ;#-c{z zRLXLe$Z85fUwbymPa5B*`wP%dTIl|Q8#11!;^W_JpPS|W!f6;He1aqqdK~={wLbRH zPiZ(X)7JUqbw)8xNNYh4E-cZdhRpp-G?P`!~c1n|X!n;0Ej@#6 zNR%vW?e^h=iK&LeaTA&43v=JJhQp?FPjs=>2ZP`}8(O>l_yF^QGsv7+DMdAplI=Jd ziza?0Bo*04|Epai1zx67xzC!D>mIY$5OmGE)HPc{<@n7gFq6Mi`q5cp>zHQT`*-Ng zIQK5(3N$**_F(eT$#e2%>C%dE zFz?MYLWf28W&1Z`UgO5<0Zhc2v3fUn3*bzFeRZt!ln zJ)?XXZqJa9E?{Iec!wo*mJw&vhnH_uA2sEh)CY9FQGLxI4wykEX81-mZ=(Aqgfa4K zLztWPUxZOh!|7Z25F!NSENzR9b0l#nw>dVfG6SOR)mLMkl9t0*?uql49`jd^`z41k z?eOvcK3>FW*a?h(7_9N}k6+EWU&wTSgd6$e-!r}F{1F|0-qX+*^NH<`RKu7lO_f&} z{0lf{nwi7_#HEQIw*8o?gvEaxWFr|3p}U~B@HGGjd!$UuL_C&JvC9Z}IOx zaSKEDaX#0C1SWHT`k0ep?@s~4;GY79#ecLcNQVBn70|~S)ojgp(9VOHUBumhJRY>^ z!M;3*1*d@a<3SKu1oL^YP!IO!!7@EKz!|061-NG*Ia@=`;+lbkG*NUAA%<%PNW+7L zB1k0=lo|vml}|yt2vY47w22_$M!_5rB+Myj6+vnjf>nenilSB{Sf&Mgi(sJ^%oRb0 z7PN_=-9q2K`U+*@+gFs^#v|TIMdVnt?_oLD65I+Kt+iyPnnk_|p>>?^zqPSWeD<%G z?dNdLv@9r7ZMwZHjkjapKR_;g01#V(w(s=FoRY7ie%ueheF=OP4PSF5G&AA&o*eA` zRoiPE4@HH$!h8!X3C2=lms#I%!uX^WBBUCZAKW5E$H$Ipdg8jl1=7BS^*@(w$F{7uMXp&H6Y(dPf23b(a<-s>WaZfUxvu~n|G>$A|1P_Jp$s>|aCbsm zLKH)}35}A>8oeyWU+8KFeP1MW3Pk;Id_v%3B<*Dl&($W?X5NZ@nVaL}WPK*<} zSF%lyrYj_0(kI*~6@z|`2(U7*1&zy2Sjx{+iwh-fiM57+u0o5@Bs8DnkH+!d(@{qM zvW=)9@vM10rqM6k`bS!ZZ@KP7=VxC4rR1L%{-ev`Ymv3WGO(-=;!S1~&jD{(BQMiT z;0(E@e}GS>G1UW)l&X37uQ0k#i1 zRMuG$jpd^WNbD*rgxNQXRz%~7X|9NVik~*J^_*B(A&=zF&73VJ_~(Gbv&pi_v$DZ) zfs9~tO)%7xKA+%zi1gU+(b-w{fN=aN#BT(z!|?nH5AHxXq^BZu$0Aq+ z7`fql3f}l|Ga^W5PB0$JrCS(%VUTt?qHX?C(z736djnfb0tZWc7gXt_xv{p&GJy6;zB+tM@=YY{i&-^qFTaf9%8UB{ka!t5`k$*M?K?;Esl=}; z7RvZUrmA1;@K@tf$C8QogeJazhx&0}18|8pAIZQPI||LmWe(g2A^V{?{e!)6@GD4k zKsn$t#3ddHM*N@0>5fR%s?O>|ov2U>ol9A6!@uWBK6RK6iP01s`Ff;$P9(uT@KVjlQ2ovTSpLsWtAp9C477?j=~b&)uI z<7G}mkxucM#5V?<2g zj|gb3C1Vn!wfwY&_zm8th+0O=)J5Fr#Ax3Fo(qkl96H80e-RPsjp8}A3yHi@l=)bV zBA95@v06j-qvAVpoSZ4IQ~VX!eFtYQ9C?yCD!v~{%mv26DFaLkw#0^_aoCIBk;YY3 z-~*gp@E@72#RsD0=;tu7n+6k>>e!PbNm&U<8g#l;N|l>oo=E#UA-~70y0z#{!rBjX9pugL7WVXo9IyDL~7%)PDZoR6jdz{Tb^$c`|*09>+c1FbsyS z7gT(nKFU!DGB@JZ0RC|ND&wOTIQd}v6xI;?C;G)RV5LhC+x~aSMtxbV1d`bLL@72o=TX=u=$C4>8|lne(QSj zumUf`rSWyR{4MhnJxbAv6flCI()3iR{P+&NVT@rEV&qlBRGXF{ql|L(o?&omyl(|9 z^tDUh`uQHJ&64ROodjE_P@&mY|EB)t^I?|rJZ|}d9m@M;@q$Z-LM2Lu^j~+N_X?^d zRp=x56$lup)uoUX5Pd&0%$bGr33HD@`eTYxj?w0NLv^UcHmTq$%V`izkN7)X|L(1og;{SzYe!ATidQj)K&-ikp)=ha7N*j#@)C||qKvZe(Oi6DtA~j)AYWsmOiqB!%q@$mG=tgm zwfX*EQJ5IvcsA8kQ4c$MA_v_W_s+vm4Gal85CIskU{Wi< zTf&yru(*PMo-5s`Sntiu&w}mfVN|U5>DL<-BPEw>UvE^Ly-ZjM)5O}-s5E=QCL82* zGw5TMPFt{(D4lwZlAlQFRqs&8oDlgUT< zWj}JQ1Y6@V(lWZu93G30aZI!t`}uK5H%=Y&XxKIo7RM^QD~UtRim^`H1Q_*~m$vvf zQGMf3U)tUxwISIP8$U4vXp|2M(aqL>x-$|0qnFb#($ZG1zXVskOZ|FK2#7}qoQUF zO8kO=O5Xo}X7=n?7DDNJfA76NPkz}mGtVzbEMa`@wo?|w*jbtt1^zr_lFVW6?zl^te z`naNFm4?q5od0{vMy~RfT?%!9R*TuZ`l>8|g(?~7_i^8!0&Tp8VDG?2@5;lFc3N{wXh)p5#8bnG5`cdVt-f2dxL%l_x*Zwvd&It9y))C;s8-2qCN*XjXi{1G3!L7 zQQjQF|6i56Y%ZD|IK(r;X{XVLK!t4N6QV+}hA@8c*HIx^>4Zp-40u8nA=vf=nggZ4 zZHK>}1jOgfhrgN~5FhP-Liv;5Pb7cziWA9S-A{n+0iI+ZZr*;nGY3mi&Vp{AhOu$w zW{z8G*D2wSLIdX=DL~9Fs8V1&w&7AiY4f#}`?`^?xde;dQhJfIT-pT|bGBWaY(;Jo zbGD?lfSj}Ov|!Kaq!RD_1@Sc)jYx*cr1)Mda+cF<=l1YG;56u*w%(O(y`SqVUP-rV zn3?A-<>_WCFxr5zaS#j~iy}(yQBex@!!Xk4Y=ddv)Aa4}RMgtZrMs7w2YG`j9r|NPUc}=eDyat#8 zMyoXzleu7f5gVpxz1+*1#*T0`YiN2>Vj6u(ak>SfD^_CtsvqP%pE3j zj`JwTBU}N(&C3;!bD!@Mh(?^3w5`~XPY5P4m)VIJ?jsPs^!uzltUYrNu78_1YaZ3)@*17M8S;3@nIFBe6XS68>t0FKc4Xy^{JNUnv zQy8u^V4;OpKBj8CVpsELx}Z9YQv8-mUe*zm_lDXWSj#wIy~GyA zzL^lcS(59?(q`w3cbNBjvMv$5A_=ZK%)g(X4nmy-)}joj;s0{h2WDvE3Kq=IOx0*k ztL7cLu&ld97+FDmVP=RB%)0h-4oZVM(uYb>)rnAm6L3&E6DL-uKtk?#Z z9l7k}_*sCyZ8HBWcp9!J|={o zun^s%^}@TU!W~UAy3MLJds<5Nt>(*TZU`SlftAHZ*$8K8|aWI z&HGX5kqylU@%40f@AQi1>4tSxw)L*~#{Lz$wH-$&GtCz-*R8Pd3tdV58u&f%9k;%b zN7UC}(P3=|-@|l5GDh$(oK9kYqzy-9ark75t&#i0*nbtBhIf9U;4jpHxQl$k5s4b@H}vuCJia7RJdhA*K&ZlDv!NGRMOI zzYC!!7b1EgN}@XA@b9G;Cb17sR0DrvVa@&1#J_EY7+FiJ3;?}Q7yB&iGls;>M2X<|o zJAm-_e}}1UyBlg?UGlDG!0MTdNd=;w%da}l7c2DoliE9oL$5F+md#MW1-{5}Dy z<-`s-v<^x8Cb_h4a3`jOqt{O!7h<23of#t8n5py%Y`)ztU+C3gF|0w#Th>F zRc}2%+kMad6WHaBgB!0RTz=N0)sEn9RM4nRXd1m3Dsgi*Zltpimw-ku5l#j^+ARXd zuxewbQpTd7G?j7|s?0|dG`7Y|rIV&o&IFX$4X~18(=ULJP10xs22!5%lE}|AkTOAu zd)M5pn|wy$1n4bsUrv!p66E<>wSR z#0u_)fp6c+0s4dgf@-+`SyF%kC=!eUi~2hijAd3@)AKSB%QGU840TnDk4Ve&D`@Y>xz)LyW)HKCYwWap zh#M1^QrEI`BW)j!5We>AEP8WIZ?xq9Y8i22k05eP^dJNPqF2U<8Uc6_EJq5v zUmfVY1YzC*G{yX*mV19%XgTYSd zeQeWY}nfU;BqRUWU=I2zQ&$!1OP1V3iChe{t#`%e*WOHvY&syUQ3m4jwKoe5v~z|vD)11}1LYq+JhqdUAMuwVFK z49?a_>yXN}v4c)A5mUNEod?_npVWV?*Oyp7v~N6B-|e3Gk>D7;Ye%F~n5nWUn0? z5YB;S{g9x;yQABSlEX$q7o0xKg$Fvkp#ewmjX$uK;`di0m#g^%u<%vlWIq&=ZHq;D z&Ko*B5PTgPM45R5b7Olj@4@wroDOv($8Ud<>A4NB9d}l_iQnKH=shd89LdIn$($Pvheu9mkgTYL9Pok4DtDfN4!OP(X6A`pDB!LBdnH$B*gc+rPT znYJh1r^1c)_i@AgwFG{3>)Rq%S#W3a5U&g4-YZ{z%g5N@6?-R^e!l#c9+xqt!8LhG zbHtUE1n5R!gUk6zs6AWQ=CdFnnCP?Y_DmkzBtNHE1H)YH(cIp9@Daw`-bk!p{U+!Z zDaTrN5%gpP<@yH}sBG(AYk%L4W0_mpKeVG!+{TnU^**;T?oKG(HqO0M?{XVs?+ioP z!+q`H?S;1cws;!128L4&N%&q=ER{s#yA`xZ+}DP{z}GxbOcMT2K;rxm-V=+L1C(GP zc7oi_Z9oj1&M2qNh`296mxEddc@Egw4$ayI=rusE0eX#KKXfmd`p$qqJ{iCFk;MMx zH88S{)MiAiXQNm$6g5lh7}Nle@OY-=Q6C>ryNk zuS-%ev#$H-3TDP%)!#2_3MYmXh_$tV_}_W5@Om z^veu>#1{9>5nbtzfu08E_TDbl4`Tg4G51}fcH>GBgnk+OkoC(9UB8~b6)H~_=V94Q z9UhYL6kS3E%E_o|a~q+e*$*%O>+#PczlyfJo^0$ zQz6Mcl(3LuF6a|M7Y>{*?bk|Mwh8h{8OO!0-(5E$c_d=Hi#jU63gI7MD3GJjMz`L+ z1H;VL9o?3$h&9{-3gq~K^rbG5bo;oYkW9=zt_5z>K5j0Qw+L-5_im)X;4K7r9fh`g zJ0KSzUj}SpC{#z_AqP;Q(w~ATL&>G&7BFLt?a^SRi08MS;QFm(U8F~)QUWzO zF2gles1RC$l?K=moPxa%!C+fJmI(Q=2ti}q4u>nW%;B(w#~=?Mb}Bj=up1{HmvNO` z%8dgSvRL|ES||qz15WtGQV#VUeFIh2>=V>+qru=K(Pjb!`A`n*Nb%$2*>IZ(yv?p#3n;MzMkKcJ6=D0r13+ygS}^zB;hQ zUGQS?Pn4~BVFY8sknPrKDRUEZ5;9E)GdC_*-{-mL^0BtYX-FP*@q(8eDLCV*E9gWO znH>gA_>@>7W1-l4Hn5)-H7BrtDF9^7$moCnIL2ymMs2(neVN;ukAWUv=r;5`TjP&V zdb*+Ba*0?bxvZCP0v@=mv2EETE_7f<+t!!`H00I_c4A*pGwe{(1RqW85Y)I8JUR_s z2cbX1hCYkofV;Xty2pqrh>npM>pzmA*ew)fg?S?f#b^-WC~LGt2HdRz&>2U~`j#Zd z9;H}A+F3pZV7C+YZ6QoVSBtiTTLG&Olo6I=Tx{}TH!R|sd>E}q{h$vzL`QoVB^H*> zsksoj1>+S>TO_hsjAi*9cWRKgd~h>JvIrNcWl$p625vkJrUp-vo9o`lRaQ~z4$(_t zJ|VPGR(9}%7{*F$Ey$I8he#7wRB{-!^03&cecz-1#tN@W8{7|zEU!Ju^!!j%h_5-l zll0C|9}IosKFl!WKf-XI1dA0t`W;qCTw2uKDAE&_Y64TSsTzDy@7&g(908KBi566F zaDK3O!A_T0JNl`3=IvaM5h78(vW;HLE^$oktr6h+(}m_0=;{%4!MtjtC{hA_hV@Q- z7SpF&x#zLbD$VBvCXuM!aiV}lddRh)CmA`ogEl=^Zep2SvXX2wD-Pe)H$%SA0C=I%+~mj?07R!jbbQ>_F?k{b0kTxrH#7xqMryh z7=1H_4KkMO0nzJ#DZ7s?><9Zq*Mp(W310&Xtm4lAkGi<$zD1m;3k{rP3%?>6ZftZ6 zhMQP^UI;uXlajZZdY-9fAH=qgxy>XR0ej;I^sZyQf<}K!4CxqVW8-P~N#=`=9sQmVH>0*HF(xy3 z6i}pU#Skk~?L=#k%-p~)r8vM0uNciI=5xOWFnSlFP)dX!Bq~zwecR-h6?a`kCEPJ9^z<2!Qw=@fy(A0)^< z`C-0dROk-qwj<3rCenuE#@oo|a`dYm7+hT}kt ztX@6;Xa$>v)S~MlQj+D5;KvO5)e6B$8T6}#!H`nF8n&*J^s54ev-uEw&%%9c;fNS>GD0 zk_}1U+J+HkIjGPP`@ISJ)(p(c22;N!7s&G0ulG^k8p;O+a+vS=VSF=D;hO#+o6*M| zKdE*N%7CeM4Jr;9s4O5s@7nnSE`jacuqM~jyAt~a8TUv22Kv_8hO<{l-D~&4{nzSS zTX#%-YtVmacg0k#VVnB@v92|4N*$lBwO}r*Vz9K z2R6NdPrkwm&EO&$Ho&pPHqdh(sNnk~yxj%AVlQW#FOp|B^6qhd ztmlOW+Iu@`;x4qY5KH;o;Qel01cu=}jJ5Z7EbQT7sQTE& zXxGg=36Ze>V>y5YR&)yb5RL%x=FP8hGCu`f94@0}jF(PHgNgQv>&Jun38`-)b{XeS zJWuv6XY+^fx#ZVy#Ps7OkE6Ii4MXibfb>tGm+yI`+nB*G1JpU;F8DW2!flNwA&nW4 zym5M$vmakQ25eV$@%}3ZsOg4NOz(m->%_ zS}aj#>(^XI)Eke2S|U;B>(?wKYSmFtYb9zxzh)^>Z_A9js)49@^i}Mk`u$D#MVwWu z@jgnlW`OzVtM29R{cG^cXb>Goi`D`#AAQxY_yQ&|*&Q5*R7kImM(I+5B*7Lg{ z)Ux2j?i+z}9}fBA;|v9f+pxKe5fCJ2VHZW{V}<<4_La3^rf`RQ^2k-~(Ja?M8^p&u zZrz46u~uKf=Yg{!tE|0$6dfP3lXv5GU?f-b4%bbto40OaAAKk*B~Tqvt>CBnM1_&& zd^TCWuiQs&O4nbvB_`oVK}36@o;|c|EH>o2fh;!N8ef46J7(CMx3ub=J*^&EHh za@;A9?D8$hd(v-vVw=|(Hu>Uad|>3UYA^~uF&A)nVO)$eV3duln15!QuiVTAO;|&z zQDx4NBLlr}4qDUgsWq)5AtqSLw^`GkmMHh#&9P_(#_-;|n^VRg^DzFg^|LxaXnY+P zSKu*1WPh;dEm-gqr}6zpaXwlbcPwvellocNx{3N(dD)aoMf)){p);ELS?>VeQT4N) zdA#~r^N}LzKDReXKkER3beNpsoPZyNg)?c5LYqBO)BzG)Lsz6ECpD8t|Qc!&c zrT9rT!R%oHRcfX^whmZQ;5$1DcVT_{$wE~m8i7qs^_=Zu3lz5?U97|UyN{x$m1Jhp z(>fcCAsd08pwslU#z;7dp4QXM6wo=4!l}???mPQ(Y+YDS^PN3C7n2R_3=a3UM+c=N zR-P?Fx$v`SB(zBpKD77@q=E19XZ87vX-GlC-3+9VLDefD$58e94@g>;v_xj&Gj3Ix z4BqfZ)11npWY(M-Q^fA}E8OaJ_7xuJu=s5_ zFurYn@Wmo(X4{lY-mKQVc{!+ndS!R{r_j&DYv7PI3T zIOI3^+I%jrv3If*?Z|;CH%VB@QHeT{Zu%U2tyE7r;l{BEEGZVX!kl+txxUBs4B|CF zt<-v3h?A2dW8^rWab<3iakIV1_&QI};rb%x1@72+7u1LUtX&Ch>^}?UZJ@#UMbrj? zSmT60$F6-JDgzS-7dGO8&-t7|h^5V_l=Ah97egynZtuN`x!2FjkTdo#{;K@+g#0iD zA-{)W)iC0)=<12a+SMS`g_{l#0*MfFIQbOAdJ0U`N^<+f_7e~KpxRvjlh8mWl#|n< zH{0^-kZ7VUzX=CN4~*qfYr1jcxUn?fr@nZ(@Re<>_#UBe!&fi;{SL7$2M)m5_vQ1^ zD44++A`G9yaBouWha30#!4mG__#)pmW3ttNLa=+#48DPO_eBCb_-Bzcu9MrEZbEV% z%l2(zf6mNI;;nc;>G0uII^@u#&<%x=689NDBmkHJRANLDZBYMvk;VuHJmA8WB z+fYdE-aYL=8v4gDR)jBnutWH2c)Q!?{@j1j!-U#GuL11CZmNF3Gw}$HQ@JK$nzhGu za1XXDChpcFHv-FL%t0p*b%udA7hC5zb=Sko?bbWw)+H^}U#-9D#y7|JOiayqoUxl} z0cz`x_i=9SJ)HazHNpUy01jAL{}~`rA5ICh{3T*NBiy!Pi?M;*g6KCRkkvQ|m9>qj z8I`VC{RN^Ni@y$rvqrh$hsyQv)M%7;wPIU6I@v63Gg30qtfk;6wO`h+-KJOs<9oNJJ%3G-K*m4`Q!_?^xwuTS( zd<)d_{mB7Ncc|adn@@FRjT;dGZHtw~oHpo0vOqyn9}dM;yKo1#wnsC3xN2IB`>VCx z$}a9miZ2)nlD3tBU-Jg%l(@!P^G(=Ro+9Yq){|^snKxl^iB;SGXP=imN2RR51i1~jYDl3+nZLdr&7DM#nwfd< zK`SXdrKU5J2jy@72GT(Oagdj)YD|34P2(qQ@uCUMDKyQ#!z7E?!TAY3ya~Bh3`dNm z9H`d8X3(9VN|tKDTg;%bJ;w>OT%&rrB6;7!=1pJdbMD>1geo@fG8IdMWo zpt6b+N}&5h6X-k91hBX|u_D+{Gy%tnCXj!k2^5}a0>vkq00;3CVkC~|DG4}V5q8|Jgxs>7g8?I(U`Hx=4>UpSr}~$|e1ag#psm4Hw}WFsA@#|G>riD7WanUy z&F(Ac4@~4p5xp3p@d50;o+>$cpddDna&vf|vR_OplET$ka%f*V$gH2#`D+j4w@-%+ zCuq=xDIr`vh~(iby4M)}J`PTMjS*hGsFQn#I0}3w&ilE@YG zMtwe(MwskzH|@R>tgDbiE+80fTIB}6b3h(H^Ox-?!os%wO>0H7yP!jCu$^z&>cLr$ zP`_=(1x|e2>9`$-&)S?gS%VKU7H`?B_#ksCcG~U{J8hwXj^1avHFiivTK>GR{jFip z4kU84YP*)Xe{gKQhl zg}@)FmHtSV^oJW1yj#JW6}(TuxeR7}?F$*dRl)7jA8u1{hk|!0IIjva|84~@RPa6p zFII33!w%CgQE~m2Swnr79(v6WD?hv5Fm)_P6!ah z^_c;RI4Jxe6q13gHwl2suc;bVYoG}}IHZq!B9#;oMh@XjoG?L(%eDf1gF0z-lyOW`XMkLO%r1q97<@NkQk#ZYGoZtSp;#_ zB5IW43i7O3dDw9tZBQO`t3+8n2+Dk zNa1JB$3MX4@+k4$4(Djb!j(QBZ$ZTPe4NRM`FJ#OK2DsTbKVy7apC^_EzZY-iSzOG zf&euik0#E?*8&2Q5&w2~Rl)W$@*f;T95zJfO^c%g!~D0s1gw<>sv zg10Gnt%7$cc!Pq+hSX*Sj}56U3a+X6tqN{e@!J&Kq2OH#p0D8D3SOw-eF|Qz;F=h^ z<5LQ|^qVUqhk`eV_lj`7f;UTlq)@?I6ug*kwE2cs<#@bJIUenz19EK`e{QDOsi_vT z^(!$!+EU0^oL^Jje9hZ{?s=O*e_{}JN1;m=LC_T|HB@%xLPO;)1XET2f^Imz0B9RR zZ+V%ZKQWjSU@;yr$$gC9%Oq*N07=5Noi3&+BEG_ikh4PleMw zXhMm?0b2~mq$XllkWMk*MSvqAK&&P5%>bm9w%ovgt%hT0;l)HiA#>>9XTMfsrGwUt|;?hA7*PRd`i0d;0 zV#I0WK#Yit!g#=+GA_jSY)Ph8a;P67!6BZ<5lze=TSdeVFEnz{@3K5M32}khc1qq{L zK>}ufw434tj>r3yL$2h}PD)URo0Z^If;!Tm1hW#<;aUY(g4$e*C_!zmMM{+5Dpc`n zr9Ygn;x{O`L&2LB+^*m)3a%-5tAh85=~qppO~JdRKis9@T?*bUnid0(g7-;(xK+V5 z6~9Hn?J9n=f;$wvLBaDCyjH;r6}&{jixs?B!An&Bg$iD)^3PZB1_gI0c(a1r6}&~k zH3e@~aCsXI5Z~{#(pUJ=ZAE0T8y4*F0LfYrpau{xd}2` zvTlORE)l$yD6@;r5Gi@$GP_VgOv#g<5FjLv!wiT?9=jPLC6CwFv=&xDqoUMqIHO zk}9(c6~rViKOsQYoEZ=!&TfXJ$!tx5q|8P|$sQrsPqOAjmoOza2O%>=5_kNCu*+L}YZoK>64u&@6GfQtd>d1?yh)$#LAiRUR+?rRp`zj@&#@#FXX zQ1dB1e%Hbw4``L-$3B85pR0(S7e3gt>1Fv5{O5S{lBhc0VL2eq;q2&!;)mxf;Y~L8 z$JCAJ2)>JrE_pz~Wvj*-)NQM6(;!%?@qq=0Ec$Ure!dvVs?iYQBu34oH?XGUfdyOW z>ESF^^%h3!NgYTfa_CpiF2LWNPY77O0s8DeZLV1+3TzcO-CQ@{O8mF(v<6R`() z$P5Ya%nbey^Nrc^@lyZ_PmBT<^MoisrJVo;n88b?JOLumpputQj1JiaPK*xOb54v7 z+2Kx%4%tsnj1JkoPmGQ}(OXZ9j&7O8U(ci*#r_6#wEYd}X#E?|(egK-qxo+@N5kKM zj@lEY12UASIbyQ&z7mlj%^eOR%6CBs@1P_WClkfGU5aw?l`EA?Wg;9rlf*f;N@OC! z9-*)>BE`9+B!x}Cq9lEM`5#;-Gz)bY|9h}cw*Li;;4lppJ1vTnqP9ElggPo7@|g+E z+}KgzEYv*wW1djI8k5^pAm+WivwIL{JyJml=ZRgk2kp{7b3qfQu>^;BZXRpg?C={E z`9(OPjuYuPzk`kFYvSk8rGgn8M86zUSKJKS9vG+6gPnlkP=Vfwlho^R_PiO&Oz_R2 zU|aBA#D~5nThn_;%fr(++szZ_?t-orPdwuQKtBuvc!$q)9sE4J-J|bz+dR1VcGxrV zIY9~3ML}PtJglA9;MRA0sdVl`9^ii0Q?SDpftpCoJ@F01bRFF7nfRLCheP1%Y`52m zGx583yo)#<%X84{_)S;ArrZ7ugb(#TYS+XC1|E&Khqo`igi5Z)@J@mYcFPX#(qEnU zh97ENsHqgUT%g3%y8{|Itt(}SbBhbtx!o2hOR(%(_KMyoS_hU4afE2{mG&qHPLS1m zkFqzFUf)Bi?4np@pA*-2dF=Y;LFhs1`}eN(tZ=C6!a1mvDrX?>A>m2_ZcF#b8aFNu zc_!`(Z(r_>)p(J?djPCyXt?!Ed|ijq()JxVR)tE&0;!)qS@(BLwnFzPj@&ytJeC(w z^<#0MzDw^E4F`{Y2vINaQHT7*)sKT)xcj+*(R!X2ZYnNVgsUG#xcU)Uh;U!Q zpKPJ?#PYdARm1o0W6AaV0{DdmL)XMTF8y`a!Pn7k@1xopkN&ai;A<}9bmYl;@fyo; z-NW`ogb~PDZs?xUcl&m9BaBJyc&p$+!j}J?@i-IU{Z!r$*-d2NLM<`eZy|!)@-A+5 zp!;_d`o7*}n+2hH=6)K?58a*zUDkt>xHXpkY! zjV`JYvAhb_xAZN0*$A`dz(psy437r!L`vpXbgCVP3!<++~IH>pb_%n-yFqN8{X(QSGP{siczX8EvxQ74T4 z@zdma_KmHsfw#@_N%&2;+guJcGW&3??{lC2saJnxU>4l?X8kTMd;5)Vd7-%5j~xdQ zJ9WRr{C*wUm#8n?s~4@O%D!5u!aLhk82)Wsbx2ego{*l@7oLsH51}vIr}Tw;p)j0A zx|POouXB@6A3M&cRE9&3bW(aMKT~D+sNTC`>v^Rw{Bk3&J25?E2fJ5qDTU#$rKFcY zU$VRk?GyUKy&Y|6hygDT$^Hqo%snBmL6zY@q7TgVHD6q3IJpVBvt?KGiu1HI}z_=MTRX+yRdgeFQ>n_=kwT7>99{+Hmh=Pp4nMy_1!$@Am6&`SnkHLUDMJzR#z> z3`N$xD^V9?5?D};`?30Deaw=3VAoMaIBuyq2b_C+e<0;}8hWvPlBlmFHVrtWs$rPV zxy$F==C$mhMKCDK^M$wLI^iyDwYo-bIbZ$jo=(RQj%I`qF`-*?vjXTGE1>^zf@nDs-oO-M^$l#F#KuW-;019v zGv^2K5D>KmA+)#5#8o0n7~qx4zf+}t|5jXJM0Z^ao@--vG2UbMy|t*U-`BoBy9jsT z=6WNzHVhpEZ0Wt;$Q;P~IXJVuO_+FhL%`_hS4Az}$k=B^I#hfXnZo|OHr97>>B{G{ zo`J*OonnvEY5f|$v%z}s7j;lPxOr*e{m9G9!F(Eg`XT25Xh2|z^v`=w zNsxcIG^EIT&o6-#3C9eh-2A}b9 zNaF)F{6%_u@2Mt(H!u`&iozRN4m0tdc;YXbi9bg*{e{6UpMFM>UY(oNNN5<^ZMqp< zEu}4^=ZTelf;>v$2b7xqphZfw;(WJp-B_4t(dRrROgeG)@V3u5&1>9>etTuEx8R4i z##%8AT?ng0a6pd35?IV+7%_)=_33sI#{$NNtVgTbt)!>|$#Y^2?R&zOt>*`ET{%2EJ~!F+W_$)&mw?na}cv z$7Ka_{LZS}T=d=LE(l|#FiP?x2=N)Pa5UL^J|d4GU7?ML7!TqNf_54Mv%3pk3ckcO zO~LcFyKjQ1^sz9WwMAfi*bQiXvJiCZ&*`0VIRWbv&I8T?edEA$wk8j{2!^7Y+VJ6I z;vT`GwkL|7qE=wtv$@DP-xpa9L2`R1Y!qO<<=N@c|LnH?7AjSv(DO@E6X5yYJY&uS ztn;^A1DoC1MLRtOUj%+k^CuYMdW@pYXSF-G=|vAfl&r;~5MgJdAFUCpuyE}dNL-zG zaV^;Ss&Dee!@O7xisQpm>>kT5_eGwkf)9Di<~-noP6O}cDP1DxcvivtFq-w|34+(!9k*IAog&Yhs4l~(co8XA0MJ5xxA6X7(P<5HBAAhKPl!m16$+b2WQs$(d_=nErp5W2kFx3Ae+p1pzQ{U#ExGfYdjFIYfLLYEHMJ6UpksEOH_VXVt z_C&5|iy`>K-N8A49O?b8DV~(Pjm#QIK*Xdfc&-;#ACN-uZJg&6DUjzd-+DH9DrR6J z@t&Bx=KO7zms*5O67OW!(qC~;q;?6hv;bNDf{z1s(ZMF-QaEVsL7+^W%MqUwvp89T zXQKs(@8{sH-wwf5)PVJXs7d2-u9Ezizw2QxK>YfHTvxjWzJ}&rb@n8560W7bcvKyyU@0*Izi|I7v(fw{C(={R|4pm-oSb-HKk^FSD zn_=xMHtu+DJtq^~BI%B}`@QD8PK-a$2daP0oeZgPJ2+fu;=fZ+4HZHpeukP?&QSBp z8Dd^Z%>?1LrPqY|Ei3$(_MIVY>}Eqj_NO@C;MK3->~c7iz6&kvR4k5*{2wvF&_7b& z`1z<2IB7?b`ojc&VJO|Y32N>GCpp`Mn@abvn#qU3FR_Ny`|!289TP#j5EU4VA|t^q zBfK3hNt_q}1$~T4bxCfc(BXyRJ|X4g_9Cm|T`YcDlT4@1Qv5qLyMAc)Ed<6O9m@sA_Rqhj{B_dbha+Qv>!bbw zeq;2F_|-?>Al!c8z8t?`T9{RK=%alM%9)Jct(ye@8l%o9ymBjZHge7qo-UdDHe>WR z=(oV334qES{!g!AtnD)QZN?>g;ddK3yYbRt#ONd3sC2~2b2wfe$XXHtg0cBz#_#f0 z9+2hg(=Yi2Q_^#O%9KV$@M{vknn5{>`P=3n-f8xOn0#b^2p@pyk1e5rGi{CkjwTNc zoMF4G0GtZiJcuYKGKwvX(HAoL%Iz#Wb~Zt;Eciwv$3ch%j*|pRnwz#6t-J8A^yK3 z+!*~ZqYO&W-Gq>!#R4Tx&_76joS;$!Ai=Bo(sO3wW#qWr`-50F!Bf0Uc7NNFT+XMjx4-iOe2CNajldB~Ioh>5r572LvGT zU-PBsti{X7`5AuooHh70$^0pUMz7>=|5;sv#*OGZpfOB+{?QuHm@pnx4vg;Uv<$qY zUved)#C>kl7nwPrk1C+IU7&XAIp@*W7W0jVk7sPq9AAf1H3ufl3G87ZfW_>YMT~ec zUzdJWOnuZ&_>ECp@T-s7On1%`bmu&VU$czAXVB;e@jI|3bmVKnRb*{xPc3q3HjHLr zyWHmOaO!XB6km~aVMx5&JO20|< z8Soo9cKqr&rwF$u+()iXp!!RA^wIC(*QLW826WQ4Sv>{YY+>%JUaFKwyJ7YHt_Mjh z)I?fvA|+w{WQ_V9A{(Pynd)Hl{+s~PdxyY?)4N#uO?s>0H*!kwtLNM-+{MCuC4S9X zE@IH=AKcN#vqk_r;(#Uf;&(?=X&}#u#;~@U)v^VL1JysP8Ib#VOh={W117 zM(sd=+Zf%67bhaHdI)|Cv(~Mh8u(X>J@guiB>Taf`k8WgIqi@A8 z6{rcRvW!8a%J{q0Hyj&sCm9!%5~1>d5a^?eg=eEIz!oFNOTPt<$pWC{*cMPGCESbX zl4AgJ8>2?y#fjQ;xXupJ)1@RQNuUDtAxoqRZ>IGe`t{CYl0IrHUit;k;MZ)9rx`SQ zEq}NBhhxLxT1=|qn@wuJDRjiK;&iz|mL~5*M+#S*tai^hA4}OtOT*{*pwi=PE)9H5W6SPG8mvvlwg;7b67*sr69YqN zUSfoJdb2RW%@q^e=ea5~3P(ce+VTrR{e>$q%fgn!g{}e2&9bqI%f;4bQhzNn#>ilN z1fYB}P7~NBGU**k`o)-eC1)VBDZ&SagbY;r#knT3DtF>|FY@#1hq#W4muEc7V@F{I zVDQQj7GFEVZzBQtFPBVj{Uyu8LKy{{*vZ=8}AA*udZzpjMu zC1apo$)_I_<*pqGQ?ge^rc4bTfsvn&kxA$X3;=%v3{94w@8`Ffmm{uhEVR6x?T4~} zUKE*fr>mjA2-d8D2AA8`I2;@em(SMN16R>Rtk^#DwSSPs_Qv;JRNtT*`l3AGZ(HMT zWbJW&5ZclZe90G?i|U^PvGlQb;%naa!`ZU;{|NIxAC&5E`#@&@hRhV%bEk~i2k+rW z3w(JI39FaTABXCM`9gep_l7n>_sL(DM5FV?6rd3M$1I~v@-(wVR1_ym^bx{5`e&Yr zUq+pX(wvCDQeuIGpD7Zg0zOp(+dhaw8yHr>@f_xvM00*AbGSXR&a$I4j$Tx7&=z_O zj*4cgqcbMQl-;8TY_jqb9}BTp*s%a^{ax|#9Oen_Fd(@$4u}}S%C2tDF zTO#>$LwDixlP%^TKu!X4!#|QdSyur)W2cIg#Gp;{Je3A=zF6$AtKSa z6qjt6KtEC!)R+Z$JIYH8Xf#{sWjI9oesoK2nXwCqHbd{;gm&|EiiYcs{u-E~07AbA z?3YnppGm&xxSL=|>e*ab-e1H>S|CaQ`V;C06s1&9cl1ZG2(ZqK`j1zQU6vK~(i?q0 zMuZKHr2b}-VpnBFt@lQI@OeZK{bnkDtf|19F;WD7(vpX$nLi?GLVr)GADC84te!Wc zt7AF+2NJ-1G_ihwEQm6z=dEZ6Sc0%y;fU9x6o9BlLCeh|DY+j-c`~Sd4(y@0gfc*% z{;K{w)cZoD{FWUg^d)4(CdFf*UB#&@Uk^B^~elJ8ma}ddHY=LN3y7B z@mjC`33g^t$1;DhBjARny&14^re|aGlwHD8@M-WJbTnX^-CyFv2r`hro{HQvxG{)| zW>^_`@>8S?_CtLPVlX25Gl1j$P1Z*jZw-#;GP!pEU-NYz5`xOy=1*#*4)Geyl!(d>8vUIAo;n zBpq`|$2ruQJ{PtHy`hV=;7GCY5X_-I#COG>LM(oGJFMq!j;H5r>-k0>w&rd}_il3?mvNr$_SMVX9(t%XJVsIy&rrnT~Li@1{#yJo{9*hzJNlp&9fZhT+T?5a03cj>8 zUd^Eb&1t)f+f{HC+V1)h#4)x;9f)MoR7f%ewScfDOh~*OfY=iu3FR;%tRa+xiK1F$ zt6{U#n1h3Afw3U9mJp|;7>ZG)?>ZTa#lRG@KvX^o!X@@JQ0a|a@*UX8>-iQ+;Wj*W zC`*MoMx-yYj$CpHfMB%)xg z7Q~q`@=360SzYi+60a#%Q*0K*fug|3$s)cA5>YM^ttJ^P`ZFQobMBbVjwh7b;8ViN z&Fr!0(KM<++bn1!S+SQ`WF27js)>m}4;qKuEknpeM`fVAt~5|tQ(f=)`$gq}g*EdXe_vZ#SGuVFrkfm- z9P?|+>m5j;vU&mTQsMEJFDR`-a;0@h-3g4z=b!7iaEha}dcMPX{#1zsyxAhVMWxFe z^U48S<|wOKG_SH+WaX%-m^MOl$j3F!;i{@K^uqselTUA*mi)|9(j-$9M7gyG^PE4RL4)j$oF0ZRE2aJ!RcltDk zq+yz4da$CRyl&DAL=Ti!2U7Bj6<|(%xnq*kQBhOJ>Mfs;KhaTI?>l z`r7ic$_hpAxsJfH+H$t5go2Fm_@6DBNHnseqO`KAd_EExKVO?(I^VGbt}5wd^N$dL zWs`0wud7M$&a5s^@XcOQ zqs>BTra4fy5!#~Cs;ZhY?aCU*_5kYAmS?ECdzg#=r)yIFN*j!PW*OUG9&{l3HDMouxWXdlv% zG0USQ3n^4nSwFXuoiQ+XQE8yAaw+<@c*}Yekji;gCCoaLMC z(cClV5a+5nF1I$zcjaZocinKkHfxqgyK*LfXSn9f@LsRE{IiQ(#oDadZoZ0Kv#<7D zr+Mbg@cS^ome1@Ohm<$VSQgcl>d(bM;Haq<{hq-UHRx_jAe^T;L@SzdaFU~{ymWE7 zqkK_q04dZsf{=6b%PUGDI7DYcJ@J_&01$Ulz|SwOUpN?cLb@@kistxd`-=V7JFfOz z=9_t?!|l7q=k|mqAhf#pdh*}HIt3nYadR+}t4ak;P z`AYnGY54OT6BgCfp&lzB_$$e`frX`kYMJcZ+KH+33oe=FxHMQ*6%!F^z;Fag(Fe?d z3`|6E=&r0Ut1AaPR@E#>Y?m01O-sg*oxjvkRa&JQZz*799 zRI@P7f^_27lMN~@brZ~pPba~)NeEIIGTqGA;tnO;m2(g=KAiWyI3H5!L^F?;|(UH?I_ZNfe*r3*~3=V~il@WpVbG zy@M^l$pdAQ9J%XdQ(`u@2=k5d?=Sei;~XhJ&ZW#a$7GaA(;S?mB=%i)8AYcQGv~>e z=TY)Zb9k243K1w;J4TnnXWG*o{?c13t1uO;D=$S$N;aBMQ;nd~T5t-dnl%o=%0ie5 zW>O`L#!)#-UE?19j+MVlG=*;aFA^bd;4=C%2;%gy%^?c%Br5 zs-Tsewy~!zDo58tje#aG2G~2bZl%cK=wa4}l%+ByIT(Q+&J#gRBAyhs5|3?KzBEu? zJs+LI6D+H$oL^a5jj?n=O&!MCMS`C(#}SPWIa+rP2BOmHAm-nkHVXMrSq&#LL^cAQ z<2uAfDgJ@FjN>&%Mp-H6I_hgM-zRg;D-U3dXKmMWzOgJxOq077)nJ@11H;7zDoi++ zEGVzWfLj^BY()&Qu@1egJT6a(H+3%9OLo`PiH>#*_}tQ}0IO9SzZ8?!D)}=$-+3Y3 ze2VL8AWRm;W+P(SADc`$CSVrmP<;S}SXjPv;xrLUw9{+~uRv@77Tsx@IWf7mrfxnx zOWE7_hKRY2Gq&-VU4r?p;lKhP*a5zob(IS+VX8uniYe3*#5RGLBuerEGQ%Vw=;t8d zNDA}?Fe=pqs~{Uy9*S!0@qO@k%fT(JKa>IeJf4N8m}mRM(}cBs%+0qY<*}ta7A^U2 zKui8(8df~Hct+yUELqu!|Jf-IaS|Uaj*&zB-yiw*^^>t5GizYH5AWuM%pm$Ms9sn? z*Xwju)3u$h5M7;g{WD!p)Ag@(Jw{h6UH?hfGjz4k^&(yO)3t}L)pYgJbq8IC>Eij1 zg~LHrw3@Clbn$z}!gJ`lfi4_1(4yQxS$G*;m(n$ru8Zk9m#*{a;=!lr1i0`P4^n_V zLrpsg|BS$g?=1XpyN=ewHOr}5mStP6)q7Lv`&dN ztosx#tIw`kYq6o(^)1a}|F)*t&(ti%XTpEBmQ^?o)jeL*`o61KTF%k5{Cv$?JXy2U zPS&zoQQ@sqG;84nfL*9*-9JFQi!`m)qgk4#k^Z0#3F>ru_p@TivE^*GXhT+7OT0<^9J9XMZIyg{?JZa_XyBm5c7 zQn(SYEm~F=_Vr15fCE^{?GzaRp*kZ{p zMim$5ShAXPES5feZf^dXC95#cqScPEWVej9Xnki`vYXGgXl>_MEZyf=vhA?am+!-S zwk4;9TUa+(ERGv2StU1Ew2~VwR{KpBiyc0|9 zX)d)`YL{8E`j%O=?%OTH@<1vd%dJ;ymZf%!edI~GyeB*bVdR&d<=Cyl^REaKkCl#7 z?5nb@WPwq`#4BW7O&=b{Ba@bJdk+tqM?PusMk0K5b_l!Pe8{@1@!zE4q50*L7B2_k zp={!#NwhUX#3x<*@V{2!&sO8DmX^L9SfP_v4~yj7WxYE)4?aGr^~3yGKkNTVmXn7L z#gm$z3MRfE|7#RJa&$o_&&-!>B|5FwICmGup zDq$1qgZY3(A@F5B--J^5tz)cW0A*%pU8MmiAaOQQ{l|V z5B&8Ce?RgRPg*_{-SKjp4lkj|gWcxNhkl18^x_Ww-zv%Wr_0cIM86->JJ0>4S&V>%(9qu}R(cb6VV`flJaJdX4Oz`su6yM{o|uM+4P z3jR*u$LeP&_#ArI9|t{CfWJ%O$JQHZ?WW4b`kxQ{9)*AL5XyTm@cR`$rY-Wx&|c)D z&BW(H4m@Ksj>qZ!=L^K^eniIm#xdw(KIdYN(WUTzmx(TG^0*+%k9gPpM&@&KrhJq< zBL2O=Z&LVI90OmJANXq(eq%fb#C z{Bt2Eb}4$ebQezsc|?A@5%^>NOY$er3?2)gYo6TSN&GH_o{8Vs-=6~hT7`c}ru3ye z$ysfU09&r7K836G_cEWq%NQ?tJ{6ODYkAxdPY%T!pVabOVS-Hy4O^WPvEGGz7;ir0 zQp08$?>5xAc+%pf%jE|UuM=uq_`If~rRAr>S>G=Je<`gO3=#he;7{n3_^%8h{c|x@ z+@t2`f zhO$202Yj!>=OH`sq|ukozvHmUmH(#9XK_aUO^ z41eMf@HrmG@P9Z2e2&Ls6g@+kPjEb*pzygW7f&kxsbJO*$KzP~$Kr3|b3BgGKh*KK zU)BFm<|`bJdlddziq5087sulm{!r5Ac)UxcKNR{o9(NuG{Tz?iD}2fw@noof!Z{wt z((h2{Y2_Vc--zpw64lP!RuxZbK7+*LI;816$=@02vyMb^@qGif`xK00%_`odnevlV zi2WRn$E`gwAGZomD^~(O*CAaB|F_3TpLA~qe)0P<{VO2@#gkV4^zokK@xqT}yf0L= zG`d7Oavy`^@zjqc{vR^ot8ms6$Ky_gKa}|d$K$1+42>R^o8z$;w#51D##kesv~nlQ zd&c_|@%j$ScplWj;PH~zlZKNe8kJw1epWRWol1rdb|e^5BFci z^CH=*?_(7H4^%p7@**L9j_(r`{!qqWF}^GO z@k2_)u4F*4pyRd#87Z-sEu zlE0(ghIrR~L&nSa{m)8C8~={N@;!!l_HQ1G&h&Wu5N{#XisCWg7EfBeB+EzUHxA=r z+nFNTfFt%HG%a3weR6zX%lnFW@Ckd!`Q3|nT|C@?hqqP5la^mPoi89>|3n#Ya>jV+ z^1?9-b2Htc5?Bi=3*Zz$to6~}9Zza}GJrkCqih?oC;N#Dt2BR*;M zaS8&H_jz7Nyxa?9Ja5MQ(&ObJ562H=yuYyT@fkvXe#GnKEh0QKGV)h?evOFN{X-dV z-Eq+Q6yhy($#`#OjF&$D`IO~z%Xn=W`8hdWo~(t8=LDMlGQFYB6B-o$@y-*PRJ^^4 zp7eG}rknluDf|}}N&5PS7?1q@DgJY>l<`Jpil@SfKMsATN9D6@2>7#sKUJmg9s+(N z@UK(&o@3y%-25K7RN+6F317B%Jf$Ug&ql|Y)rhEq3f2^X+nJeQxA~G8+KXl@h@s#s~r7GSB8RMnP z|JM;OpBnG*tjyTn>GAR)t4m1L{pGuqX%sh_z zdF+&a)@-2FXtoP)LUe{p^T@TCrnWI zS7gj5y&gGFD6E(Hyh>X6q?IduoZ>v8hle=vM3^j}w0PPl<^$L5pOKIV#z}?-u3Lee2!7R zJc*TYJgxm#ncnFNFD*S4PP+a0FH!gxWWrBbhcI3v{uiov?_`XZ&fgqQ9lwzDSu)mB zay;_)r}*zz@z!R{j|s~Cv~h4Q{iVz&qkLECFnx}vO$tBb_@nSM)j#Wn=M1!8$$W>p zAHjID@!zT9WoPV<$>$vyZ#Di~{!Qlh;&H^=jCfQ3L&p1$jPa7&f%$PSv-Q_9-j#5R zCyih85F8)hS-%{cJ5{{z%UJ1ruEL2wKLP((^C8l8FYw1aBI#;_TRdrWv4h3ek))DO zrv0m?M`d~|B-&Bw^#f;@!fzh}{%ycn`Wu=4p-l9ubXXsc;NROS@n2Bknff*HU&nus z!mk=a`Xhle;de6q&@u3jRd2Pwm+82MK&Kx#%?f`=-?fetpY;|?-#r9+SZ}S0o}u8g z-ntZiNv8A@&Ou$wy5&U(Jf%axXWJDj{LKALj7Q?!_XEH$QTRjYXD=k+mt~?yg|nZ1 z0sIz~zSpV%Y5go+ZcKrUS-)PEdl=FXPg=a>`91lV`+m(&%6N-av`lc$N|RVzTbpD$b9;SI1b0i;hRP3f#Bi0PrduqTWgo$#5>y4w_4);CL-UzQtzLs zce8r0Qt!hG->UqlsP`E4{*HQ|tKJu?ccFS;uHLiN`)2iCsNQwzeVck8jrOl#U)8`@ zHSkppd{qNq)xcLZ@Kp_bRRdquz*jZ!RSkSq17Fp^|MnU?x5?xl1ufkGPLYp}ozEh^MmSW_{- zbQw{a7^pPx!P_jn<%`N{1$2$_)|ZQXTj+(}ENEe4+_l1MlF=%>!9YD>>*=YF)7U2b z^-$^~NbVHgKwa5Fky2O8y9gDg)$LN=`9b0Brk7L$CZ4Rh+9=d}U4YxaC(qTYg~m+n z|5$3Jh8gW43(@Q5)@#4CEYdbupt(l-vqkDInhQ@wUCpAo(D1TY`@I!9i0ZXRa*AfU z=4!vnz4AI&-2&~=lgq0YSJu^3Ynx8ivKB2w%|ks@pnUGa@>0=)mbu7kL3Jr;+Ilj! zkhE;A<=gn_=1=ok&>=$YS=#R{g39%lY_0DsKw7^8NNHuYw#y|^Yx$}oQ1ukLQ46qS-- zL$(_50~5CGwyY4y5)u#*x{)Aa!RfMHXrZO0TMOkQ-V&-pl`be+slX+gRtyi5_b;nS7;bm8454#TiNFhH1L$SGKip2l~dN1;U=cy~4p^bM|dKZQ@}3$ToxiN)|)Sd`~sF?u#}fpaaCLUNU+ zuEagk+?}|WnrFIoEwg7OxEE(GR#sYE6QrG1^!2YysPyB0*bKe3Y9oH!%uR83HTU;@ zy?;?qfpnXi5q>diTglp1xh6NX4rc`$k~wqT)@HY@xyg;*^dYPsUeDFJn9o|w=iyVr znOrQXRGattulcNPf%WzV#wnxYHOx)ihL%~_4rB!73k1~1-fF)hBkr4@(b z4ip)K1B(2`FQ0&OqiK>R?y**Cjk>p5PhGrZNusr;Tk76xbxpHpxjn6OyWGpI=ek#0 zp-SdTZB4H7BqLRJayh)qFQ4YJ^emTSyHP_6X2rxk(6-pEX%8}1ZVQI*z3tHON(L^6 z+LwF09hz2tl)H_SU9%rDueEW=yxoQY?hdqJ2b$s>r#Og~B~)B>w@zLGK{jZr-ZdGO zKF>-={j#)&dtxwgH@BOZt=rMAp;&Hb+)eFhF`Q8BAHEotuEAtmA8yCk-bshOG?icA zatuq?pdsttYAwv~Gg?-Dkr9So;zTuZ|GIGD$ zI(e1b2Je^GOk#b0x!*5K*Wo%i=g%qV@6<=ai(?)G8Cpo5G1qv8E|ONZXooJ8=cC2c zh@wBQ$f%ss|A#8t1JVRU-pCld? zpC=v@UoD<`qt&-j+$p|COszET-#?3c)*0^=m%n5DzPKX(Xsfla^Cq)@Rt&8*AMfjIHvY1>bBpoSV(7m4 zcsB%p@fLBz*~*KDDDyWi#r}L`<3Da z;&tM(_)hVN_yO^l_^;y52QA({aj&>>vbDG8H)fwMrrsUfd$JhXcb+d4QwNXsYsApR z^L(oq`gop)#nj59|C8eK?~M-x|3{6d9c1mHrXKz0h(~r9&lf{)&)e6Csl7-4&0^^A zdH$n#!QYJcimA~@|JH-8KIrv%K1K}fKF`OCq2uTIYvR$}#@C4}dyRh{{9iKOEuQ*{ zaoZHDZ^3KEGsV;hWP8pQLod+tGBLCRJ+Bi}N09!%3gw~nNZuY8MBW+v#rwp=;>JU? z{YX@!|8z0*2|XVtp4wnMKiHvNNdHo>BhP_+Td+e?l6;@o9csKw46Q?N9}`pekp71t zJ(BZV=V8Wk#3R#;FAzf`(fh9zQ!kO_e<_A`qUW7r>L}9wuDJIoZQ zx9IsCaVLZ~SbmL|T8!lT#pM%>E5ZMCmX;dN6^~tJe5x2alRo}A;)-~oc=!^tuL!)v_!=_ifRI;my*T8Td`BGeOE!QT z9S$P?xIJ5F$D85PjPDggGt}E35|4h`c$AF#Lf*>L!Tt%e|4TeF!!#f-Ta%U+4ev`)|a0qrmigGeMZcrRPvc(XrOw&L_F4Se3iJP z&-j+$f3a~n_^bT3;4k}5@zlipUkUuS)xTd{9x*=naBDyHUD>`P#L#^8e5{zdwX~lQ z_$=dj;@%6)|9mmDUcLY2V(Qtl{I%lJO~yBisiRB#&&1H%^}Jn7ynx7d@NC8>IbSd-9B>ys_wHU24=Ry_EekII3sw()Bo1%@fvj zEt&I#X&F0zIB4g}29p&9h3%|&_c#`v3QU7qiJ5cj7J(@)gGXs3MRnNvBJaZZbgvT~ zCz#VSXlYaF4?`T^&f`}PUUZt(7w-zE`E%z(WwLdAb0rw_<#0B2p-L_3yJ~gRn%Ek*KVz>O zHZEfwI_|bzye3o7(XmJJr?s_7xm^*e&@~P_!nSwom%KIGu8F$u0J(&Yupq}4c$q0Y5>(t4c&!Zyql8)uWO_|&-zMreRjn8 zwMCw}0c~F4x{>dC)o{`3=(4DsZEe2h!m$(@n=_76&QCt!kQWxNnVzuUcs00Yt81UM z65>@Y(Z*H{`6d?6Ro!*ws<=V9siHfYnX<5nO0F(!659$S|7lDNf1MWUUNZjSkOPuPrV-_+Ir5YQsw zmr30zCjHe?FvVnk>S!x!4-UP&1 z{;6+Qx{oFr)^&=Ezsry=RPR?F;uW1LNLSZA(UN);R*kH1|toLpC%>dCbW zmnC(wFJ-Npg6Zwm3{axa6XlzbC0b2s%dX8#XTcH8(5H!|iRIi=Tj&^mKFAaezi>0D zaM=Lai8YkMqokSlmUA0Se5PG?U15z(;fo7%os#de<++*}d-1yB{Jq$#ra5U^AU%0x z5}&@BlFw=Nq!e7f=QbeYH?&xo#?IxG(%)~2e@c1c(`mWRpS-G{R!o;GINs_GYZ|j- zmRvOLaJ|n!O+(h5Md(`7c;AY`*?>QNkoReE@mnREKk8RvKC5~FYCSlmg%Fr?Y9e4F zsyc91s`1EfTv6-MJ(GNjux}by*M3}cZM8m9zQUzU8c&wiB#m>j;WJg8&yQbWpH?aO z8D{l(0y8&u$tl$+E G{QVE&|E|{n literal 0 HcmV?d00001 diff --git a/gf_time.c b/gf_time.c new file mode 100644 index 0000000..a4a8a87 --- /dev/null +++ b/gf_time.c @@ -0,0 +1,195 @@ +/* + * gf_unit.c + * + * Performs unit testing for gf arithmetic + */ + +#include +#include +#include +#include +#include +#include + +#include "gf.h" +#include "gf_method.h" +#include "gf_rand.h" +#include "gf_general.h" + +#define REGION_SIZE (4096) + +void +timer_start (double *t) +{ + struct timeval tv; + + gettimeofday (&tv, NULL); + *t = (double)tv.tv_sec + (double)tv.tv_usec * 1e-6; +} + +double +timer_split (const double *t) +{ + struct timeval tv; + double cur_t; + + gettimeofday (&tv, NULL); + cur_t = (double)tv.tv_sec + (double)tv.tv_usec * 1e-6; + return (cur_t - *t); +} + +void problem(char *s) +{ + fprintf(stderr, "Timing test failed.\n"); + fprintf(stderr, "%s\n", s); + exit(1); +} + +void usage(char *s) +{ + fprintf(stderr, "usage: gf_time w tests seed size(bytes) iterations [method [params]] - does timing\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Legal w are: 1 - 32, 64 and 128\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Tests may be any combination of:\n"); + fprintf(stderr, " A: All\n"); + fprintf(stderr, " S: All Single Operations\n"); + fprintf(stderr, " R: All Region Operations\n"); + fprintf(stderr, " M: Single: Multiplications\n"); + fprintf(stderr, " D: Single: Divisions\n"); + fprintf(stderr, " I: Single: Inverses\n"); + fprintf(stderr, " B: Region: Buffer-Constant Multiplication\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Use -1 for time(0) as a seed.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "For method specification, type gf_methods\n"); + fprintf(stderr, "\n"); + if (s != NULL) fprintf(stderr, "%s\n", s); + exit(1); +} + +int main(int argc, char **argv) +{ + int w, it, i, size, iterations, xor; + char tests[100]; + char test; + char *single_tests = "MDI"; + char *region_tests = "G012"; + char *tstrings[256]; + void *tmethods[256]; + gf_t gf; + double timer, elapsed, ds, di, dnum; + int num; + time_t t0; + uint8_t *ra, *rb; + gf_general_t a; + + if (argc < 6) usage(NULL); + if (sscanf(argv[1], "%d", &w) == 0) usage("Bad w\n"); + if (sscanf(argv[3], "%ld", &t0) == 0) usage("Bad seed\n"); + if (sscanf(argv[4], "%d", &size) == 0) usage("Bad size\n"); + if (sscanf(argv[5], "%d", &iterations) == 0) usage("Bad iterations\n"); + if (t0 == -1) t0 = time(0); + MOA_Seed(t0); + + ds = size; + di = iterations; + + if ((w > 32 && w != 64 && w != 128) || w < 0) usage("Bad w"); + if ((size * 8) % w != 0) usage ("Bad size -- must be a multiple of w*8\n"); + + if (!create_gf_from_argv(&gf, w, argc, argv, 6)) usage("Bad Method"); + + strcpy(tests, ""); + for (i = 0; i < argv[2][i] != '\0'; i++) { + switch(argv[2][i]) { + case 'A': strcat(tests, single_tests); + strcat(tests, region_tests); + break; + case 'S': strcat(tests, single_tests); break; + case 'R': strcat(tests, region_tests); break; + case 'G': strcat(tests, "G"); break; + case '0': strcat(tests, "0"); break; + case '1': strcat(tests, "1"); break; + case '2': strcat(tests, "2"); break; + case 'M': strcat(tests, "M"); break; + case 'D': strcat(tests, "D"); break; + case 'I': strcat(tests, "I"); break; + default: usage("Bad tests"); + } + } + + tstrings['M'] = "Multiply"; + tstrings['D'] = "Divide"; + tstrings['I'] = "Inverse"; + tstrings['G'] = "Region-Random"; + tstrings['0'] = "Region-By-Zero"; + tstrings['1'] = "Region-By-One"; + tstrings['2'] = "Region-By-Two"; + + tmethods['M'] = (void *) gf.multiply.w32; + tmethods['D'] = (void *) gf.divide.w32; + tmethods['I'] = (void *) gf.inverse.w32; + tmethods['G'] = (void *) gf.multiply_region.w32; + tmethods['0'] = (void *) gf.multiply_region.w32; + tmethods['1'] = (void *) gf.multiply_region.w32; + tmethods['2'] = (void *) gf.multiply_region.w32; + + printf("Seed: %ld\n", t0); + + ra = (uint8_t *) malloc(size); + rb = (uint8_t *) malloc(size); + + if (ra == NULL || rb == NULL) { perror("malloc"); exit(1); } + + for (i = 0; i < 3; i++) { + test = single_tests[i]; + if (strchr(tests, test) != NULL) { + if (tmethods[test] == NULL) { + printf("No %s method.\n", tstrings[test]); + } else { + elapsed = 0; + dnum = 0; + for (it = 0; it < iterations; it++) { + gf_general_set_up_single_timing_test(w, ra, rb, size); + timer_start(&timer); + num = gf_general_do_single_timing_test(&gf, ra, rb, size, test); + dnum += num; + elapsed += timer_split(&timer); + } + printf("%14s: %10.6lf s Mops: %10.3lf %10.3lf Mega-ops/s\n", + tstrings[test], elapsed, + dnum/1024.0/1024.0, dnum/1024.0/1024.0/elapsed); + } + } + } + + for (i = 0; i < 4; i++) { + test = region_tests[i]; + if (strchr(tests, test) != NULL) { + if (tmethods[test] == NULL) { + printf("No %s method.\n", tstrings[test]); + } else { + elapsed = 0; + + if (test == '0') gf_general_set_zero(&a, w); + if (test == '1') gf_general_set_one(&a, w); + if (test == '2') gf_general_set_two(&a, w); + + for (xor = 0; xor < 2; xor++) { + elapsed = 0; + for (it = 0; it < iterations; it++) { + if (test == 'G') gf_general_set_random(&a, w, 1); + gf_general_set_up_single_timing_test(8, ra, rb, size); + timer_start(&timer); + gf_general_do_region_multiply(&gf, &a, ra, rb, size, xor); + elapsed += timer_split(&timer); + } + printf("%14s: XOR: %d %10.6lf s MB: %10.3lf %10.3lf MB/s\n", + tstrings[test], xor, elapsed, + ds*di/1024.0/1024.0, ds*di/1024.0/1024.0/elapsed); + } + } + } + } +} diff --git a/gf_unit b/gf_unit new file mode 100755 index 0000000000000000000000000000000000000000..dd9fa85e8e2e4393cc531d0f94ec66841a9a03b5 GIT binary patch literal 131360 zcmeFa4SZC^)jz(QY{&+pcR`e>P?DOqp{;Eed?DgXF6_cx-6#|gtuI*7XnljiuAml3 z++EG|x=8zzYHNM8R$FR)TIvf#6wD@=7vv=`3J4a$OJSEq37{c>vcK<{x%ckf&4OV2 z|Noxn^C=&0_RgG{Gc#w-oS8Xu=G^s1KRKRju~@S$mZ~g^#nKPY3w5Ps?3xp)*j zQ>TuX$GLy({?T<5oBGe18q63XJabUM)Txu)H%v-R!t>XqVbQ{N z@rid4PQ`bdgvJ0+K3(9Wuq~OK0#)YV)Ty%`y8rg+_f4JU^WX05UB4IRSvkI$S|&ZL zeOkod^3Ogz+FZ-wSEQj1->WG5D6#@+8h6<56u67Z~6nXk_}F$-;yT; zz8ADi6i#Wc85!QuUNm&--FMzM<4$T*I==B?fp49bkHRVcdeP7S!0ewsaL3f2K2SM5 z1HQrq0^dNbVH7@j{qDSbYURwI+<&JiHNAc#7YclJKNN5Zo9PGsy-h&GU*fw>Xu{N~ zE_sq{f`Gqx%N$x9!u+2WCVnHl#C!B_>eLxCr{9HoO?~huGiRL`Ux`R=u>hZ*SC2CP zPM!Jycuuh14ByON^()cpr{klrnKy{v&N3mKI`zJvoDAQ14c{|b9txZBQF{wg@ZIqf z-%pYe>GX?g_zJZo6gKxmGN*W(I`#HhAe-?o9iJnj@rjWlb=+#bV?dK~$?LiH8n?@% z%am5FU(21ro~cK$|2exq$hc&|5FeyM25TDtd=F{P@);psbHhk zl8g9}QL8122$GxQwM>7(FFkHNU3doJInlqGJyy&2G62s+Tp^x(JU{f$n)$=~etP>4 z?|A6GI}oOS=i{MzO!?kx>Z*Uf^@s0F+`I`_;_s%n=o+jkSyQwh zwy&Qp#J|bNE>D{1VP7cfkoilnI92;CccGyxkTFOsQs5s=sOgT7l|?R1sj22Droom`i}4YkR$r%<>Kmq1izrpGIaNO`6)kFYTu)M}Aq8h6r1C)Y z(VMIm^-aPYI9@n!XyACc@4V{cR9W8u@X>QX6J79SA|cW!;qXE#gN5z*k!xu`p|Fo! zAgj89qM|>-Qk`lP!<&GNRSMe{ zU4S@mS?BBxiW;irg@nrS^Db?@qZd;K7E+9eVnle@DghnD15F~(S0o*2Bt3F3I+8eA6JCBrA2ivX=#@mZ=q3852%Z% ztWdiY?28Q*Sc3Y~A_Zq4IZvcgB8?&XN(x99+}ySz3d2$2=z?@=*)5Pt;E^9TLUBQzRMIfOx=`{kT)V92~~~X z-xNs_(LqxMv)^mf5O@iF^om;u;y*{!+spPuzsF;w;5=}s#=2VC{vz#wWZJ~dl-pufn#~nf+NTlIChq_U?7?hI5tFDa07q&j`ZZW@v@w= zi`J&Ocj$wd3SN&P9s4MfSoufD*bT|qh70lDL!`0oyVB0c#{ zq*X)2W7LYb0Oy?Wdlch#CZwU|7CDeWP5j^(yrBly;VqRgKSrS|X(XXfN`WbQWJ)rE zMpZP52p8*#M2EV=7v;)<-Bx*6qo8AWbZ)4fbx6vl$g%9u7AXW|me;Ld50U`ga*ows z95^~udU6ut14nIA@EZPNmx81Di$e;Q;e`w3cS+e1eFaHn_MR!`8N|FrH5?X^*&6q- zeWC%7T`ty1Y7EyAqb+4NpaI;QG&9uRBH~0lDrkq|YidsZ?k>15FDt5E& z9=0hhL+&&*G0`MfC!*jLDKrJ?74?j}7ZaYz&B}=WQs^o4p;Eny=9in~o8?>NTji-O z4S{3*Ye^tV6`NvDBiBL`nSM5r6V4_5EpCdv39-`O8Z`PTwU|Z{RltC1QWq%~WGTxb zK`cD3tkrjiiyZ|`BYVFghi!ubo!P@$6Lo=}Y>k47H2OIrhvbzmR~B zAXabNp6Fa;i#?p4W{Q!9XC-7VLN);jbv*y4cY%eJ=0cw4%{uDq@D*z+c|%`=*oplG z!Ua(eBPx35=cs?59v*QcMNC43vFr%3!CkqA(Q~5UsNY8 ztz%r;#g?L~MuIPp#gu@i$(SDrGO~?&E3wC$Nw)E_gGA6-hzC0**@iPmB6<@~|8gtw zD4$&UoVS9ew&+zn&lR0umgt>S3HA}kMl6+$wu7Y6JYF6;r7Ky9k|$r?v8$5G~4k9B@5wVSY_G*!@50eqrxhdC~B^v zmX|2ZU8|1I=aaGIsgLwz(^7!MmAhFP;Ao+LwX9Aq#OgFO5Jh=Z zw?$U{gJty`g*7D{GRuwq3YcZJqA=7Vg+9PLPa`cmhtlyxl}Htzp0?kT)wzYCgcN$r z$gwn;L$1k3OjdUte(RL);l0eged(gpF;$Xh%7@&ieTK_#GJ0vEe(u5lW~~ z&$K$P6?F%GHm*GWdq~$;|4WF)VkBFMI%6Hjfu&lR~X8FJr8 zxgRldUxBh?cY}LTSle@=1?Hw*LCL*HF00q)#}^8DX4a3`K?9j}K}8`2FU31{vkPd! z&v}YAqlsoYtRy(8I(9o-yy5?*sX2b8HR0S?)TAULa#{1dvt;)3uqAZdAG0Pz)cj8{ zAJU|Z>Dbu2v#dl!3Dnz=W&XYj_9@oy#kPuYg`E>EJUii5C-k^jtJvxuTWs~;E|>L4 z3kFdoRr!mKp2b!vl*g+K#H-`dLN$vL6H=tMW6|y|>zTjTUDgCe33}2qtqDRt%MLWW#)9b7L;rpadFhi6&1VVvD&z558=VwNrbwW{V}vLKkaHteYb%(;7YD%Y$xb zowGgEJ@4CK-XN++&9eQPT7UxksJXJ!_LQ8cQxf%ZS(krjVx1D$Zwnkx_#4#_5fd8@ z>M)Zql35~lChz~8wr8pT?aO}vv^r}~o=W|XrMh6tV!Rmwyo$21GqBQQgo+&n zdyWAD!JePpDA&_+68cX75WGCwNZf=tQRLt8#%Ic;Jd_~^Hn7z$7ICrWKmuFL zUjyg7ivo#q-_&Zp`|({7NEG^Rl^J%F&8&?bg#2iUJ%6?_@9XpnBy7HG0tvhS12=1S zv5hUU%|FHFT6=$Bbb5Prh-;+PNLAZOXhnbM@PWf9uG~oT(z7D0(JPa@9uHk6J0!CW zGF$Ir?loRkQAgs}{f_D`zzP79RUxMJB@k{UF14c8r2xP&i$911UPg1NzmR2uH;;HLfjMAQ3@>i zGq$Zknbs5iF1Bb2t8XeV0O2jMC(s{!{fD`@PyzXPFVG83pBDVmzTDDeBY_KjP796h zmgr{8@2Tr)g>7_Wg**k!-r1!*ql9f)9@eUqwLVz5@Q^h9%tNv?-BKy-kKkoKG>452 zEO{N?vbx#Zm1z+rJTTXTjsH2kebUlCaw!zMkP(fD_3v*%)!SNPzUzt8#rj-fm_>W9 zC&nx4)Dp<|AQYb%(_>1wPV`n{GIa)+Wvqx8#Uth*0<$ytF-X+VRATgd-i-p>gv&S5tKNpd&%vvqgl{01Ezyf0(|BC8W-`rQ zpgxKXVbo$_I=3V`lgGMw8SQn~uN}@LcKuQ{QG>3_L(%KQQD+H|0rQTCO`nga& zuNw_bK!*w_#^vpR?()y!6GF?QUS1qM2L(W3Jkr^&`d`8s z+~VkRD2UMdOPqCb**~P`>s;ZBvR&9BW?bqmbcJVSyURYGJ4kD{+u0p&GtD1OZd#OD zn%vF2bhWq`rI(>}YTa+3OGDw81Bq4U(~r{RUP1}w@YpQ5>^&(=xs|}&B^KYyViHs5 zE{QgwiYO>{C($o_an}t3-PIhO6#5wa6&{mA`JcFtCz-qkyzvb9GnbBxs86v4lSJ~M zMR5El~loRBMC$}N&EBZb#gP7t_w)FG0Odt6wbwq{B9OiRjdD~ z$uCJyT#D#e9Gz#Phi6(NxcwPL9bAuMet?=2mn}p&Zs#W;`#cIhjv(zp71j_RY%o+6 z@M9OSHj57MDrhZcUHN?~6FEG{fkPk~p4p9v3ST@(LwzxCg0Bft{j{iW@+c|vjKF7$ z{}mly8(lvEl?Yc_J=0bZDNseovWrzNWVb~>0TgSYI^SSK9?&8;A(E@mXsnY-UsCL< zw_r}Yxfl~&GlS%%3D8yyp)0di7mHyM)%oqm&@8#bzG5qhss5l=j8555$WOFFW}y&p z$p|#v8nb|=fb{{4IB1H1O%fag<*(5Lv9|=4Z)#XB)KUVAJ+>Uh0E<(D^`~Y~_fa#b z^caaVB={4gG!@e|UQ|I8V>1`?BT-BpQc_1WYAI2SBQ{YK^HUAWD=AoV%vkaTmU$YM zsw9@e*ck%LFb&Hc0(Ob03l2?^O>V@vK&`LTuv{immdMp>Ef^B9RicnxkdD--^96Vq zhA5yGa6$OuzFOAAb)4}-D*%wI)>g%8MBcw^dAFJ1`cSj-(rKvOLgQ#C4Ylh)Sq!z8 zMDpNP(&&aKHxrnKS*ZYrH1N9sYs2gfdaY=Na$~`t>2>l;tbw)C;lwf8H7?TPc+3&` zT{2tM#~$FpcL0ygz#kOCXAyMG_q00G6oc+5kG&ztwG(5V=+RGummyntOdl>jF61au z#m6I{8`{)*t&k0UMUoWpaf3+m-&&GA&>5p$fgY>0U<3K`%u5ptjCds~+L6ATe#-aL^S)=?eEpCW^BgoDni3-deqbbDY=b1_Q z`8cVQX~=(!BqT@&BUN&}KM@NkJ9ZVNPoj8iv4;b*$Q-^RAH<6O1=$GjEfe5-1o)K( zAd6i+$7KX~NCT9Z022ss8wY5X4&$^`=b2JXqEwe@fPp5!^#mx=0HaNS8wp_703;yw zn$!?rzgN_~+yr=@02?%bxr*xuutEcv%{R6Z;3W-kxe3)S0;n3`G83R5Mox652Egjq zsMT-++^7MrHUWM@fN~8`VFElufbVF42`0dL0-VVK8bv?H@gVh-$CS#7F&>R-0J6Q- z=`)-F>ooxOQwHEt0xZ=4V@!Z+3Gkc-Ft>It0qAU!3Lj<4@e%><ruQ^??Q;iv%6- z#{fW|Xuqi?_9nn$f5Cd#L7u>?-e7N$6FYn92Mv1}g&o5dooz@=bdUyaH|7YhdOqgX zVy}8fvB)eo3PL|l>${}BC$O&oE4n(at)QI%E#bHpP95)1=j7Lt+-65QI%W1{sLS79 zPORlom|#0PLyf-uJQO%?m4dXYm)QqYn{Khx(pF~9ZXCJKIwLNN@sBm%>`T~3IQr%D zI&1CdbT(r7hccY2*hd{ZT%jHQ&58F#(H|i?w940oTn|D&a1=tNE-`3?8=K_0Yh>00 zM0BVemjho|ef`|*Grrqi8~+7JT42(Oq|$Gq%-X4yE>+2A&5@p=k?0Ut*k+Xjow@R` zI%fjw^?8%HM3fI7@lM<1nbr<-G4HU=VL2BwhSD54lFfHwgB{N93bxsEcyl~Yw9jkp zuqLFXSkXp0dSGj|+6onw`)H!kRkqH*gZ12`t1q=>7xf2r`&xa*=gV&s&0Va`)3MXr z?*>Q3wAI*8_-?1YB-VN84Ne)Qqfiw{5nr83FF~tv2gsFZ;mhcb zJ z*H{Kypd0FZ7lV-T!O8kq6E5d^=%iai>m+wiTg*354)u_l(G=(3#|$lT3cnL?vNoJi zL34f+4m)P$SE%=vR;X^f+yttFgVl+HHHv}rIEN;h=S_hk8{UnkugyYAShz ztrpfd3!%VFG`Y&!{JSZytX=~WaSR=uu28cSTnwykYm0ltH4eA6-N5ZCYn}gRP5n(P z4z>+U!VV5Q>V}4>LcPxi$GBlKj zH?xhywvj@trDHD!5WyH~2*qX#<%dEsfXY6_K9W23$j&BrQIpHMF>D(es+-?}WTsJq zdd8-p|16MURqPayP(q&Y~T@!DT^wIK;TtCK zIsPSltvJJko|6t5jruLKjs6$vNBG(apHaUP;bU!j523|pata#fO`zYW3bfoUtV^{$ znr-_9Q4Vi9l~0*otX;KLf6w+yPJ0K(pzEK&iO}9Yn9HY`mWHZz*6O3QVeo$%INoP= z3@gs?+4<-rWt#0#oc|;`A-5hyn4T%xFTIb%LyII=Uc~=8biDKL6XOXJl=4j2RenQD z!$N{RO|W|mnDn>0aL_{93kW%^U>Eqn;}G%Ef_w2IlGS74fyC~#wuWt?7kJWIDh}wR z5GnLw%9C)+A7LQnYP<5V%Ys4!i8G}qhZ?w09V*fm5=90QL!>9IW~i3e7P^5KB-q(B zADHK7IzRS8yW|PXcUb&a%7OX$h0<`eg!@j%_L48576KneDJlq(m!|Y)dg;3B0uIlrkly=-H z;|#lZf8C8uD?NL&pcBULBK%^LVtE7TWp!LWvY>=y7mo%(=AaTGJPtfV@O&4~rFdL; zCg8ah51G@JWA{~o=f`;N!sEkJjb{O#XYnk?^Cq5pJnQj%jEAgZyYXZr|MPfW#Y48c zjd)0B^DF#TsFN`fJe&`9k)9;uKaV=IR0)s8ny8PgUY##iC>Eh4*WQ3^0QMbtygeSm6y1$I~ zeBR@7Is%+=c^wMa{ zf`wb1CsFSjT2Y_ssVz}$DNK9`JHuBcU={7&9c)J+hk4p=+; zx!>`q6YS+R7iGCZx4h$jjbk$q`IiwQ!r^~M#tLhc9}}*Pn$5am)6mIa=F71a1v7)i zhgf0hfAnPnI}O!G<`v74`N6<~l4=@Kf<>$Lqa#<-`!0|cnk1%8z%ccbZiM{!OBSU>T9lOJ06Ksq_=9I!BPwvsg<%TrZ2IGJKuP`%K3qd9!Ny!pu*(Dfgh3*?~{~R3u{u)=RRM`(=l2qY^$(VT6iZybg zg^wTEEL7a=kS3G+dahl*12NQDG_QLRGP8h|xmf zN_P2p(eITke>`voQU=E9w&}@ysM9k!|^R>YFg$SIf1;)4#uxf$q zDg-_wMnRr_4ruW9Xn{O1d2F*5sKy2M*jg>{9gE!(t22^0EV0+MK#rx*68obT$hMSQ zV!zP>MV2a@m}-H?vG0q`*8*4uWW{D`fpNGgAG=cv^n;v<-Jk_3t_mB(I00E6*UA=hXyj^I)XBQDHsXNMf(l7){`yC&yi zGX>~|q(BSnxuytE#iyt;=B(IV2p`)3R&(^oZF8$az7fjjj;=a=}Xr97W z%%Itjy~!^)sl13X zkQC?x0tQx42n(9Q2;im(CG+BJR93Ic$9)xXTZQz-(9E3Mez=`2nnSs<=>7;bY6|@v zA-rHgh;>zXJVLm;gd?i>d4|BRBDkDb<+gtAF1ss#-nE))WP8WX&~sF}Z--~vhe|eY zj!O+u0dmnMOu(qNuz}+3j@EQ05_!*Yuz_vP%`_f$p2x`^;%5Ao!+_>I21B@u#r57fWAV3xGvhHK>vSkU7*fcla*+J$xz@( z!r$3Ab|i9l2^C|5u=nElfu1h>McqmC=j-@7I*T@CK(@1S>|hfzuFpx~H?nrYUUN1y zYqt(|sSgyo)OqqZg$i? zY_l7BlfxgmhkZ=Pid-QBg-Vs;Y(lP%9Yvdw(`{XYb?9KZtl{UIQA5|TkD!QYmDP)o z?6_OKwGbu(b6geb^#v8`nJ%{f@HRKw@=mI(=(&@M?FyNuTygyXZe<>%_H>E1C z7_LpYtX#dO<%ibBE^PA4Em+vYAWz_9y_8fFN`;atNv~x2<)y5hbt{qGHpRLD$@)+| zjrtj|>>0oj98UTf=zs)2p*#(4S55$jCC^Fme--?z%KxhTCb)yzw=6hAG$Bo1>hw_O zK$QsQwEM2;uDAOx3Fi-R9?_K6HYl^ZBX<8K))p-$OT_p_V(sPZan|EX9X2~HI57_A zTT*lRR$E&POjRbl*GX%!qJpU`Z28;0Y->v_u#uKn$r6BgU;%3t za*I%*pplVI+nC`t2QFq%QNwMQje(XbiTwSR1{y#7F1-ha!3-J#(I_dn5S8USSK=(a zajY3wbE%2HGtAVx$>wJSCMJW#|MAFrj>p-1ILF8;< zt!T7wfH?s!TzhkI>%G}sV>^#bvdK{TkpC4vNx~A6Z4+-X_Zp2H{Fc~mEq3|_q6w)ksN30!8lB5)1cUfP{ug6E1-t0-Hj_PO^}ZK~aBKyw zQ~8ad583-}w#v=cgK|OAqjK`n;L+FX*#AMPm%3i3?9rv@eXKd@T~SA4@)=*MRM*pJ ziR{GUM+xUU+_0^j37V6#XxBU(6$*YE4dM^SV;@~=`>rrOcb9egqdGjZnI1R|3o0z5 zdFcgM2*)=F`39B;kC5GA{c6ObAGH#f3V5=-nw0Y5zfQ_W-Frh|-)+!_E9@|IR_uMC z#ny#r6V+++E?&H0|3cc!IMQfy6gNl7Sn^qL0{zayFIk-8h#j}q?yvQ#lj{PXl%0+P2@nu7YLJEjDi%@IH10@OlF8+2b#2VuO!ag>Jj= zkG6K8`vR+O)0TXEz6L#UH7e?F=>-z~N1b~%ZKMq`T)A*K*x&%72BEMWc<#4ZEbAco z{Mg6dj__yr9S6YotjX#1Kf)=#pCPGxGW9%OF5oi#5uzIaf z`+==AB1F7JEs!afc#F{d2;E>S!t`K}$Xm1;&^!*USBf@zMhsV+>p(8W+U_l?rx#2$ zN@PFi+2bv0R-BCJvFz{Y6+$+YSyf!luBU{544<$wGN$X_l(FwrSp_X4E0z|{YrNNQ}MLKs3IKIB`6SP zpg4N3mjZY2>Xm~~6?h&2p=Nn|i0%@z~ zf^0;jBW;MitRp=D9uP=>#gYDh(Cw$%=d-J0XX@2zfIdc4>loL_`ZXe>?}*vJjlGF{ ziB9A;;x~=L#A#X&H**S`IgNVLThOS@j~hAM4cNwBQ8T9@wBj}HT66AzsbWcOX>GeEDPoHDm7vF3R>*{g8F@O zD#NvMT9`&=L~lDZJVdLn;KoyC#ZYWI%I8(O)##p^d6mrFLqyqKU4pFIq z8rC}WRO;a4QlZ!2`(x}&Xy|!~9~eZ~i>T8SPpA^^Hks;U?w2Mmof`G7X+*-4il*6* z#_rNC<3R6$>Ie3Ms)uQ#x0ihoP2hyKO&#@ZgVk=7e8YwL~E zWxxZhbNK3H1G56aPUpTEmTfb*bIL}E){@A6WFH0!mr`g80>;1;Dk!%I0G zdDKgi*6WK0bSug60i2(R-d`n;&PU;C`gF5>+#I1?)6@ZLNgV(uMM6~q(>Jb(!hOzkr;-yxbc4)8!_&||bDXyiPY(Pu3wP=QJFW44`urMbz~1jNGTy?kuh>zc+KQbm6`=&o zW!7gHPqfUU-7~fXunuz`g=axBtic{mDLP={PvUgiEnwyWuvKyP;Ep&_g2#x!Iw#R~ zf_RL^APK$D7NlwNIJ0Tlh)J_ndi+LgRrNMs@iyKAtLA7b2XbMiX%V+Z<%V7`Xd|F) zjP9xJ1tB`k`t!+Yl#Bhl=YJ-BSQ>p|Td?!g&)*f{zok@K*{4!^ptDlCZI^gv?6OEJ z$J!AYMZ_oKnemB588xLGK7GJ6*l|@#(=T57ZTRGoZtE7$jBYwC68alJ3-G*ODa|@y zt(3N`MF0lRTN?4(ipchC{OJPZSUqA_;g&cM?*9nThz8j59suhQ(WWOSM8{Sm7MAz> zk!(K@&d-963<&;_>c0V~4cM`F|ACsbIt(5~ z`wSihJI)`F(kZXQUi<6U_W`ZG_gv7sFH-7TfO?+hctY~|eN*bXOgq)>UEfpYAFjFQ z{Bz;bZ}K`PC4_nW6McF6>SF zf1$o<E|4Al>H8}&>8dPc&7&~w@^I!c{yVl z`907bw*CgOFxR3An^rr*?%q{wgOOM8KB(`mpi?s`_!GS1CSTBsUk~i^OB>yPw=}vJ zS{vQZTNRu+HM(EJ`!BN^-7jP~S&Gimz zdw0FVw^bf7sn`+U2-{;BW@^GzJ?GaS^A_V;0B^B6S{STf2n|0Oti#EdZm?bsw3-Ps zElRnS4Sd@YovI(Q@w$hyXn_~b*}jn@Ex>Dcqo62H&VAb9oC3#$a;c(D;K#pBK!yr8 zmD5U@e(5|7UOU@;-+q^Srl)u%&!Obau`78q$it#diR{4x`w}=n*%c_uhND89uAsO< z+jw#G`UElu94*6*|F@o=c z3{0>?Fu|5Uvo-cK)cT6y{QWkRww!v=*^P6@=u2mk?_({vExHInZ}^ra6>Jl49geWa zTV-}cnMPjDY~TaRuUI3_Zk)5azF~FI-7~!9EUb~O;p?pmF1AuaYpbg!zW~e%XKhO0 zxa4oebyD0|NjUs}QNnLiiOVhE0Hu02S!_fHQ^9J8u;cvPGp&KJcwyv-2#>WM2kIPc z`0@hW{en3Id@v3t#+cT|y5zo=n&4%G68?{xamvpQ%6-RMYn~dZ$7N}8S*EybEiT&> zXVcz!l?ZEpp!(QhCiAWkeNjf>AFAjyR;yE1)s=K3C@;myJ?rE)hL> zlj|PNv~in%Xbf|5Eab1y`-IQ96Gz+_&IPgGf()Activma zwtNF+fE-$auxYTBpxS8-JDZ}|RwtbN-LeEDY|>C`_&ONUxc{CUXJh-*P^ft#+ln>; z^qL8TQ&gm)+IxWlD?bqL0iPWEd+|y81mQl7>Z6*wM9NMsekB zM=B>K6<6L??u)^eiwH;F!Akbj7=0$e>;hdF0mfTXA(&%yNLDHYGmQ?(PK98;(IK`} z2xc7}l9LL-+@nMKq(U$Q>5$x12<9OjVo!x&Hqs$^sSwObI;3wZ1T&Kk>6Z$@{G>zj zQz4k8bjTU05X@CNQ%vexuOCWW*l-F3v-nd3gT=gEfgoW2>DKv(9mev(em*P@g z$c}J=bB&S+5pM)m5XS%#!5S#RIgT73238OoBKko@nj#-ldFMRoYacH>HDL_v}|T8#6_Gp>D~y2(GB=v;Ys-GVj;R{;$mSh zen^}BEzHk>6$HcXgRaiuvW>XhGQOmHv<)-(4T$urSC^_GN=0L|BK#`<3Tp-1k1>o$ z8pGFyZFZH1FaU)n9G#V`jYcG@VEf>!2^V@Jd+hzV3LJ1-F>9b;Bl1X0+K(+Dwb$Z@fYN>KS7G2jzu#mG_lIEpTQ1(v1nxwxe%4l*c3bkYlm`@6Wm zki0L)H-dCyF6U+KREdvI?dxFNz7%(LP4=byK(FBm$h+zH~PkBaY$?I~+ZW ze$~maD|Oci$Ghr`(!jA?=`p^fJJq8DoF_PNp3s$YTEI`EIw2Umquek~ZKlBnHQn@! zZv|VVq^yK1*(~}48}j&YoVT>@^^G*^7ojjbfo_`479!pCRv(=jY--)>$HEzkj`(krMyv4p_hcdnkOwj&W4OrnCWz`M5h&? z#|bp~5aiN)`V0r|?`v)OMSLt8;`h-A_|0x~}(&GM11vfy)2-1?qN$B`J2TsoeO z7jf|e7WEmWq&R1%7M}f_Xz3@vqs2~Vpv#Ic%2?}IJshUV-K(;&)k=nJ2!$^a>j^oq z%PzUP;G?n@n%p{RB^bKb5$qUyuOnEHEN48XZG~G{!ONeeTv`b>Pm`>bZfQRbbEjW1 z5NW{|Sj*8fBVnm@Nz)tgXG2QS62%L;U{)K@ z5Rs>UMzIHw6->8ZJBXvh_sHybs1~b!lSTsVe1#4nk=u;_AYtl4Bru%-KT4*zv))5vwnYZRn0@H#i%+ zBc=I}Mvk{j(b0Rx(of|4c-4Ermo=9h|vr$^s z*uARU*O!7w!-It)i0nM*5J6z(LAx(szbYysY!o4Gi;9RWpPep@ih$LZ<2;Uxna`WG z`^<1>XPdyf;nePD)>gU)9kkLmq_=&8mL4f>Kg*%rt?mggnZeKZpC-|O4IrFcA|5u_0eL+HI8|CUfU7)cX*>7Wq zs&~>mJ}?Hy5Hu6)krM;q=>$f5yCv1V3U^DH8@Ct_(rrZ!!zWvCp!NwxC6nX2D?H0L z>q#2;QLl@x7V23ZCko~d zz5N{w3bsE4|H0|*4r1g!SoBIJ|H1IzID1?M{{y2ywMsk};i3D}^iUjq)Mz)JVmyZ% zuS8{~;C8%|eP=B~x_^PBdxC**Pv8*l2?jv6JL?kb{F`g%U~&>p1zG^NYt92!XwC+( zCz}0tTz&VyY68B1Shx%50+;2~K+>D0L&5)Y9z68VMjbTg2IuL{4e*&g#0m!oQgAZf zH75swBR1cSfg^S)RE|hF&|{NAmn0F`Fnf)yol{6vfK<0I%z?VFTtukv5gI^vxsmT&W^0#iuM!VlTAvIB5G;75;F0 zp&76jQpjG&4qG8S+wH<$=qlI?ZAER-w2iP)5KS9$K9mdd`4Q}E-~e$hGp>Mt%*X>j z-uRZ%VDjl3@*}60oOy@e1cmV#qL*o%B{#Ye%?>!bxwoMB_ZPxyL9X%TSZey${E`-q zu)=Dz`m#q%ba3Mr=s*xD932K88Eo~hM+PX^v6n+`WobKvTni~mjF4M=}$H|uh45^N*6yIf^0{$(%`A^Q;jlcq7k#|-HCI3FX# zDr<=-fw{Iq#8ww332I`U(0dS0Ncv5>^})uG9my2_ItA(~;+{~wNMO|x2=)rS%45%d z{{Us1;@~rB1$=>d)LTKd%2GIYbK>y!Q;ejR~r#T-c|Mcgx+Ig$>EmZRpl zvXRVAYIJ?|W#FWvq2=K7l$3NTv>d`VC6W$=mXmNXivl*1T(>1e+V9R zc>N8zLf^*GxV#kn9NUO6(l=sQ26n*C^Xw-E(4cX_5Cl@__n^8ibU+0XI^ewu_A+ha zI4MF8gD92wVk0@doFxeTXHIykMgo(VCf(j3b3}gX47?>&i||U?`z|L+>1KB?W7Q%z zJ_g^M7EA0|6t4O6OLZ_L1xXllm25L*rr8zV?%{yP_akz}bG6wGkP;qTW?t|1@;8LF zJa(-GB$&t_P^sA;P&>Y6K^S{_VKnX|aD2GQgndh@tD)h+^9X0EgP}GcOqGvN58~4( zj|E{RlJsVTm^T)P^$G2-)0?D`9%>K|3Katrobx z8Ib(y>Oas%a$3W)9m<%#CM~^5f6kpT!Q#|h{gzmFSl?x2s{dlW{`8{PpSw~TN1Zr^ zNPanaETU=|+u?Z%s8E`L>M4#2&b5F? zcdkYJoUu2gaBc|b=4GuyUku!l?aCmel$b#Fn9@RXb4h%Wi^-kFM_`b_%E7?3$dB~7vXMP1MUt2?_)G&;iJd!U!<#;$#Kyj6;d-z z&rfH-?^dP7_dWw2OKWmGPl0R8@vKJy_==u|Jk(KqvEI6VXdQM4@V3(+1s+4JU=xqp z-=jW^tCll`yB&Bx!A${v8%K zgOsEX1|$@|YcMOF#V=H%VpNp3Y}4%jifzV3!E10Y=qcFYdQdW@WZ{SbB^zu&Ev0Lu znqwmP%o(BkjiLLcbcfg|7d~@Fq?h|b>?{<*%Uyu}6XkA2RZ_~O9^gH6!WL`JV5k$c zGFNBrgg?c|hKv`u5$qreyq_0{`v7o+^dnFhj*$9+e4YzNgS&M{NJMDQ1zFsMLqEM8 zykl3zzAte4+ydVa9UuHep~nO!&j@qvJQob$_uW%jWOZx@))N8=S=e|~e7gx3_fe^2 zZ&M}c;{MgB94_vsVptn{!NBlqj-fxlz>aDFgJ@B!iAtI4(GvTyfn_|$;;g`)(W4Uo z5f|XHvWW}${q{A3sVQC#mn*ZKPoahRlPVqiYX zN|Vp0P_)_gP`Y~)gT*rqtv_(t(-(y%$3Oa2&{LB?%^Bz^Tt21x13*Ez;GM}I03h&+ zWcCM;O6+i~hSrtjLWGoey2Pi&CK?ZMk6ASS9bMl1i}HR5VI_J0l167*za;fJ)%$1) zOSzKIJ@k!% zM*O(F@3uHws#Z_|v|dlW!H=%eCXv(|{JUsnpK^mADLG50-{7aa{;lMDo$mUl2wCG2 zKeS2Y6@YJaP64M9;6`#(*;7G`&OHp&8C^c*E4y4l3Zu(OSIPs_>{x;NKSuuhO#7u&c@o$M{~mD6@g{m4 z1A3#(*18zCScQc&9c++=^k?81E?kb`1)nd|%%WHG(9BZIYP7uzzk@wFbg%3feIxpP zx?6zHG|H@Ah9=U(XbVgFZWmm`=QU$j;J|nOaFt*zHttZh&lIS!vbpd*f|JHvFD$bD`jx&z+~*!T!2zr}w6e8@_{ z8$lCP)@QA;AJAZNXinF;W=4u$xs1HFKfvgMAMQ&`y$-6IRDfw;Eb9ssyWUlb? z6t!0_B!|4HQCs!)F6vL~5o}Q3IF|^I6URm}cE_$gm(+fvhX?ypJj&w-%GRTZIgetH zX!P(m2GDC>7c>}!Vn^qOyW|(rYL41S3w{9lov23t{`%CMtUjJ;8@Q4=wVJWteW34m z())9c!u&M~n_NX6aroCO+-$8EhZz;>&x=7w>Q<~3s?z)QgHH5o;vwFz=fk*33jUC$ z>Qp9#{cw7JR^ThMFr&Kj1)hf#pe}V0W@#+M+vj0FU5C$0!*ey8{6>@ecj!U-iZ%&( z3+|CH&aZ~Rt>V$#u+LqCZnv^le(sHfU^%jn?vSpc{#LTxFTlKf|@*D4$NV>6Bku`Wa&M@G5ok64n2d2r@VB#2(_hWDy}Po&1c; zxx-lx!vvQzp10Wr*GZycRW$>x3HR?v1h9%)HB3z{tN0ymx8Nm>Tri0jl)zU{UW2UT z)L6`^@v8Q9Tu`HqH=C$&H=$4~>d>68Za*c~+b{EzdbwzWI{8(6iA`^$*63IDFdDts z1a3@iG|uW%+KgJDCKYI~J>g!XB$8=?!NtDrfFUJYT1 zPe>vCUH~Y?7nV--gC4He~@)Ag4 zEj`-41bjwKZYJC!307XfuO%_^q7*tkNoKB4A1Gj(`Bej+_8b;55+_quuy{sO2>RNy zNq*A!Hr-!_ZQB=5aAOfiO`eim#Fp8M}9`r(c$Ps{F?49 z;Fx$1a`RFCEFW^Z0d5}{(ZWpmt9Qg>5}Pd@wQD za5!!vvwUIho6>ODbncHVw)$WYynj<$j~^dko_{Wx6DwsX<}tDzCu7n0(?U{_ZS=p| zHB#Wugq8cOIlb;NdksO?yh~lP9aN6p288MSoz#!c5L?F-Kf};( z8usoO0XpYj2L5)VqTFa5Y)pQwk|QAHzD3Fiz_`HBhv7OdKx8&}H{704zBIRI$VV42 zG8(+Yk~+hPGs?ru*D8;i@^#8Xb-q@4Eg%kO>Ha!y3bo{pz=!^Np_9rS~)a2$_*Bbl_IA)sZ!~w*m z@m{w5n5l%te+*KLrefe+n2D|B><_8T#W^K&~^Q*_!d7od+?yh`RxKJZRH{eR&WIP66%5 zgCMX7=JQ~o9z25w%k|)y&IsKuz&!)W*%oRM*9;`2iJ*h1Vz_32G(1=+f`oyf)F40! zKLza~NTpNICW2Hq3TBBQ)trJ>5u|n@SVdI@Drz-?rD~?b75@D;)%JH@4@HH$!h8!X z2}V<5ms#I%%JoS>BBUCZAKW5Eua6zY^sW!Lw+0VM>{=_z#%D(vU6M5nuEEFUWEa<5 z2^CPeqB#}?|bRpwZ-%|GhPxBm5~2jkO=y&4*5qYT z{z6wf=sT-0PpomTr&|=_`o|94@{ZpRL5S41Fkz%C6!8pfI;-+Aw6)>R`U2&EK0ae|LJ> z{ZTLTV=kXdb9uQa7T@q?pR=w=)K0t`+=2$tMU&U@+xwykZpYy23%?%%*)s#cdrR_U6s7fAuVkM0ij0G6n1?5;lwzhdnG&cXu3l3Ioqri z0sR~iU{zi#8kd={l%J;-7fRaW>kR>2g%+VnXg;q$8pnH2uQK|VZAJm{=gjLdjeeQd zKhiRM%XK$8Kl1`8DgV6iA6)@oi>wWnf#rn|Z!#PIP4I@b?$0!n_-yhB*@4LNVz%Dd zgqJzrMgws+eYjqSPaybsbk%Cv*HbOe?yDeO{RePA!@rNNRA7Tr=i7rfR438w-xP<-YMPZ1?SCH=a#L-U z9%V_5;r#v4JQr0+Yk(dYmo6gmwr6DRn_)mh2j}J_3kGe>lzHyw=vI2-{5+vF| zL`?`AXt+09)(0NYCUJCI#GznP>bxf+^xJ!MRZk$*lduM9>6?Uyiz`hO;)sHuS75T? zWw@Nty!y%!i>MF`#dDMZuMs|8*O=ukI#FxrepGBX zj+4{nb&|gV`|jq=*3bGi);r(@?4fbI zNpt+wnB#GTRp90GJDqN#J6+jV4X0?jCTqVsoPQod0C=tK<-;#JkLI_$u1aSv+_B)s ze^ZvH?8xj_(wbl@R0`0tExDinHjf2GppSACg3L|0 zHGqFOewFc23!Hqgg9>Ym{uBM;nYIdxpfMn4t5Vi8vyU>$-8oZ|meDCCsZ~hmW9`+W z4BMo%^~l4pJnJM=w!n@~#d;KW>~M37bA3<|KI$A|&7kK5%LJ+16Q57%-)E%?dvUFW ztd~w&Y5qNt3zPN6M@E-;%c3*$popqd%35a@Q`IxB22J$B;?!slKJ(!n7R3iflt}z4 z#oC~>9rLJT!MfXNMTB)|Q(}CfIP|4>Wm?D8;fub@!2*VvkyvUOKc&lSjaO^m_s+Cl zuHLr<?rI6Prj4!V+mt_2IcMiwGjf%8ch?njfALqBOClwcX87_@)z~yh5 zpXgDFHl%_H9okK7W&$yZ~c4^#b(I# zp-zJBlL=|2)xW8~`Fxn+JdazxV2AQPS-jxVp-_pEA^le!=)HnsNfr7ie))l(*b);=@(u({7nr~C>38mAuAE)c< z+@5f;`J8SeP0bn3>E`3iIVB#|wfW}aZ;lkY27N}A{s@&OJKI^aQfxm{Eu5*Mr(ofX zWX4xPGgvt9!fZ)Jjt5<+$eBMOLLZr!R!d6qrIuvYKRoJJm?NXm6PY8&pHFF1s`WJG z<5qVjd+{{uX(Z`wNy{yr{}hus{d7a;5eV^`W;S#tM41hYSI2tzf`V=xe2TYThR%BP zPGaaxH;09(^s5}h2~CuN3m?tJC$@T-C~pnhNS=M^EISI|F}+(HDfdR7zi;j|3n0 zw(tEQR})BE@FHf4Q?~EzJAwK^*!ON7PqT#n$t~FTuECkF_~h10c!OAPsI1;;sUq9n z>Rpy{s!O8`r}TyN)&zeabfyEl&gvb|a^i*0CL(O(VLp{{ILS7{^cU&TFobDws~Hi~ zIv-gBJIWES=z&564C7HO-8AD-&8)aoPex|NC3=8tObZc!@d_rj0=&g-84Zgo`RBP( zjf(Z&O#dv{u3kpPdY^u^Q87|-x%SmY#hG!!N|+|r-bSUF5u0p~Q_Y}Hh@G}zr-7Y% zjgp@V_No=sv8REYy8cv=qou?gr-6Os>0u`ZI}Pl_f~Qe;Vp4(Ku;oS<^|Iv-tRNny zgsESWD}i*=hw>pW-m6!UyfAI1xY72|ZgCiH9j6zAukkMfw*Z zX&lE!d_ksD{K7@SfEGVZp*9B*zNcFwKJu%k-bQML&ulaKMse&kvSw#K8RWptZ4 zJQ^S47;iQ9^J9>1j5_eKu&qBVj#YS95{H_Vqn);KFzT-;YxQrT^2VUNl)XiAL$W_Q zc6=Jpzz+)1&DQ_R-rK-ORh;|dXOj&E1b3^U(w6F4!xdW8tYAw}+brzju5L66SoBs& z2noq;NMf=p;8h527IW@#F>Tc!w^!TTw%&SMZF768VCzc)NCH|7FDi(wQBku7CB8vG zOa9;Q%EQHeE{rrFT&;asG0<<9r;ZF~&-!K7f?C`>f%Nevc$X z(GMZXa@*E_*<)Lu^?rw_gB<2-!I~A?mn*UTBG4}2FL&2 z^08~Y<#S*z&}uQOS6`C_urMV9`#$dbQlQP(5$qk>=v{pT+D;4Z^{(E+1ewvLnD+AR zmI5Xhez$I7oBqNqA#<@H#ZL?|xnk5ejpLixN^$b{riA_4n2m`2+V`U7hi}h`9K%r! za*U@agm~=}919ALF(s2@+l5?-{o2o>CfT@tj)tB_ zU*kZf0YVs8Cn*oPpAUPqMTxK-yA0Rq;woKLqIdyRiQ+|+`?4E^8W?_G_6$=$r0&n= zmQHS|>p^*UBo!K)HjlMGI~&=F{n>Y)TvgaYkE72%-k$6v%G8$m;r3(~L;Rz>btO?B z#`>Gp`v)LG!)(d!8l?pE-{IhNeh{tBHth4UjiVLu(^ zJ4z>Zw!?`X5h7X8y_s}xPTFt21?sErEZ|8a-NL^56a;17an3Ur*tt7Qo9;rJ?gm60 zR0Ec3NrE@jcDyJ-T+uD8th3RXo6cXLIzZHj;O8*Au}6?AMx9_9_03`Y|4q5e>f*Hn zhq*^MZ8!P|sF0O>Qd9`h5SJhPZB$56Iw=yQ0G<>hgxH=$b)XbD?eMpgfcQN5@HdMC z;-mggDu2rR$>fh#aWeU<^9i_nfIHboo2Q@d%)wNYqoCVoqHkQinf;bpbxJs+(8O^^ z3J{|UniQCXWw=yO+IVgC{$8YOPQhZQl%D69jrRhJG23nqwjwu*F;wr1)Mda+bqv=Z^4D;B?rWwyw>#-pBbBkEB~QjLh?vb9b{97#+aa zI1C21MG+fx@(nq^Io!s=*9IO{1+@OY!gdJJMg&UVWfLdZLn6<02{M)n3oYyZ>^NLTY4)jff zW=(~wq3Z67(?P?JuSy{-$&)5E1$Z+iPPB#Tk38CnA=+@C+cCQovw&y5CFTDCykR)a zA1QA0M#NWmpApEPtnZ2C?>FYa66apnRzrPbJ5`U%$ZK(B=QZIfVANW3355%HFJi$I zwU>K&%fvCRmb{rjgs7R3A#o19w0v)T`>d9G#RIlk2g8*t+CK`V&(2xEsS9OFE~{s>2aaPo51 zqg?0v2&xhLC2gyoEFc7fn9J=%3=a^9Xg&wM5!AVWeph(!WrslM^#z1JKmlglCBpll z9bW7Q;1U~lO>s#M$JV*-hf%@#yMCr7XS`Dkw~F9GX-G97U&H^k9Kvv>fg4(Q%f}3j zN9^kUOczXtQH$Ta!wNXAcB*@gp|4=H!8=)aYm@Y0sKN*s_aPL+lNs7h;PC3Ex*ahe zdXt-XF`c&<4EPLfR}2Xb=>yUM5Vi{c2fXAFjQ55*9GJ^EaC?a@jCC`idb6a|Q>4wt z8E-J}^krQpT166EHJD#slnw$<0uP}Kr{n($@&h9@aRdw3&&<$xomSmjbYWU|v$$jh z{KCi(AsBTX;24w_>PR1$qS%R0fD>>~Iuj?xQy_tRGc=Z5B!nJM`jF773HUEfW&w6q z42jR)az9Oc{*6|^#r|rueIwe{3GExuYZBTwpbHb)H=t!B zXx|_I1g#lS&p-92s-YLf8ah{XAdnR78z|Upw_*|jL(^g+vIJ9(n|J22k>h6x+O{eD zFHsVCcUaxqbd5y#dk~$vM5u=h!VjFAcTy4jnMn)b=Ts*y{PZy)w1g@&i_$mXMhDnR82}_3SE|kio9&dBW5DVzgD6I#;^vIK~hw$}u zPyejS)>(#iO}2Gyd}04e-P(yQl$pkhSLjyU@C#c>gBs*L@HMx-kz3R^T-jyqgxurm zgk+51UpSn^`bY=1%3|}$7F#pdiLw4FmM(eb7Y6=9UD$4WwioNa7|nY1S1>f0fsTHL z*SPB$LGrY_UL?%|^ahN}zm6}uM8^H)z?!D87%K_W5yzY?Fo|K@Bh#98k4)RS4Lfh` zw#QzuJ+{efG^-4o|0FVW_IRB9I%D4^Gm4dgXc@xMRl2d;s3u2p(ht2`c0HX zHN?^1NH0uc8=j&%{=~wX>!->4hhWdkH0Je^M}GzzusL~pAEF|pH;d8>3tm3sh5{GN zPIS$7Kv(Kl;&4o7!XcAc_DX#=Y-@G8aL&Z7Uzv6~1_LO4?qm<{wQ=qQ!asf;L)%U_ z%)q+jSW=Pmf zTzI8GwlLcR(z?`zB;ES#Z~K$>j_2blqOBes=doftQ^k7tP+vc|k3If z&Lq>0-2mD+3{*_m=KXUVk}yNF-N_?^r1*ocgW_ZZQm$#U>`{KUhiHNE9MSxGavBzd zOBmIW7R9NsZoy?Hs4&!DR}mF8-)KSNJ%MlBWrAuB&E(l7hz`vdkLO1)Z@Kjj>|`J6ZOw;o zHwKc=ven?t|G(m&;WUr@Q$&NvbcB}EDF4^6HLuw4qHXyKc+&~?+Pkpe4WaB>%}F2b zyX?aC7ddcv_2;`>y!L=WS%76pq)~7PGg;!I6KK6GaXAUTsRasfPlkT!>Pr1S;^U=h zqj0%~I z8Fo&=@cEOQmNWJg8L6d5M4DifIB>2xY-K1;d(zmH$B8`_M}6h5d10T>n|)(9ugt*r z#shxcv%xhqUu((sN6IaK1Q}42o(-)$QHcYNCfwCx&4+P9-0|&Q{V7!rv4VSW!MAVq z5dFcQf(`dQLkduUoT*)cA>`8QA`rZb^Crw4&cGV=l!h;8-XMOy>~UlKA&=^J_-d&gFY+idb61Kh9(3FSpQZ#^Msu}8~63*tzAGS0IK zg20m3h*UBm`fVWnuW!n;gt&;Nd?+q{4+|$5bBEwY(S&FL)_(ujb7Q(Fx8O!pv^AmF zej=RY#;(0mn5lFO??iD{_1_?~&@rJS&{ucE%`>W_Lq~8w*k7>`!YHvG+-SEoze~xE z#22hda71V}dV=zauBHPOt0 z3gADHQSH%R3HE}=cy+wSLSKnq3UE$k&<~Ccd5sVuP(=l%^PW*hhj6cGtWU%h zU-RN%V=&kayN?}u@e{n? zq_((xz}NXcwwIwdEXLX9voQQi>{z8h%3tjI$22dCosB<|Z+96J^YNj*+sLyRL0qhS zfPJQOlh{8Mx!mF}90>j?B{h|s$GOkBm$}*dTNA#&`1DsXgzBDU(AKH z7XAhMi89ldIvz~6cf@{1?w_E&qqE@)A6)(g*U~hQ7x=Y=N4Q`y0h2i+&uKGzGvqf zuJEql?xANzCh+&V!kYsFh<6J{;YYaba_G!y9^58+`*SdsfuwYp!1%}&`pgnI)uZos z=`Z4tVE-N9L9z+pX3(e3Q~DDiY@_^@F(c6}v`!j}t=-Wlr_lu>j#5YSj-5 zI=nl3y(l^ENa%snhq3Tbmp3%z2)_0Qaw&d(HFAX-PXG&FB~JCjAlddk!ZTa46IE*t;~i z(fKY;n7@)BuWo&N5!O>d33vXTJ3 z2yAjWKMHkbi@W(ONC;Q-S@w9QO>B{$Q>=keuFhy~|Bvty#$4V=QN2F1r+V zGJtE@7*N$zOTRPvjqf*?)^gHzdw~>D*jBXoK@6>zT#>6|rF!pe7XLv`E z?cOb(=B>#%&Q4g?HbAcddJWKP1P5Vz$+UL{{7K39{SPO$FRy`~b*wf! zVm%iH%X7K$3H8ogm}1Db{vX@=v92h#3Cf*+G8Pc0C1oGQez%14*g`&sJC6(dEG{&h zofI}dIw>Ixm@&ApN3`>RFuo>FvWiQMZR_`9hwec&JBVLry#74~#Syz;xmHXdB+8~g zdS)XWN1{QaotGxKL`)%t@3oq+oCnmwfjJ_pw&;l+c+m~6NWM9UPw3#+Mq~IyK zgb9>W!D@3Ep|aHvFaI0ykF93nA7g?c{?j%}KK_+Kl80%K@>IgjS04i*ngr9^L;ejy-!-426@B$USLHMZTMWme~*w*|bOmG3hz*>h(+I|@b z^%jH%VMyf`1`SN=0Pbb{u{AG7$e}H#fJ9q(0ofnx7jBGJq8*Lkg)AVlTEbL1vm!&Zy&OS-~TIxK@b|Cd0nTm56&GsOS@K>CzZpD_<+?u}Xa7ppIJ zxo-2RSa}(@5@Tzh?$4iK%yLlYtQ^I${P;UD@b~F?`QFI5+c0M!28PHv1(=2SbkIyP z)oM}rKS2QSKK?k4H{qbdi?#t+Z1_PzWc=x$FciiKfISM|CVVhJgGBp9FcNhcu8G2g z&@#+4AdZj}tbGUp+XAvg$oEAED&uxIT%i>Xhb=q-dHAqW(ba_2IPtiQYvfceA6Up@ z`9JW6a*#0OgkMbM!0+g5U|FkAP{)N1;~IPDjiF;Mbs+Voryx;7VyS}{_b%sB2fmuT zjsJHX@uiLy!Ikkp5u6|+b2fbyWsqwf-FYN~YaJj1+~`xhvqyPVX)$ZeWi(858MivP z@b8Vxc6jyKxh~_1iSjaKd_>y$hPQm1KQgh!uYUzY2mSEQ-XUJ>LCguRCmkS_7T5H? z&Ld|ZI*MgA&xfulpSU9PhtN?XXvp^4E`7G$#c3Tr$QoBTpj})$dpWX#BKtl^pBP(g z4Y-{L#YoikBxuw04_rIn@`U>X6MY8n594SQ3;1s5fyW&HPx-*R^Ihl5LtEU1&j8c)YngQv*Fb#LSvt0;AsXr;J5A+%94 zJNRAOl*(PH>g;%8w>Bo&MuRPB5{4i9AuQ~na>)oLN zT=a?aFr$$F7{h%EZmj6j@32DS@fO;zR+9^iXO+Pcm|FC-3xJy@_RVNhaBZWfeW+TQ<;(cr+$espNYC|mPSWW$>cHj1twI)KF&jFBY0mJZt9i+&`;VEj$! zHYixK1wOIFc<$E#M%N+=wM6&Z)`ybgL@+7^SXqiahsUiTsfyZjpL)y3%yINRY|K+TwJ!;W{K zV8 z%xax92MVioTWsqEwjSmbco-{p0R}(HxXOl|oP8RP5?==4@*O$y4-n*@_8?y| zDs%^I+mU8$6Y0QqW66a=Va3mO=TrcqKY%1DQfuH>OsHofF(%XkMc6Znl^^4SH%0jw zBR*%_g2?;a`h&M2G|i^nGfX+v^c}4CF$v>QMoB8Ec^8k{`KBnQ$N9c%G&aOY_UZ-4 zD%f147To}qlB|D(JZ7-3Rs>1PU|%f^ft2>uuymbdU(J|-VU1}ftyS4q>m1C&ta%Er zh~?707V7Q51=_rN%YyruZ!PG22S(OrIYxI$hg~_cMt3Eo7F!SW4!7QsY;O&>l8wmT z+LL3DZZw7qr@+<&XRwe`of zw+8!%c2~^Q8kVX5FWXwNx{cZVGC-RX?!y1zAl%k`3euPz$;;P! zoP+r4F=V@{hv#3}DOXzc>vD0H6&t7KjKw9)VCUhXjkc>gdHi#`^KfVo*xS42h|!8Z zCr`g_0#PR(1J%Kl^*Qy{Jsw#=w&nuv-=U&9uv-`9d)#94C}-p7d6 z3NRmi%{~0RZykOa4XWc<(b@p!qp$e|fA8avjM2CbdW_uK0p_Ex;Y3BhZv%cA?Fqb( z5v>DYKKh!?{JpOmzl`=A-p7d6127+b%`X1lw;R8V_A1`Th}H`*AAQX}{@&MzUq;)H zH!f1ZSE)ftYd7x!=n5T%taf#+832%vzV>7Ot{KFyvs+*D8QyMPv3*ZSVa-`SfM|%edQe(Dcs?nHg-*CG|M&A0rl~gTeo3P ztkqZeY2aMwDr^5AMZ<^g>e7}B)qd$44M1BUHa8P>fnxo+S*D0&c%rwWhW_C7pnO3W+TaqBz1SizA$( zIGQNVgGiOd(X=Rs*Gc^azjsDsS+5NH@P$0|DHbqc-DRj)A1E&DY@Fo>_3U@{vEM0_ z;_?;fd(v-vY`fPOHSN+?d|>3ZY6uEGF&DCXVO;bx5R{E%%nw=Tt2eVk6XsB=SDAnG z=urRb!*aSamD4&BVuEFSo1FHvMY-;7_C>SNhxgyzn$rK6oAHltpVa|E#ZTe{ zv&Rjn(lYJwZNQQO-?>>h3+vNQ6{aFl39M?G=j;?qpg0BTA`ctyK8BrEl9|a)>pWD3 ztOS07PP5Y*BjFf!T2D7qK;u9PUw|cZ-?>*{>B4%t@7!6r7;Io=aJ08GIxHQr@@x^t zg`Ytsp-zhNp~Yt)4SbhBXTWF7L<$e;Lw=L5 zFW~eVYbVQ5kL;Lola!V0m1q;`#!n&F%Jh^IZfu*tlwxT+uJaD8)c3lcM!Y7NmD*qn zad1*>jLr8MSLGHPH`$AgFLDPRjxS`tg-|2eqc1{RE;2R8`B94GuK zR_zDC3=AAxScnU5^Erc1OPj%ziVe5i0;^cL{r4p1-nb}3&RDS(jgg zMiX`UWjNS-pf8tJ(~a-tPvrG|>Wh~PU)d&#?-BZTeD%`b?Gnp!kO1s`U%42Sf)Sh{ z!tgl^=O)#9xN)x^BHUL|16TmadKPBjY!U8*|A-$ z&zYG?yjAxr5i3<9#<7pOJt7Zhd7`3Q#!3R_Wx{m+hl4`Yh~BqFxQiib73~oD4iu8B zcTYKxhW;TgE5a8(+(Y_)NlunxlOyFwp3K zUd`_*ZwdCw1VeGZ^jn9ILZzUUCXaV;KkqR19oNMa${XN!Y`TrnVd`)POT&lyz5;6b z{^%fwJGAfUEuguw=8cGey2Z?5eg|wKSzsV(0Gr}!T{r_<-=`Tq95t=Q`PKSfVUo-i z8cX$si-P3c%D}ICjbln2W3Br#?pB^I=-=L#e7`bJ!r~CCwDof~4EC5t%W%#CEK8_}hPgG?0G+s6ulH&1>IrC5u?W`2jw>3B6VVM@*y~ zVC!%r=O8ZSkcH zZ4o9<7NJp##thSMFvXD5dLu`jb?2{>@5|cc`{IZ2_7`^9nzNyf{4nS%?d{Nn+*WU7 zEti4K@eu7Jyv~&_b~ESU|H~Lr3e6^$K@hSNI5g5=t3&>{j@x$g2T<;`SVb)wvppiA zFux`MMd;j2$jv+088EU5cBF#$!V<&*ntv%OAPA}q)*5ULJJ}}`(Vk4W0W5e{Mi}gIHSOL~%&U+?E+FV_+T{Yjb4YGK^Ox@|#>BSsb!%m-yRb_vuw7)?>cL)* z(4cMA#ZG+O>AD@8&pMpgS%VKU7H|2>_#pEIthD_|th9xOy855t(%4}YY2~xN&NoKE zI*`cGvcoxKjB4?;cV3Wd%(1q3{;RXr3RiB6=Xag8xp0kb@%(pZ?O3?%EuPfI)$LsKUA@WzEmf1{ro+zJnQsMfJO(?qGP_b^w!IXgt_{Lej9SSf`{x zs?k0M*af>}zyJdr2?2u)C@=%uwgVjq5jJ=@wu=-g5J%fl29zWOe87Oxgn+{gs5b+U z8WL%MkuDI2uBa~Dq+rz4g1FX%06|<^LVzHy-3(B~!4SCQVx&WXOyYVH0t9ir2?2t* z0W&}m2ZKLIj`qaDQjTV5oLxxH3Biy7l5+_Gl5=K&B5t!uoRq2=nn|1xD^wc;J^)%B zIz{O_OdonV`V?6(NCvWACjchDW@wnLfhPFikUq+ZG*UzuC4?h!!UQQU+bYNvekn?L zQF=VKRS*|Us*5nG7P=^W1qu-BB73f`{Z^$Omm;7tnNs^F~(-lX7d z3SO_^?FwG1;2jEHqToFWUZmi?3SOY#0}Ae-9}44Pd&}Xg6!-n%nlk%WhCF-hr6%6||Qt^aK%@E2oawtg%5N)t1A%LwkuB!?ZB-V)P zA`Uaec0e?LdqRLv5L!Y2^jo5?8W8;Cs4_-euNk6<>q!U@#C0SDkhr+6YFCgracyRZ zBCa(dKoHlI5J2J*bXC0qZ8C`~HA7;YO9+shO9&uwaa~oQAaUXxW(bLcZo$AkL3Jr* zCHjPh1YIRe51JY%uB?Q5f?iEfT}oL|twAqYg|5Ozc&W13Z?39I(Nz^_*wL#|VQL6| z3(RgrucTiNv^WOFN70x19_R<)A^`ek&*B~)xqkQExJ5#**!L>O<7z$m_to#j}zbRu#aXUT6k4F>7)!K+y~v3Fd*-uHiW90RCN_b-Ee$$6&yyG z6~&SHMlP*g@*ufyYYxFfr4v)B9SDU#5>u&dLR(;@K>dl4WxMo8#P|@FQ9Be|=vTmd z6kLoJ0q<4tA{Bo?!Nu4Sa7~O((4{JsZkPT@eN3e*c$0z`D0r)a7b$p~f|n?GyMmW0 zc!z@5D|nBBHz|0mOKnx~SeM$S;F^lxuHbeRzeB+t3f`mO1q$A);6(~Ppx`A6u8FQY zKBTZqzd186^c&qeBiWIy}!Atl?oj=*G9FKM=$0I#-K(CGB&rMW2b+ux& zeia5tTZ$Nq<7=9muX_{Fy>Bw;PYlB9C~V0h2)1ISh02~>Sg72MV4CV*(hJ9D0BuL; z%`X!4CkAr>Ecyc`xu5a-nIvy7K$39npo?jWh%YfB^sF#{dGpKky^Kh87^)&GINpE* z#$V;E0%O3sN2R!O3V|+H$||QANIIpXmU1dOYSkTMt-Vf(Yr;%H%4@=0LJBMcVtt|} zs!*b^!xr5!sfpMXq+5)45#UG&5OawFGXSZjO*hbCtL_+9crg%A$ecQe?yNK+ppO{! zWn`hAk0KWCl8RCkO&nVt^%JhArZagqkF}KWYld56OwkQXiP}hgC|c&7#$N5 z2>Tm7)&UE4WhB=3GQ6YI819suWj|ieE4N z;Q|%ENx>Zo-m2hs1#eSuO~Kn0d_WAp>LMKq-Yfm#9tH1F@Lo~1=y()-K>EY&3a+X6 zZ3=Ez@mm$#q2NslUZCLh3SOk(r3zl6;3W!Ps`4*V@OqVhfr2+FxI@8P72K}iZ3?a_ zc)Nnj(`YEag7?afINYP)J@iA8|JP`5l;sn)L(x9pr;Ed4v{Z9)B}Fz(XG_UV(AiRS z6LfZ|kgY_WU1Ekv%@fzzMG9hSo`Qq`p?Mr;Kuq)4%@C=1g)~$F^OwW2oS`znE^54TFsC&o!z8Brp~TU2oS`TngKE5O3aW{ zon53LCUFG`0g`iOK#VxM8Iq>6H3gD78;p`ILe8Hg=R}h*H8(pUGei=1;>vqO8oC)8 zI(j|mQ`4{mJ2BguPrn>ho71o_{sn+b0qA>TI>*)V{Z5JdDOT@q72m&k;3V=wZFr1Q0&9m*$cxD+brQtHus0L zjpzuzjfF0`LBVCK#T?XatMAaDSZeWs1)D4eaYlZz=*enP5n?As-T8N8PRR`mw$M|f zS*+SEjMmM`#i~d>NA_sAT)E@*PjGUm*Z87vvV%;&$>{}#ZnxkUW4YK>QKe#vpze*5 z-H!6eK1q3GhowBSH&Y&aVgg#CXFw=%h1Q$s=}Gi-BzoEtJ#C4e)Yhc#_SR!~YAnvQ0Km{QfD)xaKfjo29%7Lx;=V%d4 zP24oauZz5cDs4LphS(jz#nmZd-ISY2%tR1ba9dnpg&_{VGICJUxZ`Y08T=pN8>8hDrvMb590e@qNl^f%odg9K!Aqe$2_jITQkGAS4%q}wjt<#!PL2-Q z;7*PX*-lT64%xg{2l0M|2xpp_IIG8_3uDO)8B!P z`je#tI+VLPV!HF*Qjs99I~+!p?}ifINlh$vCW?8xROR9;SE`iCMA&&IsdFrq$V9|_ zgyMz~sm>)ODPsCLCF%Xk|HC}twNOX!zZdId2cAO@j;o>k2ik5o{?eqtBzgLdhkxL}FXScXkJH%&Bda`=tP zf@17Y$BuOD-@!ukb@Bb^(!dNhqF;fbD^7;(2;{5ua8JN!m_YBwPU;QVd)^9TCiv!1 zxIOqb;=^8(t>qo0<>79e9p;X6cVW+}$DVcopdY~nct_559r`r9!=vwU+dMe9#)u;eCC8)-`c}fm`G4;T_8_qmio#Jd@zUy=8}X>n~4v z%?~p!v{Z^yE-+&1-3g1F*3~k^xy6O!+-?huC0KT^cu5}+wS$`sv4v>bRrV-5PLS3A zBX!?YI=@H6?BW=+&x_l;JU+j<5qcPY|F>%c84goj*awxuat7iY5{@L`v~-{3xN&L7 zGi7&p$4YOEr*&*(86QXR!!ClPQx!O6p}!&p52kZH`jM{irAP-YisveBik+)q7|vBpX*KSpGsAfbf)dYD z%mM-8JOu&zPE)=w5qYfonc%p-QE>aOJp14Qw;rhnGR?f3m5kXCUV%&R1D=qmgRL;SWz?AI|^tOo=%7aTm(Vvk8{>Od8#VeXD-0j(m7Q1wRyG zM+J_2kk2^b;L#7`$Oo>(lU8fgCO>iXODr@oqp#=by@`5(P7rP$ZvQj_zg`zlvpG^ih!J|Cx`p20bhN63;}aEiLi8vP2K<~b3bd^0`2 z#}Z>4vjEF5Mzjj;x$CG=_~vbpqpCd!}dj+F#3m2 zQRdk;wz`JiG|MO9H{xt_1@b!dO0y>PEyyn!bB>ZJ+qY|~))S8>!K(O`H&deUBaHWojGy>Oqh7w(0@a9+}_ zEQWiXn|%7je4jEI4m;9G>8booli}m~*T&}a%3k;tMqY1XddLp%mnh3`!HRq6XuMZaa#naS}<vC9z?^Va0|~KId+q zbGz5Fmp6jJSe`Gu1IGz>OM~J?!RP%5-rnbjjbAfOh^KQa+Csmz6(C=D0QOCs9ZY5W z``+#EK7^}Dyv`$BM=J`#hDi56#_hF|-l1)QoY0WvmNPc6)?x;DC7Gi?5EgLVqdNm{ z3vmByN%V{Mf*Y{Bpd!u6V3^tTXVAlXdJ6x|8cUYITGH)6k_}awohVXCpqwfIGdUC zt#}BC+JX??x6H&uegXefdE5Xn&`{eF&U)LN_mkH!} zaoRbI%^3d*Rq8da#ewfza;4F8=Xbqovq*mAoryA6<-I$N8zx`C_r9 zgNKQQz#sPWFtG(S1|7^6upKR6&`ZOBkl-hn#Xejr`V={er>TD|5^ol6#1eHZ8u+Hb zXrFVJ%mNuxJ0LlKcngy2iD&2y9~BwK(uZLzJKB+#hlBYv`SioigRp?W66v4zpO&Ei zaLTth{9ItH&-s~;McL_de&X96MZ-I|5zWr;{EKRUe&;JllP9Gax?3I%E($*FW0%GU zX84Qs&i*f$0^Y<>#3>GMWI4>l`{Ic|ZzjHtX8MbQJwE-cV!bvusgh7JytnBlG_{nv zj9w^a_6hnZMIKOUwu3gQ(Mk&3#)65sqD7zogt*d)qlY(r#_3+;RP z@U?o4z?l#*>zTM%2^+S&p${zRD{%A6Rh_=_t+4rT_2C4*ZnZH#9LUxK795$+@`v-Y z0y%zXO>QpQ?n)Pwu`(DXOoGpj+RjcgyJnZl7=-bPnkohql>TJZK{5if-({hm$FL zg^1c7D}I7jfpyR3V&fuTWF-{I?cKPe0P`)+E|30ax9ztusTzfyUtTo|C^?b;1-DjvWJuvlB0l z1v_8%O}liI7qdaJeR#UvW7+M##PdY(C*Jb;_xoVez&mYvkH|TmRq$T)X8n2MLJdJf zr#pKoXgD1Brx*=xqhhn`oXsxhF3`}s96ub}VLch-N>h(!NPT@0d zo!~88iK87W(Un9OQnb_bJM0U<0vFBGj4H-GpBMX$mAO>pUzKB&dVBx3kV6O9(&E)| zO$&6g4rU~uif5!4K2ou@OoyaDF2*)PTjS~nd)5X~?f%HkMTz|g$DNNi;Rm;bba`6g zVE4iykA@DU9n>|+j^ti3>W{EL5mN;cJjREf+Sa^)!>cY&n~2oqd5CXc;q$gT??>vG zIKT`V22VbHvlxFs@zE||Xz)9>H9rTYzTG`#dnj5IBh~AKJ>WsAObnPJH{j&$r{B57 z6S=Y@hTsqP2Im8EwErJW^`!J|WbRM`A_i5#sb1XrfE0pnVn3%yfij2j)-%B`U<4)- z?~Cbcj^E~bX+_8+@lG}^{U!GlT9*)03y|e6{4ih_4QvVyg@e{U1j@uY9q~CaijyUH zCR&L2es(P5)G9A`mCeHdT%i=wEMsZ18s&#GI|Y%HJF@-`yq;Q*c%;$ z6Dd3^D<-;#SwFQ3)e-#*Ad0VIJe7JMNoaq8n1@gs!#;pNQqhCKDP|;oJt{`20KCRh zq}>Z5n7;bPhgOI9}e#bzNkE|Lxb>P zF|2dd#+~nM;9!DFB)t*$pw}GNiT(%nK=n_#k|7Ol2S*D_{C5hfVM2(+&sO8g*=k%l zTZ}7dnIPP;{JPMfWtAVpzO%#~yV)?1{W118c=anex*QFo@4^Z@4U6L-{|8Jk^v|?6 zei1kVC+|_D{V*Y4=t{S4f|>ikDb5bzrqMmjW(siOmzYE91Nd6qiGiS9s0wsOk+G1L zG2SkhBu;dIf<8v2xg@tyGz&d9hQ zK1}KK+9w=;c`UFiad0fm?(Gf?!S<2NP+D_*Wuywj4@JhA>ueUlj9fv3_NY zd;dBtoaF4sueZ9RYiQT)IHX#=V;47sS$d4|FB3q=Qvzehr)Mbt@1%bKTgCv_$NdU^ zWBmR2)yMx>xbG6~f59(=7S}2}^zk}_avJfwb(4@^V_XB_)mxdfky9o-Ju>(0#`x>$ zx4=;(04jI*r%S)geYNEuo>aZOy+#m4}AT zvaKzIq=L3EBFce`5({JeElj?82g{C?P0%X~zR}1jCBy>92_U^21xB3S$E4q+_c!nxIc@mWbABS+t-`$)zb3sA28~~a z-|}X{rSu{iLWs0OAzoOMgtfjd5onz-^4r z#ft+G+EtzT)g3a0PE#X(;MT{zP{KAceluSB_&?y62GoRB zd7MGxe$L;mzR_5aJH@#8--%FtP$=~At-`ZW7GR5!(?Y)mjz$4c*=+%3Qo}uu_Pzih zw=u35FAmhU;W#@;PuG$hB!LR>LzYMt-b`y7+I1e&(#MTvS{DypP12P@JPMCKzK_2< z{G+kp@Lde5;)_jcy(x6mvFc2@K$fQOLr05NovK#PI37z`NK3&ho2N~;$z9~I~s1AiY*T+{UqqcL?#B5&fp%tu^g8WK{k`5^ehSBwDcq}IlaUP z@$}|mfSW4@xX*G{W)zKu(Y2KqhX#vQVU&d>hi|%uFgDA^EG`#IpGobt*q9)L@ezRf z$v9nLo5-YhB6hf1$g13l{k_P~s~_e(DqfyRERP+98N!9n zW^{Zk!>Enr5(Ur%*o4aQeCP<|%{b+fQDC?2G1Oq<`W2Qi3QvgvB)(6_eGofg3J{~u z7&pI#+*f|hi|Lqt1g1~%RlV4_VS>!G34(+LS@H53r}e*@kiKzRT7)G2lfSNn?qX>icFsoI*LnvK13#=qqqS0OAu&^{9-@9&Ab?KWn-e{?e09B1@z*` z^gCTmgT=Vb8fb92ZOx-0(Qx@}&3$kcPr;1s6JO_hS*&k--v#@IUC|7>iL!QNTih1^46uW5MqyrA5(D#Z7%^eJJqbTAKM{Rj6gxyDI;Y}N z3=`-_>Vg`x0B=Tlhyj&m3%v-3NZ*fU$t5#30a0gY-J4Kvo^DZbz0qF-Qxrhh7s35y zG}mX6FB?>i`~gKN4C;-3Cl&#>GlTze*4SlP!I%E%yD=hc za3r-ilN7ruE4bbt?Zf90LGdibfMM%fUeblZNkge%+w>Zp=oFq$mShzbso*4p~dfd z^^dSJ3m(h-#fpF%miA`jjx#+Qi>GW7p2ClVZ=s<9(`^0{AD18l`HQK@eZyY{QQ3-H z1|I(yDMS3gk6|v1i2fMhczcul=z+dmIUTxe<=I~BC%`hy2h{# zjgH3jycOVnEY7)O(@J1RF8*yCX`eL)kzxho<4oq>IL1a|ToB`j(C=bB2b+xaU8G|^ z={S!z(--1yL2u|1EjU&zJOp!S5AkiWrVxuC-htb5H^=zOAmtgMod4Tk6l>jR)?L7Hz=rwT(!=qlbA1tgEiC1I>URV|^z{{gQ=f#}Z14Kip zdVFd^H;hX(#?Ff5y~R|;;+E?WY;<;E(zbRE7NyWXcq8|6`z8`~@D&MjBc>Pjn}kp7 zq6Ew?VI>lli&ymbNK(lKmdvrfj;#xluGfawN;kEKEp2BBYcaLsDdf zMQM!!xlV{>3f#j4xKV7!2Ti*nDTVc88H{}(f;?PG1SB~)-~@UL=yVM|<0<^Bt@&DZ z6{t?zS}s??Rb*TH9jIe0jXDs?q^XEx2x9pS!-DA zH0EQYT3{kbttZ4ODTZNG>02OUu^1Ra77FI0Ae>^)1eM;%WgniVX?-@7!fkl$FqVqz z7?HlnI&#@K8<*$%-leoeZfwN@H7-BmUKFI)_YicLKk{ISNax8W1Yr>t@V>hNfDqUT z(C?j05+YBMJOUd;tVeoeXsL*W&oKx4t`~qOTP2`G0AQu{U|$iF)Bgfm{|H(^&Od@2 z?m&rLwm}A5fdEj?Qtd}A_w{?Md{#qV##Z@c*Yo%sE|_1ZHhxjEOLz6a(-!0;|Tl{Vlzt4%^=kY6_U}M>mikXfjl?#Kl)d9yc zN1&oH(CBEas95ay$E6j4s=CEDI?i`2uB&Kt2n@ojYnM2xYaN%*nS9Yr%ckJ)6soXd zNm&h|mo*?YCt^&yXsY9z(;a2Ciyh93W(XYO%@fHlEnDGOQ~}@$M|s`SMb))sf$F+i zM_uL2F`7d@u9*&3O^q2(h0dMnn2X$NDjaq76%8U*qhs>YU`?R9zNWfd1YEed`j+ZO zqzB4Wv}}!$xpSE~}|= z)CQL>s%U5=R#nCFDX3!&6|Oq7%TBYi9ge^?6OIV9kq1bvHI2u@CU-hHWlNPkOU;YXa(fPEU3V*3pSKjIQ{|bCGoQD z)z?)cZ`6{RP)+p`GI?2bpbEvKltcOgb&iTMaI!8?g_%Eqy%E?B#`tYHPR zo7+&1sw`MOsb={F4u_kfue^fiq$6XN$4C}hrmnhiVRh{-6%B!fOUnWc)yro(ip5*< zQ9!B})l{5w&KS+bLXfMDikP5IYNP-O7Y zoC7vlb3SXpQMPz7m}pkS`Py8sZ_YgJ@@uc2kM%fBn|1xX?_RyIUcDD-^IWt19?gIC z<@{fGgXh|-wYepJ-#l&h)x{-O&-KmoXzr`$6X%-wF1I$E2+q{P!^@y&>|PtIqGVK zC^5LQt_D&Jd7tS}EeCw$bG`%3>Xr&e#nSo!QmAtTp+A*^LTeWMmUsj}jY{>hxU8{i zIP3&`sa;n%is$?1`AYoPJFfLy?z{Rbhue3Z&+QqbIv%0=8maqYc=4V@FnXq=v8uY# zRG%7&!6Vr3SR&dbUJVtCgXQSJSk}uM>Voyq+2vKW)i*=0o=@E`AoXSFM3T#|YHy}P zIKZ{wq8h3NCRE>0hg1Wy3Ce6G+QLO?_>0h))ir>RmFQ5aDYt>DvOulEua|u-YfPmh zq-3UJP7rD~S@$>sWoQFtCkG*#i8$`+#&W373P(-dlEiw6$=J+f4B7b095rPPOHg*E zD7u?lD;nxV^cWpNG{L|!{G(KJ(HVnu;x|$Z*u6jz2?Vw^ASLt96l%x!p=-(=#h!WI ztKBmlv+I`D*ELpy;L7TXn#Is07<6FNQ8VSH?@yWPsJU#}h2Iy9Rs6{GVwzAtkn6Hq zc0{Ng>P<0{NakBIUB^z(snSF0!G^|a$bZHpGbN& z5wMW_l|O;yj9wzL9G4;%wc5DbWSDsQFA5yO{Q`J^tDDapUi>u3O(U&f%Yd~MSRLC<1aH8^|E*s86r(9MW#Q2)SQK28I zYvDwO$VQ-R^UxhbvEm=7E7)J7XOv8t>S(NUfLRc(MHK<`@#J)HJ1QX&w6M zatK_kqryOT*^-J{bhy<4j8;S!8*9)jD&qQ-crzA4ykv7t9q4GsfiEnp2~fqY9hAw^*l0wIIm%RRAU4Ppv1C6_dgD;e$B9cXdPc5)7DXz)>-TT87vraKY8rO7a3S!z3W+ zXD8rD3iJigE7b*Spc_>lifVio#a!IuYPb5)=%aRO?n;g)~a!UM&Ab2RY!pqeU+{QbnT$4m#%KQw$t?# zT^s58Jzc+}>sNGfJu})y*L`%|M;G@`Mem}Eq7c1B2w}2p6X87t|1%hlhi|O2XxkU?c5lt{1ue_6 zJ=bahr!Ctx8nKrT$8Y>UEN%ys?riwSYPH&Ci8ZRtre&34v9WccX6gB=ruCeyS?mt@ z^O40QTxUN?)7sC|ETxk*ZJp$CIGrDa~4sjqUAEBm8NI$|k_JXj!dYfNev5&ud!8OImglzDaewqUH4L zMLv7A?A{N-gQH0ADB?kKdcj+JmL=P6wP^NHmaLLd7E61sMJvg}Wqji;TEUkr*`-(! zYCqeOU6OCnnkHK;ZIdn8J-9({z=!udOHL_1p>^C~vGm?x$#P)r$#J8_+H<4D(sPqV zD_vsAZeL=tlvV-uPZmpSgT>n3Xt9*w?#T8P7E8gcmaO($En3@3%P4S0%i|9o%dOXH zmgRPfee5Z@JjaV_!Tpq2@mYn{Bs|x}qycnm+G+MRSyqa`xKZL2vff1>9>$x8|9URJ z;XxJ2CoSGsgx{4N!m2bM#=8su>r}i8VYy#CY4Pj`4xMt>s7TIQ>)qLT3N4lXsR$?i z%MD3KuENt&(^KKZ_v3$o!e?CZ90UJ0Ab1u2`wIP7=|2pd+`A+_UpWpw`SB|L2UYqX zW#m6r+Y$pGW_B>MmgNTZ&&zVJPtnT;x(jg3cr>LgimVvVsGT#*=wzl zoV!MaPGP)<5pVb1lCG?b@sjJ8@%E9vAINwW%!^N2eq#|BmnZUJBJ$`{@t$BDKB@6! zsE|KD@R$Ek=5qjfiYFC6rCiLX8Sx74k?~@>HB&ys|2^=F6n>4&<(PEs1AddjpA5Hn zjzd?TLQ8EIS}S&G=uS;vE!`@RJrVz5P&D9Bq=mP{w!^kWe=HvLF9V z3je-L_$r+E6A^Vl;ghH0Nu^5#6WtHNE|OhInm{$ar5;*`&#z3THl3F$QQ=_`l0Umo>Rxp!_XHya~UR`P`H# zAFI?y#J>mlUWI?Y{8x~Y1X`Lj(u)Gp~Asoq2$ive@Ug2R-T0Pr$RNa z+bYxl?FjI{2mA@!B>t~QfPWX$SNJQ1HVt<=dn8Zq|%epFEU;;;`Qv2`K%ODdi;1S&l89@;mi@VOci$!Q>D7<5>E~lW*d)KaSBq(*AgZqGu%I z752vuDg1L3oyV##_Qx^&k)+T5c%4drB=oaCZaD$^*&i=g_|!Y%$-sZY*&oN!?^5V# z%VLZ-59Ph|>ka-=F5)fanU_8!4ynQlWM*6H{ff-*nV7*UAKUSjR&4G-GCquc2 z&;GbRD)VtG^t5uRaN=_w(yH)(dz|z|`GIfmm+4;x9VnhudWM^aA>QNHH5MUVW~l z?*p{Kk;Z!%@n(#b@z!K4S8{%ND)~(P`|@!zy^;3sRSN$^`}cYk??n6e8K+75M%uqm zQ25(3@kjB4^)32$n@oQs>q+e2cPo5+ijz-jeXC%m&;Gqn;g6*M75%%ypQO@B(?<#E zvwx4#GZOmQzn=&_?B8SQzcm7W)&PIq7bHL3$Dv=eZ{TlG`0F#_r}TH^E7uu|PM7(V zW{gLIL)pxS>x}gZKl6HTvOW;)8}XK(A@iYMJgN0PO#c`_ye1W|Y6N`y1@JraWIjJU z4nFDF1^j|9O8j}?#qe~c>+iA9^$RA*c;QU>45NqLh}Zrl8IP8FhtChnNlX6Dc^l$Q z__Bqqi4AN^sIL&kH&K7^*l zOXny1_o~S<-VGV~nV#Q0h}Swr#^Y(vk<$4b;%)f4j5jS~ymY=;(I0kvL&n3W^bGll zby{=XhVAT9#9KF2#v4h0Si}BW;jhamm+AcY1>y}{AoDquV#Ftn|ED1^d7bA~#OuCT z#`9*(FFjr!^634RjQ1C`CGm_PKR@ENTq5Jm&M05$`86Y6+jnHV^(R2*6Noo?mW=m$ z#(3%DpO0BS8q&qnkx`zLChcjH_CW-WXfll^~W0a_X}mbhh=WV#3IU)}26p z9|Lzrt&I1_6DS|o0Xphryy_9sNx58%c!M{~cq8dgIZo(P_*Z7kC!LQRCujkg&&zc4 zN#lQdKgDsvLn~yw2$SWL7B77q#BqWamhqY?4tz!sFMxOpekkMZ$@u*|oz91u-vcsU zZN_?I!9(VFhV6VG;?4MpjORFkcoWeS9Y2-vu18xE&vD9^r_eI>rv*Qg>7A+Y()gvq zNw**W4uyYlCj69n2;(*5f3k}AR>pYg?U(&&uZoAwuJTDMZ*n}!_s96(@N-$Nhg7t* za*1^0dRjixp79HbpHaW7beKN-Q?J6$*#9W}O#CNbxX++Nr8Cm?2*#U-|CR?Oz1bPt zWAc6n#=8surN5N%ot;spHTjfY6rJ;2|s((iy@jjcI5qyEGO-eiYGNa z6-;{l_+O{+J5_j^d?w)E2KS8L%JdIsqBjBmVYqYuOX5GL!jF~ytH60k;n$2H{jtF5 z`<+ZbbR7KS@vVRx@A0@sKqvWDqVPxbUF!t#$+uYg?h(*KzSS#wMuJbiwJQA5Oz9`= zgSr&>9SXl}1o)!v9+mve+}=cgB=$qzkMtc1eTPc**1YsX*eoU&$Y2yjxVXOma{9?gxIG!q2ETxM-K42N;j* zetjz5zzF-{7&&~iNPSQ|d=IGipn7YaGMsoLdGgju{4wgSsrSK%4BxNbZ>aZf^?qKx zH>>v_)Vp21|3kg+SMMLHcZ+(jRPTU#|C4%`s`p4}{~YIY4t&mm&pGfp2R`S(=N$N) z1D|u?a}IpYfzLVcIR`%H!2ebbe4%LJ!i9~2vhwdQEU)_h!X?47hQ-=74U=1C_+pp` ztJh+lI+y{3XYq=f#fTGJv=9~wfuXf(T0==nO0?$(K*IurS1hj%&|M05Wo1pUv5LMb z`sy2CUagYedf^QQ%IR57PeVlo-AxvHmca&$@V3yq6!!D#1ZJ!7RxB;AUqR$`%G+2W za%`hFp2tJN8w@lOwVj^EIBEMP--%)1mU&>DJ_cX3d7cPnpA zMJ=Jd^wK^eJnG3>sEq@k8v@$DShO(;iv6v{4{+HmORR;Yw^ik*i>v>7^p+d zvevQN7?^;MS*-mgt6JNV6t5JI_8`Mh6 zKhh{&V}+OP96#CjZlNqbAVDOH7&Ksl-A=bFMA^njAb@RZ5P5lb*)FuD zrAwDb%OhDGi^l7siP6IdNk^fobM|y8ajW-J<~i4b(_@0-y3N;!}RE#^>+m^AGX4wMpXq0>i7RFG3!5FZQF-76e_}ra`Z3YjKr{X6erk zT&&DCTr!_Oz-MZ03w%n~;zDiK=PC1fUVJvU!e>$vzFWuK&FyE4HeE6#?{6Pb87cR0 zyJ(*-8nNkQN>n56@pg>-ZS6Q;P{2!SZEjB=-gn#4%ze^Kwao5yTRZv+ z?oS;jyT5cm(aXJn85%eMNoZl9NJ`32dQUPcRTSo0!xL+29rVyBmYa#vvb zUfl)#t7zmrC}p`jx}XQ8v)sMkaQ93n4xp*E$pQw!uSn!o-OjF+ z+JE>#JJrHL0Md!`;$_xqX*7 zpb$s+b=j8Q5C=|KH`SHXn;mE(-o@9th8K?vpW}Wo0})Uc=j%<`7I$j2UAjeXKO(;; zk%!&&GcI&D!u!q3+fiE8?QY$UPe^(R{$!<`i7_vlM0j$Nmmm!NA?1EDw1|{(=Pb30 zE|K_;kfBecd>Oe&mV8l(=jNJyBU!YJr1&W^bd8kXA(zSWJ%rS+kPjw1EQC`3NOBK( zA$gQ6`L|ME7EU}l$$!!PL&CK1I`SZSBe_U^gbckU^%qrVjd!eZl??qQwaZJ7*rCOw zd?;CTnNaKso{u+{yhZV^kR?}2xCdXliNEN_2oI94B9D`QMJ|#bAeYH6kt<}$b&`1Q zM2mkQmL_eFJcm3$UO*ltFD5&D(IsKV$qUFoCs*d1{T?#(sMLQuS+uF7{s&~}R4E^X zWl;JfnpI+#JWFBdS1FH>C(kloLmoZbcs+T+V&jL&Zi(@~$R)DeJV4@4E;ak%0NBDx$}GjqwcRqzD&(WPB*OOrA$}KQ{Y( z&sP`^le@{Qz5PnFUrYWfc|CcMyqR1iKTfWY@#?1ib3d_o?~{ATdt)7u{tu83CJ&Oo zL7pU^LN1XnAy1KSCRfQD$)Zmt$M;dPBvlB%N*?^F@kit$`7<5Xzv|Uy{|b3vo$+y= zZ!|uO>~1n%Lxv8Twpa9gv+;vu(MOa1yhZL_Z#=Wp+8@2$cn*2sPUG*As~e0jAwz3T z<6Y_f$s5QH3#qjCgy-KISIN+5Q~O?B);{#wl)p?C?KY`D*YhUh6Ugq5#>3>w-NviP zJ@*)2MII#IMjj{MO)in2Bv;9Ak-Ilryyh9!U$@2hK(c7eNq^^&p*N>|CRw!S#C|Cm zI&{i6kfBMZTq27;o%sKaJoupT9`FB<@qT+*e?-Sl{O6FzA2B|S42?UrUrrXiJMmvn zhW4HEBjlcEjb9~;E}r;z>}~BqBTxBoGW7D4k0e7oPx<@g$(M{L$i>%;@AUp}7{5rK z+F{%|)7tB)8qXq&E}!)0J7j3|DX$u>>rT34>kV6zSbUe4b?u242?tO zlgL#FX-NIc$)bNK{5$d>l30W*-v2n`mQPuH=p?HBbL0X1pi}(6P8JPC;nT>_Q&hf) z3~fc_>&c?CDE^zs-KQHrM)7a@v~vgHHE{v+7rMbSv4*;`+4mR59O=H_ z_#p5;s5o%0aUZ#~!uS|6v>i46N#r7V5qb0+v#<2L-1t&qtN}hxyqC#?wC^GhFkahd zt-sat%>Mu~bT_sB5uR!PCK)=MYCn}+ru|%Uah2JBMjoL34l=Ynwf@ov#NXqS+rusew62L z8_y?qbN-%AhQ6x$pGOuAS*d>+dEgr3YssQrEB4#T(709JN*0Y7(JIz`ge%3x+?btJTKrAg8B;sJ~iOsfR_e5 z8t|%s#{<41;K_j31$=A3>jSYu)aa7;M1H;q(2aosh*p0^r zczlS*9y~t6<6}JVmI5l)_`}(eGlZ9XRb9qQyAws`oiP0yk6q$}mjrqIKjTZ#9w%3) z^X}cE25O_%ULccNYRtPJ7aMumE@r()MRJ8MF306)xhP%NlDZZc*RgAaV|G1kELxdU z*{*E&H?Z(>UmVnG%<8LK1*WhLZZ#3*wPEFX-nsF`y-svoM2@eL#a$JD7{d5=tv)$< z!KGBKUhtRl?K)~YM1cXbS6EpqeL4&Hh9Y$_n;z1sY*!1Blx3UnR|4h!88z-1W44~& zc4Q>{!h%g_jM;j)V!DX6mJMCJI_OPk4f~(6*9{xiu>llm4xPIK-^aw~BG+j&`UFmA%J({~9?eCKRV!B7U`%#CJ>GgM4=pk31VINqKt4X=$EkW~-y(NPN7+66|bZ$ac|>ov>qmX$Z@+9m*^f z{f*YDr$|<2>lxl=aBS4R0f^s%k@yW(+s-z`2*(gT)=Q!IFC>lUAlN2c1a9H#*{vYlFEPdo`TA@RH$@ z83*BBrw3VR3p=SfIrg#`9}HXjGYfT17`+)|T*L2P-RYRxxkf&A8Vd3mpkec&pTxx7 zrEpuXc$9=a&Z`m}jCgmor4KZOle z6a?|-v$0p*%LtiCgmCHC={S0W7i##}@k#c(?(~m+3-K|UZd%u=()f@eC8*w~JjBbp zR1g=}z0eYS9Y&N8G)&bapc(v;QeA4RxFmK8u^QvK}9?5{kjhmO@rl(^r z8oh8xo+LT*d-jx*-=$`zd`V_knvwUFMr44c@nj=S9g8&ebJ*GQg$^0KagJvc)#iXA zO&5uyx|QU=C@;G-@@b^7o@;oeWD_>!w+%J<0ME)Lc;c$7g~j zX=~}y)N(G9+uSSnb?J+}FQq3c1as{e$WN_R^KNH46u7H+jHIdd7eF@1-TDa&n9 z()7a0QpNq&hO;L?W^H!CZW-I^8J|L^t_m~*R z&g6}XzwU~rGe(*+D`k4|s($LQxLhRjt?slIV|vaC=Pf%@u34sW$hxbDxYl%Ddy%^u z$VVUKc3NC~R!QfGY9ZFM!~;<4AxoMMfiUDvnB-q literal 0 HcmV?d00001 diff --git a/gf_unit.c b/gf_unit.c new file mode 100644 index 0000000..8e95bf3 --- /dev/null +++ b/gf_unit.c @@ -0,0 +1,222 @@ +/* + * gf_unit.c + * + * Performs unit testing for gf arithmetic + */ + +#include +#include +#include +#include +#include +#include + +#include "gf.h" +#include "gf_int.h" +#include "gf_method.h" +#include "gf_rand.h" +#include "gf_general.h" + +#define REGION_SIZE (16384) + +void problem(char *s) +{ + fprintf(stderr, "Unit test failed.\n"); + fprintf(stderr, "%s\n", s); + exit(1); +} + +void usage(char *s) +{ + fprintf(stderr, "usage: gf_unit w tests seed [method] - does unit testing in GF(2^w)\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Legal w are: 1 - 32, 64 and 128\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Tests may be any combination of:\n"); + fprintf(stderr, " A: All\n"); + fprintf(stderr, " S: Single operations (multiplication/division)\n"); + fprintf(stderr, " R: Region operations\n"); + fprintf(stderr, " V: Verbose Output\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Use -1 for time(0) as a seed.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "For method specification, type gf_methods\n"); + fprintf(stderr, "\n"); + if (s != NULL) fprintf(stderr, "%s\n", s); + exit(1); +} + +int main(int argc, char **argv) +{ + int w, i, verbose, single, region, tested, top; + int start, end, xor; + gf_t gf, gf_def; + time_t t0; + gf_internal_t *h; + gf_general_t *a, *b, *c, *d, *ai, *bi; + char as[50], bs[50], cs[50], ds[50], ais[50], bis[50]; + uint32_t mask; + char *ra, *rb, *rc, *rd, *target; + int align; + + if (argc < 4) usage(NULL); + if (sscanf(argv[1], "%d", &w) == 0) usage("Bad w\n"); + if (sscanf(argv[3], "%ld", &t0) == 0) usage("Bad seed\n"); + if (t0 == -1) t0 = time(0); + MOA_Seed(t0); + + if (w > 32 && w != 64 && w != 128) usage("Bad w"); + + if (create_gf_from_argv(&gf, w, argc, argv, 4) == 0) usage("Bad Method"); + + for (i = 0; i < strlen(argv[2]); i++) { + if (strchr("ASRV", argv[2][i]) == NULL) usage("Bad test\n"); + } + + h = (gf_internal_t *) gf.scratch; + a = (gf_general_t *) malloc(sizeof(gf_general_t)); + b = (gf_general_t *) malloc(sizeof(gf_general_t)); + c = (gf_general_t *) malloc(sizeof(gf_general_t)); + d = (gf_general_t *) malloc(sizeof(gf_general_t)); + ai = (gf_general_t *) malloc(sizeof(gf_general_t)); + bi = (gf_general_t *) malloc(sizeof(gf_general_t)); + + ra = (char *) malloc(sizeof(char)*REGION_SIZE); + rb = (char *) malloc(sizeof(char)*REGION_SIZE); + rc = (char *) malloc(sizeof(char)*REGION_SIZE); + rd = (char *) malloc(sizeof(char)*REGION_SIZE); + + if (w <= 32) { + mask = 0; + for (i = 0; i < w; i++) mask |= (1 << i); + } + + verbose = (strchr(argv[2], 'V') != NULL); + single = (strchr(argv[2], 'S') != NULL || strchr(argv[2], 'A') != NULL); + region = (strchr(argv[2], 'R') != NULL || strchr(argv[2], 'A') != NULL); + + if (!gf_init_easy(&gf_def, w, GF_MULT_DEFAULT)) problem("No default for this value of w"); + + if (verbose) printf("Seed: %ld\n", t0); + + if (single) { + + if (gf.multiply.w32 == NULL) problem("No multiplication operation defined."); + if (verbose) { printf("Testing single multiplications/divisions.\n"); fflush(stdout); } + if (w <= 10) { + top = (1 << w)*(1 << w); + } else { + top = 1024*1024; + } + for (i = 0; i < top; i++) { + if (w <= 10) { + a->w32 = i % (1 << w); + b->w32 = (i >> w); + } else if (i < 10) { + gf_general_set_zero(a, w); + gf_general_set_random(b, w, 1); + } else if (i < 20) { + gf_general_set_random(a, w, 1); + gf_general_set_zero(b, w); + } else if (i < 30) { + gf_general_set_one(a, w); + gf_general_set_random(b, w, 1); + } else if (i < 40) { + gf_general_set_random(a, w, 1); + gf_general_set_one(b, w); + } else { + gf_general_set_random(a, w, 1); + gf_general_set_random(b, w, 1); + } + + tested = 0; + gf_general_multiply(&gf, a, b, c); + + /* If this is not composite, then first test against the default: */ + + if (h->mult_type != GF_MULT_COMPOSITE) { + tested = 1; + gf_general_multiply(&gf_def, a, b, d); + + if (!gf_general_are_equal(c, d, w)) { + gf_general_val_to_s(a, w, as); + gf_general_val_to_s(b, w, bs); + gf_general_val_to_s(c, w, cs); + gf_general_val_to_s(d, w, ds); + printf("Error in single multiplication (all numbers in hex):\n\n"); + printf(" gf.multiply(gf, %s, %s) = %s\n", as, bs, cs); + printf(" The default gf multiplier returned %s\n", ds); + exit(1); + } + } + + /* Now, we also need to double-check by other means, in case the default is wanky, + and when we're performing composite operations. Start with 0 and 1, where we know + what the result should be. */ + + if (gf_general_is_zero(a, w) || gf_general_is_zero(b, w) || + gf_general_is_one(a, w) || gf_general_is_one(b, w)) { + tested = 1; + if (((gf_general_is_zero(a, w) || gf_general_is_zero(b, w)) && !gf_general_is_zero(c, w)) || + (gf_general_is_one(a, w) && !gf_general_are_equal(b, c, w)) || + (gf_general_is_one(b, w) && !gf_general_are_equal(a, c, w))) { + gf_general_val_to_s(a, w, as); + gf_general_val_to_s(b, w, bs); + gf_general_val_to_s(c, w, cs); + printf("Error in single multiplication (all numbers in hex):\n\n"); + printf(" gf.multiply(gf, %s, %s) = %s, which is clearly wrong.\n", as, bs, cs); +; + exit(1); + } + } + + /* Dumb check to make sure that it's not returning numbers that are too big: */ + + if (w < 32 && (c->w32 & mask) != c->w32) { + gf_general_val_to_s(a, w, as); + gf_general_val_to_s(b, w, bs); + gf_general_val_to_s(c, w, cs); + printf("Error in single multiplication (all numbers in hex):\n\n"); + printf(" gf.multiply.w32(gf, %s, %s) = %s, which is too big.\n", as, bs, cs); + exit(1); + } + } + } + + if (region) { + if (verbose) { printf("Testing region multiplications\n"); fflush(stdout); } + for (i = 0; i < 1000; i++) { + if (i < 20) { + gf_general_set_zero(a, w); + } else if (i < 40) { + gf_general_set_one(a, w); + } else { + gf_general_set_random(a, w, 1); + } + MOA_Fill_Random_Region(ra, REGION_SIZE); + MOA_Fill_Random_Region(rb, REGION_SIZE); + xor = i%2; + align = w/8; + if (align == 0) align = 1; + if (align > 16) align = 16; + if ((h->region_type & GF_REGION_CAUCHY) || (w < 32 && w != 4 && w != 8 && w != 16)) { + start = MOA_Random_W(5, 1); + end = REGION_SIZE - MOA_Random_W(5, 1); + target = rb; + while ((end-start)%w != 0) end--; + } else { + start = MOA_Random_W(5, 1) * align; + end = REGION_SIZE - (MOA_Random_W(5, 1) * align); + if (h->mult_type == GF_MULT_COMPOSITE && (h->region_type & GF_REGION_ALTMAP)) { + target = rb ; + } else { + target = ((i%4)/2) ? rb : ra; + } + } + memcpy(rc, ra, REGION_SIZE); + memcpy(rd, target, REGION_SIZE); + gf_general_do_region_multiply(&gf, a, ra+start, target+start, end-start, xor); + gf_general_do_region_check(&gf, a, rc+start, rd+start, target+start, end-start, xor); + } + } +} diff --git a/gf_w128.c b/gf_w128.c new file mode 100644 index 0000000..5dcf8a1 --- /dev/null +++ b/gf_w128.c @@ -0,0 +1,496 @@ +/* + * gf_w128.c + * + * Routines for 128-bit Galois fields + */ + +#include "gf_int.h" +#include +#include + +#define GF_FIELD_WIDTH (128) + +#define two_x(a) {\ + a[0] <<= 1; \ + if (a[1] & (uint64_t) 1 << 63) a[0] ^= 1; \ + a[1] <<= 1; } + +#define a_get_b(a, i, b, j) {\ + a[i] = b[j]; \ + a[i + 1] = b[j + 1];} + +#define set_zero(a, i) {\ + a[i] = 0; \ + a[i + 1] = 0;} + +typedef struct gf_group_tables_s { + gf_val_128_t m_table; + gf_val_128_t r_table; +} gf_group_tables_t; + +static +void +gf_w128_multiply_region_from_single(gf_t *gf, void *src, void *dest, gf_val_128_t val, int bytes, +int xor) +{ + int i; + gf_val_128_t s128; + gf_val_128_t d128; + uint64_t c128[2]; + + set_zero(c128, 0); + + s128 = (gf_val_128_t) src; + d128 = (gf_val_128_t) dest; + + if (xor) { + for (i = 0; i < bytes/sizeof(gf_val_64_t); i += 2) { + gf->multiply.w128(gf, &s128[i], val, c128); + d128[i] ^= c128[0]; + d128[i+1] ^= c128[1]; + } + } else { + for (i = 0; i < bytes/sizeof(gf_val_64_t); i += 2) { + gf->multiply.w128(gf, &s128[i], val, &d128[i]); + } + } +} + +/* + * Some w128 notes: + * --Big Endian + * --return values allocated beforehand + */ +void +gf_w128_shift_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128) +{ + /* ordered highest bit to lowest l[0] l[1] r[0] r[1] */ + uint64_t pl[2], pr[2], ppl[2], ppr[2], i, a[2], bl[2], br[2], one, lbit; + gf_internal_t *h; + + h = (gf_internal_t *) gf->scratch; + + if (GF_W128_IS_ZERO(a128) || GF_W128_IS_ZERO(b128)) { + set_zero(c128, 0); + return; + } + + a_get_b(a, 0, a128, 0); + a_get_b(br, 0, b128, 0); + set_zero(bl, 0); + + one = 1; + lbit = (one << 63); + + set_zero(pl, 0); + set_zero(pr, 0); + + for (i = 0; i < GF_FIELD_WIDTH/2; i++) { + if (a[1] & (one << i)) { + pl[1] ^= bl[1]; + pr[0] ^= br[0]; + pr[1] ^= br[1]; + } + bl[1] <<= 1; + if (br[0] & lbit) bl[1] ^= 1; + br[0] <<= 1; + if (br[1] & lbit) br[0] ^= 1; + br[1] <<= 1; + } + + for (i = 0; i < GF_FIELD_WIDTH/2; i++) { + if (a[0] & (one << i)) { + pl[0] ^= bl[0]; + pl[1] ^= bl[1]; + pr[0] ^= br[0]; + } + bl[0] <<= 1; + if (bl[1] & lbit) bl[0] ^= 1; + bl[1] <<= 1; + if (br[0] & lbit) bl[1] ^= 1; + br[0] <<= 1; + } + + one = lbit; + ppl[0] = lbit; + ppl[1] = h->prim_poly >> 1; + ppr[0] = lbit; + ppr[1] = 0; + while (one != 0) { + if (pl[0] & one) { + pl[0] ^= ppl[0]; + pl[1] ^= ppl[1]; + pr[0] ^= ppr[0]; + pr[1] ^= ppr[1]; + } + one >>= 1; + ppr[1] >>= 1; + if (ppr[0] & 1) ppr[1] ^= lbit; + ppr[0] >>= 1; + if (ppl[1] & 1) ppr[0] ^= lbit; + ppl[1] >>= 1; + if (ppl[0] & 1) ppl[1] ^= lbit; + ppl[0] >>= 1; + } + + one = lbit; + while (one != 0) { + if (pl[1] & one) { + pl[1] ^= ppl[1]; + pr[0] ^= ppr[0]; + pr[1] ^= ppr[1]; + } + one >>= 1; + ppr[1] >>= 1; + if (ppr[0] & 1) ppr[1] ^= lbit; + ppr[0] >>= 1; + if (ppl[1] & 1) ppr[0] ^= lbit; + ppl[1] >>= 1; + } + + c128[0] = pr[0]; + c128[1] = pr[1]; + + return; +} + +static +void gf_w128_group_m_init(gf_t *gf, gf_val_128_t b128) +{ + int i, j; + int g_m; + uint64_t prim_poly, lbit; + gf_internal_t *scratch; + gf_group_tables_t *gt; + uint64_t a128[2]; + scratch = (gf_internal_t *) gf->scratch; + gt = scratch->private; + g_m = scratch->arg1; + prim_poly = scratch->prim_poly; + + set_zero(gt->m_table, 0); + a_get_b(gt->m_table, 2, b128, 0); + lbit = 1; + lbit <<= 63; + + for (i = 2; i < (1 << g_m); i <<= 1) { + a_get_b(a128, 0, gt->m_table, 2 * (i >> 1)); + two_x(a128); + a_get_b(gt->m_table, 2 * i, a128, 0); + if (gt->m_table[2 * (i >> 1)] & lbit) gt->m_table[(2 * i) + 1] ^= prim_poly; + for (j = 0; j < i; j++) { + gt->m_table[(2 * i) + (2 * j)] = gt->m_table[(2 * i)] ^ gt->m_table[(2 * j)]; + gt->m_table[(2 * i) + (2 * j) + 1] = gt->m_table[(2 * i) + 1] ^ gt->m_table[(2 * j) + 1]; + } + } + return; +} + +void +gf_w128_group_multiply(GFP gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128) +{ + int i; + /* index_r, index_m, total_m (if g_r > g_m) */ + int i_r, i_m, t_m; + int mask_m, mask_r; + int g_m, g_r; + uint64_t p_i[2], a[2]; + gf_internal_t *scratch; + gf_group_tables_t *gt; + + scratch = (gf_internal_t *) gf->scratch; + gt = scratch->private; + g_m = scratch->arg1; + g_r = scratch->arg2; + + mask_m = (1 << g_m) - 1; + mask_r = (1 << g_r) - 1; + + if (b128[0] != gt->m_table[2] || b128[1] != gt->m_table[3]) { + gf_w128_group_m_init(gf, b128); + } + + p_i[0] = 0; + p_i[1] = 0; + a[0] = a128[0]; + a[1] = a128[1]; + + t_m = 0; + i_r = 0; + + /* Top 64 bits */ + for (i = ((GF_FIELD_WIDTH / 2) / g_m) - 1; i >= 0; i--) { + i_m = (a[0] >> (i * g_m)) & mask_m; + i_r ^= (p_i[0] >> (64 - g_m)) & mask_r; + p_i[0] <<= g_m; + p_i[0] ^= (p_i[1] >> (64-g_m)); + p_i[1] <<= g_m; + p_i[0] ^= gt->m_table[2 * i_m]; + p_i[1] ^= gt->m_table[(2 * i_m) + 1]; + t_m += g_m; + if (t_m == g_r) { + p_i[1] ^= gt->r_table[i_r]; + t_m = 0; + i_r = 0; + } else { + i_r <<= g_m; + } + } + + for (i = ((GF_FIELD_WIDTH / 2) / g_m) - 1; i >= 0; i--) { + i_m = (a[1] >> (i * g_m)) & mask_m; + i_r ^= (p_i[0] >> (64 - g_m)) & mask_r; + p_i[0] <<= g_m; + p_i[0] ^= (p_i[1] >> (64-g_m)); + p_i[1] <<= g_m; + p_i[0] ^= gt->m_table[2 * i_m]; + p_i[1] ^= gt->m_table[(2 * i_m) + 1]; + t_m += g_m; + if (t_m == g_r) { + p_i[1] ^= gt->r_table[i_r]; + t_m = 0; + i_r = 0; + } else { + i_r <<= g_m; + } + } + + c128[0] = p_i[0]; + c128[1] = p_i[1]; +} + +/* a^-1 -> b */ +void +gf_w128_euclid(GFP gf, gf_val_128_t a128, gf_val_128_t b128) +{ + uint64_t e_i[2], e_im1[2], e_ip1[2]; + uint64_t d_i, d_im1, d_ip1; + uint64_t y_i[2], y_im1[2], y_ip1[2]; + uint64_t c_i[2]; + uint64_t *b; + uint64_t one = 1; + uint64_t buf, buf1; + + /* This needs to return some sort of error (in b128?) */ + if (a128[0] == 0 && a128[1] == 0) return; + + e_im1[0] = 0; + e_im1[1] = ((gf_internal_t *) (gf->scratch))->prim_poly; + e_i[0] = a128[0]; + e_i[1] = a128[1]; + d_im1 = 128; + for (d_i = (d_im1-1) % 64; ((one << d_i) & e_i[0]) == 0 && d_i > 0; d_i--) ; + if (!((one << d_i) & e_i[0])) { + for (d_i = (d_im1-1) % 64; ((one << d_i) & e_i[1] == 0); d_i--) ; + } else { + d_i += 64; + } + y_i[0] = 0; + y_i[1] = 1; + y_im1[0] = 0; + y_im1[1] = 0; + + while (!(e_i[0] == 0 && e_i[1] == 1)) { + + e_ip1[0] = e_im1[0]; + e_ip1[1] = e_im1[1]; + d_ip1 = d_im1; + c_i[0] = 0; + c_i[1] = 0; + + while (d_ip1 >= d_i) { + if ((d_ip1 - d_i) >= 64) { + c_i[0] ^= (one << ((d_ip1 - d_i) - 64)); + e_ip1[0] ^= (e_i[1] << ((d_ip1 - d_i) - 64)); + } else { + c_i[1] ^= (one << (d_ip1 - d_i)); + e_ip1[0] ^= (e_i[0] << (d_ip1 - d_i)); + if (d_ip1 - d_i > 0) e_ip1[0] ^= (e_i[1] >> (64 - (d_ip1 - d_i))); + e_ip1[1] ^= (e_i[1] << (d_ip1 - d_i)); + } + d_ip1--; + while (d_ip1 >= 64 && (e_ip1[0] & (one << (d_ip1 - 64))) == 0) d_ip1--; + while (d_ip1 < 64 && (e_ip1[1] & (one << d_ip1)) == 0) d_ip1--; + } + + gf->multiply.w128(gf, c_i, y_i, y_ip1); + y_ip1[0] ^= y_im1[0]; + y_ip1[1] ^= y_im1[1]; + + y_im1[0] = y_i[0]; + y_im1[1] = y_i[1]; + + y_i[0] = y_ip1[0]; + y_i[1] = y_ip1[1]; + + e_im1[0] = e_i[0]; + e_im1[1] = e_i[1]; + d_im1 = d_i; + e_i[0] = e_ip1[0]; + e_i[1] = e_ip1[1]; + d_i = d_ip1; + } + + b = (uint64_t *) b128; + b[0] = y_i[0]; + b[1] = y_i[1]; + + return; +} + +void +gf_w128_divide_from_inverse(GFP gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128) +{ + uint64_t d[2]; + gf->inverse.w128(gf, b128, d); + gf->multiply.w128(gf, a128, d, c128); + return; +} + +void +gf_w128_inverse_from_divide(GFP gf, gf_val_128_t a128, gf_val_128_t b128) +{ + uint64_t one128[2]; + one128[0] = 0; + one128[1] = 1; + gf->divide.w128(gf, one128, a128, b128); + return; +} + +static +int gf_w128_shift_init(gf_t *gf) +{ + gf->multiply.w128 = gf_w128_shift_multiply; + gf->inverse.w128 = gf_w128_euclid; + gf->multiply_region.w128 = gf_w128_multiply_region_from_single; + return 1; +} + +/* + * Because the prim poly is only 8 bits and we are limiting g_r to 16, I do not need the high 64 + * bits in all of these numbers. + */ +static +void gf_w128_group_r_init(gf_t *gf) +{ + int i, j; + int g_r; + uint64_t pp; + gf_internal_t *scratch; + gf_group_tables_t *gt; + scratch = (gf_internal_t *) gf->scratch; + gt = scratch->private; + g_r = scratch->arg2; + pp = scratch->prim_poly; + + gt->r_table[0] = 0; + for (i = 1; i < (1 << g_r); i++) { + gt->r_table[i] = 0; + for (j = 0; j < g_r; j++) { + if (i & (1 << j)) { + gt->r_table[i] ^= (pp << j); + } + } + } + return; +} + +static +int gf_w128_group_init(gf_t *gf) +{ + gf_internal_t *scratch; + gf_group_tables_t *gt; + int g_m, g_r, size_r; + + scratch = (gf_internal_t *) gf->scratch; + gt = scratch->private; + g_m = scratch->arg1; + g_r = scratch->arg2; + size_r = (1 << g_r); + + gt->r_table = scratch->private + (2 * sizeof(uint64_t *)); + gt->m_table = gt->r_table + size_r; + gt->m_table[2] = 0; + gt->m_table[3] = 0; + + gf_w128_group_r_init(gf); + + gf->multiply.w128 = gf_w128_group_multiply; + gf->inverse.w128 = gf_w128_euclid; + gf->multiply_region.w128 = gf_w128_multiply_region_from_single; /* This needs to change */ + return 1; +} + +int gf_w128_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2) +{ + int size_m, size_r; + int w = 128; + switch(mult_type) + { + case GF_MULT_DEFAULT: + case GF_MULT_SHIFT: + if (arg1 != 0 || arg2 != 0 || region_type != 0) return -1; + return sizeof(gf_internal_t); + break; + case GF_MULT_GROUP: + + /* arg1 == mult size, arg2 == reduce size */ + /* Should prevent anything over arg1 > 16 || arg2 > 16 */ + if (region_type != 0) return -1; + if (arg1 <= 0 || arg2 <= 0 || arg1 > 16 || arg2 > 16) return -1; + if (GF_FIELD_WIDTH % arg1 != 0 || GF_FIELD_WIDTH % arg2 != 0) return -1; + /* + * Currently implementing code where g_m and g_r are the same or where g_r is larger, as + * these it is more efficient to have g_r as large as possible (but still not > 16) + */ + if (arg1 > arg2) return -1; + + /* size of each group, 128 bits */ + size_m = (1 << arg1) * 2 * sizeof(uint64_t); + /* The PP is only 8 bits and we are limiting g_r to 16, so only uint64_t */ + size_r = (1 << arg2) * sizeof(uint64_t); + + /* + * two pointers prepend the table data for structure + * because the tables are of dynamic size + */ + return sizeof(gf_internal_t) + size_m + size_r + 2 * sizeof(uint64_t *); + default: + return -1; + } +} + +int gf_w128_init(gf_t *gf) +{ + gf_internal_t *h; + + h = (gf_internal_t *) gf->scratch; + if (h->prim_poly == 0) h->prim_poly = 0x87; /* Omitting the leftmost 1 as in w=32 */ + + gf->multiply.w128 = NULL; + gf->divide.w128 = NULL; + gf->inverse.w128 = NULL; + gf->multiply_region.w128 = NULL; + + switch(h->mult_type) { + case GF_MULT_DEFAULT: + case GF_MULT_SHIFT: if (gf_w128_shift_init(gf) == 0) return 0; break; + case GF_MULT_GROUP: if (gf_w128_group_init(gf) == 0) return 0; break; + default: return 0; + } + if (h->divide_type == GF_DIVIDE_EUCLID) { + gf->divide.w128 = gf_w128_divide_from_inverse; + gf->inverse.w128 = gf_w128_euclid; + } /* } else if (h->divide_type == GF_DIVIDE_MATRIX) { + gf->divide.w128 = gf_w128_divide_from_inverse; + gf->inverse.w128 = gf_w128_matrix; + } */ + + if (gf->inverse.w128 != NULL && gf->divide.w128 == NULL) { + gf->divide.w128 = gf_w128_divide_from_inverse; + } + if (gf->inverse.w128 == NULL && gf->divide.w128 != NULL) { + gf->inverse.w128 = gf_w128_inverse_from_divide; + } + return 1; +} diff --git a/gf_w16.c b/gf_w16.c new file mode 100644 index 0000000..d33c695 --- /dev/null +++ b/gf_w16.c @@ -0,0 +1,1941 @@ +/* + * gf_w16.c + * + * Routines for 16-bit Galois fields + */ + +#include "gf_int.h" +#include +#include + +#define GF_FIELD_WIDTH (16) +#define GF_FIELD_SIZE (1 << GF_FIELD_WIDTH) +#define GF_MULT_GROUP_SIZE GF_FIELD_SIZE-1 + +#define GF_BASE_FIELD_WIDTH (8) +#define GF_BASE_FIELD_SIZE (1 << GF_BASE_FIELD_WIDTH) +#define GF_S_GF_8_2 (63) + +struct gf_logtable_data { + int log_tbl[GF_FIELD_SIZE]; + uint16_t antilog_tbl[GF_FIELD_SIZE * 2]; + uint16_t inv_tbl[GF_FIELD_SIZE]; +}; + +struct gf_zero_logtable_data { + int log_tbl[GF_FIELD_SIZE]; + uint16_t _antilog_tbl[GF_FIELD_SIZE * 4]; + uint16_t *antilog_tbl; + uint16_t inv_tbl[GF_FIELD_SIZE]; +}; + +struct gf_lazytable_data { + int log_tbl[GF_FIELD_SIZE]; + uint16_t antilog_tbl[GF_FIELD_SIZE * 2]; + uint16_t inv_tbl[GF_FIELD_SIZE]; + uint16_t lazytable[GF_FIELD_SIZE]; +}; + +struct gf_w8_logtable_data { + uint8_t log_tbl[GF_BASE_FIELD_SIZE]; + uint8_t antilog_tbl[GF_BASE_FIELD_SIZE * 2]; + uint8_t *antilog_tbl_div; +}; + +struct gf_w8_single_table_data { + uint8_t mult[GF_BASE_FIELD_SIZE][GF_BASE_FIELD_SIZE]; +}; + +struct gf_w16_bytwo_data { + uint64_t prim_poly; + uint64_t mask1; + uint64_t mask2; +}; + +struct gf_w16_group_4_4_data { + uint16_t reduce[16]; + uint16_t shift[16]; +}; + +#define AB2(ip, am1 ,am2, b, t1, t2) {\ + t1 = (b << 1) & am1;\ + t2 = b & am2; \ + t2 = ((t2 << 1) - (t2 >> (GF_FIELD_WIDTH-1))); \ + b = (t1 ^ (t2 & ip));} + +#define SSE_AB2(pp, m1 ,m2, va, t1, t2) {\ + t1 = _mm_and_si128(_mm_slli_epi64(va, 1), m1); \ + t2 = _mm_and_si128(va, m2); \ + t2 = _mm_sub_epi64 (_mm_slli_epi64(t2, 1), _mm_srli_epi64(t2, (GF_FIELD_WIDTH-1))); \ + va = _mm_xor_si128(t1, _mm_and_si128(t2, pp)); } + +#define MM_PRINT(s, r) { uint8_t blah[16], ii; printf("%-12s", s); _mm_storeu_si128((__m128i *)blah, r); for (ii = 0; ii < 16; ii += 2) printf(" %02x %02x", blah[15-ii], blah[14-ii]); printf("\n"); } + +static +inline +gf_val_32_t gf_w16_inverse_from_divide (gf_t *gf, gf_val_32_t a) +{ + return gf->divide.w32(gf, 1, a); +} + +static +inline +gf_val_32_t gf_w16_divide_from_inverse (gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + b = gf->inverse.w32(gf, b); + return gf->multiply.w32(gf, a, b); +} + +static +void +gf_w16_multiply_region_from_single(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + gf_region_data rd; + uint16_t *s16; + uint16_t *d16; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 2); + gf_do_initial_region_alignment(&rd); + + s16 = (uint16_t *) rd.s_start; + d16 = (uint16_t *) rd.d_start; + + if (xor) { + while (d16 < ((uint16_t *) rd.d_top)) { + *d16 ^= gf->multiply.w32(gf, val, *s16); + d16++; + s16++; + } + } else { + while (d16 < ((uint16_t *) rd.d_top)) { + *d16 = gf->multiply.w32(gf, val, *s16); + d16++; + s16++; + } + } + gf_do_final_region_alignment(&rd); +} + +static +inline +gf_val_32_t gf_w16_euclid (gf_t *gf, gf_val_32_t b) +{ + gf_val_32_t e_i, e_im1, e_ip1; + gf_val_32_t d_i, d_im1, d_ip1; + gf_val_32_t y_i, y_im1, y_ip1; + gf_val_32_t c_i; + + if (b == 0) return -1; + e_im1 = ((gf_internal_t *) (gf->scratch))->prim_poly; + e_i = b; + d_im1 = 16; + for (d_i = d_im1; ((1 << d_i) & e_i) == 0; d_i--) ; + y_i = 1; + y_im1 = 0; + + while (e_i != 1) { + + e_ip1 = e_im1; + d_ip1 = d_im1; + c_i = 0; + + while (d_ip1 >= d_i) { + c_i ^= (1 << (d_ip1 - d_i)); + e_ip1 ^= (e_i << (d_ip1 - d_i)); + while ((e_ip1 & (1 << d_ip1)) == 0) d_ip1--; + } + + y_ip1 = y_im1 ^ gf->multiply.w32(gf, c_i, y_i); + y_im1 = y_i; + y_i = y_ip1; + + e_im1 = e_i; + d_im1 = d_i; + e_i = e_ip1; + d_i = d_ip1; + } + + return y_i; +} + +static +gf_val_32_t gf_w16_extract_word(gf_t *gf, void *start, int bytes, int index) +{ + uint16_t *r16, rv; + + r16 = (uint16_t *) start; + rv = r16[index]; + return rv; +} + +static +gf_val_32_t gf_w16_composite_extract_word(gf_t *gf, void *start, int bytes, int index) +{ + int sub_size; + gf_internal_t *h; + uint8_t *r8, *top; + uint16_t a, b, *r16; + gf_region_data rd; + + h = (gf_internal_t *) gf->scratch; + gf_set_region_data(&rd, gf, start, start, bytes, 0, 0, 32); + r16 = (uint16_t *) start; + if (r16 + index < (uint16_t *) rd.d_start) return r16[index]; + if (r16 + index >= (uint16_t *) rd.d_top) return r16[index]; + index -= (((uint16_t *) rd.d_start) - r16); + r8 = (uint8_t *) rd.d_start; + top = (uint8_t *) rd.d_top; + sub_size = (top-r8)/2; + + a = h->base_gf->extract_word.w32(h->base_gf, r8, sub_size, index); + b = h->base_gf->extract_word.w32(h->base_gf, r8+sub_size, sub_size, index); + return (a | (b << 8)); +} + +static +gf_val_32_t gf_w16_split_extract_word(gf_t *gf, void *start, int bytes, int index) +{ + uint16_t *r16, rv; + uint8_t *r8; + gf_region_data rd; + + gf_set_region_data(&rd, gf, start, start, bytes, 0, 0, 32); + r16 = (uint16_t *) start; + if (r16 + index < (uint16_t *) rd.d_start) return r16[index]; + if (r16 + index >= (uint16_t *) rd.d_top) return r16[index]; + index -= (((uint16_t *) rd.d_start) - r16); + r8 = (uint8_t *) rd.d_start; + r8 += ((index & 0xfffffff0)*2); + r8 += (index & 0xf); + rv = (*r8 << 8); + r8 += 16; + rv |= *r8; + return rv; +} + +static +inline +gf_val_32_t gf_w16_matrix (gf_t *gf, gf_val_32_t b) +{ + return gf_bitmatrix_inverse(b, 16, ((gf_internal_t *) (gf->scratch))->prim_poly); +} + +/* JSP: GF_MULT_SHIFT: The world's dumbest multiplication algorithm. I only + include it for completeness. It does have the feature that it requires no + extra memory. +*/ + +static +inline +gf_val_32_t +gf_w16_shift_multiply (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16) +{ + gf_val_32_t product, i, pp, a, b; + gf_internal_t *h; + + a = a16; + b = b16; + h = (gf_internal_t *) gf->scratch; + pp = h->prim_poly; + + product = 0; + + for (i = 0; i < GF_FIELD_WIDTH; i++) { + if (a & (1 << i)) product ^= (b << i); + } + for (i = (GF_FIELD_WIDTH*2-1); i >= GF_FIELD_WIDTH; i--) { + if (product & (1 << i)) product ^= (pp << (i-GF_FIELD_WIDTH)); + } + return product; +} + +static +int gf_w16_shift_init(gf_t *gf) +{ + gf->multiply.w32 = gf_w16_shift_multiply; + gf->inverse.w32 = gf_w16_euclid; + gf->multiply_region.w32 = gf_w16_multiply_region_from_single; + return 1; +} + +/* KMG: GF_MULT_LOGTABLE: */ + +static +void +gf_w16_log_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + uint16_t *s16, *d16; + int lv; + struct gf_logtable_data *ltd; + gf_region_data rd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 2); + gf_do_initial_region_alignment(&rd); + + ltd = (struct gf_logtable_data *) ((gf_internal_t *) gf->scratch)->private; + s16 = (uint16_t *) rd.s_start; + d16 = (uint16_t *) rd.d_start; + + lv = ltd->log_tbl[val]; + + if (xor) { + while (d16 < (uint16_t *) rd.d_top) { + *d16 ^= (*s16 == 0 ? 0 : ltd->antilog_tbl[lv + ltd->log_tbl[*s16]]); + d16++; + s16++; + } + } else { + while (d16 < (uint16_t *) rd.d_top) { + *d16 = (*s16 == 0 ? 0 : ltd->antilog_tbl[lv + ltd->log_tbl[*s16]]); + d16++; + s16++; + } + } + gf_do_final_region_alignment(&rd); +} + +static +inline +gf_val_32_t +gf_w16_log_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + struct gf_logtable_data *ltd; + + ltd = (struct gf_logtable_data *) ((gf_internal_t *) gf->scratch)->private; + return (a == 0 || b == 0) ? 0 : ltd->antilog_tbl[ltd->log_tbl[a] + ltd->log_tbl[b]]; +} + +static +inline +gf_val_32_t +gf_w16_log_divide(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + int log_sum = 0; + struct gf_logtable_data *ltd; + + if (a == 0 || b == 0) return 0; + ltd = (struct gf_logtable_data *) ((gf_internal_t *) gf->scratch)->private; + + log_sum = ltd->log_tbl[a] - ltd->log_tbl[b] + (GF_MULT_GROUP_SIZE); + return (ltd->antilog_tbl[log_sum]); +} + +static +gf_val_32_t +gf_w16_log_inverse(gf_t *gf, gf_val_32_t a) +{ + struct gf_logtable_data *ltd; + + ltd = (struct gf_logtable_data *) ((gf_internal_t *) gf->scratch)->private; + return (ltd->inv_tbl[a]); +} + +static +int gf_w16_log_init(gf_t *gf) +{ + gf_internal_t *h; + struct gf_logtable_data *ltd; + int i, b; + + h = (gf_internal_t *) gf->scratch; + ltd = h->private; + + ltd->log_tbl[0] = 0; + + b = 1; + for (i = 0; i < GF_MULT_GROUP_SIZE; i++) { + ltd->log_tbl[b] = i; + ltd->antilog_tbl[i] = b; + ltd->antilog_tbl[i+GF_MULT_GROUP_SIZE] = b; + b <<= 1; + if (b & GF_FIELD_SIZE) { + b = b ^ h->prim_poly; + } + } + ltd->inv_tbl[0] = 0; /* Not really, but we need to fill it with something */ + ltd->inv_tbl[1] = 1; + for (i = 2; i < GF_FIELD_SIZE; i++) { + ltd->inv_tbl[i] = ltd->antilog_tbl[GF_MULT_GROUP_SIZE-ltd->log_tbl[i]]; + } + + gf->inverse.w32 = gf_w16_log_inverse; + gf->divide.w32 = gf_w16_log_divide; + gf->multiply.w32 = gf_w16_log_multiply; + gf->multiply_region.w32 = gf_w16_log_multiply_region; + + return 1; +} + +/* JSP: GF_MULT_SPLIT_TABLE: Using 8 multiplication tables to leverage SSE instructions. +*/ + +static +void +gf_w16_split_4_16_lazy_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + uint64_t i, j, a, c, prod; + uint16_t *s16, *d16, *top; + gf_internal_t *h; + uint16_t table[4][16]; + gf_region_data rd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 2); + gf_do_initial_region_alignment(&rd); + + h = (gf_internal_t *) gf->scratch; + + for (j = 0; j < 16; j++) { + for (i = 0; i < 4; i++) { + c = (j << (i*4)); + table[i][j] = gf_w16_log_multiply(gf, c, val); + } + } + + s16 = (uint16_t *) rd.s_start; + d16 = (uint16_t *) rd.d_start; + top = (uint16_t *) rd.d_top; + + while (d16 < top) { + a = *s16; + prod = (xor) ? *d16 : 0; + for (i = 0; i < 4; i++) { + prod ^= table[i][a&0xf]; + a >>= 4; + } + *d16 = prod; + s16++; + d16++; + } +} + +static +void +gf_w16_split_8_16_lazy_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + uint64_t j, a, c, prod, *s64, *d64, *top64; + gf_internal_t *h; + uint64_t htable[256], ltable[256]; + gf_region_data rd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 8); + gf_do_initial_region_alignment(&rd); + + h = (gf_internal_t *) gf->scratch; + + for (j = 0; j < 256; j++) { + ltable[j] = gf_w16_log_multiply(gf, j, val); + htable[j] = gf_w16_log_multiply(gf, (j<<8), val); + } + + s64 = (uint64_t *) rd.s_start; + d64 = (uint64_t *) rd.d_start; + top64 = (uint64_t *) rd.d_top; + +/* Does Unrolling Matter? -- Doesn't seem to. + while (d64 != top64) { + a = *s64; + + prod = htable[a >> 56]; + a <<= 8; + prod ^= ltable[a >> 56]; + a <<= 8; + prod <<= 16; + + prod ^= htable[a >> 56]; + a <<= 8; + prod ^= ltable[a >> 56]; + a <<= 8; + prod <<= 16; + + prod ^= htable[a >> 56]; + a <<= 8; + prod ^= ltable[a >> 56]; + a <<= 8; + prod <<= 16; + + prod ^= htable[a >> 56]; + a <<= 8; + prod ^= ltable[a >> 56]; + prod ^= ((xor) ? *d64 : 0); + *d64 = prod; + *s64++; + *d64++; + } +*/ + + while (d64 != top64) { + a = *s64; + + prod = 0; + for (j = 0; j < 4; j++) { + prod <<= 16; + prod ^= htable[a >> 56]; + a <<= 8; + prod ^= ltable[a >> 56]; + a <<= 8; + } + + prod ^= ((xor) ? *d64 : 0); + *d64 = prod; + *s64++; + *d64++; + } + gf_do_final_region_alignment(&rd); +} + +static void +gf_w16_table_lazy_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + uint64_t j, a, c, pp; + gf_internal_t *h; + struct gf_lazytable_data *ltd; + gf_region_data rd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 8); + gf_do_initial_region_alignment(&rd); + + h = (gf_internal_t *) gf->scratch; + ltd = (struct gf_lazytable_data *) h->private; + + ltd->lazytable[0] = 0; + + /* + a = val; + c = 1; + pp = h->prim_poly; + + do { + ltd->lazytable[c] = a; + c <<= 1; + if (c & (1 << GF_FIELD_WIDTH)) c ^= pp; + a <<= 1; + if (a & (1 << GF_FIELD_WIDTH)) a ^= pp; + } while (c != 1); + */ + + a = ltd->log_tbl[val]; + for (c = 1; c < GF_FIELD_SIZE; c++) { + ltd->lazytable[c] = ltd->antilog_tbl[ltd->log_tbl[c]+a]; + } + + gf_two_byte_region_table_multiply(&rd, ltd->lazytable); + gf_do_final_region_alignment(&rd); +} + +static +void +gf_w16_split_4_16_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ +#ifdef INTEL_SSE4 + uint64_t i, j, *s64, *d64, *top64;; + uint64_t a, c, prod; + uint8_t low[4][16]; + uint8_t high[4][16]; + gf_region_data rd; + + __m128i mask, ta, tb, ti, tpl, tph, tlow[4], thigh[4], tta, ttb, shuffler, unshuffler, lmask; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 32); + gf_do_initial_region_alignment(&rd); + + for (j = 0; j < 16; j++) { + for (i = 0; i < 4; i++) { + c = (j << (i*4)); + prod = gf_w16_log_multiply(gf, c, val); + low[i][j] = (prod & 0xff); + high[i][j] = (prod >> 8); + } + } + + for (i = 0; i < 4; i++) { + tlow[i] = _mm_loadu_si128((__m128i *)low[i]); + thigh[i] = _mm_loadu_si128((__m128i *)high[i]); + } + + s64 = (uint64_t *) rd.s_start; + d64 = (uint64_t *) rd.d_start; + top64 = (uint64_t *) rd.d_top; + + mask = _mm_set1_epi8 (0x0f); + lmask = _mm_set1_epi16 (0xff); + + if (xor) { + while (d64 != top64) { + + ta = _mm_load_si128((__m128i *) s64); + tb = _mm_load_si128((__m128i *) (s64+2)); + + tta = _mm_srli_epi16(ta, 8); + ttb = _mm_srli_epi16(tb, 8); + tpl = _mm_and_si128(tb, lmask); + tph = _mm_and_si128(ta, lmask); + + tb = _mm_packus_epi16(tpl, tph); + ta = _mm_packus_epi16(ttb, tta); + + ti = _mm_and_si128 (mask, tb); + tph = _mm_shuffle_epi8 (thigh[0], ti); + tpl = _mm_shuffle_epi8 (tlow[0], ti); + + tb = _mm_srli_epi16(tb, 4); + ti = _mm_and_si128 (mask, tb); + tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[1], ti), tpl); + tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[1], ti), tph); + + ti = _mm_and_si128 (mask, ta); + tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[2], ti), tpl); + tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[2], ti), tph); + + ta = _mm_srli_epi16(ta, 4); + ti = _mm_and_si128 (mask, ta); + tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[3], ti), tpl); + tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[3], ti), tph); + + ta = _mm_unpackhi_epi8(tpl, tph); + tb = _mm_unpacklo_epi8(tpl, tph); + + tta = _mm_load_si128((__m128i *) d64); + ta = _mm_xor_si128(ta, tta); + ttb = _mm_load_si128((__m128i *) (d64+2)); + tb = _mm_xor_si128(tb, ttb); + _mm_store_si128 ((__m128i *)d64, ta); + _mm_store_si128 ((__m128i *)(d64+2), tb); + + d64 += 4; + s64 += 4; + + } + } else { + while (d64 != top64) { + + ta = _mm_load_si128((__m128i *) s64); + tb = _mm_load_si128((__m128i *) (s64+2)); + + tta = _mm_srli_epi16(ta, 8); + ttb = _mm_srli_epi16(tb, 8); + tpl = _mm_and_si128(tb, lmask); + tph = _mm_and_si128(ta, lmask); + + tb = _mm_packus_epi16(tpl, tph); + ta = _mm_packus_epi16(ttb, tta); + + ti = _mm_and_si128 (mask, tb); + tph = _mm_shuffle_epi8 (thigh[0], ti); + tpl = _mm_shuffle_epi8 (tlow[0], ti); + + tb = _mm_srli_epi16(tb, 4); + ti = _mm_and_si128 (mask, tb); + tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[1], ti), tpl); + tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[1], ti), tph); + + ti = _mm_and_si128 (mask, ta); + tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[2], ti), tpl); + tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[2], ti), tph); + + ta = _mm_srli_epi16(ta, 4); + ti = _mm_and_si128 (mask, ta); + tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[3], ti), tpl); + tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[3], ti), tph); + + ta = _mm_unpackhi_epi8(tpl, tph); + tb = _mm_unpacklo_epi8(tpl, tph); + + _mm_store_si128 ((__m128i *)d64, ta); + _mm_store_si128 ((__m128i *)(d64+2), tb); + + d64 += 4; + s64 += 4; + } + } + + gf_do_final_region_alignment(&rd); +#endif +} + +static +void +gf_w16_split_4_16_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ +#ifdef INTEL_SSE4 + uint64_t i, j, *s64, *d64, *top64;; + uint64_t c, prod; + uint8_t low[4][16]; + uint8_t high[4][16]; + gf_region_data rd; + struct gf_single_table_data *std; + __m128i mask, ta, tb, ti, tpl, tph, tlow[4], thigh[4]; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 32); + gf_do_initial_region_alignment(&rd); + + for (j = 0; j < 16; j++) { + for (i = 0; i < 4; i++) { + c = (j << (i*4)); + prod = gf_w16_log_multiply(gf, c, val); + low[i][j] = (prod & 0xff); + high[i][j] = (prod >> 8); + } + } + + for (i = 0; i < 4; i++) { + tlow[i] = _mm_loadu_si128((__m128i *)low[i]); + thigh[i] = _mm_loadu_si128((__m128i *)high[i]); + } + + s64 = (uint64_t *) rd.s_start; + d64 = (uint64_t *) rd.d_start; + top64 = (uint64_t *) rd.d_top; + + mask = _mm_set1_epi8 (0x0f); + + if (xor) { + while (d64 != top64) { + + ta = _mm_load_si128((__m128i *) s64); + tb = _mm_load_si128((__m128i *) (s64+2)); + + ti = _mm_and_si128 (mask, tb); + tph = _mm_shuffle_epi8 (thigh[0], ti); + tpl = _mm_shuffle_epi8 (tlow[0], ti); + + tb = _mm_srli_epi16(tb, 4); + ti = _mm_and_si128 (mask, tb); + tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[1], ti), tpl); + tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[1], ti), tph); + + ti = _mm_and_si128 (mask, ta); + tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[2], ti), tpl); + tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[2], ti), tph); + + ta = _mm_srli_epi16(ta, 4); + ti = _mm_and_si128 (mask, ta); + tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[3], ti), tpl); + tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[3], ti), tph); + + ta = _mm_load_si128((__m128i *) d64); + tph = _mm_xor_si128(tph, ta); + _mm_store_si128 ((__m128i *)d64, tph); + tb = _mm_load_si128((__m128i *) (d64+2)); + tpl = _mm_xor_si128(tpl, tb); + _mm_store_si128 ((__m128i *)(d64+2), tpl); + + d64 += 4; + s64 += 4; + } + } else { + while (d64 != top64) { + + ta = _mm_load_si128((__m128i *) s64); + tb = _mm_load_si128((__m128i *) (s64+2)); + + ti = _mm_and_si128 (mask, tb); + tph = _mm_shuffle_epi8 (thigh[0], ti); + tpl = _mm_shuffle_epi8 (tlow[0], ti); + + tb = _mm_srli_epi16(tb, 4); + ti = _mm_and_si128 (mask, tb); + tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[1], ti), tpl); + tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[1], ti), tph); + + ti = _mm_and_si128 (mask, ta); + tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[2], ti), tpl); + tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[2], ti), tph); + + ta = _mm_srli_epi16(ta, 4); + ti = _mm_and_si128 (mask, ta); + tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[3], ti), tpl); + tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[3], ti), tph); + + _mm_store_si128 ((__m128i *)d64, tph); + _mm_store_si128 ((__m128i *)(d64+2), tpl); + + d64 += 4; + s64 += 4; + + } + } + gf_do_final_region_alignment(&rd); + +#endif +} + +static +int gf_w16_split_init(gf_t *gf) +{ + gf_internal_t *h; + gf_w16_log_init(gf); + + h = (gf_internal_t *) gf->scratch; + if (h->arg1 == 8 || h->arg2 == 8) { + gf->multiply_region.w32 = gf_w16_split_8_16_lazy_multiply_region; + } else if (h->arg1 == 4 || h->arg2 == 4) { + if (h->region_type & GF_REGION_SSE) { + if (h->region_type & GF_REGION_ALTMAP) { + gf->multiply_region.w32 = gf_w16_split_4_16_lazy_sse_altmap_multiply_region; + } else { + gf->multiply_region.w32 = gf_w16_split_4_16_lazy_sse_multiply_region; + } + } else { + gf->multiply_region.w32 = gf_w16_split_4_16_lazy_multiply_region; + } + } + return 1; +} + +static +int gf_w16_table_init(gf_t *gf) +{ + gf_internal_t *h; + gf_w16_log_init(gf); + + h = (gf_internal_t *) gf->scratch; + gf->multiply_region.w32 = NULL; + gf->multiply_region.w32 = gf_w16_table_lazy_multiply_region; + return 1; +} + +static +void +gf_w16_log_zero_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + uint16_t lv; + int i; + uint16_t *s16, *d16, *top16; + struct gf_zero_logtable_data *ltd; + gf_region_data rd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 2); + gf_do_initial_region_alignment(&rd); + + ltd = (struct gf_zero_logtable_data*) ((gf_internal_t *) gf->scratch)->private; + s16 = (uint16_t *) rd.s_start; + d16 = (uint16_t *) rd.d_start; + top16 = (uint16_t *) rd.d_top; + bytes = top16 - d16; + + lv = ltd->log_tbl[val]; + + if (xor) { + for (i = 0; i < bytes; i++) { + d16[i] ^= (ltd->antilog_tbl[lv + ltd->log_tbl[s16[i]]]); + } + } else { + for (i = 0; i < bytes; i++) { + d16[i] = (ltd->antilog_tbl[lv + ltd->log_tbl[s16[i]]]); + } + } + + /* This isn't necessary. */ + gf_do_final_region_alignment(&rd); +} + +/* Here -- double-check Kevin */ +static +inline +gf_val_32_t +gf_w16_log_zero_multiply (gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + struct gf_zero_logtable_data *ltd; + + ltd = (struct gf_zero_logtable_data *) ((gf_internal_t *) gf->scratch)->private; + return ltd->antilog_tbl[ltd->log_tbl[a] + ltd->log_tbl[b]]; +} + +static +inline +gf_val_32_t +gf_w16_log_zero_divide (gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + int log_sum = 0; + struct gf_zero_logtable_data *ltd; + + if (a == 0 || b == 0) return 0; + ltd = (struct gf_zero_logtable_data *) ((gf_internal_t *) gf->scratch)->private; + + log_sum = ltd->log_tbl[a] - ltd->log_tbl[b] + (GF_MULT_GROUP_SIZE); + return (ltd->antilog_tbl[log_sum]); +} + +static +gf_val_32_t +gf_w16_log_zero_inverse (gf_t *gf, gf_val_32_t a) +{ + struct gf_zero_logtable_data *ltd; + + ltd = (struct gf_zero_logtable_data *) ((gf_internal_t *) gf->scratch)->private; + return (ltd->inv_tbl[a]); +} + +static +inline +gf_val_32_t +gf_w16_bytwo_p_multiply (gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + uint32_t prod, pp, pmask, amask; + gf_internal_t *h; + + h = (gf_internal_t *) gf->scratch; + pp = h->prim_poly; + + + prod = 0; + pmask = 0x8000; + amask = 0x8000; + + while (amask != 0) { + if (prod & pmask) { + prod = ((prod << 1) ^ pp); + } else { + prod <<= 1; + } + if (a & amask) prod ^= b; + amask >>= 1; + } + return prod; +} + +static +inline +gf_val_32_t +gf_w16_bytwo_b_multiply (gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + uint32_t prod, pp, bmask; + gf_internal_t *h; + + h = (gf_internal_t *) gf->scratch; + pp = h->prim_poly; + + prod = 0; + bmask = 0x8000; + + while (1) { + if (a & 1) prod ^= b; + a >>= 1; + if (a == 0) return prod; + if (b & bmask) { + b = ((b << 1) ^ pp); + } else { + b <<= 1; + } + } +} + +static +void +gf_w16_bytwo_p_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + uint64_t *s64, *d64, t1, t2, ta, prod, amask; + gf_region_data rd; + struct gf_w16_bytwo_data *btd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + btd = (struct gf_w16_bytwo_data *) ((gf_internal_t *) (gf->scratch))->private; + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 8); + gf_do_initial_region_alignment(&rd); + + s64 = (uint64_t *) rd.s_start; + d64 = (uint64_t *) rd.d_start; + + if (xor) { + while (s64 < (uint64_t *) rd.s_top) { + prod = 0; + amask = 0x8000; + ta = *s64; + while (amask != 0) { + AB2(btd->prim_poly, btd->mask1, btd->mask2, prod, t1, t2); + if (val & amask) prod ^= ta; + amask >>= 1; + } + *d64 ^= prod; + d64++; + s64++; + } + } else { + while (s64 < (uint64_t *) rd.s_top) { + prod = 0; + amask = 0x8000; + ta = *s64; + while (amask != 0) { + AB2(btd->prim_poly, btd->mask1, btd->mask2, prod, t1, t2); + if (val & amask) prod ^= ta; + amask >>= 1; + } + *d64 = prod; + d64++; + s64++; + } + } + gf_do_final_region_alignment(&rd); +} + +#define BYTWO_P_ONESTEP {\ + SSE_AB2(pp, m1 ,m2, prod, t1, t2); \ + t1 = _mm_and_si128(v, one); \ + t1 = _mm_sub_epi16(t1, one); \ + t1 = _mm_and_si128(t1, ta); \ + prod = _mm_xor_si128(prod, t1); \ + v = _mm_srli_epi64(v, 1); } + +static +void +gf_w16_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ +#ifdef INTEL_SSE4 + int i; + uint8_t *s8, *d8; + uint32_t vrev; + uint64_t amask; + __m128i pp, m1, m2, ta, prod, t1, t2, tp, one, v; + struct gf_w16_bytwo_data *btd; + gf_region_data rd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + btd = (struct gf_w16_bytwo_data *) ((gf_internal_t *) (gf->scratch))->private; + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 16); + gf_do_initial_region_alignment(&rd); + + vrev = 0; + for (i = 0; i < 16; i++) { + vrev <<= 1; + if (!(val & (1 << i))) vrev |= 1; + } + + s8 = (uint8_t *) rd.s_start; + d8 = (uint8_t *) rd.d_start; + + pp = _mm_set1_epi16(btd->prim_poly&0xffff); + m1 = _mm_set1_epi16((btd->mask1)&0xffff); + m2 = _mm_set1_epi16((btd->mask2)&0xffff); + one = _mm_set1_epi16(1); + + while (d8 < (uint8_t *) rd.d_top) { + prod = _mm_setzero_si128(); + v = _mm_set1_epi16(vrev); + ta = _mm_load_si128((__m128i *) s8); + tp = (!xor) ? _mm_setzero_si128() : _mm_load_si128((__m128i *) d8); + BYTWO_P_ONESTEP; + BYTWO_P_ONESTEP; + BYTWO_P_ONESTEP; + BYTWO_P_ONESTEP; + BYTWO_P_ONESTEP; + BYTWO_P_ONESTEP; + BYTWO_P_ONESTEP; + BYTWO_P_ONESTEP; + BYTWO_P_ONESTEP; + BYTWO_P_ONESTEP; + BYTWO_P_ONESTEP; + BYTWO_P_ONESTEP; + BYTWO_P_ONESTEP; + BYTWO_P_ONESTEP; + BYTWO_P_ONESTEP; + BYTWO_P_ONESTEP; + _mm_store_si128((__m128i *) d8, _mm_xor_si128(prod, tp)); + d8 += 16; + s8 += 16; + } + gf_do_final_region_alignment(&rd); +#endif +} + +static +void +gf_w16_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_w16_bytwo_data *btd) +{ +#ifdef INTEL_SSE4 + int i; + uint8_t *d8, *s8, tb; + __m128i pp, m1, m2, t1, t2, va, vb; + + s8 = (uint8_t *) rd->s_start; + d8 = (uint8_t *) rd->d_start; + + pp = _mm_set1_epi16(btd->prim_poly&0xffff); + m1 = _mm_set1_epi16((btd->mask1)&0xffff); + m2 = _mm_set1_epi16((btd->mask2)&0xffff); + + while (d8 < (uint8_t *) rd->d_top) { + va = _mm_load_si128 ((__m128i *)(s8)); + SSE_AB2(pp, m1, m2, va, t1, t2); + _mm_store_si128((__m128i *)d8, va); + d8 += 16; + s8 += 16; + } +#endif +} + +static +void +gf_w16_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_w16_bytwo_data *btd) +{ +#ifdef INTEL_SSE4 + int i; + uint8_t *d8, *s8, tb; + __m128i pp, m1, m2, t1, t2, va, vb; + + s8 = (uint8_t *) rd->s_start; + d8 = (uint8_t *) rd->d_start; + + pp = _mm_set1_epi16(btd->prim_poly&0xffff); + m1 = _mm_set1_epi16((btd->mask1)&0xffff); + m2 = _mm_set1_epi16((btd->mask2)&0xffff); + + while (d8 < (uint8_t *) rd->d_top) { + va = _mm_load_si128 ((__m128i *)(s8)); + SSE_AB2(pp, m1, m2, va, t1, t2); + vb = _mm_load_si128 ((__m128i *)(d8)); + vb = _mm_xor_si128(vb, va); + _mm_store_si128((__m128i *)d8, vb); + d8 += 16; + s8 += 16; + } +#endif +} + + +static +void +gf_w16_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ +#ifdef INTEL_SSE4 + int itb; + uint8_t *d8, *s8; + __m128i pp, m1, m2, t1, t2, va, vb; + struct gf_w16_bytwo_data *btd; + gf_region_data rd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 16); + gf_do_initial_region_alignment(&rd); + + btd = (struct gf_w16_bytwo_data *) ((gf_internal_t *) (gf->scratch))->private; + + if (val == 2) { + if (xor) { + gf_w16_bytwo_b_sse_region_2_xor(&rd, btd); + } else { + gf_w16_bytwo_b_sse_region_2_noxor(&rd, btd); + } + gf_do_final_region_alignment(&rd); + return; + } + + s8 = (uint8_t *) rd.s_start; + d8 = (uint8_t *) rd.d_start; + + pp = _mm_set1_epi16(btd->prim_poly&0xffff); + m1 = _mm_set1_epi16((btd->mask1)&0xffff); + m2 = _mm_set1_epi16((btd->mask2)&0xffff); + + while (d8 < (uint8_t *) rd.d_top) { + va = _mm_load_si128 ((__m128i *)(s8)); + vb = (!xor) ? _mm_setzero_si128() : _mm_load_si128 ((__m128i *)(d8)); + itb = val; + while (1) { + if (itb & 1) vb = _mm_xor_si128(vb, va); + itb >>= 1; + if (itb == 0) break; + SSE_AB2(pp, m1, m2, va, t1, t2); + } + _mm_store_si128((__m128i *)d8, vb); + d8 += 16; + s8 += 16; + } + + gf_do_final_region_alignment(&rd); +#endif +} + +static +void +gf_w16_bytwo_b_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + int i; + uint64_t *s64, *d64, t1, t2, ta, tb, prod; + struct gf_w16_bytwo_data *btd; + gf_region_data rd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 16); + gf_do_initial_region_alignment(&rd); + + btd = (struct gf_w16_bytwo_data *) ((gf_internal_t *) (gf->scratch))->private; + s64 = (uint64_t *) rd.s_start; + d64 = (uint64_t *) rd.d_start; + + switch (val) { + case 2: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= ta; + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = ta; + d64++; + s64++; + } + } + break; + case 3: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= (ta ^ prod); + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = (ta ^ prod); + d64++; + s64++; + } + } + break; + case 4: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= ta; + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = ta; + d64++; + s64++; + } + } + break; + case 5: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= (ta ^ prod); + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = ta ^ prod; + d64++; + s64++; + } + } + default: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + prod = *d64 ; + ta = *s64; + tb = val; + while (1) { + if (tb & 1) prod ^= ta; + tb >>= 1; + if (tb == 0) break; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + } + *d64 = prod; + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + prod = 0 ; + ta = *s64; + tb = val; + while (1) { + if (tb & 1) prod ^= ta; + tb >>= 1; + if (tb == 0) break; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + } + *d64 = prod; + d64++; + s64++; + } + } + break; + } + gf_do_final_region_alignment(&rd); +} + +static +int gf_w16_bytwo_init(gf_t *gf) +{ + gf_internal_t *h; + uint64_t ip, m1, m2; + struct gf_w16_bytwo_data *btd; + + h = (gf_internal_t *) gf->scratch; + btd = (struct gf_w16_bytwo_data *) (h->private); + ip = h->prim_poly & 0xffff; + m1 = 0xfffe; + m2 = 0x8000; + btd->prim_poly = 0; + btd->mask1 = 0; + btd->mask2 = 0; + + while (ip != 0) { + btd->prim_poly |= ip; + btd->mask1 |= m1; + btd->mask2 |= m2; + ip <<= GF_FIELD_WIDTH; + m1 <<= GF_FIELD_WIDTH; + m2 <<= GF_FIELD_WIDTH; + } + + if (h->mult_type == GF_MULT_BYTWO_p) { + gf->multiply.w32 = gf_w16_bytwo_p_multiply; + if (h->region_type == GF_REGION_SSE) { + gf->multiply_region.w32 = gf_w16_bytwo_p_sse_multiply_region; + } else { + gf->multiply_region.w32 = gf_w16_bytwo_p_nosse_multiply_region; + } + } else { + gf->multiply.w32 = gf_w16_bytwo_b_multiply; + if (h->region_type == GF_REGION_SSE) { + gf->multiply_region.w32 = gf_w16_bytwo_b_sse_multiply_region; + } else { + gf->multiply_region.w32 = gf_w16_bytwo_b_nosse_multiply_region; + } + } + gf->inverse.w32 = gf_w16_euclid; + return 1; +} + +static +int gf_w16_log_zero_init(gf_t *gf) +{ + gf_internal_t *h; + struct gf_zero_logtable_data *ltd; + int i, b; + + h = (gf_internal_t *) gf->scratch; + ltd = h->private; + + ltd->log_tbl[0] = (-GF_MULT_GROUP_SIZE) + 1; + + bzero(&(ltd->_antilog_tbl[0]), sizeof(ltd->_antilog_tbl)); + + ltd->antilog_tbl = &(ltd->_antilog_tbl[GF_FIELD_SIZE * 2]); + + b = 1; + for (i = 0; i < GF_MULT_GROUP_SIZE; i++) { + ltd->log_tbl[b] = (uint16_t)i; + ltd->antilog_tbl[i] = (uint16_t)b; + ltd->antilog_tbl[i+GF_MULT_GROUP_SIZE] = (uint16_t)b; + b <<= 1; + if (b & GF_FIELD_SIZE) { + b = b ^ h->prim_poly; + } + } + ltd->inv_tbl[0] = 0; /* Not really, but we need to fill it with something */ + ltd->inv_tbl[1] = 1; + for (i = 2; i < GF_FIELD_SIZE; i++) { + ltd->inv_tbl[i] = ltd->antilog_tbl[GF_MULT_GROUP_SIZE-ltd->log_tbl[i]]; + } + + gf->inverse.w32 = gf_w16_log_zero_inverse; + gf->divide.w32 = gf_w16_log_zero_divide; + gf->multiply.w32 = gf_w16_log_zero_multiply; + gf->multiply_region.w32 = gf_w16_log_zero_multiply_region; + return 1; +} + +static +gf_val_32_t +gf_w16_composite_multiply_recursive(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + uint8_t b0 = b & 0x00ff; + uint8_t b1 = (b & 0xff00) >> 8; + uint8_t a0 = a & 0x00ff; + uint8_t a1 = (a & 0xff00) >> 8; + uint8_t a1b1; + uint16_t rv; + + a1b1 = base_gf->multiply.w32(base_gf, a1, b1); + + rv = ((base_gf->multiply.w32(base_gf, a0, b0) ^ a1b1) | ((base_gf->multiply.w32(base_gf, a1, b0) ^ base_gf->multiply.w32(base_gf, a0, b1) ^ base_gf->multiply.w32(base_gf, a1b1, GF_S_GF_8_2)) << 8)); + return rv; +} + +static +gf_val_32_t +gf_w16_composite_multiply_table(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + gf_internal_t *h = (gf_internal_t *) gf->scratch; + struct gf_w8_single_table_data * std; + + uint8_t b0 = b & 0x00ff; + uint8_t b1 = (b & 0xff00) >> 8; + uint8_t a0 = a & 0x00ff; + uint8_t a1 = (a & 0xff00) >> 8; + uint8_t a1b1; + uint16_t rv; + + std = (struct gf_w8_single_table_data *) h->private; + + a1b1 = std->mult[a1][b1]; + + rv = ((std->mult[a0][b0] ^ a1b1) | + ((std->mult[a1][b0] ^ std->mult[a0][b1] ^ std->mult[a1b1][GF_S_GF_8_2]) << 8)); + return rv; +} + +/* + * Composite field division trick (explained in 2007 tech report) + * + * Compute a / b = a*b^-1, where p(x) = x^2 + sx + 1 + * + * let c = b^-1 + * + * c*b = (s*b1c1+b1c0+b0c1)x+(b1c1+b0c0) + * + * want (s*b1c1+b1c0+b0c1) = 0 and (b1c1+b0c0) = 1 + * + * let d = b1c1 and d+1 = b0c0 + * + * solve s*b1c1+b1c0+b0c1 = 0 + * + * solution: d = (b1b0^-1)(b1b0^-1+b0b1^-1+s)^-1 + * + * c0 = (d+1)b0^-1 + * c1 = d*b1^-1 + * + * a / b = a * c + */ +static +gf_val_32_t +gf_w16_composite_inverse(gf_t *gf, gf_val_32_t a) +{ + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + uint8_t a0 = a & 0x00ff; + uint8_t a1 = (a & 0xff00) >> 8; + uint8_t c0, c1, d, tmp; + uint16_t c; + uint8_t a0inv, a1inv; + + if (a0 == 0) { + a1inv = base_gf->inverse.w32(base_gf, a1); + c0 = base_gf->multiply.w32(base_gf, a1inv, GF_S_GF_8_2); + c1 = a1inv; + } else if (a1 == 0) { + c0 = base_gf->inverse.w32(base_gf, a0); + c1 = 0; + } else { + a1inv = base_gf->inverse.w32(base_gf, a1); + a0inv = base_gf->inverse.w32(base_gf, a0); + + d = base_gf->multiply.w32(base_gf, a1, a0inv); + + tmp = (base_gf->multiply.w32(base_gf, a1, a0inv) ^ base_gf->multiply.w32(base_gf, a0, a1inv) ^ GF_S_GF_8_2); + tmp = base_gf->inverse.w32(base_gf, tmp); + + d = base_gf->multiply.w32(base_gf, d, tmp); + + c0 = base_gf->multiply.w32(base_gf, (d^1), a0inv); + c1 = base_gf->multiply.w32(base_gf, d, a1inv); + } + + c = c0 | (c1 << 8); + + return c; +} + +static +gf_val_32_t +gf_w16_composite_divide(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + uint16_t binv; + + binv = gf->inverse.w32(gf, b); + return gf->multiply.w32(gf, a, binv); +} + +static +void +gf_w16_composite_multiply_region_inline(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + struct gf_w8_single_table_data * std; + uint8_t b0 = val & 0x00ff; + uint8_t b1 = (val & 0xff00) >> 8; + uint16_t *s16, *d16, *top; + uint8_t a0, a1, a1b1; + struct gf_logtable_data *ltd; + gf_region_data rd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + + std = (struct gf_w8_single_table_data *) h->private; + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 2); + + s16 = rd.s_start; + d16 = rd.d_start; + top = rd.d_top; + + if (xor) { + while (d16 < top) { + a0 = (*s16) & 0x00ff; + a1 = ((*s16) & 0xff00) >> 8; + a1b1 = std->mult[a1][b1]; + + *d16 ^= ((std->mult[a0][b0] ^ a1b1) | ((std->mult[a1][b0] ^ std->mult[a0][b1] ^ std->mult[a1b1][GF_S_GF_8_2]) << 8)); + s16++; + d16++; + } + } else { + while (d16 < top) { + a0 = (*s16) & 0x00ff; + a1 = ((*s16) & 0xff00) >> 8; + a1b1 = std->mult[a1][b1]; + + *d16 = ((std->mult[a0][b0] ^ a1b1) | ((std->mult[a1][b0] ^ std->mult[a0][b1] ^ std->mult[a1b1][GF_S_GF_8_2]) << 8)); + s16++; + d16++; + } + } +} + +static +void +gf_w16_composite_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + unsigned long uls, uld; + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + uint8_t b0 = val & 0x00ff; + uint8_t b1 = (val & 0xff00) >> 8; + uint16_t *s16, *d16, *top; + uint8_t a0, a1, a1b1; + gf_region_data rd; + struct gf_logtable_data *ltd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 2); + + s16 = rd.s_start; + d16 = rd.d_start; + top = rd.d_top; + + if (xor) { + while (d16 < top) { + a0 = (*s16) & 0x00ff; + a1 = ((*s16) & 0xff00) >> 8; + a1b1 = base_gf->multiply.w32(base_gf, a1, b1); + + (*d16) ^= ((base_gf->multiply.w32(base_gf, a0, b0) ^ a1b1) | + ((base_gf->multiply.w32(base_gf, a1, b0) ^ base_gf->multiply.w32(base_gf, a0, b1) ^ base_gf->multiply.w32(base_gf, a1b1, GF_S_GF_8_2)) << 8)); + s16++; + d16++; + } + } else { + while (d16 < top) { + a0 = (*s16) & 0x00ff; + a1 = ((*s16) & 0xff00) >> 8; + a1b1 = base_gf->multiply.w32(base_gf, a1, b1); + + (*d16) = ((base_gf->multiply.w32(base_gf, a0, b0) ^ a1b1) | + ((base_gf->multiply.w32(base_gf, a1, b0) ^ base_gf->multiply.w32(base_gf, a0, b1) ^ base_gf->multiply.w32(base_gf, a1b1, GF_S_GF_8_2)) << 8)); + s16++; + d16++; + } + } +} + +static +void +gf_w16_composite_multiply_region_alt(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + uint8_t val0 = val & 0x00ff; + uint8_t val1 = (val & 0xff00) >> 8; + gf_region_data rd; + int sub_reg_size; + uint8_t *slow, *shigh; + uint8_t *dlow, *dhigh, *top;; + + /* JSP: I want the two pointers aligned wrt each other on 16 byte + boundaries. So I'm going to make sure that the area on + which the two operate is a multiple of 32. Of course, that + junks up the mapping, but so be it -- that's why we have extract_word.... */ + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 32); + gf_do_initial_region_alignment(&rd); + + slow = (uint8_t *) rd.s_start; + dlow = (uint8_t *) rd.d_start; + top = (uint8_t *) rd.d_top; + sub_reg_size = (top - dlow)/2; + shigh = slow + sub_reg_size; + dhigh = dlow + sub_reg_size; + + base_gf->multiply_region.w32(base_gf, slow, dlow, val0, sub_reg_size, xor); + base_gf->multiply_region.w32(base_gf, shigh, dlow, val1, sub_reg_size, 1); + base_gf->multiply_region.w32(base_gf, slow, dhigh, val1, sub_reg_size, xor); + base_gf->multiply_region.w32(base_gf, shigh, dhigh, val0, sub_reg_size, 1); + base_gf->multiply_region.w32(base_gf, shigh, dhigh, base_gf->multiply.w32(base_gf, GF_S_GF_8_2, val1), sub_reg_size, 1); + + gf_do_final_region_alignment(&rd); +} + +static +int gf_w16_composite_init(gf_t *gf) +{ + struct gf_w8_single_table_data * std; + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + gf_internal_t *base_h = (gf_internal_t *) base_gf->scratch; + uint16_t a, b; + + if (h->region_type & GF_REGION_ALTMAP) { + gf->multiply_region.w32 = gf_w16_composite_multiply_region_alt; + } else if (h->arg2 == 0 && base_h->mult_type == GF_MULT_TABLE && + base_h->region_type == GF_REGION_DEFAULT) { + gf->multiply_region.w32 = gf_w16_composite_multiply_region_inline; + } else { + gf->multiply_region.w32 = gf_w16_composite_multiply_region; + } + + if (h->arg2 == 0) { + std = (struct gf_w8_single_table_data *) h->private; + for (a = 0; a < 256; a++) { + for (b = 0; b < 256; b++) { + std->mult[a][b] = base_gf->multiply.w32(base_gf, a, b); + } + } + gf->multiply.w32 = gf_w16_composite_multiply_table; + } else { + gf->multiply.w32 = gf_w16_composite_multiply_recursive; + } + + gf->divide.w32 = gf_w16_composite_divide; + gf->inverse.w32 = gf_w16_composite_inverse; + + return 1; +} + +static +void +gf_w16_group_4_set_shift_tables(uint16_t *shift, uint16_t val, gf_internal_t *h) +{ + int i, j; + + shift[0] = 0; + for (i = 0; i < 16; i += 2) { + j = (shift[i>>1] << 1); + if (j & (1 << 16)) j ^= h->prim_poly; + shift[i] = j; + shift[i^1] = j^val; + } +} + +static +inline +gf_val_32_t +gf_w16_group_4_4_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + int i; + uint16_t p, l, ind, r, a16; + + struct gf_w16_group_4_4_data *d44; + gf_internal_t *h = (gf_internal_t *) gf->scratch; + + d44 = (struct gf_w16_group_4_4_data *) h->private; + gf_w16_group_4_set_shift_tables(d44->shift, b, h); + + a16 = a; + ind = a16 >> 12; + a16 <<= 4; + p = d44->shift[ind]; + r = p & 0xfff; + l = p >> 12; + ind = a16 >> 12; + a16 <<= 4; + p = (d44->shift[ind] ^ d44->reduce[l] ^ (r << 4)); + r = p & 0xfff; + l = p >> 12; + ind = a16 >> 12; + a16 <<= 4; + p = (d44->shift[ind] ^ d44->reduce[l] ^ (r << 4)); + r = p & 0xfff; + l = p >> 12; + ind = a16 >> 12; + p = (d44->shift[ind] ^ d44->reduce[l] ^ (r << 4)); + return p; +} + +static +void gf_w16_group_4_4_region_multiply(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + int i; + uint16_t p, l, ind, r, a16, p16; + struct gf_w16_group_4_4_data *d44; + gf_region_data rd; + uint16_t *s16, *d16, *top; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + gf_internal_t *h = (gf_internal_t *) gf->scratch; + d44 = (struct gf_w16_group_4_4_data *) h->private; + gf_w16_group_4_set_shift_tables(d44->shift, val, h); + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 2); + gf_do_initial_region_alignment(&rd); + + s16 = (uint16_t *) rd.s_start; + d16 = (uint16_t *) rd.d_start; + top = (uint16_t *) rd.d_top; + + while (d16 < top) { + p = 0; + a16 = *s16; + p16 = (xor) ? *d16 : 0; + ind = a16 >> 12; + a16 <<= 4; + p = d44->shift[ind]; + r = p & 0xfff; + l = p >> 12; + ind = a16 >> 12; + a16 <<= 4; + p = (d44->shift[ind] ^ d44->reduce[l] ^ (r << 4)); + r = p & 0xfff; + l = p >> 12; + ind = a16 >> 12; + a16 <<= 4; + p = (d44->shift[ind] ^ d44->reduce[l] ^ (r << 4)); + r = p & 0xfff; + l = p >> 12; + ind = a16 >> 12; + p = (d44->shift[ind] ^ d44->reduce[l] ^ (r << 4)); + p ^= p16; + *d16 = p; + d16++; + s16++; + } + gf_do_final_region_alignment(&rd); +} + +static +int gf_w16_group_init(gf_t *gf) +{ + int i, j, p; + struct gf_w16_group_4_4_data *d44; + gf_internal_t *h = (gf_internal_t *) gf->scratch; + + d44 = (struct gf_w16_group_4_4_data *) h->private; + d44->reduce[0] = 0; + for (i = 0; i < 16; i++) { + p = 0; + for (j = 0; j < 4; j++) { + if (i & (1 << j)) p ^= (h->prim_poly << j); + } + d44->reduce[p>>16] = (p&0xffff); + } + + gf->multiply.w32 = gf_w16_group_4_4_multiply; + gf->divide.w32 = NULL; + gf->inverse.w32 = NULL; + gf->multiply_region.w32 = gf_w16_group_4_4_region_multiply; + + return 1; +} + +int gf_w16_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2) +{ + int ss; + int sa; + + ss = (GF_REGION_SSE | GF_REGION_NOSSE); + sa = (GF_REGION_STDMAP | GF_REGION_ALTMAP); + + switch(mult_type) + { + case GF_MULT_TABLE: + region_type |= GF_REGION_LAZY; + if (arg1 != 0 || arg2 != 0 || region_type != GF_REGION_LAZY) return -1; + return sizeof(gf_internal_t) + sizeof(struct gf_lazytable_data) + 64; + break; + case GF_MULT_BYTWO_p: + case GF_MULT_BYTWO_b: + if (arg1 != 0 || arg2 != 0 || (region_type | ss) != ss || + (region_type & ss) == ss) return -1; + return sizeof(gf_internal_t) + sizeof(struct gf_w16_bytwo_data); + break; + case GF_MULT_DEFAULT: + case GF_MULT_LOG_TABLE: + if (arg2 != 0) return -1; + if (region_type != GF_REGION_DEFAULT) return -1; + if (arg1 == 1) { + return sizeof(gf_internal_t) + sizeof(struct gf_zero_logtable_data) + 64; + } else if (arg1 == 0) { + return sizeof(gf_internal_t) + sizeof(struct gf_logtable_data) + 64; + } else { + return -1; + } + break; + case GF_MULT_SPLIT_TABLE: + if ((arg1 == 8 && arg2 == 16) || (arg2 == 8 && arg1 == 16)) { + region_type |= GF_REGION_LAZY; + if (region_type != GF_REGION_LAZY) return -1; + return sizeof(gf_internal_t) + sizeof(struct gf_logtable_data) + 64; + } else if ((arg1 == 4 && arg2 == 16) || (arg2 == 4 && arg1 == 16)) { + region_type &= (~GF_REGION_LAZY); /* Ignore GF_REGION_LAZY */ + if ((region_type & ss) == ss) return -1; + if ((region_type & sa) == sa) return -1; + if ((region_type & ss) == 0) region_type |= GF_REGION_SSE; + if (region_type & GF_REGION_NOSSE) { + if (region_type != GF_REGION_NOSSE) return -1; + return sizeof(gf_internal_t) + sizeof(struct gf_logtable_data) + 64; + } else { + if ((region_type | ss | sa) != (ss|sa)) return -1; + return sizeof(gf_internal_t) + sizeof(struct gf_logtable_data) + 64; + } + } + return -1; + break; + case GF_MULT_GROUP: + if (arg1 == 4 && arg2 == 4) { + return sizeof(gf_internal_t) + sizeof(struct gf_w16_group_4_4_data) + 64; + } + return -1; + case GF_MULT_SHIFT: + if (arg1 != 0 || arg2 != 0 || region_type != 0) return -1; + return sizeof(gf_internal_t); + break; + case GF_MULT_COMPOSITE: + if (region_type & ~(GF_REGION_ALTMAP | GF_REGION_STDMAP)) return -1; + if (arg1 == 2 && arg2 == 0) { + return sizeof(gf_internal_t) + sizeof(struct gf_w8_single_table_data) + 64; + } else if (arg1 == 2 && arg2 == 1) { + return sizeof(gf_internal_t) + 64; + } else { + return -1; + } + + default: + return -1; + } +} + +int gf_w16_init(gf_t *gf) +{ + gf_internal_t *h; + + h = (gf_internal_t *) gf->scratch; + if (h->prim_poly == 0) h->prim_poly = 0x1100b; + + gf->multiply.w32 = NULL; + gf->divide.w32 = NULL; + gf->inverse.w32 = NULL; + gf->multiply_region.w32 = NULL; + + switch(h->mult_type) { + case GF_MULT_DEFAULT: + case GF_MULT_LOG_TABLE: + if (h->arg1 == 1) { + if (gf_w16_log_zero_init(gf) == 0) return 0; + } else { + if (gf_w16_log_init(gf) == 0) return 0; + } + break; + case GF_MULT_SPLIT_TABLE: if (gf_w16_split_init(gf) == 0) return 0; break; + case GF_MULT_TABLE: if (gf_w16_table_init(gf) == 0) return 0; break; + case GF_MULT_SHIFT: if (gf_w16_shift_init(gf) == 0) return 0; break; + case GF_MULT_COMPOSITE: if (gf_w16_composite_init(gf) == 0) return 0; break; + case GF_MULT_BYTWO_p: + case GF_MULT_BYTWO_b: if (gf_w16_bytwo_init(gf) == 0) return 0; break; + case GF_MULT_GROUP: if (gf_w16_group_init(gf) == 0) return 0; break; + default: return 0; + } + if (h->divide_type == GF_DIVIDE_EUCLID) { + gf->divide.w32 = gf_w16_divide_from_inverse; + gf->inverse.w32 = gf_w16_euclid; + } else if (h->divide_type == GF_DIVIDE_MATRIX) { + gf->divide.w32 = gf_w16_divide_from_inverse; + gf->inverse.w32 = gf_w16_matrix; + } + + if (gf->inverse.w32== NULL && gf->divide.w32 == NULL) gf->inverse.w32 = gf_w16_euclid; + + if (gf->inverse.w32 != NULL && gf->divide.w32 == NULL) { + gf->divide.w32 = gf_w16_divide_from_inverse; + } + if (gf->inverse.w32 == NULL && gf->divide.w32 != NULL) { + gf->inverse.w32 = gf_w16_inverse_from_divide; + } + if (h->region_type & GF_REGION_ALTMAP) { + if (h->mult_type == GF_MULT_COMPOSITE) { + gf->extract_word.w32 = gf_w16_composite_extract_word; + } else { + gf->extract_word.w32 = gf_w16_split_extract_word; + } + } else { + gf->extract_word.w32 = gf_w16_extract_word; + } + return 1; +} diff --git a/gf_w32.c b/gf_w32.c new file mode 100644 index 0000000..b49ceb1 --- /dev/null +++ b/gf_w32.c @@ -0,0 +1,2350 @@ +/* + * gf_w32.c + * + * Routines for 32-bit Galois fields + */ + + +#include "gf_int.h" +#include +#include + +#define GF_FIELD_WIDTH (32) +#define GF_FIRST_BIT (1 << 31) + +#define GF_BASE_FIELD_WIDTH (16) +#define GF_BASE_FIELD_SIZE (1 << GF_BASE_FIELD_WIDTH) +#define GF_BASE_FIELD_GROUP_SIZE GF_BASE_FIELD_SIZE-1 +#define GF_S_GF_16_2 (40188) +#define GF_MULTBY_TWO(p) (((p) & GF_FIRST_BIT) ? (((p) << 1) ^ h->prim_poly) : (p) << 1); + + +struct gf_w16_logtable_data { + int log_tbl[GF_BASE_FIELD_SIZE]; + uint16_t _antilog_tbl[GF_BASE_FIELD_SIZE * 4]; + uint16_t *antilog_tbl; + uint16_t inv_tbl[GF_BASE_FIELD_SIZE]; + uint32_t log_s; +}; + +struct gf_split_2_32_lazy_data { + uint32_t tables[16][4]; + uint32_t last_value; +}; + +struct gf_split_8_8_data { + uint32_t tables[7][256][256]; + uint32_t region_tables[4][256]; + uint32_t last_value; +}; + +struct gf_w32_group_data { + uint32_t *reduce; + uint32_t *shift; + int tshift; + uint64_t rmask; + uint32_t *memory; +}; + +struct gf_split_32_8_lazy_data { + uint32_t tables[4][256]; + uint32_t last_value; +}; + +struct gf_split_4_32_lazy_data { + uint32_t tables[8][16]; + uint32_t last_value; +}; + +struct gf_w32_bytwo_data { + uint64_t prim_poly; + uint64_t mask1; + uint64_t mask2; +}; + +#define MM_PRINT32(s, r) { uint8_t blah[16], ii; printf("%-12s", s); _mm_storeu_si128((__m128i *)blah, r); for (ii = 0; ii < 16; ii += 4) printf(" %02x%02x%02x%02x", blah[15-ii], blah[14-ii], blah[13-ii], blah[12-ii]); printf("\n"); } + +#define MM_PRINT8(s, r) { uint8_t blah[16], ii; printf("%-12s", s); _mm_storeu_si128((__m128i *)blah, r); for (ii = 0; ii < 16; ii += 1) printf("%s%02x", (ii%4==0) ? " " : " ", blah[15-ii]); printf("\n"); } + +#define AB2(ip, am1 ,am2, b, t1, t2) {\ + t1 = (b << 1) & am1;\ + t2 = b & am2; \ + t2 = ((t2 << 1) - (t2 >> (GF_FIELD_WIDTH-1))); \ + b = (t1 ^ (t2 & ip));} + +#define SSE_AB2(pp, m1 ,m2, va, t1, t2) {\ + t1 = _mm_and_si128(_mm_slli_epi64(va, 1), m1); \ + t2 = _mm_and_si128(va, m2); \ + t2 = _mm_sub_epi64 (_mm_slli_epi64(t2, 1), _mm_srli_epi64(t2, (GF_FIELD_WIDTH-1))); \ + va = _mm_xor_si128(t1, _mm_and_si128(t2, pp)); } + +static +inline +uint32_t gf_w32_inverse_from_divide (gf_t *gf, uint32_t a) +{ + return gf->divide.w32(gf, 1, a); +} + +static +inline +uint32_t gf_w32_divide_from_inverse (gf_t *gf, uint32_t a, uint32_t b) +{ + b = gf->inverse.w32(gf, b); + return gf->multiply.w32(gf, a, b); +} + +static +void +gf_w32_multiply_region_from_single(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int +xor) +{ + int i; + uint32_t *s32; + uint32_t *d32; + + s32 = (uint32_t *) src; + d32 = (uint32_t *) dest; + + if (xor) { + for (i = 0; i < bytes/sizeof(uint32_t); i++) { + d32[i] ^= gf->multiply.w32(gf, val, s32[i]); + } + } else { + for (i = 0; i < bytes/sizeof(uint32_t); i++) { + d32[i] = gf->multiply.w32(gf, val, s32[i]); + } + } +} + +static +inline +uint32_t gf_w32_euclid (gf_t *gf, uint32_t b) +{ + uint32_t e_i, e_im1, e_ip1; + uint32_t d_i, d_im1, d_ip1; + uint32_t y_i, y_im1, y_ip1; + uint32_t c_i; + + if (b == 0) return -1; + e_im1 = ((gf_internal_t *) (gf->scratch))->prim_poly; + e_i = b; + d_im1 = 32; + for (d_i = d_im1-1; ((1 << d_i) & e_i) == 0; d_i--) ; + y_i = 1; + y_im1 = 0; + + while (e_i != 1) { + + e_ip1 = e_im1; + d_ip1 = d_im1; + c_i = 0; + + while (d_ip1 >= d_i) { + c_i ^= (1 << (d_ip1 - d_i)); + e_ip1 ^= (e_i << (d_ip1 - d_i)); + d_ip1--; + while ((e_ip1 & (1 << d_ip1)) == 0) d_ip1--; + } + + y_ip1 = y_im1 ^ gf->multiply.w32(gf, c_i, y_i); + y_im1 = y_i; + y_i = y_ip1; + + e_im1 = e_i; + d_im1 = d_i; + e_i = e_ip1; + d_i = d_ip1; + } + + return y_i; +} + +static +gf_val_32_t gf_w32_extract_word(gf_t *gf, void *start, int bytes, int index) +{ + uint32_t *r32, rv; + + r32 = (uint32_t *) start; + rv = r32[index]; + return rv; +} + +static +gf_val_32_t gf_w32_composite_extract_word(gf_t *gf, void *start, int bytes, int index) +{ + int sub_size; + gf_internal_t *h; + uint8_t *r8, *top; + uint32_t a, b, *r32; + gf_region_data rd; + + h = (gf_internal_t *) gf->scratch; + gf_set_region_data(&rd, gf, start, start, bytes, 0, 0, 32); + r32 = (uint32_t *) start; + if (r32 + index < (uint32_t *) rd.d_start) return r32[index]; + if (r32 + index >= (uint32_t *) rd.d_top) return r32[index]; + index -= (((uint32_t *) rd.d_start) - r32); + r8 = (uint8_t *) rd.d_start; + top = (uint8_t *) rd.d_top; + sub_size = (top-r8)/2; + + a = h->base_gf->extract_word.w32(h->base_gf, r8, sub_size, index); + b = h->base_gf->extract_word.w32(h->base_gf, r8+sub_size, sub_size, index); + return (a | (b << 16)); +} + +static +gf_val_32_t gf_w32_split_extract_word(gf_t *gf, void *start, int bytes, int index) +{ + int i; + uint32_t *r32, rv; + uint8_t *r8; + gf_region_data rd; + + gf_set_region_data(&rd, gf, start, start, bytes, 0, 0, 64); + r32 = (uint32_t *) start; + if (r32 + index < (uint32_t *) rd.d_start) return r32[index]; + if (r32 + index >= (uint32_t *) rd.d_top) return r32[index]; + index -= (((uint32_t *) rd.d_start) - r32); + r8 = (uint8_t *) rd.d_start; + r8 += ((index & 0xfffffff0)*4); + r8 += (index & 0xf); + r8 += 48; + rv =0; + for (i = 0; i < 4; i++) { + rv <<= 8; + rv |= *r8; + r8 -= 16; + } + return rv; +} + + +static +inline +uint32_t gf_w32_matrix (gf_t *gf, uint32_t b) +{ + return gf_bitmatrix_inverse(b, 32, ((gf_internal_t *) (gf->scratch))->prim_poly); +} + +/* JSP: GF_MULT_SHIFT: The world's dumbest multiplication algorithm. I only + include it for completeness. It does have the feature that it requires no + extra memory. +*/ + +static +inline +uint32_t +gf_w32_shift_multiply (gf_t *gf, uint32_t a32, uint32_t b32) +{ + uint64_t product, i, pp, a, b, one; + gf_internal_t *h; + + a = a32; + b = b32; + h = (gf_internal_t *) gf->scratch; + one = 1; + pp = h->prim_poly | (one << 32); + + product = 0; + + for (i = 0; i < GF_FIELD_WIDTH; i++) { + if (a & (one << i)) product ^= (b << i); + } + for (i = (GF_FIELD_WIDTH*2-1); i >= GF_FIELD_WIDTH; i--) { + if (product & (one << i)) product ^= (pp << (i-GF_FIELD_WIDTH)); + } + return product; +} + +static +int gf_w32_shift_init(gf_t *gf) +{ + gf->multiply.w32 = gf_w32_shift_multiply; + gf->inverse.w32 = gf_w32_euclid; + gf->multiply_region.w32 = gf_w32_multiply_region_from_single; + return 1; +} + +static +void +gf_w32_group_set_shift_tables(uint32_t *shift, uint32_t val, gf_internal_t *h) +{ + int i; + uint32_t j; + + shift[0] = 0; + + for (i = 1; i < (1 << h->arg1); i <<= 1) { + for (j = 0; j < i; j++) shift[i|j] = shift[j]^val; + if (val & GF_FIRST_BIT) { + val <<= 1; + val ^= h->prim_poly; + } else { + val <<= 1; + } + } +} + +static +void gf_w32_group_s_equals_r_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + int i; + int leftover, rs; + uint32_t p, l, ind, r, a32; + int bits_left; + int g_s; + gf_region_data rd; + uint32_t *s32, *d32, *top; + struct gf_w32_group_data *gd; + gf_internal_t *h = (gf_internal_t *) gf->scratch; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + gd = (struct gf_w32_group_data *) h->private; + g_s = h->arg1; + gf_w32_group_set_shift_tables(gd->shift, val, h); + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 4); + gf_do_initial_region_alignment(&rd); + + s32 = (uint32_t *) rd.s_start; + d32 = (uint32_t *) rd.d_start; + top = (uint32_t *) rd.d_top; + + leftover = 32 % g_s; + if (leftover == 0) leftover = g_s; + + while (d32 < top) { + rs = 32 - leftover; + a32 = *s32; + ind = a32 >> rs; + a32 <<= leftover; + p = gd->shift[ind]; + + bits_left = rs; + rs = 32 - g_s; + + while (bits_left > 0) { + bits_left -= g_s; + ind = a32 >> rs; + a32 <<= g_s; + l = p >> rs; + p = (gd->shift[ind] ^ gd->reduce[l] ^ (p << g_s)); + } + if (xor) p ^= *d32; + *d32 = p; + d32++; + s32++; + } + gf_do_final_region_alignment(&rd); +} + +static +void gf_w32_group_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + uint32_t *s32, *d32, *top; + int i; + int leftover; + uint64_t p, l, r; + uint32_t a32, ind; + int g_s, g_r; + struct gf_w32_group_data *gd; + gf_region_data rd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + gf_internal_t *h = (gf_internal_t *) gf->scratch; + g_s = h->arg1; + g_r = h->arg2; + gd = (struct gf_w32_group_data *) h->private; + gf_w32_group_set_shift_tables(gd->shift, val, h); + + leftover = GF_FIELD_WIDTH % g_s; + if (leftover == 0) leftover = g_s; + + gd = (struct gf_w32_group_data *) h->private; + g_s = h->arg1; + gf_w32_group_set_shift_tables(gd->shift, val, h); + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 4); + gf_do_initial_region_alignment(&rd); + + s32 = (uint32_t *) rd.s_start; + d32 = (uint32_t *) rd.d_start; + top = (uint32_t *) rd.d_top; + + while (d32 < top) { + a32 = *s32; + ind = a32 >> (GF_FIELD_WIDTH - leftover); + p = gd->shift[ind]; + p <<= g_s; + a32 <<= leftover; + + i = (GF_FIELD_WIDTH - leftover); + while (i > g_s) { + ind = a32 >> (GF_FIELD_WIDTH-g_s); + p ^= gd->shift[ind]; + a32 <<= g_s; + p <<= g_s; + i -= g_s; + } + + ind = a32 >> (GF_FIELD_WIDTH-g_s); + p ^= gd->shift[ind]; + + for (i = gd->tshift ; i >= 0; i -= g_r) { + l = p & (gd->rmask << i); + r = gd->reduce[l >> (i+32)]; + r <<= (i); + p ^= r; + } + + if (xor) p ^= *d32; + *d32 = p; + d32++; + s32++; + } + gf_do_final_region_alignment(&rd); +} + +static +inline +gf_val_32_t +gf_w32_group_s_equals_r_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + int i; + int leftover, rs; + uint32_t p, l, ind, r, a32; + int bits_left; + int g_s; + + struct gf_w32_group_data *gd; + gf_internal_t *h = (gf_internal_t *) gf->scratch; + g_s = h->arg1; + + gd = (struct gf_w32_group_data *) h->private; + gf_w32_group_set_shift_tables(gd->shift, b, h); + + leftover = 32 % g_s; + if (leftover == 0) leftover = g_s; + + rs = 32 - leftover; + a32 = a; + ind = a32 >> rs; + a32 <<= leftover; + p = gd->shift[ind]; + + bits_left = rs; + rs = 32 - g_s; + + while (bits_left > 0) { + bits_left -= g_s; + ind = a32 >> rs; + a32 <<= g_s; + l = p >> rs; + p = (gd->shift[ind] ^ gd->reduce[l] ^ (p << g_s)); + } + return p; +} + +static +inline +gf_val_32_t +gf_w32_group_4_4_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + int i; + uint32_t p, l, ind, r, a32; + + struct gf_w32_group_data *d44; + gf_internal_t *h = (gf_internal_t *) gf->scratch; + + d44 = (struct gf_w32_group_data *) h->private; + gf_w32_group_set_shift_tables(d44->shift, b, h); + + p = 0; + a32 = a; + ind = a32 >> 28; + a32 <<= 4; + p = d44->shift[ind]; + ind = a32 >> 28; + a32 <<= 4; + l = p >> 28; + p = (d44->shift[ind] ^ d44->reduce[l] ^ (p << 4)); + ind = a32 >> 28; + a32 <<= 4; + l = p >> 28; + p = (d44->shift[ind] ^ d44->reduce[l] ^ (p << 4)); + ind = a32 >> 28; + a32 <<= 4; + l = p >> 28; + p = (d44->shift[ind] ^ d44->reduce[l] ^ (p << 4)); + ind = a32 >> 28; + a32 <<= 4; + l = p >> 28; + p = (d44->shift[ind] ^ d44->reduce[l] ^ (p << 4)); + ind = a32 >> 28; + a32 <<= 4; + l = p >> 28; + p = (d44->shift[ind] ^ d44->reduce[l] ^ (p << 4)); + ind = a32 >> 28; + a32 <<= 4; + l = p >> 28; + p = (d44->shift[ind] ^ d44->reduce[l] ^ (p << 4)); + ind = a32 >> 28; + l = p >> 28; + p = (d44->shift[ind] ^ d44->reduce[l] ^ (p << 4)); + return p; +} + +static +inline +gf_val_32_t +gf_w32_group_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + int i; + int leftover; + uint64_t p, l, r, mask; + uint32_t a32, ind; + int g_s, g_r; + struct gf_w32_group_data *gd; + + gf_internal_t *h = (gf_internal_t *) gf->scratch; + g_s = h->arg1; + g_r = h->arg2; + gd = (struct gf_w32_group_data *) h->private; + gf_w32_group_set_shift_tables(gd->shift, b, h); + + leftover = GF_FIELD_WIDTH % g_s; + if (leftover == 0) leftover = g_s; + + a32 = a; + ind = a32 >> (GF_FIELD_WIDTH - leftover); + p = gd->shift[ind]; + p <<= g_s; + a32 <<= leftover; + + i = (GF_FIELD_WIDTH - leftover); + while (i > g_s) { + ind = a32 >> (GF_FIELD_WIDTH-g_s); + p ^= gd->shift[ind]; + a32 <<= g_s; + p <<= g_s; + i -= g_s; + } + + ind = a32 >> (GF_FIELD_WIDTH-g_s); + p ^= gd->shift[ind]; + + for (i = gd->tshift ; i >= 0; i -= g_r) { + l = p & (gd->rmask << i); + r = gd->reduce[l >> (i+32)]; + r <<= (i); + p ^= r; + } + return p; +} + +static +int gf_w32_group_init(gf_t *gf) +{ + uint32_t i, j, p, index; + struct gf_w32_group_data *gd; + gf_internal_t *h = (gf_internal_t *) gf->scratch; + int g_r, g_s; + + g_s = h->arg1; + g_r = h->arg2; + + gd = (struct gf_w32_group_data *) h->private; + gd->shift = (uint32_t *) (&(gd->memory)); + gd->reduce = gd->shift + (1 << h->arg1); + + gd->rmask = (1 << g_r) - 1; + gd->rmask <<= 32; + + gd->tshift = 32 % g_s; + if (gd->tshift == 0) gd->tshift = g_s; + gd->tshift = (32 - gd->tshift); + gd->tshift = ((gd->tshift-1)/g_r) * g_r; + + gd->reduce[0] = 0; + for (i = 0; i < (1 << h->arg2); i++) { + p = 0; + index = 0; + for (j = 0; j < h->arg2; j++) { + if (i & (1 << j)) { + p ^= (h->prim_poly << j); + index ^= (1 << j); + index ^= (h->prim_poly >> (32-j)); + } + } + gd->reduce[index] = p; + } + + if (h->arg1 == h->arg2) { + gf->multiply.w32 = gf_w32_group_s_equals_r_multiply; + gf->multiply_region.w32 = gf_w32_group_s_equals_r_multiply_region; + } else { + gf->multiply.w32 = gf_w32_group_multiply; + gf->multiply_region.w32 = gf_w32_group_multiply_region; + } + gf->divide.w32 = NULL; + gf->inverse.w32 = gf_w32_euclid; + + return 1; +} + +static +inline +gf_val_32_t +gf_w32_bytwo_b_multiply (gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + uint32_t prod, pp, bmask; + gf_internal_t *h; + + h = (gf_internal_t *) gf->scratch; + pp = h->prim_poly; + + prod = 0; + bmask = 0x80000000; + + while (1) { + if (a & 1) prod ^= b; + a >>= 1; + if (a == 0) return prod; + if (b & bmask) { + b = ((b << 1) ^ pp); + } else { + b <<= 1; + } + } +} + +static +inline +gf_val_32_t +gf_w32_bytwo_p_multiply (gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + uint32_t prod, pp, pmask, amask; + gf_internal_t *h; + + h = (gf_internal_t *) gf->scratch; + pp = h->prim_poly; + + + prod = 0; + pmask = 0x80000000; + amask = 0x80000000; + + while (amask != 0) { + if (prod & pmask) { + prod = ((prod << 1) ^ pp); + } else { + prod <<= 1; + } + if (a & amask) prod ^= b; + amask >>= 1; + } + return prod; +} + +static +void +gf_w32_bytwo_p_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + uint64_t *s64, *d64, t1, t2, ta, prod, amask; + gf_region_data rd; + struct gf_w32_bytwo_data *btd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + btd = (struct gf_w32_bytwo_data *) ((gf_internal_t *) (gf->scratch))->private; + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 8); + gf_do_initial_region_alignment(&rd); + + s64 = (uint64_t *) rd.s_start; + d64 = (uint64_t *) rd.d_start; + + if (xor) { + while (s64 < (uint64_t *) rd.s_top) { + prod = 0; + amask = 0x80000000; + ta = *s64; + while (amask != 0) { + AB2(btd->prim_poly, btd->mask1, btd->mask2, prod, t1, t2); + if (val & amask) prod ^= ta; + amask >>= 1; + } + *d64 ^= prod; + d64++; + s64++; + } + } else { + while (s64 < (uint64_t *) rd.s_top) { + prod = 0; + amask = 0x80000000; + ta = *s64; + while (amask != 0) { + AB2(btd->prim_poly, btd->mask1, btd->mask2, prod, t1, t2); + if (val & amask) prod ^= ta; + amask >>= 1; + } + *d64 = prod; + d64++; + s64++; + } + } + gf_do_final_region_alignment(&rd); +} + +#define BYTWO_P_ONESTEP {\ + SSE_AB2(pp, m1 ,m2, prod, t1, t2); \ + t1 = _mm_and_si128(v, one); \ + t1 = _mm_sub_epi32(t1, one); \ + t1 = _mm_and_si128(t1, ta); \ + prod = _mm_xor_si128(prod, t1); \ + v = _mm_srli_epi64(v, 1); } + +static +void +gf_w32_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ +#ifdef INTEL_SSE4 + int i; + uint8_t *s8, *d8; + uint32_t vrev; + uint64_t amask; + __m128i pp, m1, m2, ta, prod, t1, t2, tp, one, v; + struct gf_w32_bytwo_data *btd; + gf_region_data rd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + btd = (struct gf_w32_bytwo_data *) ((gf_internal_t *) (gf->scratch))->private; + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 16); + gf_do_initial_region_alignment(&rd); + + vrev = 0; + for (i = 0; i < 32; i++) { + vrev <<= 1; + if (!(val & (1 << i))) vrev |= 1; + } + + s8 = (uint8_t *) rd.s_start; + d8 = (uint8_t *) rd.d_start; + + pp = _mm_set1_epi32(btd->prim_poly&0xffffffff); + m1 = _mm_set1_epi32((btd->mask1)&0xffffffff); + m2 = _mm_set1_epi32((btd->mask2)&0xffffffff); + one = _mm_set1_epi32(1); + + while (d8 < (uint8_t *) rd.d_top) { + prod = _mm_setzero_si128(); + v = _mm_set1_epi32(vrev); + ta = _mm_load_si128((__m128i *) s8); + tp = (!xor) ? _mm_setzero_si128() : _mm_load_si128((__m128i *) d8); + BYTWO_P_ONESTEP; BYTWO_P_ONESTEP; BYTWO_P_ONESTEP; BYTWO_P_ONESTEP; + BYTWO_P_ONESTEP; BYTWO_P_ONESTEP; BYTWO_P_ONESTEP; BYTWO_P_ONESTEP; + BYTWO_P_ONESTEP; BYTWO_P_ONESTEP; BYTWO_P_ONESTEP; BYTWO_P_ONESTEP; + BYTWO_P_ONESTEP; BYTWO_P_ONESTEP; BYTWO_P_ONESTEP; BYTWO_P_ONESTEP; + BYTWO_P_ONESTEP; BYTWO_P_ONESTEP; BYTWO_P_ONESTEP; BYTWO_P_ONESTEP; + BYTWO_P_ONESTEP; BYTWO_P_ONESTEP; BYTWO_P_ONESTEP; BYTWO_P_ONESTEP; + BYTWO_P_ONESTEP; BYTWO_P_ONESTEP; BYTWO_P_ONESTEP; BYTWO_P_ONESTEP; + BYTWO_P_ONESTEP; BYTWO_P_ONESTEP; BYTWO_P_ONESTEP; BYTWO_P_ONESTEP; + _mm_store_si128((__m128i *) d8, _mm_xor_si128(prod, tp)); + d8 += 16; + s8 += 16; + } + gf_do_final_region_alignment(&rd); +#endif +} + +static +void +gf_w32_bytwo_b_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + int i; + uint64_t *s64, *d64, t1, t2, ta, tb, prod; + struct gf_w32_bytwo_data *btd; + gf_region_data rd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 32); + gf_do_initial_region_alignment(&rd); + + btd = (struct gf_w32_bytwo_data *) ((gf_internal_t *) (gf->scratch))->private; + s64 = (uint64_t *) rd.s_start; + d64 = (uint64_t *) rd.d_start; + + switch (val) { + case 2: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= ta; + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = ta; + d64++; + s64++; + } + } + break; + case 3: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= (ta ^ prod); + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = (ta ^ prod); + d64++; + s64++; + } + } + case 4: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= ta; + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = ta; + d64++; + s64++; + } + } + break; + case 5: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= (ta ^ prod); + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = ta ^ prod; + d64++; + s64++; + } + } + default: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + prod = *d64 ; + ta = *s64; + tb = val; + while (1) { + if (tb & 1) prod ^= ta; + tb >>= 1; + if (tb == 0) break; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + } + *d64 = prod; + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + prod = 0 ; + ta = *s64; + tb = val; + while (1) { + if (tb & 1) prod ^= ta; + tb >>= 1; + if (tb == 0) break; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + } + *d64 = prod; + d64++; + s64++; + } + } + break; + } + gf_do_final_region_alignment(&rd); +} + +static +void +gf_w32_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_w32_bytwo_data *btd) +{ +#ifdef INTEL_SSE4 + int i; + uint8_t *d8, *s8, tb; + __m128i pp, m1, m2, t1, t2, va, vb; + + s8 = (uint8_t *) rd->s_start; + d8 = (uint8_t *) rd->d_start; + + pp = _mm_set1_epi32(btd->prim_poly&0xffffffff); + m1 = _mm_set1_epi32((btd->mask1)&0xffffffff); + m2 = _mm_set1_epi32((btd->mask2)&0xffffffff); + + while (d8 < (uint8_t *) rd->d_top) { + va = _mm_load_si128 ((__m128i *)(s8)); + SSE_AB2(pp, m1, m2, va, t1, t2); + _mm_store_si128((__m128i *)d8, va); + d8 += 16; + s8 += 16; + } +#endif +} + +static +void +gf_w32_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_w32_bytwo_data *btd) +{ +#ifdef INTEL_SSE4 + int i; + uint8_t *d8, *s8, tb; + __m128i pp, m1, m2, t1, t2, va, vb; + + s8 = (uint8_t *) rd->s_start; + d8 = (uint8_t *) rd->d_start; + + pp = _mm_set1_epi32(btd->prim_poly&0xffffffff); + m1 = _mm_set1_epi32((btd->mask1)&0xffffffff); + m2 = _mm_set1_epi32((btd->mask2)&0xffffffff); + + while (d8 < (uint8_t *) rd->d_top) { + va = _mm_load_si128 ((__m128i *)(s8)); + SSE_AB2(pp, m1, m2, va, t1, t2); + vb = _mm_load_si128 ((__m128i *)(d8)); + vb = _mm_xor_si128(vb, va); + _mm_store_si128((__m128i *)d8, vb); + d8 += 16; + s8 += 16; + } +#endif +} + + +static +void +gf_w32_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ +#ifdef INTEL_SSE4 + uint32_t itb; + uint8_t *d8, *s8; + __m128i pp, m1, m2, t1, t2, va, vb; + struct gf_w32_bytwo_data *btd; + gf_region_data rd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 16); + gf_do_initial_region_alignment(&rd); + + btd = (struct gf_w32_bytwo_data *) ((gf_internal_t *) (gf->scratch))->private; + + if (val == 2) { + if (xor) { + gf_w32_bytwo_b_sse_region_2_xor(&rd, btd); + } else { + gf_w32_bytwo_b_sse_region_2_noxor(&rd, btd); + } + gf_do_final_region_alignment(&rd); + return; + } + + s8 = (uint8_t *) rd.s_start; + d8 = (uint8_t *) rd.d_start; + + pp = _mm_set1_epi32(btd->prim_poly&0xffffffff); + m1 = _mm_set1_epi32((btd->mask1)&0xffffffff); + m2 = _mm_set1_epi32((btd->mask2)&0xffffffff); + + while (d8 < (uint8_t *) rd.d_top) { + va = _mm_load_si128 ((__m128i *)(s8)); + vb = (!xor) ? _mm_setzero_si128() : _mm_load_si128 ((__m128i *)(d8)); + itb = val; + while (1) { + if (itb & 1) vb = _mm_xor_si128(vb, va); + itb >>= 1; + if (itb == 0) break; + SSE_AB2(pp, m1, m2, va, t1, t2); + } + _mm_store_si128((__m128i *)d8, vb); + d8 += 16; + s8 += 16; + } + + gf_do_final_region_alignment(&rd); +#endif +} + +static +int gf_w32_bytwo_init(gf_t *gf) +{ + gf_internal_t *h; + uint64_t ip, m1, m2; + struct gf_w32_bytwo_data *btd; + + h = (gf_internal_t *) gf->scratch; + btd = (struct gf_w32_bytwo_data *) (h->private); + ip = h->prim_poly & 0xffffffff; + m1 = 0xfffffffe; + m2 = 0x80000000; + btd->prim_poly = 0; + btd->mask1 = 0; + btd->mask2 = 0; + + while (ip != 0) { + btd->prim_poly |= ip; + btd->mask1 |= m1; + btd->mask2 |= m2; + ip <<= GF_FIELD_WIDTH; + m1 <<= GF_FIELD_WIDTH; + m2 <<= GF_FIELD_WIDTH; + } + + if (h->mult_type == GF_MULT_BYTWO_p) { + gf->multiply.w32 = gf_w32_bytwo_p_multiply; + if (h->region_type == GF_REGION_SSE) { + gf->multiply_region.w32 = gf_w32_bytwo_p_sse_multiply_region; + } else { + gf->multiply_region.w32 = gf_w32_bytwo_p_nosse_multiply_region; + } + } else { + gf->multiply.w32 = gf_w32_bytwo_b_multiply; + if (h->region_type == GF_REGION_SSE) { + gf->multiply_region.w32 = gf_w32_bytwo_b_sse_multiply_region; + } else { + gf->multiply_region.w32 = gf_w32_bytwo_b_nosse_multiply_region; + } + } + gf->inverse.w32 = gf_w32_euclid; + return 1; +} + +static +inline +uint32_t +gf_w32_split_8_8_multiply (gf_t *gf, uint32_t a32, uint32_t b32) +{ + uint32_t product, i, j, mask, tb; + gf_internal_t *h; + struct gf_split_8_8_data *d8; + + h = (gf_internal_t *) gf->scratch; + d8 = (struct gf_split_8_8_data *) h->private; + product = 0; + mask = 0xff; + + for (i = 0; i < 4; i++) { + tb = b32; + for (j = 0; j < 4; j++) { + product ^= d8->tables[i+j][a32&mask][tb&mask]; + tb >>= 8; + } + a32 >>= 8; + } + return product; +} + +static +inline +void +gf_w32_split_32_8_lazy_multiply_region(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor) +{ + gf_internal_t *h; + uint32_t *s32, *d32, *top, p, a, v; + struct gf_split_32_8_lazy_data *d8; + struct gf_split_8_8_data *d88; + uint32_t *t[4]; + int i, j, k, change; + uint32_t pp; + gf_region_data rd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + h = (gf_internal_t *) gf->scratch; + if (h->arg1 == 32 || h->arg2 == 32) { + d8 = (struct gf_split_32_8_lazy_data *) h->private; + for (i = 0; i < 4; i++) t[i] = d8->tables[i]; + change = (val != d8->last_value); + if (change) d8->last_value = val; + } else { + d88 = (struct gf_split_8_8_data *) h->private; + for (i = 0; i < 4; i++) t[i] = d88->region_tables[i]; + change = (val != d88->last_value); + if (change) d88->last_value = val; + } + pp = h->prim_poly; + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 4); + gf_do_initial_region_alignment(&rd); + + s32 = (uint32_t *) rd.s_start; + d32 = (uint32_t *) rd.d_start; + top = (uint32_t *) rd.d_top; + + if (change) { + v = val; + for (i = 0; i < 4; i++) { + t[i][0] = 0; + for (j = 1; j < 256; j <<= 1) { + for (k = 0; k < j; k++) { + t[i][k^j] = (v ^ t[i][k]); + } + v = (v & GF_FIRST_BIT) ? ((v << 1) ^ pp) : (v << 1); + } + } + } + + while (d32 < top) { + p = (xor) ? *d32 : 0; + a = *s32; + i = 0; + while (a != 0) { + v = (a & 0xff); + p ^= t[i][v]; + a >>= 8; + i++; + } + *d32 = p; + d32++; + s32++; + } + gf_do_final_region_alignment(&rd); +} + +static +void +gf_w32_split_2_32_lazy_multiply_region(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor) +{ + gf_internal_t *h; + struct gf_split_2_32_lazy_data *ld; + int i; + uint32_t pp, v, v2, s, *s32, *d32, *top; + gf_region_data rd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 4); + gf_do_initial_region_alignment(&rd); + + h = (gf_internal_t *) gf->scratch; + pp = h->prim_poly; + + ld = (struct gf_split_2_32_lazy_data *) h->private; + + if (ld->last_value != val) { + v = val; + for (i = 0; i < 16; i++) { + v2 = (v << 1); + if (v & GF_FIRST_BIT) v2 ^= pp; + ld->tables[i][0] = 0; + ld->tables[i][1] = v; + ld->tables[i][2] = v2; + ld->tables[i][3] = (v2 ^ v); + v = (v2 << 1); + if (v2 & GF_FIRST_BIT) v ^= pp; + } + } + ld->last_value = val; + + s32 = (uint32_t *) rd.s_start; + d32 = (uint32_t *) rd.d_start; + top = (uint32_t *) rd.d_top; + + while (d32 != top) { + v = (xor) ? *d32 : 0; + s = *s32; + i = 0; + while (s != 0) { + v ^= ld->tables[i][s&3]; + s >>= 2; + i++; + } + *d32 = v; + d32++; + s32++; + } + gf_do_final_region_alignment(&rd); +} + +static +void +gf_w32_split_2_32_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor) +{ +#ifdef INTEL_SSE4 + gf_internal_t *h; + int i, m, j, tindex; + uint32_t pp, v, v2, s, *s32, *d32, *top; + __m128i vi, si, pi, shuffler, tables[16], adder, xi, mask1, mask2; + gf_region_data rd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 32); + gf_do_initial_region_alignment(&rd); + + h = (gf_internal_t *) gf->scratch; + pp = h->prim_poly; + + s32 = (uint32_t *) rd.s_start; + d32 = (uint32_t *) rd.d_start; + top = (uint32_t *) rd.d_top; + + v = val; + for (i = 0; i < 16; i++) { + v2 = (v << 1); + if (v & GF_FIRST_BIT) v2 ^= pp; + tables[i] = _mm_set_epi32(v2 ^ v, v2, v, 0); + v = (v2 << 1); + if (v2 & GF_FIRST_BIT) v ^= pp; + } + + shuffler = _mm_set_epi8(0xc, 0xc, 0xc, 0xc, 8, 8, 8, 8, 4, 4, 4, 4, 0, 0, 0, 0); + adder = _mm_set_epi8(3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0); + mask1 = _mm_set1_epi8(0x3); + mask2 = _mm_set1_epi8(0xc); + + while (d32 != top) { + pi = (xor) ? _mm_load_si128 ((__m128i *) d32) : _mm_setzero_si128(); + vi = _mm_load_si128((__m128i *) s32); + + tindex = 0; + for (i = 0; i < 4; i++) { + si = _mm_shuffle_epi8(vi, shuffler); + + xi = _mm_and_si128(si, mask1); + xi = _mm_slli_epi16(xi, 2); + xi = _mm_xor_si128(xi, adder); + pi = _mm_xor_si128(pi, _mm_shuffle_epi8(tables[tindex], xi)); + tindex++; + + xi = _mm_and_si128(si, mask2); + xi = _mm_xor_si128(xi, adder); + pi = _mm_xor_si128(pi, _mm_shuffle_epi8(tables[tindex], xi)); + si = _mm_srli_epi16(si, 2); + tindex++; + + xi = _mm_and_si128(si, mask2); + xi = _mm_xor_si128(xi, adder); + pi = _mm_xor_si128(pi, _mm_shuffle_epi8(tables[tindex], xi)); + si = _mm_srli_epi16(si, 2); + tindex++; + + xi = _mm_and_si128(si, mask2); + xi = _mm_xor_si128(xi, adder); + pi = _mm_xor_si128(pi, _mm_shuffle_epi8(tables[tindex], xi)); + si = _mm_srli_epi16(si, 2); + tindex++; + + vi = _mm_srli_epi32(vi, 8); + } + _mm_store_si128((__m128i *) d32, pi); + d32 += 4; + s32 += 4; + } + + gf_do_final_region_alignment(&rd); + +#endif +} + +static +void +gf_w32_split_4_32_lazy_multiply_region(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor) +{ + gf_internal_t *h; + struct gf_split_4_32_lazy_data *ld; + int i, j, k; + uint32_t pp, v, s, *s32, *d32, *top; + gf_region_data rd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + h = (gf_internal_t *) gf->scratch; + pp = h->prim_poly; + + ld = (struct gf_split_4_32_lazy_data *) h->private; + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 4); + gf_do_initial_region_alignment(&rd); + + if (ld->last_value != val) { + v = val; + for (i = 0; i < 8; i++) { + ld->tables[i][0] = 0; + for (j = 1; j < 16; j <<= 1) { + for (k = 0; k < j; k++) { + ld->tables[i][k^j] = (v ^ ld->tables[i][k]); + } + v = (v & GF_FIRST_BIT) ? ((v << 1) ^ pp) : (v << 1); + } + } + } + ld->last_value = val; + + s32 = (uint32_t *) rd.s_start; + d32 = (uint32_t *) rd.d_start; + top = (uint32_t *) rd.d_top; + + while (d32 != top) { + v = (xor) ? *d32 : 0; + s = *s32; + i = 0; + while (s != 0) { + v ^= ld->tables[i][s&0xf]; + s >>= 4; + i++; + } + *d32 = v; + d32++; + s32++; + } + gf_do_final_region_alignment(&rd); +} + +static +void +gf_w32_split_4_32_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor) +{ +#ifdef INTEL_SSE4 + gf_internal_t *h; + int i, m, j, k, tindex; + uint32_t pp, v, s, *s32, *d32, *top, *realtop; + __m128i si, tables[8][4], p0, p1, p2, p3, mask1, v0, v1, v2, v3; + struct gf_split_4_32_lazy_data *ld; + uint8_t btable[16]; + gf_region_data rd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + h = (gf_internal_t *) gf->scratch; + pp = h->prim_poly; + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 64); + gf_do_initial_region_alignment(&rd); + + s32 = (uint32_t *) rd.s_start; + d32 = (uint32_t *) rd.d_start; + top = (uint32_t *) rd.d_top; + + ld = (struct gf_split_4_32_lazy_data *) h->private; + + v = val; + for (i = 0; i < 8; i++) { + ld->tables[i][0] = 0; + for (j = 1; j < 16; j <<= 1) { + for (k = 0; k < j; k++) { + ld->tables[i][k^j] = (v ^ ld->tables[i][k]); + } + v = (v & GF_FIRST_BIT) ? ((v << 1) ^ pp) : (v << 1); + } + for (j = 0; j < 4; j++) { + for (k = 0; k < 16; k++) { + btable[k] = (uint8_t) ld->tables[i][k]; + ld->tables[i][k] >>= 8; + } + tables[i][j] = _mm_loadu_si128((__m128i *) btable); + } + } + + mask1 = _mm_set1_epi8(0xf); + + if (xor) { + while (d32 != top) { + p0 = _mm_load_si128 ((__m128i *) d32); + p1 = _mm_load_si128 ((__m128i *) (d32+4)); + p2 = _mm_load_si128 ((__m128i *) (d32+8)); + p3 = _mm_load_si128 ((__m128i *) (d32+12)); + + v0 = _mm_load_si128((__m128i *) s32); s32 += 4; + v1 = _mm_load_si128((__m128i *) s32); s32 += 4; + v2 = _mm_load_si128((__m128i *) s32); s32 += 4; + v3 = _mm_load_si128((__m128i *) s32); s32 += 4; + + si = _mm_and_si128(v0, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[0][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[0][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[0][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[0][3], si)); + + v0 = _mm_srli_epi32(v0, 4); + si = _mm_and_si128(v0, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[1][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[1][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[1][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[1][3], si)); + + si = _mm_and_si128(v1, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[2][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[2][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[2][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[2][3], si)); + + v1 = _mm_srli_epi32(v1, 4); + si = _mm_and_si128(v1, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[3][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[3][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[3][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[3][3], si)); + + si = _mm_and_si128(v2, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[4][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[4][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[4][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[4][3], si)); + + v2 = _mm_srli_epi32(v2, 4); + si = _mm_and_si128(v2, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[5][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[5][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[5][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[5][3], si)); + + si = _mm_and_si128(v3, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[6][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[6][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[6][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[6][3], si)); + + v3 = _mm_srli_epi32(v3, 4); + si = _mm_and_si128(v3, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[7][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[7][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[7][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[7][3], si)); + + _mm_store_si128((__m128i *) d32, p0); + _mm_store_si128((__m128i *) (d32+4), p1); + _mm_store_si128((__m128i *) (d32+8), p2); + _mm_store_si128((__m128i *) (d32+12), p3); + d32 += 16; + } + } else { + while (d32 != top) { + + v0 = _mm_load_si128((__m128i *) s32); s32 += 4; + v1 = _mm_load_si128((__m128i *) s32); s32 += 4; + v2 = _mm_load_si128((__m128i *) s32); s32 += 4; + v3 = _mm_load_si128((__m128i *) s32); s32 += 4; + + si = _mm_and_si128(v0, mask1); + p0 = _mm_shuffle_epi8(tables[0][0], si); + p1 = _mm_shuffle_epi8(tables[0][1], si); + p2 = _mm_shuffle_epi8(tables[0][2], si); + p3 = _mm_shuffle_epi8(tables[0][3], si); + + v0 = _mm_srli_epi32(v0, 4); + si = _mm_and_si128(v0, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[1][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[1][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[1][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[1][3], si)); + + si = _mm_and_si128(v1, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[2][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[2][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[2][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[2][3], si)); + + v1 = _mm_srli_epi32(v1, 4); + si = _mm_and_si128(v1, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[3][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[3][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[3][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[3][3], si)); + + si = _mm_and_si128(v2, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[4][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[4][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[4][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[4][3], si)); + + v2 = _mm_srli_epi32(v2, 4); + si = _mm_and_si128(v2, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[5][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[5][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[5][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[5][3], si)); + + si = _mm_and_si128(v3, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[6][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[6][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[6][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[6][3], si)); + + v3 = _mm_srli_epi32(v3, 4); + si = _mm_and_si128(v3, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[7][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[7][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[7][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[7][3], si)); + + _mm_store_si128((__m128i *) d32, p0); + _mm_store_si128((__m128i *) (d32+4), p1); + _mm_store_si128((__m128i *) (d32+8), p2); + _mm_store_si128((__m128i *) (d32+12), p3); + d32 += 16; + } + } + + gf_do_final_region_alignment(&rd); + +#endif +} + + +static +void +gf_w32_split_4_32_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor) +{ +#ifdef INTEL_SSE4 + gf_internal_t *h; + int i, m, j, k, tindex; + uint32_t pp, v, s, *s32, *d32, *top; + __m128i vi, si, tables[8][4], p0, p1, p2, p3, mask1, v0, v1, v2, v3, mask8, mask16; + __m128i tv1, tv2, tv3, tv0; + struct gf_split_4_32_lazy_data *ld; + uint8_t btable[16]; + gf_region_data rd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + h = (gf_internal_t *) gf->scratch; + pp = h->prim_poly; + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 64); + gf_do_initial_region_alignment(&rd); + + s32 = (uint32_t *) rd.s_start; + d32 = (uint32_t *) rd.d_start; + top = (uint32_t *) rd.d_top; + + ld = (struct gf_split_4_32_lazy_data *) h->private; + + v = val; + for (i = 0; i < 8; i++) { + ld->tables[i][0] = 0; + for (j = 1; j < 16; j <<= 1) { + for (k = 0; k < j; k++) { + ld->tables[i][k^j] = (v ^ ld->tables[i][k]); + } + v = (v & GF_FIRST_BIT) ? ((v << 1) ^ pp) : (v << 1); + } + for (j = 0; j < 4; j++) { + for (k = 0; k < 16; k++) { + btable[k] = (uint8_t) ld->tables[i][k]; + ld->tables[i][k] >>= 8; + } + tables[i][j] = _mm_loadu_si128((__m128i *) btable); + } + } + + mask1 = _mm_set1_epi8(0xf); + mask8 = _mm_set1_epi16(0xff); + mask16 = _mm_set1_epi32(0xffff); + + if (xor) { + while (d32 != top) { + v0 = _mm_load_si128((__m128i *) s32); s32 += 4; + v1 = _mm_load_si128((__m128i *) s32); s32 += 4; + v2 = _mm_load_si128((__m128i *) s32); s32 += 4; + v3 = _mm_load_si128((__m128i *) s32); s32 += 4; + + p0 = _mm_srli_epi16(v0, 8); + p1 = _mm_srli_epi16(v1, 8); + p2 = _mm_srli_epi16(v2, 8); + p3 = _mm_srli_epi16(v3, 8); + + tv0 = _mm_and_si128(v0, mask8); + tv1 = _mm_and_si128(v1, mask8); + tv2 = _mm_and_si128(v2, mask8); + tv3 = _mm_and_si128(v3, mask8); + + v0 = _mm_packus_epi16(p1, p0); + v1 = _mm_packus_epi16(tv1, tv0); + v2 = _mm_packus_epi16(p3, p2); + v3 = _mm_packus_epi16(tv3, tv2); + + p0 = _mm_srli_epi16(v0, 8); + p1 = _mm_srli_epi16(v1, 8); + p2 = _mm_srli_epi16(v2, 8); + p3 = _mm_srli_epi16(v3, 8); + + tv0 = _mm_and_si128(v0, mask8); + tv1 = _mm_and_si128(v1, mask8); + tv2 = _mm_and_si128(v2, mask8); + tv3 = _mm_and_si128(v3, mask8); + + v0 = _mm_packus_epi16(p2, p0); + v1 = _mm_packus_epi16(p3, p1); + v2 = _mm_packus_epi16(tv2, tv0); + v3 = _mm_packus_epi16(tv3, tv1); + + si = _mm_and_si128(v0, mask1); + p0 = _mm_shuffle_epi8(tables[6][0], si); + p1 = _mm_shuffle_epi8(tables[6][1], si); + p2 = _mm_shuffle_epi8(tables[6][2], si); + p3 = _mm_shuffle_epi8(tables[6][3], si); + + v0 = _mm_srli_epi32(v0, 4); + si = _mm_and_si128(v0, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[7][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[7][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[7][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[7][3], si)); + + si = _mm_and_si128(v1, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[4][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[4][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[4][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[4][3], si)); + + v1 = _mm_srli_epi32(v1, 4); + si = _mm_and_si128(v1, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[5][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[5][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[5][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[5][3], si)); + + si = _mm_and_si128(v2, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[2][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[2][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[2][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[2][3], si)); + + v2 = _mm_srli_epi32(v2, 4); + si = _mm_and_si128(v2, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[3][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[3][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[3][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[3][3], si)); + + si = _mm_and_si128(v3, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[0][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[0][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[0][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[0][3], si)); + + v3 = _mm_srli_epi32(v3, 4); + si = _mm_and_si128(v3, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[1][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[1][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[1][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[1][3], si)); + + tv0 = _mm_unpackhi_epi8(p1, p3); + tv1 = _mm_unpackhi_epi8(p0, p2); + tv2 = _mm_unpacklo_epi8(p1, p3); + tv3 = _mm_unpacklo_epi8(p0, p2); + + p0 = _mm_unpackhi_epi8(tv1, tv0); + p1 = _mm_unpacklo_epi8(tv1, tv0); + p2 = _mm_unpackhi_epi8(tv3, tv2); + p3 = _mm_unpacklo_epi8(tv3, tv2); + + v0 = _mm_load_si128 ((__m128i *) d32); + v1 = _mm_load_si128 ((__m128i *) (d32+4)); + v2 = _mm_load_si128 ((__m128i *) (d32+8)); + v3 = _mm_load_si128 ((__m128i *) (d32+12)); + + p0 = _mm_xor_si128(p0, v0); + p1 = _mm_xor_si128(p1, v1); + p2 = _mm_xor_si128(p2, v2); + p3 = _mm_xor_si128(p3, v3); + + _mm_store_si128((__m128i *) d32, p0); + _mm_store_si128((__m128i *) (d32+4), p1); + _mm_store_si128((__m128i *) (d32+8), p2); + _mm_store_si128((__m128i *) (d32+12), p3); + d32 += 16; + } + } else { + while (d32 != top) { + v0 = _mm_load_si128((__m128i *) s32); s32 += 4; + v1 = _mm_load_si128((__m128i *) s32); s32 += 4; + v2 = _mm_load_si128((__m128i *) s32); s32 += 4; + v3 = _mm_load_si128((__m128i *) s32); s32 += 4; + + p0 = _mm_srli_epi16(v0, 8); + p1 = _mm_srli_epi16(v1, 8); + p2 = _mm_srli_epi16(v2, 8); + p3 = _mm_srli_epi16(v3, 8); + + tv0 = _mm_and_si128(v0, mask8); + tv1 = _mm_and_si128(v1, mask8); + tv2 = _mm_and_si128(v2, mask8); + tv3 = _mm_and_si128(v3, mask8); + + v0 = _mm_packus_epi16(p1, p0); + v1 = _mm_packus_epi16(tv1, tv0); + v2 = _mm_packus_epi16(p3, p2); + v3 = _mm_packus_epi16(tv3, tv2); + + p0 = _mm_srli_epi16(v0, 8); + p1 = _mm_srli_epi16(v1, 8); + p2 = _mm_srli_epi16(v2, 8); + p3 = _mm_srli_epi16(v3, 8); + + tv0 = _mm_and_si128(v0, mask8); + tv1 = _mm_and_si128(v1, mask8); + tv2 = _mm_and_si128(v2, mask8); + tv3 = _mm_and_si128(v3, mask8); + + v0 = _mm_packus_epi16(p2, p0); + v1 = _mm_packus_epi16(p3, p1); + v2 = _mm_packus_epi16(tv2, tv0); + v3 = _mm_packus_epi16(tv3, tv1); + + si = _mm_and_si128(v0, mask1); + p0 = _mm_shuffle_epi8(tables[6][0], si); + p1 = _mm_shuffle_epi8(tables[6][1], si); + p2 = _mm_shuffle_epi8(tables[6][2], si); + p3 = _mm_shuffle_epi8(tables[6][3], si); + + v0 = _mm_srli_epi32(v0, 4); + si = _mm_and_si128(v0, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[7][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[7][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[7][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[7][3], si)); + + si = _mm_and_si128(v1, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[4][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[4][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[4][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[4][3], si)); + + v1 = _mm_srli_epi32(v1, 4); + si = _mm_and_si128(v1, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[5][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[5][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[5][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[5][3], si)); + + si = _mm_and_si128(v2, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[2][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[2][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[2][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[2][3], si)); + + v2 = _mm_srli_epi32(v2, 4); + si = _mm_and_si128(v2, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[3][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[3][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[3][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[3][3], si)); + + si = _mm_and_si128(v3, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[0][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[0][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[0][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[0][3], si)); + + v3 = _mm_srli_epi32(v3, 4); + si = _mm_and_si128(v3, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[1][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[1][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[1][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[1][3], si)); + + tv0 = _mm_unpackhi_epi8(p1, p3); + tv1 = _mm_unpackhi_epi8(p0, p2); + tv2 = _mm_unpacklo_epi8(p1, p3); + tv3 = _mm_unpacklo_epi8(p0, p2); + + p0 = _mm_unpackhi_epi8(tv1, tv0); + p1 = _mm_unpacklo_epi8(tv1, tv0); + p2 = _mm_unpackhi_epi8(tv3, tv2); + p3 = _mm_unpacklo_epi8(tv3, tv2); + + _mm_store_si128((__m128i *) d32, p0); + _mm_store_si128((__m128i *) (d32+4), p1); + _mm_store_si128((__m128i *) (d32+8), p2); + _mm_store_si128((__m128i *) (d32+12), p3); + d32 += 16; + } + } + gf_do_final_region_alignment(&rd); + +#endif +} + +static +int gf_w32_split_init(gf_t *gf) +{ + gf_internal_t *h; + struct gf_split_2_32_lazy_data *ld2; + struct gf_split_4_32_lazy_data *ld4; + struct gf_split_8_8_data *d8; + struct gf_split_32_8_lazy_data *d32; + uint32_t p, basep; + int i, j, exp; + + h = (gf_internal_t *) gf->scratch; + + /* Defaults */ + gf->multiply_region.w32 = gf_w32_multiply_region_from_single; + gf->multiply.w32 = gf_w32_shift_multiply; + gf->inverse.w32 = gf_w32_euclid; + + if ((h->arg1 == 8 && h->arg2 == 32) || (h->arg1 == 32 && h->arg2 == 8)) { + d32 = (struct gf_split_32_8_lazy_data *) h->private; + d32->last_value = 0; + gf->multiply_region.w32 = gf_w32_split_32_8_lazy_multiply_region; + } + + if (h->arg1 == 8 && h->arg2 == 8) { + gf->multiply.w32 = gf_w32_split_8_8_multiply; + gf->multiply_region.w32 = gf_w32_split_32_8_lazy_multiply_region; + d8 = (struct gf_split_8_8_data *) h->private; + d8->last_value = 0; + basep = 1; + for (exp = 0; exp < 7; exp++) { + for (j = 0; j < 256; j++) d8->tables[exp][0][j] = 0; + for (i = 0; i < 256; i++) d8->tables[exp][i][0] = 0; + d8->tables[exp][1][1] = basep; + for (i = 2; i < 256; i++) { + if (i&1) { + p = d8->tables[exp][i^1][1]; + d8->tables[exp][i][1] = p ^ basep; + } else { + p = d8->tables[exp][i>>1][1]; + d8->tables[exp][i][1] = GF_MULTBY_TWO(p); + } + } + for (i = 1; i < 256; i++) { + p = d8->tables[exp][i][1]; + for (j = 1; j < 256; j++) { + if (j&1) { + d8->tables[exp][i][j] = d8->tables[exp][i][j^1] ^ p; + } else { + d8->tables[exp][i][j] = GF_MULTBY_TWO(d8->tables[exp][i][j>>1]); + } + } + } + for (i = 0; i < 8; i++) basep = GF_MULTBY_TWO(basep); + } + } + if ((h->arg1 == 2 && h->arg2 == 32) || (h->arg1 == 32 && h->arg2 == 2)) { + ld2 = (struct gf_split_2_32_lazy_data *) h->private; + ld2->last_value = 0; + if (h->region_type & GF_REGION_SSE) { + gf->multiply_region.w32 = gf_w32_split_2_32_lazy_sse_multiply_region; + } else { + gf->multiply_region.w32 = gf_w32_split_2_32_lazy_multiply_region; + } + } + if ((h->arg1 == 4 && h->arg2 == 32) || (h->arg1 == 32 && h->arg2 == 4)) { + ld4 = (struct gf_split_4_32_lazy_data *) h->private; + ld4->last_value = 0; + if (h->region_type & GF_REGION_SSE) { + if (h->region_type & GF_REGION_ALTMAP) { + gf->multiply_region.w32 = gf_w32_split_4_32_lazy_sse_altmap_multiply_region; + } else { + gf->multiply_region.w32 = gf_w32_split_4_32_lazy_sse_multiply_region; + } + } else { + gf->multiply_region.w32 = gf_w32_split_4_32_lazy_multiply_region; + } + } + return 1; +} + +static +uint32_t +gf_w32_composite_multiply_logtable(gf_t *gf, uint32_t a, uint32_t b) +{ + gf_internal_t *h = (gf_internal_t *) gf->scratch; + struct gf_w16_logtable_data * ltd = (struct gf_w16_logtable_data *) h->private; + + uint32_t b0 = b & 0xffff; + uint32_t b1 = b >> 16; + uint32_t a0 = a & 0xffff; + uint32_t a1 = a >> 16; + uint32_t a1b1; + uint32_t la0, la1, lb0, lb1, l11; + uint32_t p; + + la0 = ltd->log_tbl[a0]; + la1 = ltd->log_tbl[a1]; + lb0 = ltd->log_tbl[b0]; + lb1 = ltd->log_tbl[b1]; + + if (a1 && b1) { + l11 = (la1 + lb1); + a1b1 = ltd->antilog_tbl[l11]; + l11 = ltd->log_tbl[a1b1]; + p = ltd->antilog_tbl[l11+ltd->log_s]; + } else { + a1b1 = 0; + p = 0; + } + + if (a0 && b1) p ^= ltd->antilog_tbl[la0+lb1]; + + if (a1 && b0) p ^= ltd->antilog_tbl[la1+lb0]; + p <<= 16; + p ^= a1b1; + if (a0 && b0) p ^= ltd->antilog_tbl[la0+lb0]; + return p; +} + +static +uint32_t +gf_w32_composite_multiply_recursive(gf_t *gf, uint32_t a, uint32_t b) +{ + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + uint16_t b0 = b & 0x0000ffff; + uint16_t b1 = (b & 0xffff0000) >> 16; + uint16_t a0 = a & 0x0000ffff; + uint16_t a1 = (a & 0xffff0000) >> 16; + uint16_t a1b1; + uint32_t rv; + + a1b1 = base_gf->multiply.w32(base_gf, a1, b1); + + rv = ((base_gf->multiply.w32(base_gf, a0, b0) ^ a1b1) | ((base_gf->multiply.w32(base_gf, a1, b0) ^ base_gf->multiply.w32(base_gf, a0, b1) ^ base_gf->multiply.w32(base_gf, a1b1, GF_S_GF_16_2)) << 16)); + return rv; +} + +/* + * Composite field division trick (explained in 2007 tech report) + * + * Compute a / b = a*b^-1, where p(x) = x^2 + sx + 1 + * + * let c = b^-1 + * + * c*b = (s*b1c1+b1c0+b0c1)x+(b1c1+b0c0) + * + * want (s*b1c1+b1c0+b0c1) = 0 and (b1c1+b0c0) = 1 + * + * let d = b1c1 and d+1 = b0c0 + * + * solve s*b1c1+b1c0+b0c1 = 0 + * + * solution: d = (b1b0^-1)(b1b0^-1+b0b1^-1+s)^-1 + * + * c0 = (d+1)b0^-1 + * c1 = d*b1^-1 + * + * a / b = a * c + */ +static +uint32_t +gf_w32_composite_inverse(gf_t *gf, uint32_t a) +{ + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + uint16_t a0 = a & 0x0000ffff; + uint16_t a1 = (a & 0xffff0000) >> 16; + uint16_t c0, c1, d, tmp; + uint32_t c; + uint16_t a0inv, a1inv; + + if (a0 == 0) { + a1inv = base_gf->inverse.w32(base_gf, a1); + c0 = base_gf->multiply.w32(base_gf, a1inv, GF_S_GF_16_2); + c1 = a1inv; + } else if (a1 == 0) { + c0 = base_gf->inverse.w32(base_gf, a0); + c1 = 0; + } else { + a1inv = base_gf->inverse.w32(base_gf, a1); + a0inv = base_gf->inverse.w32(base_gf, a0); + + d = base_gf->multiply.w32(base_gf, a1, a0inv); + + tmp = (base_gf->multiply.w32(base_gf, a1, a0inv) ^ base_gf->multiply.w32(base_gf, a0, a1inv) ^ GF_S_GF_16_2); + tmp = base_gf->inverse.w32(base_gf, tmp); + + d = base_gf->multiply.w32(base_gf, d, tmp); + + c0 = base_gf->multiply.w32(base_gf, (d^1), a0inv); + c1 = base_gf->multiply.w32(base_gf, d, a1inv); + } + + c = c0 | (c1 << 16); + + return c; +} + +static +uint32_t +gf_w32_composite_divide(gf_t *gf, uint32_t a, uint32_t b) +{ + uint32_t binv; + + binv = gf->inverse.w32(gf, b); + return gf->multiply.w32(gf, a, binv); +} + +/* JSP: I'm not using this because I don't think it has value added. */ +static +void +gf_w32_composite_multiply_region_inline(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor) +{ + unsigned long uls, uld; + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + int i=0; + struct gf_w16_logtable_data * ltd; + uint16_t b0 = val & 0x0000ffff; + uint16_t b1 = (val & 0xffff0000) >> 16; + uint32_t *s32 = (uint32_t *) src; + uint32_t *d32 = (uint32_t *) dest; + uint16_t a0, a1, a1b1; + int num_syms = bytes >> 2; + int sym_divisible = bytes % 4; + + uls = (unsigned long) src; + uld = (unsigned long) dest; + if ((uls & 0x7) != (uld & 0x7)) gf_alignment_error("gf_w32_buf_const_log", 2); + if (sym_divisible) { + gf_alignment_error("gf_w32_buf_const_log: buffer size not divisible by symbol size = 2 bytes", 2); + } + + if (val == 0) { + if (xor) return; + bzero(dest, bytes); + return; + } + + ltd = (struct gf_w16_logtable_data *) h->private; + + if (xor) { + for (i = 0;i < num_syms; i++) { + a0 = s32[i] & 0x0000ffff; + a1 = (s32[i] & 0xffff0000) >> 16; + a1b1 = ltd->antilog_tbl[ltd->log_tbl[a1] + ltd->log_tbl[b1]]; + + d32[i] ^= ((ltd->antilog_tbl[ltd->log_tbl[a0] + ltd->log_tbl[b0]] ^ a1b1) | + ((ltd->antilog_tbl[ltd->log_tbl[a1] + ltd->log_tbl[b0]] ^ ltd->antilog_tbl[ltd->log_tbl[a0] + ltd->log_tbl[b1]] ^ + ltd->antilog_tbl[ltd->log_tbl[a1b1] + ltd->log_tbl[GF_S_GF_16_2]]) << 16)); + + } + } else { + for (i = 0;i < num_syms; i++) { + a0 = s32[i] & 0x0000ffff; + a1 = (s32[i] & 0xffff0000) >> 16; + a1b1 = ltd->antilog_tbl[ltd->log_tbl[a1] + ltd->log_tbl[b1]]; + + d32[i] = ((ltd->antilog_tbl[ltd->log_tbl[a0] + ltd->log_tbl[b0]] ^ a1b1) | + ((ltd->antilog_tbl[ltd->log_tbl[a1] + ltd->log_tbl[b0]] ^ ltd->antilog_tbl[ltd->log_tbl[a0] + ltd->log_tbl[b1]] ^ + ltd->antilog_tbl[ltd->log_tbl[a1b1] + ltd->log_tbl[GF_S_GF_16_2]]) << 16)); + } + } +} + +static +void +gf_w32_composite_multiply_region(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor) +{ + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + struct gf_w16_logtable_data * ltd; + uint16_t b0 = val & 0x0000ffff; + uint16_t b1 = (val & 0xffff0000) >> 16; + uint32_t *s32, *d32, *top; + uint16_t a0, a1, a1b1; + gf_region_data rd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 4); + + + s32 = rd.s_start; + d32 = rd.d_start; + top = rd.d_top; + + if (xor) { + while (d32 < top) { + a0 = *s32 & 0x0000ffff; + a1 = (*s32 & 0xffff0000) >> 16; + a1b1 = base_gf->multiply.w32(base_gf, a1, b1); + + *d32 ^= ((base_gf->multiply.w32(base_gf, a0, b0) ^ a1b1) | + ((base_gf->multiply.w32(base_gf, a1, b0) ^ base_gf->multiply.w32(base_gf, a0, b1) ^ base_gf->multiply.w32(base_gf, a1b1, GF_S_GF_16_2)) << 16)); + s32++; + d32++; + } + } else { + while (d32 < top) { + a0 = *s32 & 0x0000ffff; + a1 = (*s32 & 0xffff0000) >> 16; + a1b1 = base_gf->multiply.w32(base_gf, a1, b1); + + *d32 = ((base_gf->multiply.w32(base_gf, a0, b0) ^ a1b1) | + ((base_gf->multiply.w32(base_gf, a1, b0) ^ base_gf->multiply.w32(base_gf, a0, b1) ^ base_gf->multiply.w32(base_gf, a1b1, GF_S_GF_16_2)) << 16)); + s32++; + d32++; + } + } +} + +static +void +gf_w32_composite_multiply_region_alt(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor) +{ + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + uint16_t val0 = val & 0x0000ffff; + uint16_t val1 = (val & 0xffff0000) >> 16; + gf_region_data rd; + int sub_reg_size; + uint8_t *slow, *shigh; + uint8_t *dlow, *dhigh, *top;; + + /* JSP: I want the two pointers aligned wrt each other on 16 byte + boundaries. So I'm going to make sure that the area on + which the two operate is a multiple of 32. Of course, that + junks up the mapping, but so be it -- that's why we have extract_word.... */ + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 32); + gf_do_initial_region_alignment(&rd); + + slow = (uint8_t *) rd.s_start; + dlow = (uint8_t *) rd.d_start; + top = (uint8_t *) rd.d_top; + sub_reg_size = (top - dlow)/2; + shigh = slow + sub_reg_size; + dhigh = dlow + sub_reg_size; + + base_gf->multiply_region.w32(base_gf, slow, dlow, val0, sub_reg_size, xor); + base_gf->multiply_region.w32(base_gf, shigh, dlow, val1, sub_reg_size, 1); + base_gf->multiply_region.w32(base_gf, slow, dhigh, val1, sub_reg_size, xor); + base_gf->multiply_region.w32(base_gf, shigh, dhigh, val0, sub_reg_size, 1); + base_gf->multiply_region.w32(base_gf, shigh, dhigh, base_gf->multiply.w32(base_gf, GF_S_GF_16_2, val1), sub_reg_size, 1); + + gf_do_final_region_alignment(&rd); +} + +static +int gf_w32_composite_init(gf_t *gf) +{ + struct gf_w16_logtable_data *ltd; + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + gf_internal_t *base_h = (gf_internal_t *) base_gf->scratch; + uint32_t a, b; + uint64_t prim_poly = ((gf_internal_t *) base_gf->scratch)->prim_poly; + int i; + + if (h->region_type & GF_REGION_ALTMAP) { + gf->multiply_region.w32 = gf_w32_composite_multiply_region_alt; + } else if (h->arg2 == 0 && base_h->mult_type == GF_MULT_LOG_TABLE && + base_h->arg1 == 0) { + gf->multiply_region.w32 = gf_w32_composite_multiply_region; +/* It would be this, were that not buggy and I cared: + gf->multiply_region.w32 = gf_w32_composite_multiply_region_inline; */ + } else { + gf->multiply_region.w32 = gf_w32_composite_multiply_region; + } + + if (h->arg2 == 0) { + ltd = (struct gf_w16_logtable_data *) h->private; + + ltd->log_tbl[0] = 0; + + bzero(&(ltd->_antilog_tbl[0]), sizeof(ltd->_antilog_tbl)); + + ltd->antilog_tbl = &(ltd->_antilog_tbl[GF_BASE_FIELD_SIZE * 2]); + + b = 1; + for (i = 0; i < GF_BASE_FIELD_GROUP_SIZE; i++) { + ltd->log_tbl[b] = (uint16_t)i; + ltd->antilog_tbl[i] = (uint16_t)b; + ltd->antilog_tbl[i+GF_BASE_FIELD_GROUP_SIZE] = (uint16_t)b; + b <<= 1; + if (b & GF_BASE_FIELD_SIZE) { + b = b ^ prim_poly; + } + } + ltd->log_s = ltd->log_tbl[GF_S_GF_16_2]; + ltd->inv_tbl[0] = 0; /* Not really, but we need to fill it with something */ + ltd->inv_tbl[1] = 1; + for (i = 2; i < GF_BASE_FIELD_SIZE; i++) { + ltd->inv_tbl[i] = ltd->antilog_tbl[GF_BASE_FIELD_GROUP_SIZE-ltd->log_tbl[i]]; + } + gf->multiply.w32 = gf_w32_composite_multiply_logtable; + } else { + gf->multiply.w32 = gf_w32_composite_multiply_recursive; + } + + gf->divide.w32 = gf_w32_composite_divide; + gf->inverse.w32 = gf_w32_composite_inverse; + + return 1; +} + +int gf_w32_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2) +{ + int ss, sa; + + ss = (GF_REGION_SSE | GF_REGION_NOSSE); + sa = (GF_REGION_STDMAP | GF_REGION_ALTMAP); + + switch(mult_type) + { + case GF_MULT_BYTWO_p: + case GF_MULT_BYTWO_b: + if (arg1 != 0 || arg2 != 0) return -1; + if (region_type != GF_REGION_CAUCHY) { + if ((region_type | ss) != ss || (region_type & ss) == ss) return -1; + } + return sizeof(gf_internal_t) + sizeof(struct gf_w32_bytwo_data); + break; + case GF_MULT_GROUP: + if (arg1 <= 0 || arg2 <= 0) return -1; + if (region_type != GF_REGION_DEFAULT && region_type != GF_REGION_CAUCHY) return -1; + return sizeof(gf_internal_t) + sizeof(struct gf_w32_group_data) + + sizeof(uint32_t) * (1 << arg1) + + sizeof(uint32_t) * (1 << arg2) + 64; + break; + case GF_MULT_SPLIT_TABLE: + if (arg1 == 8 && arg2 == 8){ + if (region_type != GF_REGION_DEFAULT && region_type != GF_REGION_CAUCHY) return -1; + return sizeof(gf_internal_t) + sizeof(struct gf_split_8_8_data) + 64; + } + if ((arg1 == 8 && arg2 == 32) || (arg2 == 8 && arg1 == 32)) { + region_type &= (~GF_REGION_LAZY); + if (region_type != GF_REGION_DEFAULT) return -1; + return sizeof(gf_internal_t) + sizeof(struct gf_split_32_8_lazy_data) + 64; + } + if ((arg1 == 2 && arg2 == 32) || (arg2 == 2 && arg1 == 32)) { + region_type &= (~GF_REGION_LAZY); + if ((region_type & ss) == ss) return -1; + if ((region_type | ss) != ss) return -1; + return sizeof(gf_internal_t) + sizeof(struct gf_split_2_32_lazy_data) + 64; + } + if ((arg1 == 4 && arg2 == 32) || (arg2 == 4 && arg1 == 32)) { + region_type &= (~GF_REGION_LAZY); + if ((region_type & ss) == ss) return -1; + if ((region_type & sa) == sa) return -1; + if (region_type & (~(ss|sa))) return -1; + if (region_type & GF_REGION_SSE) { + return sizeof(gf_internal_t) + sizeof(struct gf_split_4_32_lazy_data) + 64; + } else if (region_type & GF_REGION_ALTMAP) { + return -1; + } else { + return sizeof(gf_internal_t) + sizeof(struct gf_split_4_32_lazy_data) + 64; + } + } + return -1; + case GF_MULT_DEFAULT: + case GF_MULT_SHIFT: + if (arg1 != 0 || arg2 != 0) return -1; + if (region_type != 0 && region_type != GF_REGION_CAUCHY) return -1; + return sizeof(gf_internal_t); + break; + case GF_MULT_COMPOSITE: + if (region_type & ~(GF_REGION_ALTMAP | GF_REGION_STDMAP)) return -1; + if (arg1 == 2 && arg2 == 0) { + return sizeof(gf_internal_t) + sizeof(struct gf_w16_logtable_data) + 64; + } else if (arg1 == 2 && arg2 == 1) { + return sizeof(gf_internal_t) + 64; + } else { + return -1; + } + + default: + return -1; + } +} + +int gf_w32_init(gf_t *gf) +{ + gf_internal_t *h; + + h = (gf_internal_t *) gf->scratch; + if (h->prim_poly == 0) h->prim_poly = 0x400007; + + gf->multiply.w32 = NULL; + gf->divide.w32 = NULL; + gf->inverse.w32 = NULL; + gf->multiply_region.w32 = NULL; + + switch(h->mult_type) { + case GF_MULT_DEFAULT: + case GF_MULT_SHIFT: if (gf_w32_shift_init(gf) == 0) return 0; break; + case GF_MULT_COMPOSITE: if (gf_w32_composite_init(gf) == 0) return 0; break; + case GF_MULT_SPLIT_TABLE: if (gf_w32_split_init(gf) == 0) return 0; break; + case GF_MULT_GROUP: if (gf_w32_group_init(gf) == 0) return 0; break; + case GF_MULT_BYTWO_p: + case GF_MULT_BYTWO_b: if (gf_w32_bytwo_init(gf) == 0) return 0; break; + + default: return 0; + } + if (h->divide_type == GF_DIVIDE_EUCLID) { + gf->divide.w32 = gf_w32_divide_from_inverse; + gf->inverse.w32 = gf_w32_euclid; + } else if (h->divide_type == GF_DIVIDE_MATRIX) { + gf->divide.w32 = gf_w32_divide_from_inverse; + gf->inverse.w32 = gf_w32_matrix; + } + + if (gf->inverse.w32 != NULL && gf->divide.w32 == NULL) { + gf->divide.w32 = gf_w32_divide_from_inverse; + } + if (gf->inverse.w32 == NULL && gf->divide.w32 != NULL) { + gf->inverse.w32 = gf_w32_inverse_from_divide; + } + if (h->region_type == GF_REGION_CAUCHY) { + gf->extract_word.w32 = gf_wgen_extract_word; + gf->multiply_region.w32 = gf_wgen_cauchy_region; + } else if (h->region_type & GF_REGION_ALTMAP) { + if (h->mult_type == GF_MULT_COMPOSITE) { + gf->extract_word.w32 = gf_w32_composite_extract_word; + } else { + gf->extract_word.w32 = gf_w32_split_extract_word; + } + } else { + gf->extract_word.w32 = gf_w32_extract_word; + } + return 1; +} diff --git a/gf_w4.c b/gf_w4.c new file mode 100644 index 0000000..6ea8fc4 --- /dev/null +++ b/gf_w4.c @@ -0,0 +1,2006 @@ +/* + * gf_w4.c + * + * Routines for 4-bit Galois fields + */ + +#include "gf_int.h" +#include +#include + +#define GF_FIELD_WIDTH 4 +#define GF_DOUBLE_WIDTH (GF_FIELD_WIDTH*2) +#define GF_FIELD_SIZE (1 << GF_FIELD_WIDTH) +#define GF_MULT_GROUP_SIZE (GF_FIELD_SIZE-1) + +/* ------------------------------------------------------------ + JSP: Each implementation has its own data, which is allocated + at one time as part of the handle. For that reason, it + shouldn't be hierarchical -- i.e. one should be able to + allocate it with one call to malloc. */ + +struct gf_logtable_data { + uint8_t log_tbl[GF_FIELD_SIZE]; + uint8_t antilog_tbl[GF_FIELD_SIZE * 2]; + uint8_t *antilog_tbl_div; +}; + +struct gf_single_table_data { + uint8_t mult[GF_FIELD_SIZE][GF_FIELD_SIZE]; + uint8_t div[GF_FIELD_SIZE][GF_FIELD_SIZE]; +}; + +struct gf_double_table_data { + uint8_t div[GF_FIELD_SIZE][GF_FIELD_SIZE]; + uint8_t mult[GF_FIELD_SIZE][GF_FIELD_SIZE*GF_FIELD_SIZE]; +}; +struct gf_quad_table_data { + uint8_t div[GF_FIELD_SIZE][GF_FIELD_SIZE]; + uint16_t mult[GF_FIELD_SIZE][(1<<16)]; +}; + +struct gf_quad_table_lazy_data { + uint8_t div[GF_FIELD_SIZE][GF_FIELD_SIZE]; + uint8_t smult[GF_FIELD_SIZE][GF_FIELD_SIZE]; + uint16_t mult[(1 << 16)]; +}; + +struct gf_bytwo_data { + uint64_t prim_poly; + uint64_t mask1; + uint64_t mask2; +}; + +#define AB2(ip, am1 ,am2, b, t1, t2) {\ + t1 = (b << 1) & am1;\ + t2 = b & am2; \ + t2 = ((t2 << 1) - (t2 >> (GF_FIELD_WIDTH-1))); \ + b = (t1 ^ (t2 & ip));} + +#define SSE_AB2(pp, m1 ,m2, va, t1, t2) {\ + t1 = _mm_and_si128(_mm_slli_epi64(va, 1), m1); \ + t2 = _mm_and_si128(va, _mm_set1_epi8(0x88)); \ + t2 = _mm_sub_epi64 (_mm_slli_epi64(t2, 1), _mm_srli_epi64(t2, (GF_FIELD_WIDTH-1))); \ + va = _mm_xor_si128(t1, _mm_and_si128(t2, pp)); } + +/* ------------------------------------------------------------ + JSP: These are basic and work from multiple implementations. + */ + +static +inline +gf_val_32_t gf_w4_inverse_from_divide (gf_t *gf, gf_val_32_t a) +{ + return gf->divide.w32(gf, 1, a); +} + +static +inline +gf_val_32_t gf_w4_divide_from_inverse (gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + b = gf->inverse.w32(gf, b); + return gf->multiply.w32(gf, a, b); +} + +static +inline +gf_val_32_t gf_w4_euclid (gf_t *gf, gf_val_32_t b) +{ + gf_val_32_t e_i, e_im1, e_ip1; + gf_val_32_t d_i, d_im1, d_ip1; + gf_val_32_t y_i, y_im1, y_ip1; + gf_val_32_t c_i; + + if (b == 0) return -1; + e_im1 = ((gf_internal_t *) (gf->scratch))->prim_poly; + e_i = b; + d_im1 = 4; + for (d_i = d_im1; ((1 << d_i) & e_i) == 0; d_i--) ; + y_i = 1; + y_im1 = 0; + + while (e_i != 1) { + + e_ip1 = e_im1; + d_ip1 = d_im1; + c_i = 0; + + while (d_ip1 >= d_i) { + c_i ^= (1 << (d_ip1 - d_i)); + e_ip1 ^= (e_i << (d_ip1 - d_i)); + while ((e_ip1 & (1 << d_ip1)) == 0) d_ip1--; + } + + y_ip1 = y_im1 ^ gf->multiply.w32(gf, c_i, y_i); + y_im1 = y_i; + y_i = y_ip1; + + e_im1 = e_i; + d_im1 = d_i; + e_i = e_ip1; + d_i = d_ip1; + } + + return y_i; +} + +static +gf_val_32_t gf_w4_extract_word(gf_t *gf, void *start, int bytes, int index) +{ + uint8_t *r8, v; + + r8 = (uint8_t *) start; + v = r8[index/2]; + if (index%2) { + return v >> 4; + } else { + return v&0xf; + } +} + + +static +inline +gf_val_32_t gf_w4_matrix (gf_t *gf, gf_val_32_t b) +{ + return gf_bitmatrix_inverse(b, 4, ((gf_internal_t *) (gf->scratch))->prim_poly); +} + + +/* ------------------------------------------------------------ + IMPLEMENTATION: LOG_TABLE: + + JSP: This is a basic log-antilog implementation. + I'm not going to spend any time optimizing it because the + other techniques are faster for both single and region + operations. + */ + +static +inline +gf_val_32_t +gf_w4_log_multiply (gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + struct gf_logtable_data *ltd; + + ltd = (struct gf_logtable_data *) ((gf_internal_t *) (gf->scratch))->private; + return (a == 0 || b == 0) ? 0 : ltd->antilog_tbl[(unsigned)(ltd->log_tbl[a] + ltd->log_tbl[b])]; +} + +static +inline +gf_val_32_t +gf_w4_log_divide (gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + int log_sum = 0; + struct gf_logtable_data *ltd; + + if (a == 0 || b == 0) return 0; + ltd = (struct gf_logtable_data *) ((gf_internal_t *) (gf->scratch))->private; + + log_sum = ltd->log_tbl[a] - ltd->log_tbl[b]; + return (ltd->antilog_tbl_div[log_sum]); +} + +static +void +gf_w4_log_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + int i; + uint8_t lv, b, c; + uint8_t *s8, *d8; + + struct gf_logtable_data *ltd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + ltd = (struct gf_logtable_data *) ((gf_internal_t *) (gf->scratch))->private; + s8 = (uint8_t *) src; + d8 = (uint8_t *) dest; + + lv = ltd->log_tbl[val]; + + for (i = 0; i < bytes; i++) { + c = (xor) ? d8[i] : 0; + b = (s8[i] >> GF_FIELD_WIDTH); + c ^= (b == 0) ? 0 : (ltd->antilog_tbl[lv + ltd->log_tbl[b]] << GF_FIELD_WIDTH); + b = (s8[i] & 0xf); + c ^= (b == 0) ? 0 : ltd->antilog_tbl[lv + ltd->log_tbl[b]]; + d8[i] = c; + } +} + +static +int gf_w4_log_init(gf_t *gf) +{ + gf_internal_t *h; + struct gf_logtable_data *ltd; + int i, b; + + h = (gf_internal_t *) gf->scratch; + ltd = h->private; + + ltd->log_tbl[0] = 0; + + ltd->antilog_tbl_div = ltd->antilog_tbl + (GF_FIELD_SIZE-1); + b = 1; + for (i = 0; i < GF_FIELD_SIZE-1; i++) { + ltd->log_tbl[b] = i; + ltd->antilog_tbl[i] = b; + ltd->antilog_tbl[i+GF_FIELD_SIZE-1] = b; + b <<= 1; + if (b & GF_FIELD_SIZE) { + b = b ^ h->prim_poly; + } + } + + gf->inverse.w32 = gf_w4_inverse_from_divide; + gf->divide.w32 = gf_w4_log_divide; + gf->multiply.w32 = gf_w4_log_multiply; + gf->multiply_region.w32 = gf_w4_log_multiply_region; + return 1; +} + +/* ------------------------------------------------------------ + IMPLEMENTATION: SINGLE TABLE: JSP. + */ + +static +inline +gf_val_32_t +gf_w4_single_table_multiply (gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + struct gf_single_table_data *std; + + std = (struct gf_single_table_data *) ((gf_internal_t *) (gf->scratch))->private; + return std->mult[a][b]; +} + +static +inline +gf_val_32_t +gf_w4_single_table_divide (gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + struct gf_single_table_data *std; + + std = (struct gf_single_table_data *) ((gf_internal_t *) (gf->scratch))->private; + return std->div[a][b]; +} + +static +void +gf_w4_single_table_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + int i; + uint8_t b, c; + uint8_t *s8, *d8; + + struct gf_single_table_data *std; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + std = (struct gf_single_table_data *) ((gf_internal_t *) (gf->scratch))->private; + s8 = (uint8_t *) src; + d8 = (uint8_t *) dest; + + for (i = 0; i < bytes; i++) { + c = (xor) ? d8[i] : 0; + b = (s8[i] >> GF_FIELD_WIDTH); + c ^= (std->mult[val][b] << GF_FIELD_WIDTH); + b = (s8[i] & 0xf); + c ^= (std->mult[val][b]); + d8[i] = c; + } +} + +#define MM_PRINT(s, r) { uint8_t blah[16]; printf("%-12s", s); _mm_storeu_si128((__m128i *)blah, r); for (i = 0; i < 16; i++) printf(" %02x", blah[i]); printf("\n"); } + +static +void +gf_w4_single_table_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ +#ifdef INTEL_SSE4 + gf_region_data rd; + uint8_t *base, *sptr, *dptr, *top; + __m128i tl, loset, h4, r, va, th; + + struct gf_single_table_data *std; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 16); + + std = (struct gf_single_table_data *) ((gf_internal_t *) (gf->scratch))->private; + base = (uint8_t *) std->mult; + base += (val << GF_FIELD_WIDTH); + + gf_do_initial_region_alignment(&rd); + + tl = _mm_loadu_si128((__m128i *)base); + th = _mm_slli_epi64(tl, 4); + loset = _mm_set1_epi8 (0x0f); + + sptr = rd.s_start; + dptr = rd.d_start; + top = rd.s_top; + + while (sptr < (uint8_t *) top) { + va = _mm_load_si128 ((__m128i *)(sptr)); + r = _mm_and_si128 (loset, va); + r = _mm_shuffle_epi8 (tl, r); + va = _mm_srli_epi64 (va, 4); + va = _mm_and_si128 (loset, va); + va = _mm_shuffle_epi8 (th, va); + r = _mm_xor_si128 (r, va); + va = (xor) ? _mm_load_si128 ((__m128i *)(dptr)) : _mm_setzero_si128(); + r = _mm_xor_si128 (r, va); + _mm_store_si128 ((__m128i *)(dptr), r); + dptr += 16; + sptr += 16; + } + gf_do_final_region_alignment(&rd); + +#endif +} + +static +int gf_w4_single_table_init(gf_t *gf) +{ + gf_internal_t *h; + struct gf_single_table_data *std; + int a, b, prod, loga, logb; + uint8_t log_tbl[GF_FIELD_SIZE]; + uint8_t antilog_tbl[GF_FIELD_SIZE*2]; + + h = (gf_internal_t *) gf->scratch; + std = (struct gf_single_table_data *)h->private; + + b = 1; + for (a = 0; a < GF_MULT_GROUP_SIZE; a++) { + log_tbl[b] = a; + antilog_tbl[a] = b; + antilog_tbl[a+GF_MULT_GROUP_SIZE] = b; + b <<= 1; + if (b & GF_FIELD_SIZE) { + b = b ^ h->prim_poly; + } + } + + bzero(std->mult, sizeof(uint8_t) * GF_FIELD_SIZE * GF_FIELD_SIZE); + bzero(std->div, sizeof(uint8_t) * GF_FIELD_SIZE * GF_FIELD_SIZE); + + for (a = 1; a < GF_FIELD_SIZE; a++) { + loga = log_tbl[a]; + for (b = 1; b < GF_FIELD_SIZE; b++) { + logb = log_tbl[b]; + prod = antilog_tbl[loga+logb]; + std->mult[a][b] = prod; + std->div[prod][b] = a; + } + } + + gf->inverse.w32 = NULL; + gf->divide.w32 = gf_w4_single_table_divide; + gf->multiply.w32 = gf_w4_single_table_multiply; + if (h->region_type & GF_REGION_SSE) { + gf->multiply_region.w32 = gf_w4_single_table_sse_multiply_region; + } else { + gf->multiply_region.w32 = gf_w4_single_table_multiply_region; + } + return 1; +} + +/* ------------------------------------------------------------ + IMPLEMENTATION: DOUBLE TABLE: JSP. + */ + +static +inline +gf_val_32_t +gf_w4_double_table_multiply (gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + struct gf_double_table_data *std; + + std = (struct gf_double_table_data *) ((gf_internal_t *) (gf->scratch))->private; + return std->mult[a][b]; +} + +static +inline +gf_val_32_t +gf_w4_double_table_divide (gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + struct gf_double_table_data *std; + + std = (struct gf_double_table_data *) ((gf_internal_t *) (gf->scratch))->private; + return std->div[a][b]; +} + +static +void +gf_w4_double_table_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + unsigned long uls, uld; + int i; + uint8_t *s8, *d8, *base; + gf_region_data rd; + struct gf_double_table_data *std; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 8); + + std = (struct gf_double_table_data *) ((gf_internal_t *) (gf->scratch))->private; + s8 = (uint8_t *) src; + d8 = (uint8_t *) dest; + base = (uint8_t *) std->mult; + base += (val << GF_DOUBLE_WIDTH); + + if (xor) { + for (i = 0; i < bytes; i++) d8[i] ^= base[s8[i]]; + } else { + for (i = 0; i < bytes; i++) d8[i] = base[s8[i]]; + } +} + +static +int gf_w4_double_table_init(gf_t *gf) +{ + gf_internal_t *h; + struct gf_double_table_data *std; + int a, b, c, prod, loga, logb, ab; + uint8_t log_tbl[GF_FIELD_SIZE]; + uint8_t antilog_tbl[GF_FIELD_SIZE*2]; + uint8_t mult[GF_FIELD_SIZE][GF_FIELD_SIZE]; + + h = (gf_internal_t *) gf->scratch; + std = (struct gf_double_table_data *)h->private; + + b = 1; + for (a = 0; a < GF_MULT_GROUP_SIZE; a++) { + log_tbl[b] = a; + antilog_tbl[a] = b; + antilog_tbl[a+GF_MULT_GROUP_SIZE] = b; + b <<= 1; + if (b & GF_FIELD_SIZE) { + b = b ^ h->prim_poly; + } + } + + bzero(mult, sizeof(uint8_t) * GF_FIELD_SIZE * GF_FIELD_SIZE); + bzero(std->div, sizeof(uint8_t) * GF_FIELD_SIZE * GF_FIELD_SIZE); + + for (a = 1; a < GF_FIELD_SIZE; a++) { + loga = log_tbl[a]; + for (b = 1; b < GF_FIELD_SIZE; b++) { + logb = log_tbl[b]; + prod = antilog_tbl[loga+logb]; + mult[a][b] = prod; + std->div[prod][b] = a; + } + } + bzero(std->mult, sizeof(uint8_t) * GF_FIELD_SIZE * GF_FIELD_SIZE * GF_FIELD_SIZE); + for (a = 0; a < GF_FIELD_SIZE; a++) { + for (b = 0; b < GF_FIELD_SIZE; b++) { + ab = mult[a][b]; + for (c = 0; c < GF_FIELD_SIZE; c++) { + std->mult[a][(b << 4) | c] = ((ab << 4) | mult[a][c]); + } + } + } + + gf->inverse.w32 = NULL; + gf->divide.w32 = gf_w4_double_table_divide; + gf->multiply.w32 = gf_w4_double_table_multiply; + gf->multiply_region.w32 = gf_w4_double_table_multiply_region; + return 1; +} + + +static +inline +gf_val_32_t +gf_w4_quad_table_lazy_divide (gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + struct gf_quad_table_lazy_data *std; + + std = (struct gf_quad_table_lazy_data *) ((gf_internal_t *) (gf->scratch))->private; + return std->div[a][b]; +} + +static +inline +gf_val_32_t +gf_w4_quad_table_lazy_multiply (gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + struct gf_quad_table_lazy_data *std; + + std = (struct gf_quad_table_lazy_data *) ((gf_internal_t *) (gf->scratch))->private; + return std->smult[a][b]; +} + +static +inline +gf_val_32_t +gf_w4_quad_table_divide (gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + struct gf_quad_table_data *std; + + std = (struct gf_quad_table_data *) ((gf_internal_t *) (gf->scratch))->private; + return std->div[a][b]; +} + +static +inline +gf_val_32_t +gf_w4_quad_table_multiply (gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + struct gf_quad_table_data *std; + uint16_t v; + + std = (struct gf_quad_table_data *) ((gf_internal_t *) (gf->scratch))->private; + v = std->mult[a][b]; + return v; +} + +static +void +gf_w4_quad_table_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + uint16_t *base; + gf_region_data rd; + struct gf_quad_table_data *std; + struct gf_quad_table_lazy_data *ltd; + gf_internal_t *h; + int a, b, c, d, va, vb, vc, vd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + h = (gf_internal_t *) (gf->scratch); + if (h->region_type & GF_REGION_LAZY) { + ltd = (struct gf_quad_table_lazy_data *) ((gf_internal_t *) (gf->scratch))->private; + base = ltd->mult; + for (a = 0; a < 16; a++) { + va = (ltd->smult[val][a] << 12); + for (b = 0; b < 16; b++) { + vb = (ltd->smult[val][b] << 8); + for (c = 0; c < 16; c++) { + vc = (ltd->smult[val][c] << 4); + for (d = 0; d < 16; d++) { + vd = ltd->smult[val][d]; + base[(a << 12) | (b << 8) | (c << 4) | d ] = (va | vb | vc | vd); + } + } + } + } + } else { + std = (struct gf_quad_table_data *) ((gf_internal_t *) (gf->scratch))->private; + base = &(std->mult[val][0]); + } + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 8); + gf_do_initial_region_alignment(&rd); + gf_two_byte_region_table_multiply(&rd, base); + gf_do_final_region_alignment(&rd); +} + +static +int gf_w4_quad_table_init(gf_t *gf) +{ + gf_internal_t *h; + struct gf_quad_table_data *std; + int prod, loga, logb, ab, val, a, b, c, d, va, vb, vc, vd; + uint8_t log_tbl[GF_FIELD_SIZE]; + uint8_t antilog_tbl[GF_FIELD_SIZE*2]; + uint8_t mult[GF_FIELD_SIZE][GF_FIELD_SIZE]; + + h = (gf_internal_t *) gf->scratch; + std = (struct gf_quad_table_data *)h->private; + + b = 1; + for (a = 0; a < GF_MULT_GROUP_SIZE; a++) { + log_tbl[b] = a; + antilog_tbl[a] = b; + antilog_tbl[a+GF_MULT_GROUP_SIZE] = b; + b <<= 1; + if (b & GF_FIELD_SIZE) { + b = b ^ h->prim_poly; + } + } + + bzero(mult, sizeof(uint8_t) * GF_FIELD_SIZE * GF_FIELD_SIZE); + bzero(std->div, sizeof(uint8_t) * GF_FIELD_SIZE * GF_FIELD_SIZE); + + for (a = 1; a < GF_FIELD_SIZE; a++) { + loga = log_tbl[a]; + for (b = 1; b < GF_FIELD_SIZE; b++) { + logb = log_tbl[b]; + prod = antilog_tbl[loga+logb]; + mult[a][b] = prod; + std->div[prod][b] = a; + } + } + + for (val = 0; val < 16; val++) { + for (a = 0; a < 16; a++) { + va = (mult[val][a] << 12); + for (b = 0; b < 16; b++) { + vb = (mult[val][b] << 8); + for (c = 0; c < 16; c++) { + vc = (mult[val][c] << 4); + for (d = 0; d < 16; d++) { + vd = mult[val][d]; + std->mult[val][(a << 12) | (b << 8) | (c << 4) | d ] = (va | vb | vc | vd); + } + } + } + } + } + + gf->inverse.w32 = NULL; + gf->divide.w32 = gf_w4_quad_table_divide; + gf->multiply.w32 = gf_w4_quad_table_multiply; + gf->multiply_region.w32 = gf_w4_quad_table_multiply_region; + return 1; +} +static +int gf_w4_quad_table_lazy_init(gf_t *gf) +{ + gf_internal_t *h; + struct gf_quad_table_lazy_data *std; + int a, b, c, prod, loga, logb, ab; + uint8_t log_tbl[GF_FIELD_SIZE]; + uint8_t antilog_tbl[GF_FIELD_SIZE*2]; + uint8_t mult[GF_FIELD_SIZE][GF_FIELD_SIZE]; + + h = (gf_internal_t *) gf->scratch; + std = (struct gf_quad_table_lazy_data *)h->private; + + b = 1; + for (a = 0; a < GF_MULT_GROUP_SIZE; a++) { + log_tbl[b] = a; + antilog_tbl[a] = b; + antilog_tbl[a+GF_MULT_GROUP_SIZE] = b; + b <<= 1; + if (b & GF_FIELD_SIZE) { + b = b ^ h->prim_poly; + } + } + + bzero(std->smult, sizeof(uint8_t) * GF_FIELD_SIZE * GF_FIELD_SIZE); + bzero(std->div, sizeof(uint8_t) * GF_FIELD_SIZE * GF_FIELD_SIZE); + + for (a = 1; a < GF_FIELD_SIZE; a++) { + loga = log_tbl[a]; + for (b = 1; b < GF_FIELD_SIZE; b++) { + logb = log_tbl[b]; + prod = antilog_tbl[loga+logb]; + std->smult[a][b] = prod; + std->div[prod][b] = a; + } + } + + gf->inverse.w32 = NULL; + gf->divide.w32 = gf_w4_quad_table_lazy_divide; + gf->multiply.w32 = gf_w4_quad_table_lazy_multiply; + gf->multiply_region.w32 = gf_w4_quad_table_multiply_region; + return 1; +} + +static +int gf_w4_table_init(gf_t *gf) +{ + int rt; + gf_internal_t *h; + + h = (gf_internal_t *) gf->scratch; + rt = (h->region_type); + if (rt == 0 || rt == GF_REGION_CAUCHY) rt |= GF_REGION_SINGLE_TABLE; + if (rt & GF_REGION_SINGLE_TABLE) { + return gf_w4_single_table_init(gf); + } else if (rt & GF_REGION_DOUBLE_TABLE) { + return gf_w4_double_table_init(gf); + } else if (rt & GF_REGION_QUAD_TABLE) { + if (rt & GF_REGION_LAZY) { + return gf_w4_quad_table_lazy_init(gf); + } else { + return gf_w4_quad_table_init(gf); + } + return gf_w4_double_table_init(gf); + } + return 0; +} + +/* ------------------------------------------------------------ + JSP: GF_MULT_BYTWO_p and _b: See the paper. +*/ + +static +inline +gf_val_32_t +gf_w4_bytwo_p_multiply (gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + uint32_t prod, pp, pmask, amask; + gf_internal_t *h; + + h = (gf_internal_t *) gf->scratch; + pp = h->prim_poly; + + + prod = 0; + pmask = 0x8; + amask = 0x8; + + while (amask != 0) { + if (prod & pmask) { + prod = ((prod << 1) ^ pp); + } else { + prod <<= 1; + } + if (a & amask) prod ^= b; + amask >>= 1; + } + return prod; +} + +static +inline +gf_val_32_t +gf_w4_bytwo_b_multiply (gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + uint32_t prod, pp, bmask; + gf_internal_t *h; + + h = (gf_internal_t *) gf->scratch; + pp = h->prim_poly; + + prod = 0; + bmask = 0x8; + + while (1) { + if (a & 1) prod ^= b; + a >>= 1; + if (a == 0) return prod; + if (b & bmask) { + b = ((b << 1) ^ pp); + } else { + b <<= 1; + } + } +} + +static +void +gf_w4_bytwo_p_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + uint64_t *s64, *d64, t1, t2, ta, prod, amask; + gf_region_data rd; + struct gf_bytwo_data *btd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + btd = (struct gf_bytwo_data *) ((gf_internal_t *) (gf->scratch))->private; + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 8); + gf_do_initial_region_alignment(&rd); + + s64 = (uint64_t *) rd.s_start; + d64 = (uint64_t *) rd.d_start; + + if (xor) { + while (s64 < (uint64_t *) rd.s_top) { + prod = 0; + amask = 0x8; + ta = *s64; + while (amask != 0) { + AB2(btd->prim_poly, btd->mask1, btd->mask2, prod, t1, t2); + if (val & amask) prod ^= ta; + amask >>= 1; + } + *d64 ^= prod; + d64++; + s64++; + } + } else { + while (s64 < (uint64_t *) rd.s_top) { + prod = 0; + amask = 0x8; + ta = *s64; + while (amask != 0) { + AB2(btd->prim_poly, btd->mask1, btd->mask2, prod, t1, t2); + if (val & amask) prod ^= ta; + amask >>= 1; + } + *d64 = prod; + d64++; + s64++; + } + } + gf_do_final_region_alignment(&rd); +} + +#define BYTWO_P_ONESTEP {\ + SSE_AB2(pp, m1 ,m2, prod, t1, t2); \ + t1 = _mm_and_si128(v, one); \ + t1 = _mm_sub_epi8(t1, one); \ + t1 = _mm_and_si128(t1, ta); \ + prod = _mm_xor_si128(prod, t1); \ + v = _mm_srli_epi64(v, 1); } + +static +void +gf_w4_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ +#ifdef INTEL_SSE4 + int i; + uint8_t *s8, *d8; + uint8_t vrev; + uint64_t amask; + __m128i pp, m1, m2, ta, prod, t1, t2, tp, one, v; + struct gf_bytwo_data *btd; + gf_region_data rd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + btd = (struct gf_bytwo_data *) ((gf_internal_t *) (gf->scratch))->private; + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 16); + gf_do_initial_region_alignment(&rd); + + vrev = 0; + for (i = 0; i < 4; i++) { + vrev <<= 1; + if (!(val & (1 << i))) vrev |= 1; + } + + s8 = (uint8_t *) rd.s_start; + d8 = (uint8_t *) rd.d_start; + + pp = _mm_set1_epi8(btd->prim_poly&0xff); + m1 = _mm_set1_epi8((btd->mask1)&0xff); + m2 = _mm_set1_epi8((btd->mask2)&0xff); + one = _mm_set1_epi8(1); + + while (d8 < (uint8_t *) rd.d_top) { + prod = _mm_setzero_si128(); + v = _mm_set1_epi8(vrev); + ta = _mm_load_si128((__m128i *) s8); + tp = (!xor) ? _mm_setzero_si128() : _mm_load_si128((__m128i *) d8); + BYTWO_P_ONESTEP; + BYTWO_P_ONESTEP; + BYTWO_P_ONESTEP; + BYTWO_P_ONESTEP; + _mm_store_si128((__m128i *) d8, _mm_xor_si128(prod, tp)); + d8 += 16; + s8 += 16; + } + gf_do_final_region_alignment(&rd); +#endif +} + +/* +static +void +gf_w4_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ +#ifdef INTEL_SSE4 + uint8_t *d8, *s8, tb; + __m128i pp, m1, m2, t1, t2, va, vb; + struct gf_bytwo_data *btd; + gf_region_data rd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 16); + gf_do_initial_region_alignment(&rd); + + s8 = (uint8_t *) rd.s_start; + d8 = (uint8_t *) rd.d_start; + + btd = (struct gf_bytwo_data *) ((gf_internal_t *) (gf->scratch))->private; + + pp = _mm_set1_epi8(btd->prim_poly&0xff); + m1 = _mm_set1_epi8((btd->mask1)&0xff); + m2 = _mm_set1_epi8((btd->mask2)&0xff); + + if (xor) { + while (d8 < (uint8_t *) rd.d_top) { + va = _mm_load_si128 ((__m128i *)(s8)); + vb = _mm_load_si128 ((__m128i *)(d8)); + tb = val; + while (1) { + if (tb & 1) vb = _mm_xor_si128(vb, va); + tb >>= 1; + if (tb == 0) break; + SSE_AB2(pp, m1, m2, va, t1, t2); + } + _mm_store_si128((__m128i *)d8, vb); + d8 += 16; + s8 += 16; + } + } else { + while (d8 < (uint8_t *) rd.d_top) { + va = _mm_load_si128 ((__m128i *)(s8)); + vb = _mm_setzero_si128 (); + tb = val; + while (1) { + if (tb & 1) vb = _mm_xor_si128(vb, va); + tb >>= 1; + if (tb == 0) break; + t1 = _mm_and_si128(_mm_slli_epi64(va, 1), m1); + t2 = _mm_and_si128(va, m2); + t2 = _mm_sub_epi64 ( + _mm_slli_epi64(t2, 1), _mm_srli_epi64(t2, (GF_FIELD_WIDTH-1))); + va = _mm_xor_si128(t1, _mm_and_si128(t2, pp)); + } + _mm_store_si128((__m128i *)d8, vb); + d8 += 16; + s8 += 16; + } + } + gf_do_final_region_alignment(&rd); +#endif +} +*/ + +static +void +gf_w4_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_bytwo_data *btd) +{ +#ifdef INTEL_SSE4 + int i; + uint8_t *d8, *s8, tb; + __m128i pp, m1, m2, t1, t2, va, vb; + + s8 = (uint8_t *) rd->s_start; + d8 = (uint8_t *) rd->d_start; + + pp = _mm_set1_epi8(btd->prim_poly&0xff); + m1 = _mm_set1_epi8((btd->mask1)&0xff); + m2 = _mm_set1_epi8((btd->mask2)&0xff); + + while (d8 < (uint8_t *) rd->d_top) { + va = _mm_load_si128 ((__m128i *)(s8)); + SSE_AB2(pp, m1, m2, va, t1, t2); + _mm_store_si128((__m128i *)d8, va); + d8 += 16; + s8 += 16; + } +#endif +} + +static +void +gf_w4_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_bytwo_data *btd) +{ +#ifdef INTEL_SSE4 + int i; + uint8_t *d8, *s8, tb; + __m128i pp, m1, m2, t1, t2, va, vb; + + s8 = (uint8_t *) rd->s_start; + d8 = (uint8_t *) rd->d_start; + + pp = _mm_set1_epi8(btd->prim_poly&0xff); + m1 = _mm_set1_epi8((btd->mask1)&0xff); + m2 = _mm_set1_epi8((btd->mask2)&0xff); + + while (d8 < (uint8_t *) rd->d_top) { + va = _mm_load_si128 ((__m128i *)(s8)); + SSE_AB2(pp, m1, m2, va, t1, t2); + vb = _mm_load_si128 ((__m128i *)(d8)); + vb = _mm_xor_si128(vb, va); + _mm_store_si128((__m128i *)d8, vb); + d8 += 16; + s8 += 16; + } +#endif +} + +static +void +gf_w4_bytwo_b_sse_region_4_noxor(gf_region_data *rd, struct gf_bytwo_data *btd) +{ +#ifdef INTEL_SSE4 + int i; + uint8_t *d8, *s8, tb; + __m128i pp, m1, m2, t1, t2, va, vb; + + s8 = (uint8_t *) rd->s_start; + d8 = (uint8_t *) rd->d_start; + + pp = _mm_set1_epi8(btd->prim_poly&0xff); + m1 = _mm_set1_epi8((btd->mask1)&0xff); + m2 = _mm_set1_epi8((btd->mask2)&0xff); + + while (d8 < (uint8_t *) rd->d_top) { + va = _mm_load_si128 ((__m128i *)(s8)); + SSE_AB2(pp, m1, m2, va, t1, t2); + SSE_AB2(pp, m1, m2, va, t1, t2); + _mm_store_si128((__m128i *)d8, va); + d8 += 16; + s8 += 16; + } +#endif +} + +static +void +gf_w4_bytwo_b_sse_region_4_xor(gf_region_data *rd, struct gf_bytwo_data *btd) +{ +#ifdef INTEL_SSE4 + int i; + uint8_t *d8, *s8, tb; + __m128i pp, m1, m2, t1, t2, va, vb; + + s8 = (uint8_t *) rd->s_start; + d8 = (uint8_t *) rd->d_start; + + pp = _mm_set1_epi8(btd->prim_poly&0xff); + m1 = _mm_set1_epi8((btd->mask1)&0xff); + m2 = _mm_set1_epi8((btd->mask2)&0xff); + + while (d8 < (uint8_t *) rd->d_top) { + va = _mm_load_si128 ((__m128i *)(s8)); + SSE_AB2(pp, m1, m2, va, t1, t2); + SSE_AB2(pp, m1, m2, va, t1, t2); + vb = _mm_load_si128 ((__m128i *)(d8)); + vb = _mm_xor_si128(vb, va); + _mm_store_si128((__m128i *)d8, vb); + d8 += 16; + s8 += 16; + } +#endif +} + + +static +void +gf_w4_bytwo_b_sse_region_3_noxor(gf_region_data *rd, struct gf_bytwo_data *btd) +{ +#ifdef INTEL_SSE4 + int i; + uint8_t *d8, *s8, tb; + __m128i pp, m1, m2, t1, t2, va, vb; + + s8 = (uint8_t *) rd->s_start; + d8 = (uint8_t *) rd->d_start; + + pp = _mm_set1_epi8(btd->prim_poly&0xff); + m1 = _mm_set1_epi8((btd->mask1)&0xff); + m2 = _mm_set1_epi8((btd->mask2)&0xff); + + while (d8 < (uint8_t *) rd->d_top) { + va = _mm_load_si128 ((__m128i *)(s8)); + vb = va; + SSE_AB2(pp, m1, m2, va, t1, t2); + va = _mm_xor_si128(va, vb); + _mm_store_si128((__m128i *)d8, va); + d8 += 16; + s8 += 16; + } +#endif +} + +static +void +gf_w4_bytwo_b_sse_region_3_xor(gf_region_data *rd, struct gf_bytwo_data *btd) +{ +#ifdef INTEL_SSE4 + int i; + uint8_t *d8, *s8, tb; + __m128i pp, m1, m2, t1, t2, va, vb; + + s8 = (uint8_t *) rd->s_start; + d8 = (uint8_t *) rd->d_start; + + pp = _mm_set1_epi8(btd->prim_poly&0xff); + m1 = _mm_set1_epi8((btd->mask1)&0xff); + m2 = _mm_set1_epi8((btd->mask2)&0xff); + + while (d8 < (uint8_t *) rd->d_top) { + va = _mm_load_si128 ((__m128i *)(s8)); + vb = _mm_xor_si128(_mm_load_si128 ((__m128i *)(d8)), va); + SSE_AB2(pp, m1, m2, va, t1, t2); + vb = _mm_xor_si128(vb, va); + _mm_store_si128((__m128i *)d8, vb); + d8 += 16; + s8 += 16; + } +#endif +} + +static +void +gf_w4_bytwo_b_sse_region_5_noxor(gf_region_data *rd, struct gf_bytwo_data *btd) +{ +#ifdef INTEL_SSE4 + int i; + uint8_t *d8, *s8, tb; + __m128i pp, m1, m2, t1, t2, va, vb; + + s8 = (uint8_t *) rd->s_start; + d8 = (uint8_t *) rd->d_start; + + pp = _mm_set1_epi8(btd->prim_poly&0xff); + m1 = _mm_set1_epi8((btd->mask1)&0xff); + m2 = _mm_set1_epi8((btd->mask2)&0xff); + + while (d8 < (uint8_t *) rd->d_top) { + va = _mm_load_si128 ((__m128i *)(s8)); + vb = va; + SSE_AB2(pp, m1, m2, va, t1, t2); + SSE_AB2(pp, m1, m2, va, t1, t2); + va = _mm_xor_si128(va, vb); + _mm_store_si128((__m128i *)d8, va); + d8 += 16; + s8 += 16; + } +#endif +} + +static +void +gf_w4_bytwo_b_sse_region_5_xor(gf_region_data *rd, struct gf_bytwo_data *btd) +{ +#ifdef INTEL_SSE4 + int i; + uint8_t *d8, *s8, tb; + __m128i pp, m1, m2, t1, t2, va, vb; + + s8 = (uint8_t *) rd->s_start; + d8 = (uint8_t *) rd->d_start; + + pp = _mm_set1_epi8(btd->prim_poly&0xff); + m1 = _mm_set1_epi8((btd->mask1)&0xff); + m2 = _mm_set1_epi8((btd->mask2)&0xff); + + while (d8 < (uint8_t *) rd->d_top) { + va = _mm_load_si128 ((__m128i *)(s8)); + vb = _mm_xor_si128(_mm_load_si128 ((__m128i *)(d8)), va); + SSE_AB2(pp, m1, m2, va, t1, t2); + SSE_AB2(pp, m1, m2, va, t1, t2); + vb = _mm_xor_si128(vb, va); + _mm_store_si128((__m128i *)d8, vb); + d8 += 16; + s8 += 16; + } +#endif +} + +static +void +gf_w4_bytwo_b_sse_region_7_noxor(gf_region_data *rd, struct gf_bytwo_data *btd) +{ +#ifdef INTEL_SSE4 + int i; + uint8_t *d8, *s8, tb; + __m128i pp, m1, m2, t1, t2, va, vb; + + s8 = (uint8_t *) rd->s_start; + d8 = (uint8_t *) rd->d_start; + + pp = _mm_set1_epi8(btd->prim_poly&0xff); + m1 = _mm_set1_epi8((btd->mask1)&0xff); + m2 = _mm_set1_epi8((btd->mask2)&0xff); + + while (d8 < (uint8_t *) rd->d_top) { + va = _mm_load_si128 ((__m128i *)(s8)); + vb = va; + SSE_AB2(pp, m1, m2, va, t1, t2); + vb = _mm_xor_si128(va, vb); + SSE_AB2(pp, m1, m2, va, t1, t2); + va = _mm_xor_si128(va, vb); + _mm_store_si128((__m128i *)d8, va); + d8 += 16; + s8 += 16; + } +#endif +} + +static +void +gf_w4_bytwo_b_sse_region_7_xor(gf_region_data *rd, struct gf_bytwo_data *btd) +{ +#ifdef INTEL_SSE4 + int i; + uint8_t *d8, *s8, tb; + __m128i pp, m1, m2, t1, t2, va, vb; + + s8 = (uint8_t *) rd->s_start; + d8 = (uint8_t *) rd->d_start; + + pp = _mm_set1_epi8(btd->prim_poly&0xff); + m1 = _mm_set1_epi8((btd->mask1)&0xff); + m2 = _mm_set1_epi8((btd->mask2)&0xff); + + while (d8 < (uint8_t *) rd->d_top) { + va = _mm_load_si128 ((__m128i *)(s8)); + vb = _mm_xor_si128(_mm_load_si128 ((__m128i *)(d8)), va); + SSE_AB2(pp, m1, m2, va, t1, t2); + vb = _mm_xor_si128(vb, va); + SSE_AB2(pp, m1, m2, va, t1, t2); + vb = _mm_xor_si128(vb, va); + _mm_store_si128((__m128i *)d8, vb); + d8 += 16; + s8 += 16; + } +#endif +} + +static +void +gf_w4_bytwo_b_sse_region_6_noxor(gf_region_data *rd, struct gf_bytwo_data *btd) +{ +#ifdef INTEL_SSE4 + int i; + uint8_t *d8, *s8, tb; + __m128i pp, m1, m2, t1, t2, va, vb; + + s8 = (uint8_t *) rd->s_start; + d8 = (uint8_t *) rd->d_start; + + pp = _mm_set1_epi8(btd->prim_poly&0xff); + m1 = _mm_set1_epi8((btd->mask1)&0xff); + m2 = _mm_set1_epi8((btd->mask2)&0xff); + + while (d8 < (uint8_t *) rd->d_top) { + va = _mm_load_si128 ((__m128i *)(s8)); + SSE_AB2(pp, m1, m2, va, t1, t2); + vb = va; + SSE_AB2(pp, m1, m2, va, t1, t2); + va = _mm_xor_si128(va, vb); + _mm_store_si128((__m128i *)d8, va); + d8 += 16; + s8 += 16; + } +#endif +} + +static +void +gf_w4_bytwo_b_sse_region_6_xor(gf_region_data *rd, struct gf_bytwo_data *btd) +{ +#ifdef INTEL_SSE4 + int i; + uint8_t *d8, *s8, tb; + __m128i pp, m1, m2, t1, t2, va, vb; + + s8 = (uint8_t *) rd->s_start; + d8 = (uint8_t *) rd->d_start; + + pp = _mm_set1_epi8(btd->prim_poly&0xff); + m1 = _mm_set1_epi8((btd->mask1)&0xff); + m2 = _mm_set1_epi8((btd->mask2)&0xff); + + while (d8 < (uint8_t *) rd->d_top) { + va = _mm_load_si128 ((__m128i *)(s8)); + SSE_AB2(pp, m1, m2, va, t1, t2); + vb = _mm_xor_si128(_mm_load_si128 ((__m128i *)(d8)), va); + SSE_AB2(pp, m1, m2, va, t1, t2); + vb = _mm_xor_si128(vb, va); + _mm_store_si128((__m128i *)d8, vb); + d8 += 16; + s8 += 16; + } +#endif +} + +static +void +gf_w4_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ +#ifdef INTEL_SSE4 + uint8_t *d8, *s8, tb; + __m128i pp, m1, m2, t1, t2, va, vb; + struct gf_bytwo_data *btd; + gf_region_data rd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 16); + gf_do_initial_region_alignment(&rd); + + s8 = (uint8_t *) rd.s_start; + d8 = (uint8_t *) rd.d_start; + + btd = (struct gf_bytwo_data *) ((gf_internal_t *) (gf->scratch))->private; + + switch (val) { + case 2: + if (!xor) { + gf_w4_bytwo_b_sse_region_2_noxor(&rd, btd); + } else { + gf_w4_bytwo_b_sse_region_2_xor(&rd, btd); + } + gf_do_final_region_alignment(&rd); + return; + case 3: + if (!xor) { + gf_w4_bytwo_b_sse_region_3_noxor(&rd, btd); + } else { + gf_w4_bytwo_b_sse_region_3_xor(&rd, btd); + } + gf_do_final_region_alignment(&rd); + return; + case 4: + if (!xor) { + gf_w4_bytwo_b_sse_region_4_noxor(&rd, btd); + } else { + gf_w4_bytwo_b_sse_region_4_xor(&rd, btd); + } + gf_do_final_region_alignment(&rd); + return; + case 5: + if (!xor) { + gf_w4_bytwo_b_sse_region_5_noxor(&rd, btd); + } else { + gf_w4_bytwo_b_sse_region_5_xor(&rd, btd); + } + gf_do_final_region_alignment(&rd); + return; + case 6: + if (!xor) { + gf_w4_bytwo_b_sse_region_6_noxor(&rd, btd); + } else { + gf_w4_bytwo_b_sse_region_6_xor(&rd, btd); + } + gf_do_final_region_alignment(&rd); + return; + case 7: + if (!xor) { + gf_w4_bytwo_b_sse_region_7_noxor(&rd, btd); + } else { + gf_w4_bytwo_b_sse_region_7_xor(&rd, btd); + } + gf_do_final_region_alignment(&rd); + return; + } + + pp = _mm_set1_epi8(btd->prim_poly&0xff); + m1 = _mm_set1_epi8((btd->mask1)&0xff); + m2 = _mm_set1_epi8((btd->mask2)&0xff); + + if (xor) { + while (d8 < (uint8_t *) rd.d_top) { + va = _mm_load_si128 ((__m128i *)(s8)); + vb = _mm_load_si128 ((__m128i *)(d8)); + tb = val; + while (1) { + if (tb & 1) vb = _mm_xor_si128(vb, va); + tb >>= 1; + if (tb == 0) break; + SSE_AB2(pp, m1, m2, va, t1, t2); + } + _mm_store_si128((__m128i *)d8, vb); + d8 += 16; + s8 += 16; + } + } else { + while (d8 < (uint8_t *) rd.d_top) { + va = _mm_load_si128 ((__m128i *)(s8)); + vb = _mm_setzero_si128 (); + tb = val; + while (1) { + if (tb & 1) vb = _mm_xor_si128(vb, va); + tb >>= 1; + if (tb == 0) break; + t1 = _mm_and_si128(_mm_slli_epi64(va, 1), m1); + t2 = _mm_and_si128(va, m2); + t2 = _mm_sub_epi64 ( + _mm_slli_epi64(t2, 1), _mm_srli_epi64(t2, (GF_FIELD_WIDTH-1))); + va = _mm_xor_si128(t1, _mm_and_si128(t2, pp)); + } + _mm_store_si128((__m128i *)d8, vb); + d8 += 16; + s8 += 16; + } + } + gf_do_final_region_alignment(&rd); +#endif +} + +static +void +gf_w4_bytwo_b_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + unsigned long uls, uld; + int i; + uint8_t *s8, *d8, *top; + uint64_t *s64, *d64, t1, t2, ta, tb, prod; + struct gf_bytwo_data *btd; + gf_region_data rd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 16); + gf_do_initial_region_alignment(&rd); + + btd = (struct gf_bytwo_data *) ((gf_internal_t *) (gf->scratch))->private; + s64 = (uint64_t *) rd.s_start; + d64 = (uint64_t *) rd.d_start; + + switch (val) { + case 1: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + *d64 ^= *s64; + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + *d64 = *s64; + d64++; + s64++; + } + } + break; + case 2: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= ta; + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = ta; + d64++; + s64++; + } + } + break; + case 3: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= (ta ^ prod); + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = (ta ^ prod); + d64++; + s64++; + } + } + break; + case 4: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= ta; + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = ta; + d64++; + s64++; + } + } + break; + case 5: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= (ta ^ prod); + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = ta ^ prod; + d64++; + s64++; + } + } + case 6: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= (ta ^ prod); + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = ta ^ prod; + d64++; + s64++; + } + } + case 7: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod ^= ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= (ta ^ prod); + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod ^= ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = ta ^ prod; + d64++; + s64++; + } + } + break; + case 8: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= ta; + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = ta; + d64++; + s64++; + } + } + break; + case 9: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= (ta ^ prod); + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = (ta ^ prod); + d64++; + s64++; + } + } + break; + case 10: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= (ta ^ prod); + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = (ta ^ prod); + d64++; + s64++; + } + } + break; + case 11: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod ^= ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= (ta ^ prod); + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod ^= ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = (ta ^ prod); + d64++; + s64++; + } + } + break; + case 12: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= (ta ^ prod); + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = (ta ^ prod); + d64++; + s64++; + } + } + break; + case 13: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod ^= ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= (ta ^ prod); + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod ^= ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = (ta ^ prod); + d64++; + s64++; + } + } + break; + case 14: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod ^= ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= (ta ^ prod); + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod ^= ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = (ta ^ prod); + d64++; + s64++; + } + } + break; + case 15: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod ^= ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod ^= ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= (ta ^ prod); + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod ^= ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod ^= ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = (ta ^ prod); + d64++; + s64++; + } + } + break; + default: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + prod = *d64 ; + ta = *s64; + tb = val; + while (1) { + if (tb & 1) prod ^= ta; + tb >>= 1; + if (tb == 0) break; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + } + *d64 = prod; + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + prod = 0 ; + ta = *s64; + tb = val; + while (1) { + if (tb & 1) prod ^= ta; + tb >>= 1; + if (tb == 0) break; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + } + *d64 = prod; + d64++; + s64++; + } + } + break; + } + gf_do_final_region_alignment(&rd); +} + +static +int gf_w4_bytwo_init(gf_t *gf) +{ + gf_internal_t *h; + uint64_t ip, m1, m2; + struct gf_bytwo_data *btd; + + h = (gf_internal_t *) gf->scratch; + btd = (struct gf_bytwo_data *) (h->private); + ip = h->prim_poly & 0xf; + m1 = 0xe; + m2 = 0x8; + btd->prim_poly = 0; + btd->mask1 = 0; + btd->mask2 = 0; + + while (ip != 0) { + btd->prim_poly |= ip; + btd->mask1 |= m1; + btd->mask2 |= m2; + ip <<= GF_FIELD_WIDTH; + m1 <<= GF_FIELD_WIDTH; + m2 <<= GF_FIELD_WIDTH; + } + + if (h->mult_type == GF_MULT_BYTWO_p) { + gf->multiply.w32 = gf_w4_bytwo_p_multiply; + if (h->region_type == GF_REGION_SSE) { + gf->multiply_region.w32 = gf_w4_bytwo_p_sse_multiply_region; + } else { + gf->multiply_region.w32 = gf_w4_bytwo_p_nosse_multiply_region; + } + } else { + gf->multiply.w32 = gf_w4_bytwo_b_multiply; + if (h->region_type == GF_REGION_SSE) { + gf->multiply_region.w32 = gf_w4_bytwo_b_sse_multiply_region; + } else { + gf->multiply_region.w32 = gf_w4_bytwo_b_nosse_multiply_region; + } + } + gf->inverse.w32 = gf_w4_euclid; + return 1; +} + + +/* ------------------------------------------------------------ + JSP: GF_MULT_SHIFT: The world's dumbest multiplication algorithm. I only + include it for completeness. It does have the feature that it requires no + extra memory. +*/ + +static +inline +gf_val_32_t +gf_w4_shift_multiply (gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + uint8_t product, i, pp; + gf_internal_t *h; + + h = (gf_internal_t *) gf->scratch; + pp = h->prim_poly; + + product = 0; + + for (i = 0; i < GF_FIELD_WIDTH; i++) { + if (a & (1 << i)) product ^= (b << i); + } + for (i = (GF_FIELD_WIDTH*2-1); i >= GF_FIELD_WIDTH; i--) { + if (product & (1 << i)) product ^= (pp << (i-GF_FIELD_WIDTH)); + } + return product; +} + +static +int gf_w4_shift_init(gf_t *gf) +{ + gf->multiply.w32 = gf_w4_shift_multiply; + gf->inverse.w32 = gf_w4_euclid; + return 1; +} + +int gf_w4_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2) +{ + int region_tbl_size; + int sss; + int ss; + + sss = (GF_REGION_SINGLE_TABLE | GF_REGION_SSE | GF_REGION_NOSSE); + ss = (GF_REGION_SSE | GF_REGION_NOSSE); + + switch(mult_type) + { + case GF_MULT_BYTWO_p: + case GF_MULT_BYTWO_b: + if (arg1 != 0 || arg2 != 0) return -1; + if (region_type != GF_REGION_CAUCHY) { + if ((region_type | ss) != ss || (region_type & ss) == ss) return -1; + } + return sizeof(gf_internal_t) + sizeof(struct gf_bytwo_data); + break; + case GF_MULT_TABLE: + if (arg1 != 0 || arg2 != 0) return -1; + if (region_type == GF_REGION_CAUCHY || region_type == (GF_REGION_CAUCHY | GF_REGION_SINGLE_TABLE)) { + return sizeof(gf_internal_t) + sizeof(struct gf_single_table_data) + 64; + } + if (region_type == 0) region_type = GF_REGION_SINGLE_TABLE; + if (region_type & GF_REGION_SINGLE_TABLE) { + if ((region_type | sss) != sss) return -1; + if ((region_type & sss) == sss) return -1; + return sizeof(gf_internal_t) + sizeof(struct gf_single_table_data) + 64; + } else if (region_type & GF_REGION_DOUBLE_TABLE) { + if (region_type != GF_REGION_DOUBLE_TABLE) return -1; + return sizeof(gf_internal_t) + sizeof(struct gf_double_table_data) + 64; + } else if (region_type & GF_REGION_QUAD_TABLE) { + if ((region_type | GF_REGION_LAZY) != (GF_REGION_QUAD_TABLE | GF_REGION_LAZY)) return -1; + if ((region_type & GF_REGION_LAZY) == 0) { + return sizeof(gf_internal_t) + sizeof(struct gf_quad_table_data) + 64; + } else { + return sizeof(gf_internal_t) + sizeof(struct gf_quad_table_lazy_data) + 64; + } + } + return -1; + break; + case GF_MULT_DEFAULT: + case GF_MULT_LOG_TABLE: + if (arg1 != 0 || arg2 != 0 || (region_type != 0 && region_type != GF_REGION_CAUCHY)) return -1; + return sizeof(gf_internal_t) + sizeof(struct gf_logtable_data) + 64; + break; + case GF_MULT_SHIFT: + if (arg1 != 0 || arg2 != 0 || (region_type != 0 && region_type != GF_REGION_CAUCHY)) return -1; + return sizeof(gf_internal_t); + break; + default: + return -1; + } +} + +int +gf_w4_init (gf_t *gf) +{ + gf_internal_t *h; + + h = (gf_internal_t *) gf->scratch; + if (h->prim_poly == 0) h->prim_poly = 0x13; + + gf->multiply.w32 = NULL; + gf->divide.w32 = NULL; + gf->inverse.w32 = NULL; + gf->multiply_region.w32 = NULL; + gf->extract_word.w32 = gf_w4_extract_word; + + switch(h->mult_type) { + case GF_MULT_SHIFT: if (gf_w4_shift_init(gf) == 0) return 0; break; + case GF_MULT_BYTWO_p: + case GF_MULT_BYTWO_b: + if (gf_w4_bytwo_init(gf) == 0) return 0; break; + case GF_MULT_DEFAULT: + case GF_MULT_LOG_TABLE: if (gf_w4_log_init(gf) == 0) return 0; break; + case GF_MULT_TABLE: if (gf_w4_table_init(gf) == 0) return 0; break; + default: return 0; + } + + if (h->divide_type == GF_DIVIDE_EUCLID) { + gf->divide.w32 = gf_w4_divide_from_inverse; + gf->inverse.w32 = gf_w4_euclid; + } else if (h->divide_type == GF_DIVIDE_MATRIX) { + gf->divide.w32 = gf_w4_divide_from_inverse; + gf->inverse.w32 = gf_w4_matrix; + } + + if (gf->inverse.w32 != NULL && gf->divide.w32 == NULL) { + gf->divide.w32 = gf_w4_divide_from_inverse; + } + if (gf->inverse.w32 == NULL && gf->divide.w32 != NULL) { + gf->inverse.w32 = gf_w4_inverse_from_divide; + } + + if (h->region_type == GF_REGION_CAUCHY) { + gf->multiply_region.w32 = gf_wgen_cauchy_region; + gf->extract_word.w32 = gf_wgen_extract_word; + } + return 1; +} diff --git a/gf_w64.c b/gf_w64.c new file mode 100644 index 0000000..3ab1e8a --- /dev/null +++ b/gf_w64.c @@ -0,0 +1,206 @@ +/* + * gf_w64.c + * + * Routines for 64-bit Galois fields + */ + +#include "gf_int.h" +#include +#include + +#define GF_FIELD_WIDTH (64) + +static +inline +gf_val_64_t gf_w64_inverse_from_divide (gf_t *gf, gf_val_64_t a) +{ + return gf->divide.w64(gf, 1, a); +} + +static +inline +gf_val_64_t gf_w64_divide_from_inverse (gf_t *gf, gf_val_64_t a, gf_val_64_t b) +{ + b = gf->inverse.w64(gf, b); + return gf->multiply.w64(gf, a, b); +} + +static +void +gf_w64_multiply_region_from_single(gf_t *gf, void *src, void *dest, gf_val_64_t val, int bytes, int +xor) +{ + int i; + gf_val_64_t *s64; + gf_val_64_t *d64; + + s64 = (gf_val_64_t *) src; + d64 = (gf_val_64_t *) dest; + + if (xor) { + for (i = 0; i < bytes/sizeof(gf_val_64_t); i++) { + d64[i] ^= gf->multiply.w64(gf, val, s64[i]); + } + } else { + for (i = 0; i < bytes/sizeof(gf_val_64_t); i++) { + d64[i] = gf->multiply.w64(gf, val, s64[i]); + } + } +} + +static +inline +gf_val_64_t gf_w64_euclid (gf_t *gf, gf_val_64_t b) +{ + gf_val_64_t e_i, e_im1, e_ip1; + gf_val_64_t d_i, d_im1, d_ip1; + gf_val_64_t y_i, y_im1, y_ip1; + gf_val_64_t c_i; + gf_val_64_t one = 1; + + if (b == 0) return -1; + e_im1 = ((gf_internal_t *) (gf->scratch))->prim_poly; + e_i = b; + d_im1 = 64; + for (d_i = d_im1-1; ((one << d_i) & e_i) == 0; d_i--) ; + y_i = 1; + y_im1 = 0; + + while (e_i != 1) { + + e_ip1 = e_im1; + d_ip1 = d_im1; + c_i = 0; + + while (d_ip1 >= d_i) { + c_i ^= (one << (d_ip1 - d_i)); + e_ip1 ^= (e_i << (d_ip1 - d_i)); + d_ip1--; + while ((e_ip1 & (one << d_ip1)) == 0) d_ip1--; + } + + y_ip1 = y_im1 ^ gf->multiply.w64(gf, c_i, y_i); + y_im1 = y_i; + y_i = y_ip1; + + e_im1 = e_i; + d_im1 = d_i; + e_i = e_ip1; + d_i = d_ip1; + } + + return y_i; +} + +/* JSP: GF_MULT_SHIFT: The world's dumbest multiplication algorithm. I only + include it for completeness. It does have the feature that it requires no + extra memory. +*/ + +static +inline +gf_val_64_t +gf_w64_shift_multiply (gf_t *gf, gf_val_64_t a64, gf_val_64_t b64) +{ + uint64_t pl, pr, ppl, ppr, i, pp, a, bl, br, one, lbit; + gf_internal_t *h; + + h = (gf_internal_t *) gf->scratch; + ppr = h->prim_poly; + ppl = 1; + + a = a64; + bl = 0; + br = b64; + one = 1; + lbit = (one << 63); + + pl = 0; + pr = 0; + + for (i = 0; i < GF_FIELD_WIDTH; i++) { + if (a & (one << i)) { + pl ^= bl; + pr ^= br; + } + /* printf("P: %016llx %016llx ", pl, pr); printf("B: %016llx %016llx\n", bl, br); */ + bl <<= 1; + if (br & lbit) bl ^= 1; + br <<= 1; + } + + one = lbit; + ppl = ((h->prim_poly >> 1) | lbit); + ppr = lbit; + while (one != 0) { + if (pl & one) { + pl ^= ppl; + pr ^= ppr; + } + one >>= 1; + ppr >>= 1; + if (ppl & 1) ppr ^= lbit; + ppl >>= 1; + } + return pr; +} + +static +int gf_w64_shift_init(gf_t *gf) +{ + gf->multiply.w64 = gf_w64_shift_multiply; + gf->inverse.w64 = gf_w64_euclid; + gf->multiply_region.w64 = gf_w64_multiply_region_from_single; + return 1; +} + +int gf_w64_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2) +{ + if (divide_type == GF_DIVIDE_MATRIX) return -1; + switch(mult_type) + { + case GF_MULT_DEFAULT: + case GF_MULT_SHIFT: + if (arg1 != 0 || arg2 != 0 || region_type != 0) return -1; + return sizeof(gf_internal_t); + break; + default: + return -1; + } +} + +int gf_w64_init(gf_t *gf) +{ + gf_internal_t *h; + + h = (gf_internal_t *) gf->scratch; + if (h->prim_poly == 0) h->prim_poly = 0x1b; /* Omitting the leftmost 1 as in w=32 */ + + gf->multiply.w64 = NULL; + gf->divide.w64 = NULL; + gf->inverse.w64 = NULL; + gf->multiply_region.w64 = NULL; + + switch(h->mult_type) { + case GF_MULT_DEFAULT: + case GF_MULT_SHIFT: if (gf_w64_shift_init(gf) == 0) return 0; break; + default: return 0; + } + if (h->divide_type == GF_DIVIDE_EUCLID) { + gf->divide.w64 = gf_w64_divide_from_inverse; + gf->inverse.w64 = gf_w64_euclid; + } + +/* else if (h->divide_type == GF_DIVIDE_MATRIX) { + gf->divide.w64 = gf_w64_divide_from_inverse; + gf->inverse.w64 = gf_w64_matrix; + } */ + + if (gf->inverse.w64 != NULL && gf->divide.w64 == NULL) { + gf->divide.w64 = gf_w64_divide_from_inverse; + } + if (gf->inverse.w64 == NULL && gf->divide.w64 != NULL) { + gf->inverse.w64 = gf_w64_inverse_from_divide; + } + return 1; +} diff --git a/gf_w8.c b/gf_w8.c new file mode 100644 index 0000000..f0c285d --- /dev/null +++ b/gf_w8.c @@ -0,0 +1,1837 @@ +/* + * gf_w8.c + * + * Routines for 8-bit Galois fields + */ + +#include "gf_int.h" +#include +#include + +#define GF_FIELD_WIDTH (8) +#define GF_FIELD_SIZE (1 << GF_FIELD_WIDTH) +#define GF_HALF_SIZE (1 << (GF_FIELD_WIDTH/2)) +#define GF_MULT_GROUP_SIZE GF_FIELD_SIZE-1 + +#define GF_BASE_FIELD_WIDTH (4) +#define GF_BASE_FIELD_SIZE (1 << GF_BASE_FIELD_WIDTH) +#define GF_S_GF_4_2 (4) + +struct gf_w8_logtable_data { + uint8_t log_tbl[GF_FIELD_SIZE]; + uint8_t antilog_tbl[GF_FIELD_SIZE * 2]; + uint8_t inv_tbl[GF_FIELD_SIZE]; +}; + +struct gf_w8_logzero_table_data { + uint16_t log_tbl[GF_FIELD_SIZE]; + uint8_t antilog_tbl[512+512+1]; + uint8_t *inv_tbl; +}; + +struct gf_w8_half_table_data { + uint8_t high[GF_FIELD_SIZE][GF_HALF_SIZE]; + uint8_t low[GF_FIELD_SIZE][GF_HALF_SIZE]; +}; + +struct gf_w8_single_table_data { + uint8_t divtable[GF_FIELD_SIZE][GF_FIELD_SIZE]; + uint8_t multtable[GF_FIELD_SIZE][GF_FIELD_SIZE]; +}; + +struct gf_w8_double_table_data { + uint8_t div[GF_FIELD_SIZE][GF_FIELD_SIZE]; + uint16_t mult[GF_FIELD_SIZE][GF_FIELD_SIZE*GF_FIELD_SIZE]; +}; + +struct gf_w8_double_table_lazy_data { + uint8_t div[GF_FIELD_SIZE][GF_FIELD_SIZE]; + uint8_t smult[GF_FIELD_SIZE][GF_FIELD_SIZE]; + uint16_t mult[GF_FIELD_SIZE*GF_FIELD_SIZE]; +}; + +struct gf_w4_logtable_data { + uint8_t log_tbl[GF_BASE_FIELD_SIZE]; + uint8_t antilog_tbl[GF_BASE_FIELD_SIZE * 2]; + uint8_t *antilog_tbl_div; +}; + +struct gf_w4_single_table_data { + uint8_t div[GF_BASE_FIELD_SIZE][GF_BASE_FIELD_SIZE]; + uint8_t mult[GF_BASE_FIELD_SIZE][GF_BASE_FIELD_SIZE]; +}; + +struct gf_w8_bytwo_data { + uint64_t prim_poly; + uint64_t mask1; + uint64_t mask2; +}; + +#define AB2(ip, am1 ,am2, b, t1, t2) {\ + t1 = (b << 1) & am1;\ + t2 = b & am2; \ + t2 = ((t2 << 1) - (t2 >> (GF_FIELD_WIDTH-1))); \ + b = (t1 ^ (t2 & ip));} + +#define SSE_AB2(pp, m1 ,m2, va, t1, t2) {\ + t1 = _mm_and_si128(_mm_slli_epi64(va, 1), m1); \ + t2 = _mm_and_si128(va, m2); \ + t2 = _mm_sub_epi64 (_mm_slli_epi64(t2, 1), _mm_srli_epi64(t2, (GF_FIELD_WIDTH-1))); \ + va = _mm_xor_si128(t1, _mm_and_si128(t2, pp)); } + +#define MM_PRINT(s, r) { uint8_t blah[16], ii; printf("%-12s", s); _mm_storeu_si128((__m128i *)blah, r); for (ii = 0; ii < 16; ii += 2) printf(" %02x %02x", blah[15-ii], blah[14-ii]); printf("\n"); } + +static +inline +uint32_t gf_w8_inverse_from_divide (gf_t *gf, uint32_t a) +{ + return gf->divide.w32(gf, 1, a); +} + +static +inline +uint32_t gf_w8_divide_from_inverse (gf_t *gf, uint32_t a, uint32_t b) +{ + b = gf->inverse.w32(gf, b); + return gf->multiply.w32(gf, a, b); +} + +static +inline +uint32_t gf_w8_euclid (gf_t *gf, uint32_t b) +{ + uint32_t e_i, e_im1, e_ip1; + uint32_t d_i, d_im1, d_ip1; + uint32_t y_i, y_im1, y_ip1; + uint32_t c_i; + + if (b == 0) return -1; + e_im1 = ((gf_internal_t *) (gf->scratch))->prim_poly; + e_i = b; + d_im1 = 8; + for (d_i = d_im1; ((1 << d_i) & e_i) == 0; d_i--) ; + y_i = 1; + y_im1 = 0; + + while (e_i != 1) { + + e_ip1 = e_im1; + d_ip1 = d_im1; + c_i = 0; + + while (d_ip1 >= d_i) { + c_i ^= (1 << (d_ip1 - d_i)); + e_ip1 ^= (e_i << (d_ip1 - d_i)); + while ((e_ip1 & (1 << d_ip1)) == 0) d_ip1--; + } + + y_ip1 = y_im1 ^ gf->multiply.w32(gf, c_i, y_i); + y_im1 = y_i; + y_i = y_ip1; + + e_im1 = e_i; + d_im1 = d_i; + e_i = e_ip1; + d_i = d_ip1; + } + + return y_i; +} + +static +gf_val_32_t gf_w8_extract_word(gf_t *gf, void *start, int bytes, int index) +{ + uint8_t *r8; + + r8 = (uint8_t *) start; + return r8[index]; +} + +static +inline +uint32_t gf_w8_matrix (gf_t *gf, uint32_t b) +{ + return gf_bitmatrix_inverse(b, 8, ((gf_internal_t *) (gf->scratch))->prim_poly); +} + +/* ------------------------------------------------------------ + IMPLEMENTATION: SHIFT: + + JSP: The world's dumbest multiplication algorithm. I only + include it for completeness. It does have the feature that it requires no + extra memory. +*/ + +static +inline +uint32_t +gf_w8_shift_multiply (gf_t *gf, uint32_t a8, uint32_t b8) +{ + uint16_t product, i, pp, a, b; + gf_internal_t *h; + + a = a8; + b = b8; + h = (gf_internal_t *) gf->scratch; + pp = h->prim_poly; + + product = 0; + + for (i = 0; i < GF_FIELD_WIDTH; i++) { + if (a & (1 << i)) product ^= (b << i); + } + for (i = (GF_FIELD_WIDTH*2-1); i >= GF_FIELD_WIDTH; i--) { + if (product & (1 << i)) product ^= (pp << (i-GF_FIELD_WIDTH)); + } + return product; +} + +static +int gf_w8_shift_init(gf_t *gf) +{ + gf->multiply.w32 = gf_w8_shift_multiply; + gf->inverse.w32 = gf_w8_euclid; + return 1; +} + +/* ------------------------------------------------------------ + IMPLEMENTATION: LOG_TABLE: + + JSP: Kevin wrote this, and I'm converting it to my structure. + */ + +static +inline +uint32_t +gf_w8_logzero_multiply (gf_t *gf, uint32_t a, uint32_t b) +{ + struct gf_w8_logzero_table_data *ltd; + + ltd = (struct gf_w8_logzero_table_data *) ((gf_internal_t *) gf->scratch)->private; + return ltd->antilog_tbl[(unsigned)(ltd->log_tbl[a] + ltd->log_tbl[b])]; +} + +static +inline +uint32_t +gf_w8_logzero_divide (gf_t *gf, uint32_t a, uint32_t b) +{ + struct gf_w8_logzero_table_data *ltd; + + ltd = (struct gf_w8_logzero_table_data *) ((gf_internal_t *) gf->scratch)->private; + return ltd->antilog_tbl[(unsigned)((255 + ltd->log_tbl[a]) - ltd->log_tbl[b])]; +} + +static +inline +uint32_t +gf_w8_log_multiply (gf_t *gf, uint32_t a, uint32_t b) +{ + struct gf_w8_logtable_data *ltd; + + ltd = (struct gf_w8_logtable_data *) ((gf_internal_t *) gf->scratch)->private; + return (a == 0 || b == 0) ? 0 : ltd->antilog_tbl[(unsigned)(ltd->log_tbl[a] + ltd->log_tbl[b])]; +} + +static +inline +uint32_t +gf_w8_log_divide (gf_t *gf, uint32_t a, uint32_t b) +{ + int log_sum = 0; + struct gf_w8_logtable_data *ltd; + + if (a == 0 || b == 0) return 0; + ltd = (struct gf_w8_logtable_data *) ((gf_internal_t *) gf->scratch)->private; + + log_sum = ltd->log_tbl[a] - ltd->log_tbl[b] + (GF_MULT_GROUP_SIZE); + return (ltd->antilog_tbl[log_sum]); +} + +static +uint32_t +gf_w8_log_inverse (gf_t *gf, uint32_t a) +{ + struct gf_w8_logtable_data *ltd; + + ltd = (struct gf_w8_logtable_data *) ((gf_internal_t *) gf->scratch)->private; + return (ltd->inv_tbl[a]); +} + +static +uint32_t +gf_w8_logzero_inverse (gf_t *gf, uint32_t a) +{ + struct gf_w8_logzero_table_data *ltd; + + ltd = (struct gf_w8_logzero_table_data *) ((gf_internal_t *) gf->scratch)->private; + return (ltd->inv_tbl[a]); +} + +static +void +gf_w8_log_multiply_region(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor) +{ + int i; + uint8_t lv, b, c; + uint8_t *s8, *d8; + struct gf_w8_logtable_data *ltd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + ltd = (struct gf_w8_logtable_data *) ((gf_internal_t *) gf->scratch)->private; + s8 = (uint8_t *) src; + d8 = (uint8_t *) dest; + + lv = ltd->log_tbl[val]; + + if (xor) { + for (i = 0; i < bytes; i++) { + d8[i] ^= (s8[i] == 0 ? 0 : ltd->antilog_tbl[lv + ltd->log_tbl[s8[i]]]); + } + } else { + for (i = 0; i < bytes; i++) { + d8[i] = (s8[i] == 0 ? 0 : ltd->antilog_tbl[lv + ltd->log_tbl[s8[i]]]); + } + } +} + +static +void +gf_w8_logzero_multiply_region(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor) +{ + int i; + uint8_t lv, b, c; + uint8_t *s8, *d8; + struct gf_w8_logzero_table_data *ltd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + ltd = (struct gf_w8_logzero_table_data *) ((gf_internal_t *) gf->scratch)->private; + s8 = (uint8_t *) src; + d8 = (uint8_t *) dest; + + lv = ltd->log_tbl[val]; + + if (xor) { + for (i = 0; i < bytes; i++) { + d8[i] ^= (ltd->antilog_tbl[lv + ltd->log_tbl[s8[i]]]); + } + } else { + for (i = 0; i < bytes; i++) { + d8[i] = (ltd->antilog_tbl[lv + ltd->log_tbl[s8[i]]]); + } + } +} + +static +int gf_w8_log_init(gf_t *gf) +{ + gf_internal_t *h; + struct gf_w8_logtable_data *ltd; + struct gf_w8_logzero_table_data *ztd; + uint8_t *alt; + uint8_t *inv; + int i, b; + + h = (gf_internal_t *) gf->scratch; + if (h->arg1 == 0) { + ltd = h->private; + alt = ltd->antilog_tbl; + inv = ltd->inv_tbl; + } else { + ztd = h->private; + alt = ztd->antilog_tbl; + ztd->inv_tbl = (alt + 512 + 256); + inv = ztd->inv_tbl; + } + + if (h->arg1 == 1) { + ztd->log_tbl[0] = 512; + } else { + ltd->log_tbl[0] = 0; + } + + b = 1; + for (i = 0; i < GF_MULT_GROUP_SIZE; i++) { + if (h->arg1 == 1) { + ztd->log_tbl[b] = i; + } else { + ltd->log_tbl[b] = i; + } + alt[i] = b; + alt[i+GF_MULT_GROUP_SIZE] = b; + b <<= 1; + if (b & GF_FIELD_SIZE) { + b = b ^ h->prim_poly; + } + } + if (h->arg1 == 1) { + bzero(alt+512, 255); + alt[512+512] = 0; + } + + inv[0] = 0; /* Not really, but we need to fill it with something */ + inv[1] = 1; + for (i = 2; i < GF_FIELD_SIZE; i++) { + b = (h->arg1 == 1) ? ztd->log_tbl[i] : ltd->log_tbl[i]; + inv[i] = alt[GF_MULT_GROUP_SIZE-b]; + } + + gf->inverse.w32 = (h->arg1 == 0) ? gf_w8_log_inverse : gf_w8_logzero_inverse; + gf->divide.w32 = (h->arg1 == 0) ? gf_w8_log_divide : gf_w8_logzero_divide; + gf->multiply.w32 = (h->arg1 == 0) ? gf_w8_log_multiply : gf_w8_logzero_multiply; + gf->multiply_region.w32 = (h->arg1 == 0) ? gf_w8_log_multiply_region : gf_w8_logzero_multiply_region; + return 1; +} + +/* ------------------------------------------------------------ + IMPLEMENTATION: FULL_TABLE: + + JSP: Kevin wrote this, and I'm converting it to my structure. + */ + +static +gf_val_32_t +gf_w8_table_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + struct gf_w8_single_table_data *ftd; + + ftd = (struct gf_w8_single_table_data *) ((gf_internal_t *) gf->scratch)->private; + return (ftd->multtable[a][b]); +} + +static +gf_val_32_t +gf_w8_table_divide(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + struct gf_w8_single_table_data *ftd; + + ftd = (struct gf_w8_single_table_data *) ((gf_internal_t *) gf->scratch)->private; + return (ftd->divtable[a][b]); +} + +static +gf_val_32_t +gf_w8_double_table_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + struct gf_w8_double_table_data *ftd; + + ftd = (struct gf_w8_double_table_data *) ((gf_internal_t *) gf->scratch)->private; + return (ftd->mult[a][b]); +} + +static +gf_val_32_t +gf_w8_double_table_divide(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + struct gf_w8_double_table_data *ftd; + + ftd = (struct gf_w8_double_table_data *) ((gf_internal_t *) gf->scratch)->private; + return (ftd->div[a][b]); +} + +static +void +gf_w8_double_table_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + uint16_t *base; + uint32_t b, c, prod, vc, vb; + gf_internal_t *h; + struct gf_w8_double_table_data *dtd; + struct gf_w8_double_table_lazy_data *ltd; + gf_region_data rd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + h = (gf_internal_t *) (gf->scratch); + if (h->region_type & GF_REGION_LAZY) { + ltd = (struct gf_w8_double_table_lazy_data *) h->private; + base = ltd->mult; + for (b = 0; b < GF_FIELD_SIZE; b++) { + vb = (ltd->smult[val][b] << 8); + for (c = 0; c < GF_FIELD_SIZE; c++) { + vc = ltd->smult[val][c]; + base[(b << 8)| c] = (vb | vc); + } + } + + } else { + dtd = (struct gf_w8_double_table_data *) h->private; + base = &(dtd->mult[val][0]); + } + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 8); + gf_do_initial_region_alignment(&rd); + gf_two_byte_region_table_multiply(&rd, base); + gf_do_final_region_alignment(&rd); +} + +static +gf_val_32_t +gf_w8_double_table_lazy_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + struct gf_w8_double_table_lazy_data *ftd; + + ftd = (struct gf_w8_double_table_lazy_data *) ((gf_internal_t *) gf->scratch)->private; + return (ftd->smult[a][b]); +} + +static +gf_val_32_t +gf_w8_double_table_lazy_divide(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + struct gf_w8_double_table_lazy_data *ftd; + + ftd = (struct gf_w8_double_table_lazy_data *) ((gf_internal_t *) gf->scratch)->private; + return (ftd->div[a][b]); +} + +static +void +gf_w8_table_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + int i; + uint8_t lv, b, c; + uint8_t *s8, *d8; + struct gf_w8_single_table_data *ftd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + ftd = (struct gf_w8_single_table_data *) ((gf_internal_t *) gf->scratch)->private; + s8 = (uint8_t *) src; + d8 = (uint8_t *) dest; + + if (xor) { + for (i = 0; i < bytes; i++) { + d8[i] ^= ftd->multtable[s8[i]][val]; + } + } else { + for (i = 0; i < bytes; i++) { + d8[i] = ftd->multtable[s8[i]][val]; + } + } +} + +static +int gf_w8_table_init(gf_t *gf) +{ + gf_internal_t *h; + struct gf_w8_single_table_data *ftd = NULL; + struct gf_w8_double_table_data *dtd = NULL; + struct gf_w8_double_table_lazy_data *ltd = NULL; + int a, b, c, prod, scase; + + h = (gf_internal_t *) gf->scratch; + + if (h->region_type == 0 || (h->region_type | GF_REGION_CAUCHY) || + (h->region_type | GF_REGION_SINGLE_TABLE)) { + ftd = (struct gf_w8_single_table_data *)h->private; + bzero(ftd->divtable, sizeof(uint8_t) * GF_FIELD_SIZE * GF_FIELD_SIZE); + bzero(ftd->multtable, sizeof(uint8_t) * GF_FIELD_SIZE * GF_FIELD_SIZE); + scase = 0; + } else if (h->region_type == GF_REGION_DOUBLE_TABLE) { + dtd = (struct gf_w8_double_table_data *)h->private; + bzero(dtd->div, sizeof(uint8_t) * GF_FIELD_SIZE * GF_FIELD_SIZE); + bzero(dtd->mult, sizeof(uint16_t) * GF_FIELD_SIZE * GF_FIELD_SIZE * GF_FIELD_SIZE); + scase = 1; + } else if (h->region_type == (GF_REGION_DOUBLE_TABLE | GF_REGION_LAZY)) { + ltd = (struct gf_w8_double_table_lazy_data *)h->private; + bzero(ltd->div, sizeof(uint8_t) * GF_FIELD_SIZE * GF_FIELD_SIZE); + bzero(ltd->smult, sizeof(uint8_t) * GF_FIELD_SIZE * GF_FIELD_SIZE); + scase = 2; + } else { + fprintf(stderr, "Internal error in gf_w8_table_init\n"); + exit(0); + } + + for (a = 1; a < GF_FIELD_SIZE; a++) { + b = 1; + prod = a; + do { + switch (scase) { + case 0: + ftd->multtable[a][b] = prod; + ftd->divtable[prod][b] = a; + break; + case 1: + dtd->div[prod][b] = a; + for (c = 0; c < GF_FIELD_SIZE; c++) { + dtd->mult[a][(c<<8)|b] |= prod; + dtd->mult[a][(b<<8)|c] |= (prod<<8); + } + break; + case 2: + ltd->div[prod][b] = a; + ltd->smult[a][b] = prod; + break; + } + b <<= 1; + if (b & GF_FIELD_SIZE) b = b ^ h->prim_poly; + prod <<= 1; + if (prod & GF_FIELD_SIZE) prod = prod ^ h->prim_poly; + + } while (b != 1); + } + + gf->inverse.w32 = NULL; /* Will set from divide */ + switch (scase) { + case 0: + gf->divide.w32 = gf_w8_table_divide; + gf->multiply.w32 = gf_w8_table_multiply; + gf->multiply_region.w32 = gf_w8_table_multiply_region; + break; + case 1: + gf->divide.w32 = gf_w8_double_table_divide; + gf->multiply.w32 = gf_w8_double_table_multiply; + gf->multiply_region.w32 = gf_w8_double_table_multiply_region; + break; + case 2: + gf->divide.w32 = gf_w8_double_table_lazy_divide; + gf->multiply.w32 = gf_w8_double_table_lazy_multiply; + gf->multiply_region.w32 = gf_w8_double_table_multiply_region; + break; + } + return 1; +} + +/* ------------------------------------------------------------ + IMPLEMENTATION: FULL_TABLE: + */ + +static +gf_val_32_t +gf_w8_split_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + struct gf_w8_half_table_data *htd; + htd = (struct gf_w8_half_table_data *) ((gf_internal_t *) gf->scratch)->private; + + return htd->high[b][a>>4] ^ htd->low[b][a&0xf]; +} + +static +void +gf_w8_split_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + unsigned long uls, uld; + int i; + uint8_t lv, b, c; + uint8_t *s8, *d8; + struct gf_w8_half_table_data *htd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + htd = (struct gf_w8_half_table_data *) ((gf_internal_t *) gf->scratch)->private; + s8 = (uint8_t *) src; + d8 = (uint8_t *) dest; + + if (xor) { + for (i = 0; i < bytes; i++) { + d8[i] ^= (htd->high[val][s8[i]>>4] ^ htd->low[val][s8[i]&0xf]); + } + } else { + for (i = 0; i < bytes; i++) { + d8[i] = (htd->high[val][s8[i]>>4] ^ htd->low[val][s8[i]&0xf]); + } + } +} + +static +void +gf_w8_split_multiply_region_sse(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ +#ifdef INTEL_SSE4 + uint8_t *s8, *d8, *bh, *bl, *sptr, *dptr, *top; + __m128i tbl, loset, t1, r, va, mth, mtl; + uint64_t altable[4]; + struct gf_w8_half_table_data *htd; + gf_region_data rd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + htd = (struct gf_w8_half_table_data *) ((gf_internal_t *) (gf->scratch))->private; + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 16); + gf_do_initial_region_alignment(&rd); + + bh = (uint8_t *) htd->high; + bh += (val << 4); + bl = (uint8_t *) htd->low; + bl += (val << 4); + + sptr = rd.s_start; + dptr = rd.d_start; + + mth = _mm_loadu_si128 ((__m128i *)(bh)); + mtl = _mm_loadu_si128 ((__m128i *)(bl)); + loset = _mm_set1_epi8 (0x0f); + + if (xor) { + while (sptr < (uint8_t *) rd.s_top) { + va = _mm_load_si128 ((__m128i *)(sptr)); + t1 = _mm_and_si128 (loset, va); + r = _mm_shuffle_epi8 (mtl, t1); + va = _mm_srli_epi64 (va, 4); + t1 = _mm_and_si128 (loset, va); + r = _mm_xor_si128 (r, _mm_shuffle_epi8 (mth, t1)); + va = _mm_load_si128 ((__m128i *)(dptr)); + r = _mm_xor_si128 (r, va); + _mm_store_si128 ((__m128i *)(dptr), r); + dptr += 16; + sptr += 16; + } + } else { + while (sptr < (uint8_t *) rd.s_top) { + va = _mm_load_si128 ((__m128i *)(sptr)); + t1 = _mm_and_si128 (loset, va); + r = _mm_shuffle_epi8 (mtl, t1); + va = _mm_srli_epi64 (va, 4); + t1 = _mm_and_si128 (loset, va); + r = _mm_xor_si128 (r, _mm_shuffle_epi8 (mth, t1)); + _mm_store_si128 ((__m128i *)(dptr), r); + dptr += 16; + sptr += 16; + } + } + + gf_do_final_region_alignment(&rd); +#endif + +} + + +static +int gf_w8_split_init(gf_t *gf) +{ + gf_internal_t *h; + struct gf_w8_half_table_data *htd; + int a, b, c, d, pp; + + h = (gf_internal_t *) gf->scratch; + htd = (struct gf_w8_half_table_data *)h->private; + pp = h->prim_poly; + + bzero(htd->high, sizeof(uint8_t)*GF_FIELD_SIZE*GF_HALF_SIZE); + bzero(htd->low, sizeof(uint8_t)*GF_FIELD_SIZE*GF_HALF_SIZE); + + for (a = 1; a < GF_HALF_SIZE; a++) { + b = 1; + c = a; + d = (a << (GF_FIELD_WIDTH/2)); + do { + htd->low[b][a] = c; + htd->high[b][a] = d; + b <<= 1; + if (b & GF_FIELD_SIZE) b ^= pp; + c <<= 1; + if (c & GF_FIELD_SIZE) c ^= pp; + d <<= 1; + if (d & GF_FIELD_SIZE) d ^= pp; + } while (c != a); + } + + gf->inverse.w32 = NULL; /* Will set from divide */ + gf->divide.w32 = NULL; /* Let the user figure it out. */ + gf->multiply.w32 = gf_w8_split_multiply; + if (h->region_type == GF_REGION_NOSSE) { + gf->multiply_region.w32 = gf_w8_split_multiply_region; + } else { + gf->multiply_region.w32 = gf_w8_split_multiply_region_sse; + } + return 1; +} + +static +void +gf_w8_composite_multiply_region_alt(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + uint8_t val0 = val & 0x0f; + uint8_t val1 = (val & 0xf0) >> 4; + int sub_reg_size = bytes / 2; + + if (bytes % 2 != 0) gf_alignment_error("gf_w8_composite_multiply_region_alt", 1); + + base_gf->multiply_region.w32(base_gf, src, dest, val0, sub_reg_size, xor); + base_gf->multiply_region.w32(base_gf, src+sub_reg_size, dest, val1, sub_reg_size, 1); + base_gf->multiply_region.w32(base_gf, src, dest+sub_reg_size, val1, sub_reg_size, xor); + base_gf->multiply_region.w32(base_gf, src+sub_reg_size, dest+sub_reg_size, val0, sub_reg_size, 1); + base_gf->multiply_region.w32(base_gf, src+sub_reg_size, dest+sub_reg_size, base_gf->multiply.w32(base_gf, GF_S_GF_4_2, val1), sub_reg_size, 1); +} + +static +gf_val_32_t +gf_w8_composite_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + uint8_t b0 = b & 0x0f; + uint8_t b1 = (b & 0xf0) >> 4; + uint8_t a0 = a & 0x0f; + uint8_t a1 = (a & 0xf0) >> 4; + uint8_t a1b1; + + a1b1 = base_gf->multiply.w32(base_gf, a1, b1); + + return ((base_gf->multiply.w32(base_gf, a0, b0) ^ a1b1) | ((base_gf->multiply.w32(base_gf, a1, b0) ^ base_gf->multiply.w32(base_gf, a0, b1) ^ base_gf->multiply.w32(base_gf, a1b1, GF_S_GF_4_2)) << 4)); +} + +/* + * Composite field division trick (explained in 2007 tech report) + * + * Compute a / b = a*b^-1, where p(x) = x^2 + sx + 1 + * + * let c = b^-1 + * + * c*b = (s*b1c1+b1c0+b0c1)x+(b1c1+b0c0) + * + * want (s*b1c1+b1c0+b0c1) = 0 and (b1c1+b0c0) = 1 + * + * let d = b1c1 and d+1 = b0c0 + * + * solve s*b1c1+b1c0+b0c1 = 0 + * + * solution: d = (b1b0^-1)(b1b0^-1+b0b1^-1+s)^-1 + * + * c0 = (d+1)b0^-1 + * c1 = d*b1^-1 + * + * a / b = a * c + */ +static +gf_val_32_t +gf_w8_composite_inverse(gf_t *gf, gf_val_32_t a) +{ + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + uint8_t a0 = a & 0x0f; + uint8_t a1 = (a & 0xf0) >> 4; + uint8_t c0, c1, c, d, tmp; + uint8_t a0inv, a1inv; + + + if (a0 == 0) { + a1inv = base_gf->inverse.w32(base_gf, a1) & 0xf; + c0 = base_gf->multiply.w32(base_gf, a1inv, GF_S_GF_4_2); + c1 = a1inv; + } else if (a1 == 0) { + c0 = base_gf->inverse.w32(base_gf, a0); + c1 = 0; + } else { + a1inv = base_gf->inverse.w32(base_gf, a1) & 0xf; + a0inv = base_gf->inverse.w32(base_gf, a0) & 0xf; + + d = base_gf->multiply.w32(base_gf, a1, a0inv) & 0xf; + + tmp = (base_gf->multiply.w32(base_gf, a1, a0inv) ^ base_gf->multiply.w32(base_gf, a0, a1inv) ^ GF_S_GF_4_2) & 0xf; + tmp = base_gf->inverse.w32(base_gf, tmp) & 0xf; + + d = base_gf->multiply.w32(base_gf, d, tmp) & 0xf; + + c0 = base_gf->multiply.w32(base_gf, (d^1), a0inv) & 0xf; + c1 = base_gf->multiply.w32(base_gf, d, a1inv) & 0xf; + } + + c = c0 | (c1 << 4); + + return c; +} + +static +gf_val_32_t +gf_w8_composite_divide(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + gf_val_32_t binv; + + binv = gf_w8_composite_inverse(gf, b); + + return gf_w8_composite_multiply(gf, a, binv); +} + +static +void +gf_w8_composite_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + unsigned long uls, uld; + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + int i=0; + struct gf_w4_single_table_data * std; + uint8_t b0 = val & 0x0f; + uint8_t b1 = (val & 0xf0) >> 4; + uint8_t *s8 = (uint8_t *) src; + uint8_t *d8 = (uint8_t *) dest; + uint8_t a0, a1, a1b1; + + uls = ((unsigned long) src) & 0xf; + uld = ((unsigned long) dest) & 0xf; + if ((uls & 0x7) != (uld & 0x7)) gf_alignment_error("gf_w8_composite_multiply_region", 1); + + if (val == 0) { + if (xor) return; + bzero(dest, bytes); + return; + } + + std = (struct gf_w4_single_table_data *) h->private; + + if (xor) { + for (i = 0;i < bytes; i++) { + a0 = s8[i] & 0x0f; + a1 = (s8[i] & 0xf0) >> 4; + a1b1 = std->mult[a1][b1]; + + d8[i] ^= ((base_gf->multiply.w32(base_gf, a0, b0) ^ a1b1) | + ((base_gf->multiply.w32(base_gf, a1, b0) ^ base_gf->multiply.w32(base_gf, a0, b1) ^ base_gf->multiply.w32(base_gf, a1b1, GF_S_GF_4_2)) << 4)); + + } + } else { + for (i = 0;i < bytes; i++) { + a0 = s8[i] & 0x0f; + a1 = (s8[i] & 0xf0) >> 4; + a1b1 = std->mult[a1][b1]; + + d8[i] = ((base_gf->multiply.w32(base_gf, a0, b0) ^ a1b1) | + ((base_gf->multiply.w32(base_gf, a1, b0) ^ base_gf->multiply.w32(base_gf, a0, b1) ^ base_gf->multiply.w32(base_gf, a1b1, GF_S_GF_4_2)) << 4)); + } + } + return; +} + +static +void +gf_w8_composite_multiply_region_table(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + unsigned long uls, uld; + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + int i=0; + struct gf_w4_single_table_data * std; + uint8_t b0 = val & 0x0f; + uint8_t b1 = (val & 0xf0) >> 4; + uint8_t *s8 = (uint8_t *) src; + uint8_t *d8 = (uint8_t *) dest; + uint8_t a0, a1, a1b1; + + uls = ((unsigned long) src) & 0xf; + uld = ((unsigned long) dest) & 0xf; + if ((uls & 0x7) != (uld & 0x7)) gf_alignment_error("gf_w8_composite_multiply_region", 1); + + if (val == 0) { + if (xor) return; + bzero(dest, bytes); + return; + } + + std = (struct gf_w4_single_table_data *) h->private; + + if (xor) { + for (i = 0;i < bytes; i++) { + a0 = s8[i] & 0x0f; + a1 = (s8[i] & 0xf0) >> 4; + a1b1 = std->mult[a1][b1]; + + d8[i] ^= ((std->mult[a0][b0] ^ a1b1) | ((std->mult[a1][b0] ^ std->mult[a0][b1] ^ std->mult[a1b1][GF_S_GF_4_2]) << 4)); + + } + } else { + for (i = 0;i < bytes; i++) { + a0 = s8[i] & 0x0f; + a1 = (s8[i] & 0xf0) >> 4; + a1b1 = std->mult[a1][b1]; + + d8[i] = ((std->mult[a0][b0] ^ a1b1) | ((std->mult[a1][b0] ^ std->mult[a0][b1] ^ std->mult[a1b1][GF_S_GF_4_2]) << 4)); + } + } + return; +} + +static +int gf_w8_composite_init(gf_t *gf) +{ + struct gf_w4_single_table_data * std; + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + uint8_t a, b; + + std = (struct gf_w4_single_table_data *) h->private; + + for (a = 0; a < 16; a++) { + for (b = 0; b < 16; b++) { + std->mult[a][b] = base_gf->multiply.w32(base_gf, a, b); + } + } + + if (h->region_type & GF_REGION_ALTMAP) { + gf->multiply_region.w32 = gf_w8_composite_multiply_region_alt; + } else { + if (h->region_type & GF_REGION_SINGLE_TABLE) { + gf->multiply_region.w32 = gf_w8_composite_multiply_region_table; + } else { + gf->multiply_region.w32 = gf_w8_composite_multiply_region; + } + } + + gf->multiply.w32 = gf_w8_composite_multiply; + gf->divide.w32 = gf_w8_composite_divide; + gf->inverse.w32 = gf_w8_composite_inverse; + + return 1; +} + +static +inline +gf_val_32_t +gf_w8_bytwo_p_multiply (gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + uint32_t prod, pp, pmask, amask; + gf_internal_t *h; + + h = (gf_internal_t *) gf->scratch; + pp = h->prim_poly; + + + prod = 0; + pmask = 0x80; + amask = 0x80; + + while (amask != 0) { + if (prod & pmask) { + prod = ((prod << 1) ^ pp); + } else { + prod <<= 1; + } + if (a & amask) prod ^= b; + amask >>= 1; + } + return prod; +} + +static +inline +gf_val_32_t +gf_w8_bytwo_b_multiply (gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + uint32_t prod, pp, bmask; + gf_internal_t *h; + + h = (gf_internal_t *) gf->scratch; + pp = h->prim_poly; + + prod = 0; + bmask = 0x80; + + while (1) { + if (a & 1) prod ^= b; + a >>= 1; + if (a == 0) return prod; + if (b & bmask) { + b = ((b << 1) ^ pp); + } else { + b <<= 1; + } + } +} + +static +void +gf_w8_bytwo_p_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + uint64_t *s64, *d64, t1, t2, ta, prod, amask; + gf_region_data rd; + struct gf_w8_bytwo_data *btd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + btd = (struct gf_w8_bytwo_data *) ((gf_internal_t *) (gf->scratch))->private; + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 8); + gf_do_initial_region_alignment(&rd); + + s64 = (uint64_t *) rd.s_start; + d64 = (uint64_t *) rd.d_start; + + if (xor) { + while (s64 < (uint64_t *) rd.s_top) { + prod = 0; + amask = 0x80; + ta = *s64; + while (amask != 0) { + AB2(btd->prim_poly, btd->mask1, btd->mask2, prod, t1, t2); + if (val & amask) prod ^= ta; + amask >>= 1; + } + *d64 ^= prod; + d64++; + s64++; + } + } else { + while (s64 < (uint64_t *) rd.s_top) { + prod = 0; + amask = 0x80; + ta = *s64; + while (amask != 0) { + AB2(btd->prim_poly, btd->mask1, btd->mask2, prod, t1, t2); + if (val & amask) prod ^= ta; + amask >>= 1; + } + *d64 = prod; + d64++; + s64++; + } + } + gf_do_final_region_alignment(&rd); +} + +#define BYTWO_P_ONESTEP {\ + SSE_AB2(pp, m1 ,m2, prod, t1, t2); \ + t1 = _mm_and_si128(v, one); \ + t1 = _mm_sub_epi8(t1, one); \ + t1 = _mm_and_si128(t1, ta); \ + prod = _mm_xor_si128(prod, t1); \ + v = _mm_srli_epi64(v, 1); } + +static +void +gf_w8_bytwo_p_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ +#ifdef INTEL_SSE4 + int i; + uint8_t *s8, *d8; + uint8_t vrev; + uint64_t amask; + __m128i pp, m1, m2, ta, prod, t1, t2, tp, one, v; + struct gf_w8_bytwo_data *btd; + gf_region_data rd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + btd = (struct gf_w8_bytwo_data *) ((gf_internal_t *) (gf->scratch))->private; + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 16); + gf_do_initial_region_alignment(&rd); + + vrev = 0; + for (i = 0; i < 8; i++) { + vrev <<= 1; + if (!(val & (1 << i))) vrev |= 1; + } + + s8 = (uint8_t *) rd.s_start; + d8 = (uint8_t *) rd.d_start; + + pp = _mm_set1_epi8(btd->prim_poly&0xff); + m1 = _mm_set1_epi8((btd->mask1)&0xff); + m2 = _mm_set1_epi8((btd->mask2)&0xff); + one = _mm_set1_epi8(1); + + while (d8 < (uint8_t *) rd.d_top) { + prod = _mm_setzero_si128(); + v = _mm_set1_epi8(vrev); + ta = _mm_load_si128((__m128i *) s8); + tp = (!xor) ? _mm_setzero_si128() : _mm_load_si128((__m128i *) d8); + BYTWO_P_ONESTEP; + BYTWO_P_ONESTEP; + BYTWO_P_ONESTEP; + BYTWO_P_ONESTEP; + BYTWO_P_ONESTEP; + BYTWO_P_ONESTEP; + BYTWO_P_ONESTEP; + BYTWO_P_ONESTEP; + _mm_store_si128((__m128i *) d8, _mm_xor_si128(prod, tp)); + d8 += 16; + s8 += 16; + } + gf_do_final_region_alignment(&rd); +#endif +} + +static +void +gf_w8_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_w8_bytwo_data *btd) +{ +#ifdef INTEL_SSE4 + int i; + uint8_t *d8, *s8, tb; + __m128i pp, m1, m2, t1, t2, va, vb; + + s8 = (uint8_t *) rd->s_start; + d8 = (uint8_t *) rd->d_start; + + pp = _mm_set1_epi8(btd->prim_poly&0xff); + m1 = _mm_set1_epi8((btd->mask1)&0xff); + m2 = _mm_set1_epi8((btd->mask2)&0xff); + + while (d8 < (uint8_t *) rd->d_top) { + va = _mm_load_si128 ((__m128i *)(s8)); + SSE_AB2(pp, m1, m2, va, t1, t2); + _mm_store_si128((__m128i *)d8, va); + d8 += 16; + s8 += 16; + } +#endif +} + +static +void +gf_w8_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_w8_bytwo_data *btd) +{ +#ifdef INTEL_SSE4 + int i; + uint8_t *d8, *s8, tb; + __m128i pp, m1, m2, t1, t2, va, vb; + + s8 = (uint8_t *) rd->s_start; + d8 = (uint8_t *) rd->d_start; + + pp = _mm_set1_epi8(btd->prim_poly&0xff); + m1 = _mm_set1_epi8((btd->mask1)&0xff); + m2 = _mm_set1_epi8((btd->mask2)&0xff); + + while (d8 < (uint8_t *) rd->d_top) { + va = _mm_load_si128 ((__m128i *)(s8)); + SSE_AB2(pp, m1, m2, va, t1, t2); + vb = _mm_load_si128 ((__m128i *)(d8)); + vb = _mm_xor_si128(vb, va); + _mm_store_si128((__m128i *)d8, vb); + d8 += 16; + s8 += 16; + } +#endif +} + + +static +void +gf_w8_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ +#ifdef INTEL_SSE4 + int itb; + uint8_t *d8, *s8; + __m128i pp, m1, m2, t1, t2, va, vb; + struct gf_w8_bytwo_data *btd; + gf_region_data rd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 16); + gf_do_initial_region_alignment(&rd); + + btd = (struct gf_w8_bytwo_data *) ((gf_internal_t *) (gf->scratch))->private; + + if (val == 2) { + if (xor) { + gf_w8_bytwo_b_sse_region_2_xor(&rd, btd); + } else { + gf_w8_bytwo_b_sse_region_2_noxor(&rd, btd); + } + gf_do_final_region_alignment(&rd); + return; + } + + s8 = (uint8_t *) rd.s_start; + d8 = (uint8_t *) rd.d_start; + + pp = _mm_set1_epi8(btd->prim_poly&0xff); + m1 = _mm_set1_epi8((btd->mask1)&0xff); + m2 = _mm_set1_epi8((btd->mask2)&0xff); + + while (d8 < (uint8_t *) rd.d_top) { + va = _mm_load_si128 ((__m128i *)(s8)); + vb = (!xor) ? _mm_setzero_si128() : _mm_load_si128 ((__m128i *)(d8)); + itb = val; + while (1) { + if (itb & 1) vb = _mm_xor_si128(vb, va); + itb >>= 1; + if (itb == 0) break; + SSE_AB2(pp, m1, m2, va, t1, t2); + } + _mm_store_si128((__m128i *)d8, vb); + d8 += 16; + s8 += 16; + } + + gf_do_final_region_alignment(&rd); +#endif +} + +static +void +gf_w8_bytwo_b_nosse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + int i; + uint8_t *s8, *d8, *top; + uint64_t *s64, *d64, t1, t2, ta, tb, prod; + struct gf_w8_bytwo_data *btd; + gf_region_data rd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 16); + gf_do_initial_region_alignment(&rd); + + btd = (struct gf_w8_bytwo_data *) ((gf_internal_t *) (gf->scratch))->private; + s64 = (uint64_t *) rd.s_start; + d64 = (uint64_t *) rd.d_start; + + switch (val) { + case 2: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= ta; + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = ta; + d64++; + s64++; + } + } + break; + case 3: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= (ta ^ prod); + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = (ta ^ prod); + d64++; + s64++; + } + } + break; + case 4: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= ta; + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = ta; + d64++; + s64++; + } + } + break; + case 5: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= (ta ^ prod); + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = ta ^ prod; + d64++; + s64++; + } + } + case 6: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= (ta ^ prod); + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = ta ^ prod; + d64++; + s64++; + } + } +/* + case 7: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod ^= ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= (ta ^ prod); + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod ^= ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = ta ^ prod; + d64++; + s64++; + } + } + break; + */ + case 8: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= ta; + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = ta; + d64++; + s64++; + } + } + break; +/* + case 9: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= (ta ^ prod); + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = (ta ^ prod); + d64++; + s64++; + } + } + break; + case 10: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= (ta ^ prod); + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = (ta ^ prod); + d64++; + s64++; + } + } + break; + case 11: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod ^= ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= (ta ^ prod); + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod ^= ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = (ta ^ prod); + d64++; + s64++; + } + } + break; + case 12: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= (ta ^ prod); + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = (ta ^ prod); + d64++; + s64++; + } + } + break; + case 13: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod ^= ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= (ta ^ prod); + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod ^= ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = (ta ^ prod); + d64++; + s64++; + } + } + break; + case 14: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod ^= ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= (ta ^ prod); + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod ^= ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = (ta ^ prod); + d64++; + s64++; + } + } + break; + case 15: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod ^= ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod ^= ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 ^= (ta ^ prod); + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + ta = *s64; + prod = ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod ^= ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + prod ^= ta; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + *d64 = (ta ^ prod); + d64++; + s64++; + } + } + break; +*/ + default: + if (xor) { + while (d64 < (uint64_t *) rd.d_top) { + prod = *d64 ; + ta = *s64; + tb = val; + while (1) { + if (tb & 1) prod ^= ta; + tb >>= 1; + if (tb == 0) break; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + } + *d64 = prod; + d64++; + s64++; + } + } else { + while (d64 < (uint64_t *) rd.d_top) { + prod = 0 ; + ta = *s64; + tb = val; + while (1) { + if (tb & 1) prod ^= ta; + tb >>= 1; + if (tb == 0) break; + AB2(btd->prim_poly, btd->mask1, btd->mask2, ta, t1, t2); + } + *d64 = prod; + d64++; + s64++; + } + } + break; + } + gf_do_final_region_alignment(&rd); +} + +static +int gf_w8_bytwo_init(gf_t *gf) +{ + gf_internal_t *h; + uint64_t ip, m1, m2; + struct gf_w8_bytwo_data *btd; + + h = (gf_internal_t *) gf->scratch; + btd = (struct gf_w8_bytwo_data *) (h->private); + ip = h->prim_poly & 0xff; + m1 = 0xfe; + m2 = 0x80; + btd->prim_poly = 0; + btd->mask1 = 0; + btd->mask2 = 0; + + while (ip != 0) { + btd->prim_poly |= ip; + btd->mask1 |= m1; + btd->mask2 |= m2; + ip <<= GF_FIELD_WIDTH; + m1 <<= GF_FIELD_WIDTH; + m2 <<= GF_FIELD_WIDTH; + } + + if (h->mult_type == GF_MULT_BYTWO_p) { + gf->multiply.w32 = gf_w8_bytwo_p_multiply; + if (h->region_type == GF_REGION_SSE) { + gf->multiply_region.w32 = gf_w8_bytwo_p_sse_multiply_region; + } else { + gf->multiply_region.w32 = gf_w8_bytwo_p_nosse_multiply_region; + } + } else { + gf->multiply.w32 = gf_w8_bytwo_b_multiply; + if (h->region_type == GF_REGION_SSE) { + gf->multiply_region.w32 = gf_w8_bytwo_b_sse_multiply_region; + } else { + gf->multiply_region.w32 = gf_w8_bytwo_b_nosse_multiply_region; + } + } + gf->inverse.w32 = gf_w8_euclid; + return 1; +} + + +/* ------------------------------------------------------------ + General procedures. + */ + +int gf_w8_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2) +{ + int sse; + + sse = (GF_REGION_SSE | GF_REGION_NOSSE); + + switch(mult_type) + { + case GF_MULT_DEFAULT: + case GF_MULT_TABLE: + if (arg1 != 0 || arg2 != 0) return -1; + if (region_type == GF_REGION_CAUCHY || region_type == (GF_REGION_CAUCHY | GF_REGION_SINGLE_TABLE)) { + return sizeof(gf_internal_t) + sizeof(struct gf_w8_single_table_data) + 64; + } + + if (region_type == 0) region_type = GF_REGION_SINGLE_TABLE; + if (region_type & GF_REGION_SINGLE_TABLE) { + if (region_type != GF_REGION_SINGLE_TABLE) return 0; + return sizeof(gf_internal_t) + sizeof(struct gf_w8_single_table_data) + 64; + } + if (region_type & GF_REGION_DOUBLE_TABLE) { + if (region_type == GF_REGION_DOUBLE_TABLE) { + return sizeof(gf_internal_t) + sizeof(struct gf_w8_double_table_data) + 64; + } else if (region_type == (GF_REGION_DOUBLE_TABLE | GF_REGION_LAZY)) { + return sizeof(gf_internal_t) + sizeof(struct gf_w8_double_table_lazy_data) + 64; + } else { + return -1; + } + } + return -1; + break; + case GF_MULT_BYTWO_p: + case GF_MULT_BYTWO_b: + if (arg1 != 0 || arg2 != 0) return -1; + if (region_type != GF_REGION_CAUCHY) { + if ((region_type | sse) != sse || (region_type & sse) == sse) return -1; + } + return sizeof(gf_internal_t) + sizeof(struct gf_w8_bytwo_data); + break; + case GF_MULT_SPLIT_TABLE: + if ((arg1 == 4 && arg2 == 8) || (arg1 == 8 && arg2 == 4)) { + if (region_type == GF_REGION_CAUCHY) { + return sizeof(gf_internal_t) + sizeof(struct gf_w8_half_table_data) + 64; + } + if (region_type == 0) region_type = GF_REGION_SSE; + if ((region_type | sse) != sse) return -1; + if ((region_type & sse) == sse) return -1; + return sizeof(gf_internal_t) + sizeof(struct gf_w8_half_table_data) + 64; + } + return -1; + break; + case GF_MULT_LOG_TABLE: + if ((arg1 != 0 && arg1 != 1) || arg2 != 0) return -1; + if (region_type != 0 && region_type != GF_REGION_CAUCHY) return -1; + if (arg1 == 0) return sizeof(gf_internal_t) + sizeof(struct gf_w8_logtable_data) + 64; + return sizeof(gf_internal_t) + sizeof(struct gf_w8_logzero_table_data) + 64; + break; + case GF_MULT_SHIFT: + if (arg1 != 0 || arg2 != 0) return -1; + if (region_type != 0 && region_type != GF_REGION_CAUCHY) return -1; + return sizeof(gf_internal_t); + break; + case GF_MULT_COMPOSITE: + if (region_type & ~(GF_REGION_SINGLE_TABLE | GF_REGION_ALTMAP | GF_REGION_STDMAP)) return -1; + if ((region_type & (GF_REGION_SINGLE_TABLE | GF_REGION_ALTMAP)) == (GF_REGION_SINGLE_TABLE | GF_REGION_ALTMAP)) return -1; + if (arg1 == 2 && arg2 == 4) { + return sizeof(gf_internal_t) + sizeof(struct gf_w4_single_table_data) + 64; + } else { + return -1; + } + default: + return -1; + } +} + +int gf_w8_init(gf_t *gf) +{ + gf_internal_t *h; + + h = (gf_internal_t *) gf->scratch; + if (h->prim_poly == 0) h->prim_poly = 0x11d; + + gf->multiply.w32 = NULL; + gf->divide.w32 = NULL; + gf->inverse.w32 = NULL; + gf->multiply_region.w32 = NULL; + gf->extract_word.w32 = gf_w8_extract_word; + + switch(h->mult_type) { + case GF_MULT_DEFAULT: + case GF_MULT_TABLE: if (gf_w8_table_init(gf) == 0) return 0; break; + case GF_MULT_BYTWO_p: + case GF_MULT_BYTWO_b: if (gf_w8_bytwo_init(gf) == 0) return 0; break; + case GF_MULT_LOG_TABLE: if (gf_w8_log_init(gf) == 0) return 0; break; + case GF_MULT_SHIFT: if (gf_w8_shift_init(gf) == 0) return 0; break; + case GF_MULT_SPLIT_TABLE: if (gf_w8_split_init(gf) == 0) return 0; break; + case GF_MULT_COMPOSITE: if (gf_w8_composite_init(gf) == 0) return 0; break; + default: return 0; + } + if (h->divide_type == GF_DIVIDE_EUCLID) { + gf->divide.w32 = gf_w8_divide_from_inverse; + gf->inverse.w32 = gf_w8_euclid; + } else if (h->divide_type == GF_DIVIDE_MATRIX) { + gf->divide.w32 = gf_w8_divide_from_inverse; + gf->inverse.w32 = gf_w8_matrix; + } + + if (gf->inverse.w32 != NULL && gf->divide.w32 == NULL) { + gf->divide.w32 = gf_w8_divide_from_inverse; + } + if (gf->inverse.w32 == NULL && gf->divide.w32 != NULL) { + gf->inverse.w32 = gf_w8_inverse_from_divide; + } + + if (h->region_type == GF_REGION_CAUCHY) { + gf->multiply_region.w32 = gf_wgen_cauchy_region; + gf->extract_word.w32 = gf_wgen_extract_word; + } + + return 1; +} diff --git a/gf_wgen.c b/gf_wgen.c new file mode 100644 index 0000000..86dfa4a --- /dev/null +++ b/gf_wgen.c @@ -0,0 +1,945 @@ +/* + * gf_wgen.c + * + * Routines for Galois fields for general w < 32. For specific w, + like 4, 8, 16, 32, 64 and 128, see the other files. + */ + +#include "gf_int.h" +#include +#include + +struct gf_wgen_table_w8_data { + uint8_t *mult; + uint8_t *div; + uint8_t base; +}; + +struct gf_wgen_table_w16_data { + uint16_t *mult; + uint16_t *div; + uint16_t base; +}; + +struct gf_wgen_log_w8_data { + uint8_t *log; + uint8_t *anti; + uint8_t *danti; + uint8_t base; +}; + +struct gf_wgen_log_w16_data { + uint16_t *log; + uint16_t *anti; + uint16_t *danti; + uint16_t base; +}; + +struct gf_wgen_log_w32_data { + uint32_t *log; + uint32_t *anti; + uint32_t *danti; + uint32_t base; +}; + +struct gf_wgen_group_data { + uint32_t *reduce; + uint32_t *shift; + uint32_t mask; + uint64_t rmask; + int tshift; + uint32_t memory; +}; + +static +inline +gf_val_32_t gf_wgen_inverse_from_divide (gf_t *gf, gf_val_32_t a) +{ + return gf->divide.w32(gf, 1, a); +} + +static +inline +gf_val_32_t gf_wgen_divide_from_inverse (gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + b = gf->inverse.w32(gf, b); + return gf->multiply.w32(gf, a, b); +} + +static +inline +gf_val_32_t gf_wgen_euclid (gf_t *gf, gf_val_32_t b) +{ + + gf_val_32_t e_i, e_im1, e_ip1; + gf_val_32_t d_i, d_im1, d_ip1; + gf_val_32_t y_i, y_im1, y_ip1; + gf_val_32_t c_i; + + if (b == 0) return -1; + e_im1 = ((gf_internal_t *) (gf->scratch))->prim_poly; + e_i = b; + d_im1 = ((gf_internal_t *) (gf->scratch))->w; + for (d_i = d_im1; ((1 << d_i) & e_i) == 0; d_i--) ; + y_i = 1; + y_im1 = 0; + + while (e_i != 1) { + + e_ip1 = e_im1; + d_ip1 = d_im1; + c_i = 0; + + while (d_ip1 >= d_i) { + c_i ^= (1 << (d_ip1 - d_i)); + e_ip1 ^= (e_i << (d_ip1 - d_i)); + while ((e_ip1 & (1 << d_ip1)) == 0) d_ip1--; + } + + y_ip1 = y_im1 ^ gf->multiply.w32(gf, c_i, y_i); + y_im1 = y_i; + y_i = y_ip1; + + e_im1 = e_i; + d_im1 = d_i; + e_i = e_ip1; + d_i = d_ip1; + } + + return y_i; +} + +gf_val_32_t gf_wgen_extract_word(gf_t *gf, void *start, int bytes, int index) +{ + uint8_t *ptr; + uint32_t rv; + int rs; + int byte, bit, i; + gf_internal_t *h; + + h = (gf_internal_t *) gf->scratch; + rs = bytes / h->w; + byte = index/8; + bit = index%8; + + ptr = (uint8_t *) start; + ptr += bytes; + ptr -= rs; + ptr += byte; + + rv = 0; + for (i = 0; i < h->w; i++) { + rv <<= 1; + if ((*ptr) & (1 << bit)) rv |= 1; + ptr -= rs; + } + + return rv; +} + +static +inline +gf_val_32_t gf_wgen_matrix (gf_t *gf, gf_val_32_t b) +{ + return gf_bitmatrix_inverse(b, ((gf_internal_t *) (gf->scratch))->w, + ((gf_internal_t *) (gf->scratch))->prim_poly); +} + +static +inline +uint32_t +gf_wgen_shift_multiply (gf_t *gf, uint32_t a32, uint32_t b32) +{ + uint64_t product, i, pp, a, b, one; + gf_internal_t *h; + + a = a32; + b = b32; + h = (gf_internal_t *) gf->scratch; + one = 1; + pp = h->prim_poly | (one << h->w); + + product = 0; + + for (i = 0; i < h->w; i++) { + if (a & (one << i)) product ^= (b << i); + } + for (i = h->w*2-1; i >= h->w; i--) { + if (product & (one << i)) product ^= (pp << (i-h->w)); + } + return product; +} + +static +int gf_wgen_shift_init(gf_t *gf) +{ + gf->multiply.w32 = gf_wgen_shift_multiply; + gf->inverse.w32 = gf_wgen_euclid; + return 1; +} + +static +gf_val_32_t +gf_wgen_bytwo_b_multiply (gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + uint32_t prod, pp, bmask; + gf_internal_t *h; + + h = (gf_internal_t *) gf->scratch; + pp = h->prim_poly; + + prod = 0; + bmask = (1 << (h->w-1)); + + while (1) { + if (a & 1) prod ^= b; + a >>= 1; + if (a == 0) return prod; + if (b & bmask) { + b = ((b << 1) ^ pp); + } else { + b <<= 1; + } + } +} + +static +int gf_wgen_bytwo_b_init(gf_t *gf) +{ + gf->multiply.w32 = gf_wgen_bytwo_b_multiply; + gf->inverse.w32 = gf_wgen_euclid; + return 1; +} + +static +inline +gf_val_32_t +gf_wgen_bytwo_p_multiply (gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + uint32_t prod, pp, pmask, amask; + gf_internal_t *h; + + h = (gf_internal_t *) gf->scratch; + pp = h->prim_poly; + + prod = 0; + pmask = (1 << (h->w)-1); + amask = pmask; + + while (amask != 0) { + if (prod & pmask) { + prod = ((prod << 1) ^ pp); + } else { + prod <<= 1; + } + if (a & amask) prod ^= b; + amask >>= 1; + } + return prod; +} + + +static +int gf_wgen_bytwo_p_init(gf_t *gf) +{ + gf->multiply.w32 = gf_wgen_bytwo_p_multiply; + gf->inverse.w32 = gf_wgen_euclid; + return 1; +} + +static +void +gf_wgen_group_set_shift_tables(uint32_t *shift, uint32_t val, gf_internal_t *h) +{ + int i; + uint32_t j; + + shift[0] = 0; + + for (i = 1; i < (1 << h->arg1); i <<= 1) { + for (j = 0; j < i; j++) shift[i|j] = shift[j]^val; + if (val & (1 << (h->w-1))) { + val <<= 1; + val ^= h->prim_poly; + } else { + val <<= 1; + } + } +} + +static +inline +gf_val_32_t +gf_wgen_group_s_equals_r_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + int i; + int leftover, rs; + uint32_t p, l, ind, r, a32; + int bits_left; + int g_s; + int w; + + struct gf_wgen_group_data *gd; + gf_internal_t *h = (gf_internal_t *) gf->scratch; + g_s = h->arg1; + w = h->w; + + gd = (struct gf_wgen_group_data *) h->private; + gf_wgen_group_set_shift_tables(gd->shift, b, h); + + leftover = w % g_s; + if (leftover == 0) leftover = g_s; + + rs = w - leftover; + a32 = a; + ind = a32 >> rs; + a32 <<= leftover; + a32 &= gd->mask; + p = gd->shift[ind]; + + bits_left = rs; + rs = w - g_s; + + while (bits_left > 0) { + bits_left -= g_s; + ind = a32 >> rs; + a32 <<= g_s; + a32 &= gd->mask; + l = p >> rs; + p = (gd->shift[ind] ^ gd->reduce[l] ^ (p << g_s)) & gd->mask; + } + return p; +} + +char *bits(uint32_t v) +{ + char *rv; + int i, j; + + rv = malloc(30); + j = 0; + for (i = 27; i >= 0; i--) { + rv[j] = '0' + ((v & (1 << i)) ? 1 : 0); + j++; + } + rv[j] = '\0'; + return rv; +} +char *bits_56(uint64_t v) +{ + char *rv; + int i, j; + uint64_t one; + + one = 1; + + rv = malloc(60); + j = 0; + for (i = 55; i >= 0; i--) { + rv[j] = '0' + ((v & (one << i)) ? 1 : 0); + j++; + } + rv[j] = '\0'; + return rv; +} + +static +inline +gf_val_32_t +gf_wgen_group_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + int i; + int leftover; + uint64_t p, l, r, mask; + uint32_t a32, ind; + int g_s, g_r; + struct gf_wgen_group_data *gd; + int w; + + gf_internal_t *h = (gf_internal_t *) gf->scratch; + g_s = h->arg1; + g_r = h->arg2; + w = h->w; + gd = (struct gf_wgen_group_data *) h->private; + gf_wgen_group_set_shift_tables(gd->shift, b, h); + + leftover = w % g_s; + if (leftover == 0) leftover = g_s; + + a32 = a; + ind = a32 >> (w - leftover); + p = gd->shift[ind]; + p <<= g_s; + a32 <<= leftover; + a32 &= gd->mask; + + i = (w - leftover); + while (i > g_s) { + ind = a32 >> (w-g_s); + p ^= gd->shift[ind]; + a32 <<= g_s; + a32 &= gd->mask; + p <<= g_s; + i -= g_s; + } + + ind = a32 >> (h->w-g_s); + p ^= gd->shift[ind]; + + for (i = gd->tshift ; i >= 0; i -= g_r) { + l = p & (gd->rmask << i); + r = gd->reduce[l >> (i+w)]; + r <<= (i); + p ^= r; + } + return p & gd->mask; +} + +static +int gf_wgen_group_init(gf_t *gf) +{ + uint32_t i, j, p, index; + struct gf_wgen_group_data *gd; + gf_internal_t *h = (gf_internal_t *) gf->scratch; + int g_s, g_r; + + g_s = h->arg1; + g_r = h->arg2; + gd = (struct gf_wgen_group_data *) h->private; + gd->shift = &(gd->memory); + gd->reduce = gd->shift + (1 << g_s); + gd->mask = (h->w != 31) ? ((1 << h->w)-1) : 0x7fffffff; + + gd->rmask = (1 << g_r) - 1; + gd->rmask <<= h->w; + + gd->tshift = h->w % g_s; + if (gd->tshift == 0) gd->tshift = g_s; + gd->tshift = (h->w - gd->tshift); + gd->tshift = ((gd->tshift-1)/g_r) * g_r; + + gd->reduce[0] = 0; + for (i = 0; i < (1 << g_r); i++) { + p = 0; + index = 0; + for (j = 0; j < g_r; j++) { + if (i & (1 << j)) { + p ^= (h->prim_poly << j); + index ^= (h->prim_poly >> (h->w-j)); + } + } + gd->reduce[index] = (p & gd->mask); + } + + if (g_s == g_r) { + gf->multiply.w32 = gf_wgen_group_s_equals_r_multiply; + } else { + gf->multiply.w32 = gf_wgen_group_multiply; + } + gf->divide.w32 = NULL; + gf->divide.w32 = NULL; + return 1; +} + + +static +gf_val_32_t +gf_wgen_table_8_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + gf_internal_t *h; + struct gf_wgen_table_w8_data *std; + + h = (gf_internal_t *) gf->scratch; + std = (struct gf_wgen_table_w8_data *) h->private; + + return (std->mult[(a<w)+b]); +} + +static +gf_val_32_t +gf_wgen_table_8_divide(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + gf_internal_t *h; + struct gf_wgen_table_w8_data *std; + + h = (gf_internal_t *) gf->scratch; + std = (struct gf_wgen_table_w8_data *) h->private; + + return (std->div[(a<w)+b]); +} + +static +int gf_wgen_table_8_init(gf_t *gf) +{ + gf_internal_t *h; + int w; + struct gf_wgen_table_w8_data *std; + uint32_t a, b, p, pp; + + h = (gf_internal_t *) gf->scratch; + w = h->w; + std = (struct gf_wgen_table_w8_data *) h->private; + + std->mult = &(std->base); + std->div = std->mult + ((1<w)*(1<w)); + + for (a = 0; a < (1 << w); a++) { + std->mult[a] = 0; + std->mult[a<div[a] = 0; + std->div[a<mult[(a<div[(p<prim_poly : (b << 1); + b &= ((1 << w)-1); + p = (p & (1 << (w-1))) ? (p << 1) ^ h->prim_poly : (p << 1); + p &= ((1 << w)-1); + } while (b != 1); + } + + gf->multiply.w32 = gf_wgen_table_8_multiply; + gf->divide.w32 = gf_wgen_table_8_divide; + return 1; +} + +static +gf_val_32_t +gf_wgen_table_16_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + gf_internal_t *h; + struct gf_wgen_table_w16_data *std; + + h = (gf_internal_t *) gf->scratch; + std = (struct gf_wgen_table_w16_data *) h->private; + + return (std->mult[(a<w)+b]); +} + +static +gf_val_32_t +gf_wgen_table_16_divide(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + gf_internal_t *h; + struct gf_wgen_table_w16_data *std; + + h = (gf_internal_t *) gf->scratch; + std = (struct gf_wgen_table_w16_data *) h->private; + + return (std->div[(a<w)+b]); +} + +static +int gf_wgen_table_16_init(gf_t *gf) +{ + gf_internal_t *h; + int w; + struct gf_wgen_table_w16_data *std; + uint32_t a, b, p, pp; + + h = (gf_internal_t *) gf->scratch; + w = h->w; + std = (struct gf_wgen_table_w16_data *) h->private; + + std->mult = &(std->base); + std->div = std->mult + ((1<w)*(1<w)); + + for (a = 0; a < (1 << w); a++) { + std->mult[a] = 0; + std->mult[a<div[a] = 0; + std->div[a<mult[(a<div[(p<prim_poly : (b << 1); + b &= ((1 << w)-1); + p = (p & (1 << (w-1))) ? (p << 1) ^ h->prim_poly : (p << 1); + p &= ((1 << w)-1); + } while (b != 1); + } + + gf->multiply.w32 = gf_wgen_table_16_multiply; + gf->divide.w32 = gf_wgen_table_16_divide; + return 1; +} + +static +int gf_wgen_table_init(gf_t *gf) +{ + gf_internal_t *h; + + h = (gf_internal_t *) gf->scratch; + if (h->w <= 8) return gf_wgen_table_8_init(gf); + if (h->w <= 14) return gf_wgen_table_16_init(gf); +} + +static +gf_val_32_t +gf_wgen_log_8_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + gf_internal_t *h; + struct gf_wgen_log_w8_data *std; + + h = (gf_internal_t *) gf->scratch; + std = (struct gf_wgen_log_w8_data *) h->private; + + if (a == 0 || b == 0) return 0; + return (std->anti[std->log[a]+std->log[b]]); +} + +static +gf_val_32_t +gf_wgen_log_8_divide(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + gf_internal_t *h; + struct gf_wgen_log_w8_data *std; + int index; + + h = (gf_internal_t *) gf->scratch; + std = (struct gf_wgen_log_w8_data *) h->private; + + if (a == 0 || b == 0) return 0; + index = std->log[a]; + index -= std->log[b]; + + return (std->danti[index]); +} + +static +int gf_wgen_log_8_init(gf_t *gf) +{ + gf_internal_t *h; + struct gf_wgen_log_w8_data *std; + int w; + uint32_t a, i; + + h = (gf_internal_t *) gf->scratch; + w = h->w; + std = (struct gf_wgen_log_w8_data *) h->private; + + std->log = &(std->base); + std->anti = std->log + (1<w); + std->danti = std->anti + (1<w)-1; + + i = 0; + a = 1; + do { + std->log[a] = i; + std->anti[i] = a; + std->danti[i] = a; + i++; + a = (a & (1 << (w-1))) ? (a << 1) ^ h->prim_poly : (a << 1); + a &= ((1 << w)-1); + } while (a != 1); + + gf->multiply.w32 = gf_wgen_log_8_multiply; + gf->divide.w32 = gf_wgen_log_8_divide; + return 1; +} + +static +gf_val_32_t +gf_wgen_log_16_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + gf_internal_t *h; + struct gf_wgen_log_w16_data *std; + + h = (gf_internal_t *) gf->scratch; + std = (struct gf_wgen_log_w16_data *) h->private; + + if (a == 0 || b == 0) return 0; + return (std->anti[std->log[a]+std->log[b]]); +} + +static +gf_val_32_t +gf_wgen_log_16_divide(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + gf_internal_t *h; + struct gf_wgen_log_w16_data *std; + int index; + + h = (gf_internal_t *) gf->scratch; + std = (struct gf_wgen_log_w16_data *) h->private; + + if (a == 0 || b == 0) return 0; + index = std->log[a]; + index -= std->log[b]; + + return (std->danti[index]); +} + +static +int gf_wgen_log_16_init(gf_t *gf) +{ + gf_internal_t *h; + struct gf_wgen_log_w16_data *std; + int w; + uint32_t a, i; + + h = (gf_internal_t *) gf->scratch; + w = h->w; + std = (struct gf_wgen_log_w16_data *) h->private; + + std->log = &(std->base); + std->anti = std->log + (1<w); + std->danti = std->anti + (1<w)-1; + + i = 0; + a = 1; + do { + std->log[a] = i; + std->anti[i] = a; + std->danti[i] = a; + i++; + a = (a & (1 << (w-1))) ? (a << 1) ^ h->prim_poly : (a << 1); + a &= ((1 << w)-1); + } while (a != 1); + + gf->multiply.w32 = gf_wgen_log_16_multiply; + gf->divide.w32 = gf_wgen_log_16_divide; + return 1; +} + +static +gf_val_32_t +gf_wgen_log_32_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + gf_internal_t *h; + struct gf_wgen_log_w32_data *std; + + h = (gf_internal_t *) gf->scratch; + std = (struct gf_wgen_log_w32_data *) h->private; + + if (a == 0 || b == 0) return 0; + return (std->anti[std->log[a]+std->log[b]]); +} + +static +gf_val_32_t +gf_wgen_log_32_divide(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + gf_internal_t *h; + struct gf_wgen_log_w32_data *std; + int index; + + h = (gf_internal_t *) gf->scratch; + std = (struct gf_wgen_log_w32_data *) h->private; + + if (a == 0 || b == 0) return 0; + index = std->log[a]; + index -= std->log[b]; + + return (std->danti[index]); +} + +static +int gf_wgen_log_32_init(gf_t *gf) +{ + gf_internal_t *h; + struct gf_wgen_log_w32_data *std; + int w; + uint32_t a, i; + + h = (gf_internal_t *) gf->scratch; + w = h->w; + std = (struct gf_wgen_log_w32_data *) h->private; + + std->log = &(std->base); + std->anti = std->log + (1<w); + std->danti = std->anti + (1<w)-1; + + i = 0; + a = 1; + do { + std->log[a] = i; + std->anti[i] = a; + std->danti[i] = a; + i++; + a = (a & (1 << (w-1))) ? (a << 1) ^ h->prim_poly : (a << 1); + a &= ((1 << w)-1); + } while (a != 1); + + gf->multiply.w32 = gf_wgen_log_32_multiply; + gf->divide.w32 = gf_wgen_log_32_divide; + return 1; +} + +static +int gf_wgen_log_init(gf_t *gf) +{ + gf_internal_t *h; + + h = (gf_internal_t *) gf->scratch; + if (h->w <= 8) return gf_wgen_log_8_init(gf); + if (h->w <= 16) return gf_wgen_log_16_init(gf); + if (h->w <= 32) return gf_wgen_log_32_init(gf); +} + +int gf_wgen_scratch_size(int w, int mult_type, int region_type, int divide_type, int arg1, int arg2) +{ + + if (w > 32 || w < 0) return -1; + + if ((region_type | GF_REGION_CAUCHY) != GF_REGION_CAUCHY) return -1; + + switch(mult_type) + { + case GF_MULT_DEFAULT: + case GF_MULT_SHIFT: + case GF_MULT_BYTWO_b: + case GF_MULT_BYTWO_p: + if (arg1 != 0 || arg2 != 0) return -1; + return sizeof(gf_internal_t); + break; + case GF_MULT_GROUP: + if (arg1 <= 0 || arg2 <= 0) return -1; + return sizeof(gf_internal_t) + sizeof(struct gf_wgen_group_data) + + sizeof(uint32_t) * (1 << arg1) + + sizeof(uint32_t) * (1 << arg2) + 64; + break; + + case GF_MULT_TABLE: + if (arg1 != 0 || arg2 != 0) return -1; + if (w <= 8) { + return sizeof(gf_internal_t) + sizeof(struct gf_wgen_table_w8_data) + + sizeof(uint8_t)*(1 << w)*(1<scratch; + rs = bytes / (h->w); + + written = (xor) ? 0xffffffff : 0; + for (i = 0; i < h->w; i++) { + for (j = 0; j < h->w; j++) { + if (val & (1 << j)) { + gf_multby_one(gf, src, dest + j*rs, rs, (written & (1 << j))); + written |= (1 << j); + } + } + src += rs; + val = gf->multiply.w32(gf, val, 2); + } +} + +int gf_wgen_init(gf_t *gf) +{ + gf_internal_t *h; + + h = (gf_internal_t *) gf->scratch; + if (h->prim_poly == 0) { + switch (h->w) { + case 1: h->prim_poly = 1; break; + case 2: h->prim_poly = 7; break; + case 3: h->prim_poly = 013; break; + case 4: h->prim_poly = 023; break; + case 5: h->prim_poly = 045; break; + case 6: h->prim_poly = 0103; break; + case 7: h->prim_poly = 0211; break; + case 8: h->prim_poly = 0435; break; + case 9: h->prim_poly = 01021; break; + case 10: h->prim_poly = 02011; break; + case 11: h->prim_poly = 04005; break; + case 12: h->prim_poly = 010123; break; + case 13: h->prim_poly = 020033; break; + case 14: h->prim_poly = 042103; break; + case 15: h->prim_poly = 0100003; break; + case 16: h->prim_poly = 0210013; break; + case 17: h->prim_poly = 0400011; break; + case 18: h->prim_poly = 01000201; break; + case 19: h->prim_poly = 02000047; break; + case 20: h->prim_poly = 04000011; break; + case 21: h->prim_poly = 010000005; break; + case 22: h->prim_poly = 020000003; break; + case 23: h->prim_poly = 040000041; break; + case 24: h->prim_poly = 0100000207; break; + case 25: h->prim_poly = 0200000011; break; + case 26: h->prim_poly = 0400000107; break; + case 27: h->prim_poly = 01000000047; break; + case 28: h->prim_poly = 02000000011; break; + case 29: h->prim_poly = 04000000005; break; + case 30: h->prim_poly = 010040000007; break; + case 31: h->prim_poly = 020000000011; break; + case 32: h->prim_poly = 00020000007; break; + default: fprintf(stderr, "gf_wgen_init: w not defined yet\n"); exit(1); + } + } + + gf->multiply.w32 = NULL; + gf->divide.w32 = NULL; + gf->inverse.w32 = NULL; + gf->multiply_region.w32 = gf_wgen_cauchy_region; + gf->extract_word.w32 = gf_wgen_extract_word; + + switch(h->mult_type) { + case GF_MULT_DEFAULT: + case GF_MULT_SHIFT: if (gf_wgen_shift_init(gf) == 0) return 0; break; + case GF_MULT_BYTWO_b: if (gf_wgen_bytwo_b_init(gf) == 0) return 0; break; + case GF_MULT_BYTWO_p: if (gf_wgen_bytwo_p_init(gf) == 0) return 0; break; + case GF_MULT_GROUP: if (gf_wgen_group_init(gf) == 0) return 0; break; + case GF_MULT_TABLE: if (gf_wgen_table_init(gf) == 0) return 0; break; + case GF_MULT_LOG_TABLE: if (gf_wgen_log_init(gf) == 0) return 0; break; + default: return 0; + } + if (h->divide_type == GF_DIVIDE_EUCLID) { + gf->divide.w32 = gf_wgen_divide_from_inverse; + gf->inverse.w32 = gf_wgen_euclid; + } else if (h->divide_type == GF_DIVIDE_MATRIX) { + gf->divide.w32 = gf_wgen_divide_from_inverse; + gf->inverse.w32 = gf_wgen_matrix; + } + + if (gf->inverse.w32== NULL && gf->divide.w32 == NULL) gf->inverse.w32 = gf_wgen_euclid; + + if (gf->inverse.w32 != NULL && gf->divide.w32 == NULL) { + gf->divide.w32 = gf_wgen_divide_from_inverse; + } + if (gf->inverse.w32 == NULL && gf->divide.w32 != NULL) { + gf->inverse.w32 = gf_wgen_inverse_from_divide; + } + return 1; +} diff --git a/junk b/junk new file mode 100755 index 0000000000000000000000000000000000000000..7bd0ccf22a991bb66caea30b5b59dcd93752f1cf GIT binary patch literal 8712 zcmeHNUuYaf82>g&?X```6$(abbd`pNT9TAL7#|83Nyyfg8rxKYD3iP7l3clLj=jA! z5sHDJG)Ul{RP@PLpM15DU9nePTQ+@U@}l#)m@0&cDfU+B%xB(<@v%w zSyjVY;nZ&bGM%x#c0I^oVnOXXqowTJ9;BFD9zyisec%K(A(d1KQp(ZGku?yZEd5S4;l1D)}=D=-6y;Qrk02 zPMlhglyQzP&Z(IoKVOU$&Gt65J?=>gadN)czmt(dXWVz!u_2Ti9vUA?Xh2wM-x&fE z`;K;vuS!*dg<1He7V|-MZ+pGw5wOF3yO${CW6#A~uM!5k{Oi^J8Mla&dxQHeiIpnn zA7yW_$sTcXJl`)fSNg2fbUw_p7E3dKp0?*|J#lg!bn|wx^Bp;V_QJW5;j?B#z(8sp zfGHX69Q}&)QLD&vA}a&B9u=-gR|0nd<4*>WEuo|t>OTYKJAD0F7(f|)=TK7Ehl$VP zd-*%^I_{>DlSe);BsSiAq3Q$|I3|Iy% z1C{~HfMvik@DK*Bh`08$xBAmv@4agoZ@rdXyTAImbMwbm&$;KVefw*hw|X<u2zbPBt85n>e=#6{lkb&n=PS>n$Gpq|tK% z-5Nb7k@uV$=sR7WImq^p@80;rIq*>~|Ne(ZKN-1^JHOuj7n+AIHF~yC*5}x1>z78O zF-KRn?x4mdBYgH#NGx3>ii^ZElM$Yym>is;DXjAh+K#*{8YyzCLmdA0gwT{6VFq#D zx+u<4*2%W-&(xtgi8em(&j)Y>@&Y?i+BmEu`)*4cO+TJI!%}0;^JDs4KcUM>U7|}$ za{71J%eqV{EwBt&1}p=X0n318z%pPNunbrRECZGS%YbFzf5pJz9D`JGP`H3NE`q|Z z2EBC&1`p>{aj6sv>N(Ua2x^BjKw4Uqv{%D?5Xw1eQ*wMhU-D(_!o*N8E0>G@Vkubg zWma0|=U}EdRSSyL9P0q4R{YSs`cjnV~iLI z+`mK@FRy8A;$&|CJK}I?Bn)#U@s$Y2cQ)+6_+oUiB$7ilXRs?S1M_Yin6HYeL~&*n k5jrJtxrngr4tYxL=2jWzRkR-Q0>$fSR&dvP!wr#t0F>c35C8xG literal 0 HcmV?d00001 diff --git a/junk-pick-best-output b/junk-pick-best-output new file mode 100755 index 0000000000000000000000000000000000000000..eb0db93fa1fffd4c128bb25584333cea9c09e652 GIT binary patch literal 28584 zcmeHQ4SZD9m4C@hV9@A9g*FOk)S&T$k02ijO3eTxZ|Vdi0ZCEzF-#_8XufvlO$cry z!C~CIKBlJK;?ml7({|Ugb#1d-WmC3b_>e?JMV8jox)P~tUkpm>QW{w#`#<;IH(yDB zD*f&L_Lukj-Fwfu_ndRjJ@?#?ckj#OwbLJ*9VVS#l}kFG|jn1P)cC3@6B0U-XhUR6E|hpguF2# zLs7gzHQ=vV8^e?JyLJwjcb6z9Jn>1=6$Q`_iYb-8sybhQ;mPt=3VH7d{RmHbhO~FB z*Q@v%yuLbBQ9Wx?`=h9U>-SS3kMLBT66GsOsAj#trd;vYRMes%RbGD~m-kPCiEwM8 zfD~9!e3eQ?z*Ftx1*!5{=W%&u

-ahwsLoOC@P_3;{rXmxl%@$|A=yM?y&vmm_6~ zJmBmRIQbjNjrVI^pmr_vQtCZ{;K=gwxD-h$;(lwQsi~q=*OJlvs!|cE@ea{15B$^x zrh0@YwsTyHKt-v64|7+pQv*Zf6$p8&1s~yw{YvdjbR9*h@|;WFHt>^vZNg6oPn1XY zTYz9bic;=TJtj8PelmzjRE{7aJh8oTP7W!`njpF%_N4Z2rI42%qeupX+ObUX6F$$m zT}$qDE^?W3REhAD96<`4p6z0Op>`(9x&ia(3`QX$(6@r1^?vZ6LGp4*YJu-|uY~QO z9O;*h(7Z~L9zvQ2zi+xklHkXZs7kT}rf?MkUhsP%I1=VuzZMRLElgP)I zN25^-j9OsS0;3ifwZNzaMlCREfl&*LT42-yqZSyoK#B#H%i4d22h&2gZ#p|TC`qAQ zIXb`lc1bdJ;b1|IPAGqWaBw&AAU^|HMn;Y#y||Cqb`!(%bm(C07#tkrnR57emezPi z3k`%D&!mN>ZfYEe$=yV9eOzv4g52k?MgB9$A0qe1Ozt`fwaMXk`(?e+CPzbA+UfA# zv^~cxa@t3-_IjUH4)3wa;X)~NiXbBwhXq=vu>lfKlA?|kjt3me9m`7PO~>1zudMCL zLu*~yNjdu651rr$k0~BmO(mU?N+e83XQVC;HpIcEIM^Hq zTTD>jMK#IcPTBa%WM-V7;bf5;ht<&;X$64|b-4AZ>rv(kzmVJE4G88W;p+PInx24fcy=+QhTJzv%zFi=*;=yhh1pOe+MThz;CJ;K{%WXrk@CSh1~ z7j@81A~GgJR^L3A{*_#t>&~y_W~r0p$e?PMwYQBkI1cN}aB2Jd7Q4cWa&5N0$u3>Z zwaMC>ZtZu*AE@|JIl9VJ5Ik<}gmE9!E=#s7FisWaWP}#Tk&e*!`tF1vxAt3%q7Ipx z`M^y$(cAoOXpv%S`{NOo8G6-NNtHaD!1Q%64c|Af7*j!vPI&?(a`Ye-Mi zl@5WDYb9LiaTNc_uJi*TMYz&mQx$NfKj36H#mLN`Up78wC8iShrYdnsq7qT=&uDa% z`}0=r&zb4T{yaHZiF8t8(|(%#9&|k9SmpQ&hr%b@#JRBy#cqA}Kj&f;NAE%BIol9g zA-M=+5M3bwl;dDY94ruEXCynGnPY-cmW$ad4>>gHQ4%HTkpbo&%U^@1^fQpvg#v>Y zyo!Pz20FJPx0~m-BKHs{&*CDx2n%;$WJEex!PeERU~4cZ{y?#QP9H4UjcbDz}~4DWS(cp7H<6?$PUGY|EbP)YkjhQCk-f9;aPRE99^8X z6J<1KWkn8e%*2qVmKjv`p9v$+c23cZRMk_4cPwNu|kb};Vyn6h?J4RUn$WcY>g z(&upp+F_5h;ouwAx1oqS!tp%o4t+xAjmXFR7RL0Vy&+o)cH_`qGX4&1_T7gdp9IVL z{C9C+Z{$OTPuBwK0;2>%&(ne^R(s5NtOel?%y|KC)lIQpaGYBRrUHkKDkFGkF@cmURnn-O*U<_Tcb6p>-HtE1=q^YZ-N|Q4fLV zY1z;ING*Ggx9mU`x``anSO(6W7&jQ(v+Kz|BL7yB7$b)vIQi8qdn=Qov#})Ka!B4Y zfF=%D>qCxT(EY~GDUzd;UO|HNF06yICqcHc3zx`|4t1JYVMaoQKSvp+f7!xS6!rZh zavg*FUEYH<|1zT{)mpeG@C4MPra;9mYSB|*!9pS5$p(_TM%JCJtOImxP?k5l^hLQj zvR;PG%c>Ta_Di|&-_hZ05ZM>cR#Fzp!`i!yElFmqjaCIn7*>3GUQ^PEwp}va^S~2pLrH;IXkV|9O*ft(3*lf1z zNa7fFfAO;62f3_u_r8d3g_knNtZ>+;Ey~TaKbD8gxW3F_A<(BG*=?Wu;zNvm`C-)T;E1dn&$e*VZXwehn;IR+ffghp6CiO zVF2s%o%9Ou4Uz;eGq)|{ux+tNwA199-_0Z4#X?fptu7%K)q^>xabX_bP0oUsmN&_0 zXZjp9ZfEH}SK)s3`o1Hq_n*sQLUz%bVH~;=%}no`1>O6mqe~OZp58bpNx zy-%SMm~%*xx(IbRo`+~!+q$rTNCd{0hKb~$i?q`T3jCw#dDmg+ml&x zG<3jN36Xubq?XZE`~~9+RGr?~W(vKREEz|K^_cyJjkqvrAIm~I%ocy2@g7RpQh`ki z?Mf0hEiK7!bm8MzVc2#JOMhcKJVvc&&OXK+Z7^Kg0rW)&+sQ4?;hJh;})=zaUjt)XGPf+-i~^-V>Q zF^N$fHPVq96A@wD8J8iY_mAsA&b9A*+7)%@+8p*BXWiOqdUg3Zi5kS5iSZ@tFo6ye z=rDl}6X=ZtFYX)ClX))ftgQVPETOK94J7OgVOKqfm&!jHFVQH_y;;RL577TAhuluS zbH0S#t(wh8gE_j!$$F^W_#QqtAWvHaW#H|_k#|GSqL1=Y`zUc2+)Sp|*j(w-PPz5k zEOf7Z^A~C2=i|DJkHrp`KE|!DCQkwwo8-|kaHj|nx$O6J@~Is&{1=Q&Xfcvg?J=&~ z+@X$f>8s#Vh_mI$Z&bYb{YT%ouvetXy;wT|3I~=$!cfC%N(xJ|L)W^s-?Pr84ja6T z)OWVAJpzB-)^`zyF#*PKX;5e}20$g$uEqd}wZL>M>}k1MZ6vpH>2r+b;NxzEGVWFY zOt*?0-jIv>O|`HC;p>c1&lPE9iuC2d?OWX1z)o0K>bu5NpYPa7J#6VR&O6UUYAy4`1~A&@cx= ziGlfmOkj>ITAjtl;if+=u3HE1)dM1{VNsg~f*=hwa)6_`U<}VtW$3 zBqy;`*5Pvw%u2nTM4~;{07coM1XH0y*6%?rY=_Xrb?A~YYY83P`Xi(RW_R+TE=W4J z8Yii}Tn%?&Wp?OlDB{-MrwswzqX!@KkS6n?2^|&ChbC|eSua8*7hn;W9`lR&=TT+D zLRWZWwiLRKDR7#5DBrUgUlMBUz^IDrCkpQph2LU@r{GXzQ{yUR!?T-c7mBpizGHcF zGbVJjrWTYgeFb$WjmnXY2{+}&f0%4a-n|}D&u!2vY2?QYniHDH6^@O#DBie82|BSL zlm3k17-!JKwCG|~$2z!UsFR5qoO<_tu%1hq4f9>$MwoO`q6)%1W4Ic972MqL7t?UZ z$1w%_+*sMnyaUKfFlGxk<~FpWZ%P8$-zAf^ak6%hxiw?>X+3p)E^}!#DrGIyChHq< z#I-TYzWF!gq}bzJB}dl}7!y!SpN-t}H5*h7FSg;Bx3Pm3;?tC&udunJ>#%Yf@@>vG ztU#_kXDpb^ADJXaR}Q$sAK>)2&&E9bsIiwg1`mb|>`*#rH)6CQ2SA5dE?&pR(7yR* znhTOQANI$uWSKZ)^ns<+nZkuNpI{7OXXyRKLF1=DKaHtnHQeoVLhp zmVgUiCl~GwWn#B_I~r_!oiQAeqnTIAk;D9*&hC62G=cF+EcJGylxW!ogo&ZEZ|r4c z?G2-rNuiSrXDh-s>}hCgT)3)Dor?9g1?RCgti3HTmwG8XAN-y15BQdhcAUl9@#1JT zrpZ-pp`%#Fwqa4HHPYJ`q|V5JXm_Eng%+JvEjZu(TkIhnyP*MXMPCtHQFq}9Rcu8+ zO5BRxAS%H$7CK_|5T@-j4kPWg^MP$l215P5ulEx+NjtaMiuLt2?4)qk<78e|7(zQx zeVZ%1KMUuYT;eJpL(AMBk>V;4}QuM$V(aAT{aqUOdHVVO&p{x)ZPT zV~e#~>Vp>9;>7Gqo9q2$DGM>Kcy#BPa8DE7(7xGW-_gNO6HBqNEo1v|8tvqvdgFG3 zKAe{ayI`Xph;8E_{62)x*kf#^VjUK6VxtHW&DnLpc>)LY&JLJJ>bniL>tcfT;b6qR z1AAR;=`GH7`wppKFRE{Lo{$HNyW~M+Q_-Ey?%?#M#uHNLg}$3(%#X#GyN6>AH+D(t z7{)~|VbW~wcx0x1$Ep)lKb!H5FAOK=dt`&_yJ3y3q>g~5(D zz7xf8iIvPiW2mpXJDUs)1$qeMO+!hzbNp@tlc$kRIx`o+))kvRlY_=yr}GFUy=iQD zb$07TScc^Apo+o(~TNZTV@ihHmj?Uua< zWNpjdej55j#9D4D+!E*DwWF3nbZUY=Fu-NF4E-Hcw|}h^)y1V~&Lg_>NRo98^eL)s z?7`9BbZY#E*stN;VO*#44D2QK!D(3Gcq)87UP!ip({wt!3F`edqUQ_XS%wB0*sLGb z_8(|E12$?$e^L+jcNXEz4h7*xLkiu1X7|g{jGr-1y0&=(J3jWV2{BrUt*@R@=uT*h z;^qBtJnd2PW8D2pI11Mru?WxJ0Dj{vn~1TY7OWFR;X_n7m|Vz+^dcKPCv+9)`i%ob zAliflg9%}?MR&|uy1|63~A#7d;WDec%|4o=z0;$N_MH;X%1yqCrMS$u%S zT`Ycy#RpmZDvJ-X_%Mr)u=rOj?q>0uEPji{Z?l;8`h!!BvG`pUzsF*O#qYEDIEz1I zaX*VcV)029pJMR1Qtv$k0g!s{B4`=b$ljR*c?rrRC_vD)1Z^T{JVEym zbO}Lo38M4G$~=Nj!QU&dCg?apnFJjp=q#q_-fn{W33`>FcL>@~5G^{rI|ZvQwgBw2M|L%G*P8ZfKr59gVb`X zK=KJ$Cy=irO*fB7BI!OPbLgj{;#2#82*_zVP6@%6F{(w&|w0AJBi z#9)a$N=Eqj?l@kC&dEhjsS~m4ApKRYB+=grzI2-;ZAMD%&y+5cidYR&8vYJ59e=x% zMtsC`Yp!G=o|97~3-LTXT}t0OU9$dqhLn!KvBc&i4I6N-E2U`<)&WFPcK;X*fyTw z8L92GW~20R%h#;Yj4flrV~O7l9_#y@JVMNPMr`je^=|?H7X|-zvwkiHf+-`x&Q{vd zB&RK>!-hkqi5v$;Up>gXTnLZIl=kgG5PoX_XzDDYCkvlD}T+&Cy60pyKyA@ z4b^`O_!qs+`JWSt+ZJ3H|0LF0=Euy9$2@r!(o8RwBpQq4#}hx|@@cwbPpVx-HqkFb z*l~)}n|?o}9}?&{13vtFAz$?05PB;aCTt=3&m#Q&8=QV2VD_9tpLap8+NrzIjS6X|M^{uCWbbYn4p zo2nDBBei^oBg1Vx407v}O@qv*3F~q^d~QiF6LuK%%={q&J8(TcnqZ)GE@`qJ8yPn24`m z;O~j_Es-7;X_rWMi}YEM{#c}~B7H)nTSWS(NE<|2Bhqq_t`upBNbeEpwIaXx7`4Et1x76}YJpJ;j9OsS0;3ifwZQ*j3tUlzU(WJXctTa`(ol`h zUF-E!`K0>;1uZDRZ_nW_r%q}rYAK@MisHXTw+27=hTo!Vxe^pBmy`x)DNB8IGiNBJ zim$p(-Jk?~bs($_c-H!yQUI(VD^>6C;#P>k%a)a@`GspdLBCh=^B>DY?DJH+yeP=` zR(b-88u0klpv&tl^&oNz#s^S7n=^8bHKB^(n1-jvkMfN`KctdRtDVVL3)P3VMW|I? zRZ=fCwWTv-)U2;&jZ@Y(G${CqzhG^Rr^*j2*3Xav4J{J4u{wv}#JNg?(6uz^bmrfY zbSLLrQm80vYeI^*p&@@RehjWQQ0=Kw{HibDL07m+z?iQ%olEa64Wf6OdF4w=)dFSd z8d%%sBhl)tI*&i#0!_IS>Z412Rk#7n52~)xnF_LIRr!KJs-Lt#jx#^Ms&>6EpscA4 z)s#Dzq7L3w7eVrzcmqQ<{=W?QNcow6PWfo^T+~b)T%lQ8~d}(?QY)hl5bUFVq%%#3y2)|(2A_ZF0_%w+zH7n?U*ypUL-0kt)x!&69 zI!^%3aIcq&+Tun21}7#5DbOCvQqMEN%wF#=_tiL=pbo($CTNW2Vx`=Nd9!u{`7_O{ zB)ls|Hpf#}=ffz5BCOg&LrTCFB%bbsl3>ukwkF2YGo-}iF=8bP^QnF&sNc*?oY&7c zPtQd|f*WeQ&Ju4mQ*dd7X!b7A+D*-LazcbmJ^PbYWw5nAHx`IQ2o#g$kSPt@3#SG@s8@TxcwU z>QR|<8e9gO_Y}1a(LYfJ| zW&}P(XbVGq)!ynlLbnlG7n9n`P*cu!hT+$g)e53*BQzNFdTJQj&QLWVBz2f*-3Aa# z>57yIM-QmdQpu`Fw^n{yRbZRU)1 zK2`N&KZj4J>Ov|e-jG)<$5%15*m^Ws&%YsxSf%u#JI(*L4BaD2U$t7SKb9qn z^;a%LC+~%gu-afJvjf&V_x|qRzjdT9l0{A&-Qr@Rm{r_NvJOX^CmbjCWn z#IW5m`@Fg4Zi;&8U~2%&=%^?UM1?EY}C!0}BD!|86G%JcI%ep29D1fGsEDz~CyBocjJ zLijV{KJq4kKQHjHD%t|2d9-DDbC5`^Ke{ye9U25{e}H{*`d^zL!37 zBiy|ArN2la+`Rw&s=&>AVEQPU%FX*=4{SlWd4JnLINE34pKk_k5x?e5^=}j9<~{rO z89)CvipqZ^aP#+$p8<}1Dq_zbhSN9P{CS<@Xa#?IQt*>0IDHa^Z>(bU(^K&K7|z_1 z38ar66X_!<__h@M=@fi-3VtvJzZ4&4CCaBS+!FEWDY%@1ds6UF3a+K#uchFb_*5>j zzME3;6)AXg3jU)M{C$SU?RPSz+=a1{Sl?ELH^utnUj-WYya3O*qP zpOk{%n1bJug6F5;g(lDa}lN>+=Os5LLS1c2>7&w zecwf&8c##Olq_M|k}x%~Z(Ki*G#lY+1p4lXzI>yPVQCG(bSKS5z*4^T-}TYAWnVy`&oi9}_aH1rkP%!6Uqo;tEJ3&rp#3AxFX~=B#`=67{HbaEUiGKThf% z28J!32m5RGa*aJ6;WPYxFf4Z8x4_|66c>2Uw{QH6;`4{ui9n=#u0QArdi_itoWi>4 zaIoGN@K4)O-F{j*C*w zpYR%l`i6Vk!_bPohvI+640%&k%oka_V&esuzwr)trIUI#N$pEGloV*vX{5+w$B-Pp z;PXc!Ki9!yLitEHN1P<-P%=5ooU>5T98bbAq)ElI5~$3BM@I1JPaHXW;!$H#4m%^{ zIl~+-4p%T72RlwoA!g@@Jnyp}AYPD^kxmZ5%m(SuBST&io5`>wp6ARjmEpXHgp6V2 z(?MSRnT`dS2zFb^po@Jn$b?>`BSFaeG?%HwcHwt1L#fYqKu-%dtHAV9oPB|ZU)hd) z!fLXaTq9kgLOQ!bMM}4)$p@kWOFRh`S@9!Kkzt;F0v+b^6}hpaP{86QoS%l8$Dm3D wJO5NF*nuTK@idP*m1=&*X&!DWb)2Y +#include +#include +#include +#include +#include +#include +#include +#include +#include +using namespace std; + +#define VIT(i, v) for (i = 0; i < v.size(); i++) +#define IT(it, ds) for (it = ds.begin(); it != ds.end(); it++) +#define FUP(i, n) for (i = 0; i < n; i++) + +typedef map ISmap; +typedef map IImap; +typedef map SDmap; + +typedef ISmap::iterator ISmit; +typedef IImap::iterator IImit; +typedef SDmap::iterator SDmit; + +typedef vector SVec; + +void StoSVec(string &s, SVec &sv) +{ + istringstream ss; + string s2; + + ss.clear(); + ss.str(s); + while (ss >> s2) sv.push_back(s2); +} + +main() +{ + string s, k; + double d, b; + int i; + SVec sv; + SDmap bmap; + SDmit bmit; + + while (getline(cin, s)) { + sv.clear(); + StoSVec(s, sv); + + if (sv[0] == "Seed:") { + b = 0; + for (i = 0; i < 2; i++) { + getline(cin, s); + sv.clear(); + StoSVec(s, sv); + sscanf(sv[3].c_str(), "%lf", &d); + if (d > b) b = d; + } + getline(cin, s); + sv.clear(); + StoSVec(s, sv); + k = sv[2]; + k += " "; + k += sv[3]; + for (i = 4; i < sv.size(); i++) { + if (sv[i] != "-") { + k += " "; + k += sv[i]; + } + } + if (bmap[k] < b) bmap[k] = b; + } + } + + IT(bmit, bmap) { + printf("%10.4lf %s\n", bmit->second, bmit->first.c_str()); + } +} diff --git a/junk-proc.awk b/junk-proc.awk new file mode 100644 index 0000000..ed1f4cd --- /dev/null +++ b/junk-proc.awk @@ -0,0 +1,11 @@ +($1 == "Seed:") { l = 0; n++; t=0 } +{ if (l >= 1 && l <= 4) { + t += $4 + if (l == 4) avg = t/4.0 + } + if (l == 5) { + printf("xaxis max %d hash_label at %d : %s\n", n+1, n, $0 ) + printf("newcurve marktype xbar marksize 1 cfill 1 1 0 pts %d %.2lf\n", n, avg); + } + l++ +} diff --git a/junk-save.c b/junk-save.c new file mode 100644 index 0000000..9b73025 --- /dev/null +++ b/junk-save.c @@ -0,0 +1,658 @@ + +/* + c = gf.multiply.w32(&gf, a, b); + tested = 0; + +*/ + /* If this is not composite, then first test against the default: */ + +/* + if (h->mult_type != GF_MULT_COMPOSITE) { + tested = 1; + d = gf_def.multiply.w32(&gf_def, a, b); + + if (c != d) { + printf("Error in single multiplication (all numbers in hex):\n\n"); + printf(" gf.multiply.w32() of %x and %x returned %x\n", a, b, c); + printf(" The default returned %x\n", d); + exit(1); + } + } + +*/ + /* Now, we also need to double-check, in case the default is wanky, and when + we're performing composite operations. Start with 0 and 1: */ + +/* + if (a == 0 || b == 0 || a == 1 || b == 1) { + tested = 1; + if (((a == 0 || b == 0) && c != 0) || + (a == 1 && c != b) || + (b == 1 && c != a)) { + printf("Error in single multiplication (all numbers in hex):\n\n"); + printf(" gf.multiply.w32() of %x and %x returned %x, which is clearly wrong.\n", a, b, c); + exit(1); + } + +*/ + /* If division or inverses are defined, let's test all combinations to make sure + that the operations are consistent with each other. */ + +/* + } else { + if ((c & mask) != c) { + printf("Error in single multiplication (all numbers in hex):\n\n"); + printf(" gf.multiply.w32() of %x and %x returned %x, which is too big.\n", a, b, c); + exit(1); + } + + } + if (gf.inverse.w32 != NULL && (a != 0 || b != 0)) { + tested = 1; + if (a != 0) { + ai = gf.inverse.w32(&gf, a); + + if (gf.multiply.w32(&gf, c, ai) != b) { + printf("Error in single multiplication (all numbers in hex):\n\n"); + printf(" gf.multiply.w32() of %x and %x returned %x\n", a, b, c); + printf(" The inverse of %x is %x, and gf_multiply.w32() of %x and %x equals %x\n", + a, ai, c, ai, gf.multiply.w32(&gf, c, ai)); + exit(1); + } + } + if (b != 0) { + bi = gf.inverse.w32(&gf, b); + if (gf.multiply.w32(&gf, c, bi) != a) { + printf("Error in single multiplication (all numbers in hex):\n\n"); + printf(" gf.multiply.w32() of %x and %x returned %x\n", a, b, c); + printf(" The inverse of %x is %x, and gf_multiply.w32() of %x and %x equals %x\n", + b, bi, c, bi, gf.multiply.w32(&gf, c, bi)); + exit(1); + } + } + } + if (gf.divide.w32 != NULL && (a != 0 || b != 0)) { + tested = 1; + + if (a != 0) { + ai = gf.divide.w32(&gf, c, a); + + if (ai != b) { + printf("Error in single multiplication/division (all numbers in hex):\n\n"); + printf(" gf.multiply.w32() of %x and %x returned %x\n", a, b, c); + printf(" gf.divide.w32() of %x and %x returned %x\n", c, a, ai); + exit(1); + } + } + if (b != 0) { + bi = gf.divide.w32(&gf, c, b); + + if (bi != a) { + printf("Error in single multiplication/division (all numbers in hex):\n\n"); + printf(" gf.multiply.w32() of %x and %x returned %x\n", a, b, c); + printf(" gf.divide.w32() of %x and %x returned %x\n", c, b, bi); + exit(1); + } + } + } + + if (!tested) problem("There is no way to test multiplication.\n"); + } +*/ + +/* + if (region) { + + if (w == 4) { + if (gf.multiply_region.w32 == NULL) { + printf("No multiply_region.\n"); + } else { + r8b = (uint8_t *) malloc(REGION_SIZE); + r8c = (uint8_t *) malloc(REGION_SIZE); + r8d = (uint8_t *) malloc(REGION_SIZE); + fill_random_region(r8b, REGION_SIZE); + for (xor = 0; xor < 2; xor++) { + if (verbose) { + printf("Testing buffer-constant, src != dest, xor = %d\n", xor); + fflush(stdout); + } + for (a = 0; a < 16; a++) { + fill_random_region(r8c, REGION_SIZE); + memcpy(r8d, r8c, REGION_SIZE); + sindex = MOA_Random_W(3, 1); + eindex = REGION_SIZE/sizeof(uint8_t)-MOA_Random_W(3, 1); + size = (eindex-sindex)*sizeof(uint8_t); + gf.multiply_region.w32(&gf, (void *) (r8b+sindex), (void *) (r8c+sindex), a, size, xor); + for (i = sindex; i < eindex; i++) { + b = (r8b[i] >> 4); + c = (r8c[i] >> 4); + d = (r8d[i] >> 4); + if (!xor && gf.multiply.w32(&gf, a, b) != c) { + printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r8b+i)); + printf(" %d * %d = %d, but should equal %d\n", a, b, c, gf.multiply.w32(&gf, a, b) ); + printf("i=%d. %d %d %d %d\n", i, a, r8b[i], r8c[i], r8d[i]); + problem("Failed buffer-constant, xor=0"); + } + if (xor && (gf.multiply.w32(&gf, a, b) ^ d) != c) { + printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r8b+i)); + printf(" %d %d %d %d\n", a, b, c, d); + printf(" %d %d %d %d\n", a, r8b[i], r8c[i], r8d[i]); + problem("Failed buffer-constant, xor=1"); + } + b = (r8b[i] & 0xf); + c = (r8c[i] & 0xf); + d = (r8d[i] & 0xf); + if (!xor && gf.multiply.w32(&gf, a, b) != c) { + printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r8b+i)); + printf(" %d * %d = %d, but should equal %d\n", a, b, c, gf.multiply.w32(&gf, a, b) ); + printf("i=%d. 0x%x 0x%x 0x%x 0x%x\n", i, a, r8b[i], r8c[i], r8d[i]); + problem("Failed buffer-constant, xor=0"); + } + if (xor && (gf.multiply.w32(&gf, a, b) ^ d) != c) { + printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r8b+i)); + printf(" (%d * %d ^ %d) should equal %d - equals %d\n", + a, b, d, (gf.multiply.w32(&gf, a, b) ^ d), c); + printf(" %d %d %d %d\n", a, r8b[i], r8c[i], r8d[i]); + problem("Failed buffer-constant, xor=1"); + } + } + } + } + for (xor = 0; xor < 2; xor++) { + if (verbose) { + printf("Testing buffer-constant, src == dest, xor = %d\n", xor); + fflush(stdout); + } + for (a = 0; a < 16; a++) { + fill_random_region(r8b, REGION_SIZE); + memcpy(r8d, r8b, REGION_SIZE); + sindex = MOA_Random_W(3, 1); + eindex = REGION_SIZE/sizeof(uint8_t)-MOA_Random_W(3, 1); + size = (eindex-sindex)*sizeof(uint8_t); + gf.multiply_region.w32(&gf, (void *) (r8b+sindex), (void *) (r8b+sindex), a, size, xor); + for (i = sindex; i < eindex; i++) { + b = (r8b[i] >> 4); + d = (r8d[i] >> 4); + if (!xor && gf.multiply.w32(&gf, a, d) != b) problem("Failed buffer-constant, xor=0"); + if (xor && (gf.multiply.w32(&gf, a, d) ^ d) != b) { + printf("i=%d. %d %d %d\n", i, a, b, d); + printf("i=%d. %d %d %d\n", i, a, r8b[i], r8d[i]); + problem("Failed buffer-constant, xor=1"); + } + b = (r8b[i] & 0xf); + d = (r8d[i] & 0xf); + if (!xor && gf.multiply.w32(&gf, a, d) != b) problem("Failed buffer-constant, xor=0"); + if (xor && (gf.multiply.w32(&gf, a, d) ^ d) != b) { + printf("%d %d %d\n", a, b, d); + problem("Failed buffer-constant, xor=1"); + } + } + } + } + free(r8b); + free(r8c); + free(r8d); + } + } else if (w == 8) { + if (gf.multiply_region.w32 == NULL) { + printf("No multiply_region.\n"); + } else { + r8b = (uint8_t *) malloc(REGION_SIZE); + r8c = (uint8_t *) malloc(REGION_SIZE); + r8d = (uint8_t *) malloc(REGION_SIZE); + fill_random_region(r8b, REGION_SIZE); + for (xor = 0; xor < 2; xor++) { + if (verbose) { + printf("Testing buffer-constant, src != dest, xor = %d\n", xor); + fflush(stdout); + } + for (a = 0; a < 256; a++) { + fill_random_region(r8c, REGION_SIZE); + memcpy(r8d, r8c, REGION_SIZE); + if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && + (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { + sindex = 0; + eindex = REGION_SIZE; + } else { + sindex = MOA_Random_W(3, 1); + eindex = REGION_SIZE/sizeof(uint8_t)-MOA_Random_W(3, 1); + } + size = (eindex-sindex)*sizeof(uint8_t); + gf.multiply_region.w32(&gf, (void *) (r8b+sindex), (void *) (r8c+sindex), a, size, xor); + for (i = sindex; i < eindex; i++) { + if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && + (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { + b = get_alt_map_2w8(i, (uint8_t*)r8b, REGION_SIZE / 2); + c = get_alt_map_2w8(i, (uint8_t*)r8c, REGION_SIZE / 2); + d = get_alt_map_2w8(i, (uint8_t*)r8d, REGION_SIZE / 2); + } else { + b = r8b[i]; + c = r8c[i]; + d = r8d[i]; + } + if (!xor && gf.multiply.w32(&gf, a, b) != c) { + printf("i=%d. %d %d %d %d\n", i, a, b, c, d); + printf("i=%d. %d %d %d %d\n", i, a, r8b[i], r8c[i], r8d[i]); + printf("%llx. Sindex: %d\n", r8b+i, sindex); + problem("Failed buffer-constant, xor=0"); + } + if (xor && (gf.multiply.w32(&gf, a, b) ^ d) != c) { + printf("i=%d. %d %d %d %d\n", i, a, b, c, d); + printf("i=%d. %d %d %d %d\n", i, a, r8b[i], r8c[i], r8d[i]); + problem("Failed buffer-constant, xor=1"); + } + } + } + } + for (xor = 0; xor < 2; xor++) { + if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && + (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { + continue; + } + if (verbose) { + printf("Testing buffer-constant, src == dest, xor = %d\n", xor); + fflush(stdout); + } + for (a = 0; a < 256; a++) { + fill_random_region(r8b, REGION_SIZE); + memcpy(r8d, r8b, REGION_SIZE); + sindex = MOA_Random_W(3, 1); + eindex = REGION_SIZE/sizeof(uint8_t)-MOA_Random_W(3, 1); + size = (eindex-sindex)*sizeof(uint8_t); + gf.multiply_region.w32(&gf, (void *) (r8b+sindex), (void *) (r8b+sindex), a, size, xor); + for (i = sindex; i < eindex; i++) { + b = r8b[i]; + d = r8d[i]; + if (!xor && gf.multiply.w32(&gf, a, d) != b) problem("Failed buffer-constant, xor=0"); + if (xor && (gf.multiply.w32(&gf, a, d) ^ d) != b) { + printf("i=%d. %d %d %d\n", i, a, b, d); + printf("i=%d. %d %d %d\n", i, a, r8b[i], r8d[i]); + problem("Failed buffer-constant, xor=1"); + } + } + } + } + free(r8b); + free(r8c); + free(r8d); + } + } else if (w == 16) { + if (gf.multiply_region.w32 == NULL) { + printf("No multiply_region.\n"); + } else { + r16b = (uint16_t *) malloc(REGION_SIZE); + r16c = (uint16_t *) malloc(REGION_SIZE); + r16d = (uint16_t *) malloc(REGION_SIZE); + for (xor = 0; xor < 2; xor++) { + if (verbose) { + printf("Testing buffer-constant, src != dest, xor = %d\n", xor); + fflush(stdout); + } + for (j = 0; j < 1000; j++) { + fill_random_region(r16b, REGION_SIZE); + a = MOA_Random_W(w, 0); + fill_random_region(r16c, REGION_SIZE); + memcpy(r16d, r16c, REGION_SIZE); + if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && + (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { + sindex = 0; + eindex = REGION_SIZE / sizeof(uint16_t); + } else { + sindex = MOA_Random_W(3, 1); + eindex = REGION_SIZE/sizeof(uint16_t)-MOA_Random_W(3, 1); + } + size = (eindex-sindex)*sizeof(uint16_t); + gf.multiply_region.w32(&gf, (void *) (r16b+sindex), (void *) (r16c+sindex), a, size, xor); + ai = gf.inverse.w32(&gf, a); + if (!xor) { + gf.multiply_region.w32(&gf, (void *) (r16c+sindex), (void *) (r16d+sindex), ai, size, xor); + } else { + gf.multiply_region.w32(&gf, (void *) (r16c+sindex), (void *) (r16d+sindex), 1, size, xor); + gf.multiply_region.w32(&gf, (void *) (r16d+sindex), (void *) (r16b+sindex), ai, size, xor); + } + + for (i = sindex; i < eindex; i++) { + if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && + (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { + b = get_alt_map_2w16(i, (uint8_t*)r16b, size / 2); + c = get_alt_map_2w16(i, (uint8_t*)r16c, size / 2); + d = get_alt_map_2w16(i, (uint8_t*)r16d, size / 2); + } else { + b = r16b[i]; + c = r16c[i]; + d = r16d[i]; + } + if (!xor && d != b) { + printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r16b+i)); + printf("We have %d * %d = %d, and %d * %d = %d.\n", a, b, c, c, ai, d); + printf("%d is the inverse of %d\n", ai, a); + problem("Failed buffer-constant, xor=0"); + } + if (xor && b != 0) { + printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r16b+i)); + printf("We did d=c; c ^= ba; d ^= c; b ^= (a^-1)d;\n"); + printf(" b should equal 0, but it doesn't. Probe into it.\n"); + problem("Failed buffer-constant, xor=1"); + } + } + } + } + for (xor = 0; xor < 2; xor++) { + if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && + (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { + continue; + } + if (verbose) { + printf("Testing buffer-constant, src == dest, xor = %d\n", xor); + fflush(stdout); + } + for (j = 0; j < 1000; j++) { + a = MOA_Random_W(w, 0); + fill_random_region(r16b, REGION_SIZE); + memcpy(r16d, r16b, REGION_SIZE); + sindex = MOA_Random_W(3, 1); + eindex = REGION_SIZE/sizeof(uint16_t)-MOA_Random_W(3, 1); + size = (eindex-sindex)*sizeof(uint16_t); + gf.multiply_region.w32(&gf, (void *) (r16b+sindex), (void *) (r16b+sindex), a, size, xor); + ai = gf.inverse.w32(&gf, a); + if (!xor) { + gf.multiply_region.w32(&gf, (void *) (r16b+sindex), (void *) (r16b+sindex), ai, size, xor); + } else { + gf.multiply_region.w32(&gf, (void *) (r16d+sindex), (void *) (r16b+sindex), 1, size, xor); + gf.multiply_region.w32(&gf, (void *) (r16b+sindex), (void *) (r16b+sindex), ai, size, 0); + } + + for (i = sindex; i < eindex; i++) { + b = r16b[i]; + c = r16c[i]; + d = r16d[i]; + if (!xor && (d != b)) { + printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r16b+i)); + printf("We did d=b; b = ba; b = b(a^-1).\n"); + printf("So, b should equal d, but it doesn't. Look into it.\n"); + printf("b = %d. d = %d. a = %d\n", b, d, a); + problem("Failed buffer-constant, xor=0"); + } + if (xor && d != b) { + printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r16b+i)); + printf("We did d=b; b = b + ba; b += d; b = b(a^-1);\n"); + printf("We did d=c; c ^= ba; d ^= c; b ^= (a^-1)d;\n"); + printf("So, b should equal d, but it doesn't. Look into it.\n"); + problem("Failed buffer-constant, xor=1"); + } + } + } + } + free(r16b); + free(r16c); + free(r16d); + } + } else if (w == 32) { + if (gf.multiply_region.w32 == NULL) { + printf("No multiply_region.\n"); + } else { + r32b = (uint32_t *) malloc(REGION_SIZE); + r32c = (uint32_t *) malloc(REGION_SIZE); + r32d = (uint32_t *) malloc(REGION_SIZE); + for (xor = 0; xor < 2; xor++) { + if (verbose) { + printf("Testing buffer-constant, src != dest, xor = %d\n", xor); + fflush(stdout); + } + for (j = 0; j < 1000; j++) { + a = MOA_Random_32(); + fill_random_region(r32b, REGION_SIZE); + fill_random_region(r32c, REGION_SIZE); + memcpy(r32d, r32c, REGION_SIZE); + if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && + (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { + sindex = 0; + eindex = REGION_SIZE / sizeof(uint32_t); + } else { + sindex = MOA_Random_W(3, 1); + eindex = REGION_SIZE/sizeof(uint32_t)-MOA_Random_W(3, 1); + } + size = (eindex-sindex)*sizeof(uint32_t); + gf.multiply_region.w32(&gf, (void *) (r32b+sindex), (void *) (r32c+sindex), a, size, xor); + ai = gf.inverse.w32(&gf, a); + if (!xor) { + gf.multiply_region.w32(&gf, (void *) (r32c+sindex), (void *) (r32d+sindex), ai, size, xor); + } else { + gf.multiply_region.w32(&gf, (void *) (r32c+sindex), (void *) (r32d+sindex), 1, size, xor); + gf.multiply_region.w32(&gf, (void *) (r32d+sindex), (void *) (r32b+sindex), ai, size, xor); + } + for (i = sindex; i < eindex; i++) { + if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && + (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { + b = get_alt_map_2w32(i, (uint8_t*)r32b, size / 2); + c = get_alt_map_2w32(i, (uint8_t*)r32c, size / 2); + d = get_alt_map_2w32(i, (uint8_t*)r32d, size / 2); + i++; + } else { + b = r32b[i]; + c = r32c[i]; + d = r32d[i]; + } + if (!xor && d != b) { + printf("i=%d. Addresses: b: 0x%lx\n", i, (unsigned long) (r32b+i)); + printf("We have %d * %d = %d, and %d * %d = %d.\n", a, b, c, c, ai, d); + printf("%d is the inverse of %d\n", ai, a); + problem("Failed buffer-constant, xor=0"); + } + if (xor && b != 0) { + printf("i=%d. Addresses: b: 0x%lx c: 0x%lx d: 0x%lx\n", i, + (unsigned long) (r32b+i), (unsigned long) (r32c+i), (unsigned long) (r32d+i)); + printf("We did d=c; c ^= ba; d ^= c; b ^= (a^-1)d;\n"); + printf(" b should equal 0, but it doesn't. Probe into it.\n"); + printf("a: %8x b: %8x c: %8x, d: %8x\n", a, b, c, d); + problem("Failed buffer-constant, xor=1"); + } + + } + } + } + for (xor = 0; xor < 2; xor++) { + if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && + (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { + continue; + } + if (verbose) { + printf("Testing buffer-constant, src == dest, xor = %d\n", xor); + fflush(stdout); + } + for (j = 0; j < 1000; j++) { + a = MOA_Random_32(); + fill_random_region(r32b, REGION_SIZE); + memcpy(r32d, r32b, REGION_SIZE); + sindex = MOA_Random_W(3, 1); + eindex = REGION_SIZE/sizeof(uint32_t)-MOA_Random_W(3, 1); + size = (eindex-sindex)*sizeof(uint32_t); + gf.multiply_region.w32(&gf, (void *) (r32b+sindex), (void *) (r32b+sindex), a, size, xor); + ai = gf.inverse.w32(&gf, a); + if (!xor) { + gf.multiply_region.w32(&gf, (void *) (r32b+sindex), (void *) (r32b+sindex), ai, size, xor); + } else { + gf.multiply_region.w32(&gf, (void *) (r32d+sindex), (void *) (r32b+sindex), 1, size, xor); + gf.multiply_region.w32(&gf, (void *) (r32b+sindex), (void *) (r32b+sindex), ai, size, 0); + } + + for (i = sindex; i < eindex; i++) { + b = r32b[i]; + c = r32c[i]; + d = r32d[i]; + if (!xor && (d != b)) { + printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r32b+i)); + printf("We did d=b; b = ba; b = b(a^-1).\n"); + printf("So, b should equal d, but it doesn't. Look into it.\n"); + printf("b = %d. d = %d. a = %d\n", b, d, a); + problem("Failed buffer-constant, xor=0"); + } + if (xor && d != b) { + printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r32b+i)); + printf("We did d=b; b = b + ba; b += d; b = b(a^-1);\n"); + printf("We did d=c; c ^= ba; d ^= c; b ^= (a^-1)d;\n"); + printf("So, b should equal d, but it doesn't. Look into it.\n"); + problem("Failed buffer-constant, xor=1"); + } + } + } + } + free(r32b); + free(r32c); + free(r32d); + } + } else if (w == 64) { + if (gf.multiply_region.w64 == NULL) { + printf("No multiply_region.\n"); + } else { + r64b = (uint64_t *) malloc(REGION_SIZE); + r64c = (uint64_t *) malloc(REGION_SIZE); + r64d = (uint64_t *) malloc(REGION_SIZE); + fill_random_region(r64b, REGION_SIZE); + for (xor = 0; xor < 2; xor++) { + if (verbose) { + printf("Testing buffer-constant, src != dest, xor = %d\n", xor); + fflush(stdout); + } + for (j = 0; j < 1000; j++) { + a64 = MOA_Random_64(); + fill_random_region(r64c, REGION_SIZE); + memcpy(r64d, r64c, REGION_SIZE); + sindex = MOA_Random_W(3, 1); + eindex = REGION_SIZE/sizeof(uint64_t)-MOA_Random_W(3, 1); + size = (eindex-sindex)*sizeof(uint64_t); + gf.multiply_region.w64(&gf, (void *) (r64b+sindex), (void *) (r64c+sindex), a64, size, xor); + for (i = sindex; i < eindex; i++) { + b64 = r64b[i]; + c64 = r64c[i]; + d64 = r64d[i]; + if (!xor && gf.multiply.w64(&gf, a64, b64) != c64) { + printf("i=%d. 0x%llx 0x%llx 0x%llx should be 0x%llx\n", i, a64, b64, c64, + gf.multiply.w64(&gf, a64, b64)); + printf("i=%d. 0x%llx 0x%llx 0x%llx\n", i, a64, r64b[i], r64c[i]); + problem("Failed buffer-constant, xor=0"); + } + if (xor && (gf.multiply.w64(&gf, a64, b64) ^ d64) != c64) { + printf("i=%d. 0x%llx 0x%llx 0x%llx 0x%llx\n", i, a64, b64, c64, d64); + printf("i=%d. 0x%llx 0x%llx 0x%llx 0x%llx\n", i, a64, r64b[i], r64c[i], r64d[i]); + problem("Failed buffer-constant, xor=1"); + } + } + } + } + for (xor = 0; xor < 2; xor++) { + if (verbose) { + printf("Testing buffer-constant, src == dest, xor = %d\n", xor); + fflush(stdout); + } + for (j = 0; j < 1000; j++) { + a64 = MOA_Random_64(); + fill_random_region(r64b, REGION_SIZE); + memcpy(r64d, r64b, REGION_SIZE); + sindex = MOA_Random_W(3, 1); + eindex = REGION_SIZE/sizeof(uint64_t)-MOA_Random_W(3, 1); + size = (eindex-sindex)*sizeof(uint64_t); + gf.multiply_region.w64(&gf, (void *) (r64b+sindex), (void *) (r64b+sindex), a64, size, xor); + for (i = sindex; i < eindex; i++) { + b64 = r64b[i]; + d64 = r64d[i]; + if (!xor && gf.multiply.w64(&gf, a64, d64) != b64) problem("Failed buffer-constant, xor=0"); + if (xor && (gf.multiply.w64(&gf, a64, d64) ^ d64) != b64) { + printf("i=%d. 0x%llx 0x%llx 0x%llx\n", i, a64, b64, d64); + printf("i=%d. 0x%llx 0x%llx 0x%llx\n", i, a64, r64b[i], r64d[i]); + problem("Failed buffer-constant, xor=1"); + } + } + } + } + free(r64b); + free(r64c); + free(r64d); + } + } else if (w == 128) { + if (gf.multiply_region.w128 == NULL) { + printf("No multiply_region.\n"); + } else { + r128b = (uint64_t *) malloc(REGION_SIZE); + r128c = (uint64_t *) malloc(REGION_SIZE); + r128d = (uint64_t *) malloc(REGION_SIZE); + fill_random_region(r128b, REGION_SIZE); + for (xor = 0; xor < 2; xor++) { + if (verbose) { + printf("Testing buffer-constant, src != dest, xor = %d\n", xor); + fflush(stdout); + } + for (j = 0; j < 1000; j++) { + MOA_Random_128(a128); + fill_random_region(r128c, REGION_SIZE); + memcpy(r128d, r128c, REGION_SIZE); + sindex = MOA_Random_W(3, 1); + eindex = REGION_SIZE/(2*sizeof(uint64_t))-MOA_Random_W(3, 1); + size = (eindex-sindex)*sizeof(uint64_t)*2; + gf.multiply_region.w128(&gf, (void *) (r128b+sindex*2), (void *) (r128c+sindex*2), a128, size, xor); + for (i = sindex; i < eindex; i++) { + b128[0] = r128b[2*i]; + b128[1] = r128b[2*i+1]; + c128[0] = r128c[2*i]; + c128[1] = r128c[2*i+1]; + d128[0] = r128d[2*i]; + d128[1] = r128d[2*i+1]; + gf.multiply.w128(&gf, a128, b128, e128); + if (xor) { + e128[0] ^= d128[0]; + e128[1] ^= d128[1]; + } + if (!xor && !GF_W128_EQUAL(c128, e128)) { + printf("i=%d. 0x%llx%llx 0x%llx%llx 0x%llx%llx should be 0x%llx%llx\n", + i, a128[0], a128[1], b128[0], b128[1], c128[0], c128[1], e128[0], e128[1]); + problem("Failed buffer-constant, xor=0"); + } + if (xor && !GF_W128_EQUAL(e128, c128)) { + printf("i=%d. 0x%llx%llx 0x%llx%llx 0x%llx%llx 0x%llx%llx\n", i, + a128[0], a128[1], b128[0], b128[1], c128[0], c128[1], d128[0], d128[1]); + problem("Failed buffer-constant, xor=1"); + } + } + } + } + for (xor = 0; xor < 2; xor++) { + if (verbose) { + printf("Testing buffer-constant, src == dest, xor = %d\n", xor); + fflush(stdout); + } + for (j = 0; j < 1000; j++) { + MOA_Random_128(a128); + fill_random_region(r128b, REGION_SIZE); + memcpy(r128d, r128b, REGION_SIZE); + sindex = MOA_Random_W(3, 1); + sindex = 0; + eindex = REGION_SIZE/(2*sizeof(uint64_t))-MOA_Random_W(3, 1); + eindex = REGION_SIZE/(2*sizeof(uint64_t)); + size = (eindex-sindex)*sizeof(uint64_t)*2; + gf.multiply_region.w128(&gf, (void *) (r128b+sindex), (void *) (r128b+sindex), a128, size, xor); + for (i = sindex; i < eindex; i++) { + b128[0] = r128b[2*i]; + b128[1] = r128b[2*i + 1]; + d128[0] = r128d[2*i]; + d128[1] = r128d[2*i + 1]; + gf.multiply.w128(&gf, a128, d128, e128); + if (xor) { + e128[0] ^= d128[0]; + e128[1] ^= d128[1]; + } + if (!xor && !GF_W128_EQUAL(b128, e128)) problem("Failed buffer-constant, xor=0"); + if (xor && !GF_W128_EQUAL(b128, e128)) { + problem("Failed buffer-constant, xor=1"); + } + } + } + } + free(r128b); + free(r128c); + free(r128d); + } + } + } + exit(0); + */ +} diff --git a/junk-w16-backup.c b/junk-w16-backup.c new file mode 100644 index 0000000..ad0788a --- /dev/null +++ b/junk-w16-backup.c @@ -0,0 +1,1585 @@ +/* + * gf_w16.c + * + * Routines for 16-bit Galois fields + */ + +#include "gf_int.h" +#include +#include + +#define GF_FIELD_WIDTH (16) +#define GF_FIELD_SIZE (1 << GF_FIELD_WIDTH) +#define GF_MULT_GROUP_SIZE GF_FIELD_SIZE-1 + +#define GF_BASE_FIELD_WIDTH (8) +#define GF_BASE_FIELD_SIZE (1 << GF_BASE_FIELD_WIDTH) +#define GF_S_GF_8_2 (63) + +struct gf_logtable_data { + int log_tbl[GF_FIELD_SIZE]; + gf_val_16_t antilog_tbl[GF_FIELD_SIZE * 2]; + gf_val_16_t inv_tbl[GF_FIELD_SIZE]; +}; + +struct gf_zero_logtable_data { + int log_tbl[GF_FIELD_SIZE]; + gf_val_16_t _antilog_tbl[GF_FIELD_SIZE * 4]; + gf_val_16_t *antilog_tbl; + gf_val_16_t inv_tbl[GF_FIELD_SIZE]; +}; + +struct gf_lazytable_data { + int log_tbl[GF_FIELD_SIZE]; + gf_val_16_t antilog_tbl[GF_FIELD_SIZE * 2]; + gf_val_16_t inv_tbl[GF_FIELD_SIZE]; + gf_val_16_t lazytable[GF_FIELD_SIZE]; +}; + +struct gf_w8_logtable_data { + gf_val_8_t log_tbl[GF_BASE_FIELD_SIZE]; + gf_val_8_t antilog_tbl[GF_BASE_FIELD_SIZE * 2]; + gf_val_8_t *antilog_tbl_div; +}; + +struct gf_w8_single_table_data { + gf_val_8_t mult[GF_BASE_FIELD_SIZE][GF_BASE_FIELD_SIZE]; + gf_val_8_t div[GF_BASE_FIELD_SIZE][GF_BASE_FIELD_SIZE]; +}; + +struct gf_w8_double_table_data { + gf_val_8_t div[GF_BASE_FIELD_SIZE][GF_BASE_FIELD_SIZE]; + gf_val_8_t mult[GF_BASE_FIELD_SIZE][GF_BASE_FIELD_SIZE*GF_BASE_FIELD_SIZE]; +}; + + +#define MM_PRINT(s, r) { uint8_t blah[16], ii; printf("%-12s", s); _mm_storeu_si128((__m128i *)blah, r); for (ii = 0; ii < 16; ii += 2) printf(" %02x %02x", blah[15-ii], blah[14-ii]); printf("\n"); } + +static +inline +gf_val_16_t gf_w16_inverse_from_divide (gf_t *gf, gf_val_16_t a) +{ + return gf->divide.w16(gf, 1, a); +} + +static +inline +gf_val_16_t gf_w16_divide_from_inverse (gf_t *gf, gf_val_16_t a, gf_val_16_t b) +{ + b = gf->inverse.w16(gf, b); + return gf->multiply.w16(gf, a, b); +} + +static +void +gf_w16_multiply_region_from_single(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor) +{ + int i; + gf_val_16_t *s16; + gf_val_16_t *d16; + + s16 = (gf_val_16_t *) src; + d16 = (gf_val_16_t *) dest; + + if (xor) { + for (i = 0; i < bytes/2; i++) { + d16[i] ^= gf->multiply.w16(gf, val, s16[i]); + } + } else { + for (i = 0; i < bytes/2; i++) { + d16[i] = gf->multiply.w16(gf, val, s16[i]); + } + } +} + +static +inline +gf_val_16_t gf_w16_euclid (gf_t *gf, gf_val_16_t b) +{ + gf_val_32_t e_i, e_im1, e_ip1; + gf_val_32_t d_i, d_im1, d_ip1; + gf_val_16_t y_i, y_im1, y_ip1; + gf_val_16_t c_i; + + if (b == 0) return -1; + e_im1 = ((gf_internal_t *) (gf->scratch))->prim_poly; + e_i = b; + d_im1 = 16; + for (d_i = d_im1; ((1 << d_i) & e_i) == 0; d_i--) ; + y_i = 1; + y_im1 = 0; + + while (e_i != 1) { + + e_ip1 = e_im1; + d_ip1 = d_im1; + c_i = 0; + + while (d_ip1 >= d_i) { + c_i ^= (1 << (d_ip1 - d_i)); + e_ip1 ^= (e_i << (d_ip1 - d_i)); + while ((e_ip1 & (1 << d_ip1)) == 0) d_ip1--; + } + + y_ip1 = y_im1 ^ gf->multiply.w16(gf, c_i, y_i); + y_im1 = y_i; + y_i = y_ip1; + + e_im1 = e_i; + d_im1 = d_i; + e_i = e_ip1; + d_i = d_ip1; + } + + return y_i; +} + +static +inline +gf_val_16_t gf_w16_matrix (gf_t *gf, gf_val_16_t b) +{ + return gf_bitmatrix_inverse(b, 16, ((gf_internal_t *) (gf->scratch))->prim_poly); +} + +/* JSP: GF_MULT_SHIFT: The world's dumbest multiplication algorithm. I only + include it for completeness. It does have the feature that it requires no + extra memory. +*/ + +static +inline +gf_val_16_t +gf_w16_shift_multiply (gf_t *gf, gf_val_16_t a16, gf_val_16_t b16) +{ + uint32_t product, i, pp, a, b; + gf_internal_t *h; + + a = a16; + b = b16; + h = (gf_internal_t *) gf->scratch; + pp = h->prim_poly; + + product = 0; + + for (i = 0; i < GF_FIELD_WIDTH; i++) { + if (a & (1 << i)) product ^= (b << i); + } + for (i = (GF_FIELD_WIDTH*2-1); i >= GF_FIELD_WIDTH; i--) { + if (product & (1 << i)) product ^= (pp << (i-GF_FIELD_WIDTH)); + } + return product; +} + +static +int gf_w16_shift_init(gf_t *gf) +{ + gf->multiply.w16 = gf_w16_shift_multiply; + gf->inverse.w16 = gf_w16_euclid; + gf->multiply_region.w16 = gf_w16_multiply_region_from_single; + return 1; +} + +/* KMG: GF_MULT_LOGTABLE: */ + +static +void +gf_w16_log_multiply_region(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor) +{ + unsigned long uls, uld; + int i; + uint16_t lv, b, c; + uint16_t *s16, *d16; + int num_syms = bytes >> 1; + int sym_divisible = bytes % 2; + + struct gf_logtable_data *ltd; + + uls = (unsigned long) src; + uld = (unsigned long) dest; + if ((uls & 0x7) != (uld & 0x7)) gf_alignment_error("gf_w16_buf_const_log", 2); + if (sym_divisible) { + gf_alignment_error("gf_w16_buf_const_log: buffer size not divisible by symbol size = 2 bytes", 2); + } + + if (val == 0) { + if (xor) return; + bzero(dest, bytes); + return; + } + + ltd = (struct gf_logtable_data *) ((gf_internal_t *) gf->scratch)->private; + s16 = (uint16_t *) src; + d16 = (uint16_t *) dest; + + lv = ltd->log_tbl[val]; + + if (xor) { + for (i = 0; i < num_syms; i++) { + d16[i] ^= (s16[i] == 0 ? 0 : ltd->antilog_tbl[lv + ltd->log_tbl[s16[i]]]); + } + } else { + for (i = 0; i < num_syms; i++) { + d16[i] = (s16[i] == 0 ? 0 : ltd->antilog_tbl[lv + ltd->log_tbl[s16[i]]]); + } + } +} + +static +inline +gf_val_16_t +gf_w16_log_multiply(gf_t *gf, gf_val_16_t a, gf_val_16_t b) +{ + struct gf_logtable_data *ltd; + + ltd = (struct gf_logtable_data *) ((gf_internal_t *) gf->scratch)->private; + return (a == 0 || b == 0) ? 0 : ltd->antilog_tbl[ltd->log_tbl[a] + ltd->log_tbl[b]]; +} + +static +inline +gf_val_16_t +gf_w16_log_divide(gf_t *gf, gf_val_16_t a, gf_val_16_t b) +{ + int log_sum = 0; + struct gf_logtable_data *ltd; + + if (a == 0 || b == 0) return 0; + ltd = (struct gf_logtable_data *) ((gf_internal_t *) gf->scratch)->private; + + log_sum = ltd->log_tbl[a] - ltd->log_tbl[b] + (GF_MULT_GROUP_SIZE); + return (ltd->antilog_tbl[log_sum]); +} + +static +gf_val_16_t +gf_w16_log_inverse(gf_t *gf, gf_val_16_t a) +{ + struct gf_logtable_data *ltd; + + ltd = (struct gf_logtable_data *) ((gf_internal_t *) gf->scratch)->private; + return (ltd->inv_tbl[a]); +} + +static +int gf_w16_log_init(gf_t *gf) +{ + gf_internal_t *h; + struct gf_logtable_data *ltd; + int i, b; + + h = (gf_internal_t *) gf->scratch; + ltd = h->private; + + ltd->log_tbl[0] = 0; + + b = 1; + for (i = 0; i < GF_MULT_GROUP_SIZE; i++) { + ltd->log_tbl[b] = (gf_val_16_t)i; + ltd->antilog_tbl[i] = (gf_val_16_t)b; + ltd->antilog_tbl[i+GF_MULT_GROUP_SIZE] = (gf_val_16_t)b; + b <<= 1; + if (b & GF_FIELD_SIZE) { + b = b ^ h->prim_poly; + } + } + ltd->inv_tbl[0] = 0; /* Not really, but we need to fill it with something */ + ltd->inv_tbl[1] = 1; + for (i = 2; i < GF_FIELD_SIZE; i++) { + ltd->inv_tbl[i] = ltd->antilog_tbl[GF_MULT_GROUP_SIZE-ltd->log_tbl[i]]; + } + + gf->inverse.w16 = gf_w16_log_inverse; + gf->divide.w16 = gf_w16_log_divide; + gf->multiply.w16 = gf_w16_log_multiply; + gf->multiply_region.w16 = gf_w16_log_multiply_region; + + return 1; +} + +/* JSP: GF_MULT_SPLIT_TABLE: Using 8 multiplication tables to leverage SSE instructions. +*/ + +static +void +gf_w16_split_4_16_lazy_multiply_region(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor) +{ + uint64_t i, j, a, c, prod; + uint16_t *s16, *d16, *top; + gf_internal_t *h; + uint16_t table[4][16]; + + h = (gf_internal_t *) gf->scratch; + + for (j = 0; j < 16; j++) { + for (i = 0; i < 4; i++) { + c = (j << (i*4)); + table[i][j] = gf_w16_log_multiply(gf, c, val); + } + } + + s16 = (uint16_t *) src; + d16 = (uint16_t *) dest; + top = (uint16_t *) (dest+bytes); + + while (d16 < top) { + a = *s16; + prod = (xor) ? *d16 : 0; + for (i = 0; i < 4; i++) { + prod ^= table[i][a&0xf]; + a >>= 4; + } + *d16 = prod; + s16++; + d16++; + } +} + +static +void +gf_w16_split_8_16_lazy_multiply_region(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor) +{ + uint64_t j, a, c, prod, *s64, *d64, *top64; + uint16_t *s16, *d16, *top; + gf_internal_t *h; + uint64_t htable[256], ltable[256]; + unsigned long uls, uld; + + h = (gf_internal_t *) gf->scratch; + + uls = ((unsigned long) src) & 0xf; + uld = ((unsigned long) dest) & 0xf; + if (uls != uld || uls % 2 != 0 || bytes % 2 != 0) gf_alignment_error("gf_w16_split_8_16_lazy_multiply_region", 2); + + if (val == 0) { + if (xor) return; + bzero(dest, bytes); + return; + } + + for (j = 0; j < 256; j++) { + ltable[j] = gf_w16_log_multiply(gf, j, val); + htable[j] = gf_w16_log_multiply(gf, (j<<8), val); + } + + s16 = (uint16_t *) src; + d16 = (uint16_t *) dest; + top = (uint16_t *) (dest+bytes); + + if (uls != 0) { + while (uls != 16 && d16 < top) { + a = *s16; + prod = (xor) ? *d16 : 0; + prod ^= ltable[a&0xff]; + a >>= 8; + prod ^= htable[a]; + *d16 = prod; + s16++; + d16++; + uls += 2; + } + if (d16 == top) return; + } + + uls = ((unsigned long) top) & 0xf; + uld = ((unsigned long) top) ^ uls; + top64 = (uint64_t *) uld; + s64 = (uint64_t *) s16; + d64 = (uint64_t *) d16; + +/* Does Unrolling Matter? -- Doesn't seem to. + while (d64 != top64) { + a = *s64; + + prod = htable[a >> 56]; + a <<= 8; + prod ^= ltable[a >> 56]; + a <<= 8; + prod <<= 16; + + prod ^= htable[a >> 56]; + a <<= 8; + prod ^= ltable[a >> 56]; + a <<= 8; + prod <<= 16; + + prod ^= htable[a >> 56]; + a <<= 8; + prod ^= ltable[a >> 56]; + a <<= 8; + prod <<= 16; + + prod ^= htable[a >> 56]; + a <<= 8; + prod ^= ltable[a >> 56]; + prod ^= ((xor) ? *d64 : 0); + *d64 = prod; + *s64++; + *d64++; + } +*/ + + while (d64 != top64) { + a = *s64; + + prod = 0; + for (j = 0; j < 4; j++) { + prod <<= 16; + prod ^= htable[a >> 56]; + a <<= 8; + prod ^= ltable[a >> 56]; + a <<= 8; + } + + prod ^= ((xor) ? *d64 : 0); + *d64 = prod; + *s64++; + *d64++; + } + + + if (uls != 0) { + d16 = (uint16_t *) d64; + s16 = (uint16_t *) s64; + while (d16 < top) { + a = *s16; + prod = (xor) ? *d16 : 0; + prod ^= ltable[a&0xff]; + a >>= 8; + prod ^= htable[a]; + *d16 = prod; + s16++; + d16++; + } + } + return; +} + +static +void +gf_w16_table_lazy_multiply_region(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor) +{ + uint64_t j, a, c, prod, *s64, *d64, *top64, pp; + uint16_t *s16, *d16, *top; + gf_internal_t *h; + struct gf_lazytable_data *ltd; + unsigned long uls, uld; + + h = (gf_internal_t *) gf->scratch; + + uls = ((unsigned long) src) & 0xf; + uld = ((unsigned long) dest) & 0xf; + if (uls != uld || uls % 2 != 0 || bytes % 2 != 0) gf_alignment_error("gf_w16_table_lazy_multiply_region", 2); + + if (val == 0) { + if (xor) return; + bzero(dest, bytes); + return; + } + + ltd = (struct gf_lazytable_data *) h->private; + + ltd->lazytable[0] = 0; + a = val; + c = 1; + pp = h->prim_poly; + + do { + ltd->lazytable[c] = a; + c <<= 1; + if (c & (1 << GF_FIELD_WIDTH)) c ^= pp; + a <<= 1; + if (a & (1 << GF_FIELD_WIDTH)) a ^= pp; + } while (c != 1); + + s16 = (uint16_t *) src; + d16 = (uint16_t *) dest; + top = (uint16_t *) (dest+bytes); + + if (uls != 0) { + while (uls != 16 && d16 < top) { + prod = (xor) ? *d16 : 0; + prod ^= ltd->lazytable[*s16]; + *d16 = prod; + s16++; + d16++; + uls += 2; + } + if (d16 == top) return; + } + + uls = ((unsigned long) top) & 0xf; + uld = ((unsigned long) top) ^ uls; + top64 = (uint64_t *) uld; + s64 = (uint64_t *) s16; + d64 = (uint64_t *) d16; + + /* Unrolling doesn't seem to matter + while (d64 != top64) { + a = *s64; + + prod = ltd->lazytable[a >> 48]; + a <<= 16; + prod <<= 16; + + prod ^= ltd->lazytable[a >> 48]; + a <<= 16; + prod <<= 16; + + prod ^= ltd->lazytable[a >> 48]; + a <<= 16; + prod <<= 16; + + prod ^= ltd->lazytable[a >> 48]; + + prod ^= ((xor) ? *d64 : 0); + *d64 = prod; + *s64++; + *d64++; + } + */ + + while (d64 != top64) { + a = *s64; + + prod = 0; + for (j = 0; j < 4; j++) { + prod <<= 16; + prod ^= ltd->lazytable[a >> 48]; + a <<= 16; + } + prod ^= ((xor) ? *d64 : 0); + *d64 = prod; + *s64++; + *d64++; + } + + if (uls != 0) { + d16 = (uint16_t *) d64; + s16 = (uint16_t *) s64; + while (d16 < top) { + prod = (xor) ? *d16 : 0; + prod ^= ltd->lazytable[*s16]; + *d16 = prod; + s16++; + d16++; + } + } + return; +} + +static +void +gf_w16_split_4_16_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor) +{ +#ifdef INTEL_SSE4 + uint64_t i, j, *s64, *d64, *top64;; + uint64_t a, c, prod; + uint16_t *s16, *d16, *top; + uint8_t low[4][16]; + uint8_t high[4][16]; + unsigned long uls, uld; + + __m128i mask, ta, tb, ti, tpl, tph, tlow[4], thigh[4], shuffler, unshuffler, tta, ttb; + + struct gf_single_table_data *std; + + uls = ((unsigned long) src) & 0xf; + uld = ((unsigned long) dest) & 0xf; + if (uls != uld || uls % 2 != 0 || bytes % 2 != 0) gf_alignment_error("gf_w16_split_4_16_lazy_sse_altmap_multiply_region", 2); + + if (val == 0) { + if (xor) return; + bzero(dest, bytes); + return; + } + + for (j = 0; j < 16; j++) { + for (i = 0; i < 4; i++) { + c = (j << (i*4)); + prod = gf_w16_log_multiply(gf, c, val); + low[i][j] = (prod & 0xff); + high[i][j] = (prod >> 8); + } + } + + s16 = (uint16_t *) src; + d16 = (uint16_t *) dest; + top = (uint16_t *) (dest+bytes); + + if (uls != 0) { + while (uls != 16 && d16 < top) { + a = *s16; + prod = (xor) ? *d16 : 0; + for (i = 0; i < 4; i++) { + c = a & 0xf; + prod ^= low[i][c]; + prod ^= (high[i][c] << 8); + a >>= 4; + } + *d16 = prod; + s16++; + d16++; + uls += 2; + } + if (d16 == top) return; + } + + for (i = 0; i < 4; i++) { + tlow[i] = _mm_loadu_si128((__m128i *)low[i]); + thigh[i] = _mm_loadu_si128((__m128i *)high[i]); + } + + uls = ((unsigned long) top); + uld = ((unsigned long) d16); + bytes = (uls - uld); + if ((bytes & 0x1f) != 0) bytes -= (bytes & 0x1f); + + top64 = (uint64_t *) (uld + bytes); + s64 = (uint64_t *) s16; + d64 = (uint64_t *) d16; + mask = _mm_set1_epi8 (0x0f); + shuffler = _mm_set_epi8(0xf, 0xd, 0xb, 0x9, 7, 5, 3, 1, 0xe, 0xc, 0xa, 8, 6, 4, 2, 0); + unshuffler = _mm_set_epi8(0xf, 7, 0xe, 6, 0xd, 5, 0xc, 4, 0xb, 3, 0xa, 2, 9, 1, 8, 0); + + if (xor) { + while (d64 != top64) { + + ta = _mm_load_si128((__m128i *) s64); + MM_PRINT("Ta", ta); + tb = _mm_load_si128((__m128i *) (s64+2)); + MM_PRINT("Tb", tb); + tta = _mm_shuffle_epi8(ta, shuffler); + ttb = _mm_shuffle_epi8(tb, shuffler); + ta = _mm_unpackhi_epi64(ttb, tta); + MM_PRINT("New ta", ta); + tb = _mm_unpacklo_epi64(ttb, tta); + MM_PRINT("New tb", tb); + exit(0); + + + ti = _mm_and_si128 (mask, tb); + tph = _mm_shuffle_epi8 (thigh[0], ti); + tpl = _mm_shuffle_epi8 (tlow[0], ti); + + tb = _mm_srli_epi16(tb, 4); + ti = _mm_and_si128 (mask, tb); + tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[1], ti), tpl); + tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[1], ti), tph); + + ti = _mm_and_si128 (mask, ta); + tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[2], ti), tpl); + tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[2], ti), tph); + + ta = _mm_srli_epi16(ta, 4); + ti = _mm_and_si128 (mask, ta); + tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[3], ti), tpl); + tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[3], ti), tph); + + tta = _mm_unpackhi_epi64(tpl, tph); + ttb = _mm_unpacklo_epi64(tpl, tph); + ta = _mm_shuffle_epi8(tta, unshuffler); + tb = _mm_shuffle_epi8(ttb, unshuffler); + tta = _mm_load_si128((__m128i *) d64); + ta = _mm_xor_si128(ta, tta); + ttb = _mm_load_si128((__m128i *) (d64+2)); + tb = _mm_xor_si128(tb, ttb); + _mm_store_si128 ((__m128i *)d64, ta); + _mm_store_si128 ((__m128i *)(d64+2), tb); + + d64 += 4; + s64 += 4; + + } + } else { + while (d64 != top64) { + + ta = _mm_load_si128((__m128i *) s64); + tb = _mm_load_si128((__m128i *) (s64+2)); + tta = _mm_shuffle_epi8(ta, shuffler); + ttb = _mm_shuffle_epi8(tb, shuffler); + ta = _mm_unpackhi_epi64(ttb, tta); + tb = _mm_unpacklo_epi64(ttb, tta); + + ti = _mm_and_si128 (mask, tb); + tph = _mm_shuffle_epi8 (thigh[0], ti); + tpl = _mm_shuffle_epi8 (tlow[0], ti); + + tb = _mm_srli_epi16(tb, 4); + ti = _mm_and_si128 (mask, tb); + tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[1], ti), tpl); + tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[1], ti), tph); + + ti = _mm_and_si128 (mask, ta); + tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[2], ti), tpl); + tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[2], ti), tph); + + ta = _mm_srli_epi16(ta, 4); + ti = _mm_and_si128 (mask, ta); + tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[3], ti), tpl); + tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[3], ti), tph); + + tta = _mm_unpackhi_epi64(tpl, tph); + ttb = _mm_unpacklo_epi64(tpl, tph); + ta = _mm_shuffle_epi8(tta, unshuffler); + tb = _mm_shuffle_epi8(ttb, unshuffler); + _mm_store_si128 ((__m128i *)d64, ta); + _mm_store_si128 ((__m128i *)(d64+2), tb); + + d64 += 4; + s64 += 4; + } + } + + d16 = (uint16_t *) d64; + s16 = (uint16_t *) s64; + + while (d16 != top) { + a = *s16; + prod = (xor) ? *d16 : 0; + for (i = 0; i < 4; i++) { + c = a & 0xf; + prod ^= low[i][c]; + prod ^= (high[i][c] << 8); + a >>= 4; + } + *d16 = prod; + s16++; + d16++; + } +#endif +} + +/* +static +void +gf_w16_split_4_16_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor) +{ +#ifdef INTEL_SSE4 + uint64_t i, j, *s64, *d64, *top64;; + uint64_t a, c, prod; + uint16_t *s16, *d16, *top; + uint8_t low[4][16]; + uint8_t high[4][16]; + unsigned long uls, uld; + + __m128i mask, ta, ti, tp, tlow[4], thigh[4]; + + struct gf_single_table_data *std; + + uls = ((unsigned long) src) & 0xf; + uld = ((unsigned long) dest) & 0xf; + if (uls != uld || uls % 2 != 0 || bytes % 2 != 0) gf_alignment_error("gf_w16_split_4_16_lazy_sse_multiply_region", 2); + + if (val == 0) { + if (xor) return; + bzero(dest, bytes); + return; + } + + for (j = 0; j < 16; j++) { + for (i = 0; i < 4; i++) { + c = (j << (i*4)); + prod = gf_w16_log_multiply(gf, c, val); + low[i][j] = (prod & 0xff); + high[i][j] = (prod >> 8); + } + } + + s16 = (uint16_t *) src; + d16 = (uint16_t *) dest; + top = (uint16_t *) (dest+bytes); + + if (uls != 0) { + while (uls != 16 && d16 < top) { + a = *s16; + prod = (xor) ? *d16 : 0; + for (i = 0; i < 4; i++) { + c = a & 0xf; + prod ^= low[i][c]; + prod ^= (high[i][c] << 8); + a >>= 4; + } + *d16 = prod; + s16++; + d16++; + uls += 2; + } + if (d16 == top) return; + } + + for (i = 0; i < 4; i++) { + tlow[i] = _mm_loadu_si128((__m128i *)low[i]); + thigh[i] = _mm_loadu_si128((__m128i *)high[i]); + } + + uls = ((unsigned long) top) & 0xf; + uld = ((unsigned long) top) ^ uls; + top64 = (uint64_t *) uld; + s64 = (uint64_t *) s16; + d64 = (uint64_t *) d16; + mask = _mm_set1_epi16 (0x0f); + + if (xor) { + while (d64 != top64) { + ta = _mm_load_si128((__m128i *) s64); + ti = _mm_and_si128 (mask, ta); + tp = _mm_shuffle_epi8 (tlow[0], ti); + ti = _mm_slli_epi16 (ti, 8); + ti = _mm_shuffle_epi8 (thigh[0], ti); + tp = _mm_xor_si128 (tp, ti); + + ta = _mm_srli_epi16(ta, 4); + ti = _mm_and_si128 (mask, ta); + tp = _mm_xor_si128(_mm_shuffle_epi8 (tlow[1], ti), tp); + ti = _mm_slli_epi16 (ti, 8); + ti = _mm_shuffle_epi8 (thigh[1], ti); + tp = _mm_xor_si128 (tp, ti); + + ta = _mm_srli_epi16(ta, 4); + ti = _mm_and_si128 (mask, ta); + tp = _mm_xor_si128(_mm_shuffle_epi8 (tlow[2], ti), tp); + ti = _mm_slli_epi16 (ti, 8); + ti = _mm_shuffle_epi8 (thigh[2], ti); + tp = _mm_xor_si128 (tp, ti); + + ti = _mm_srli_epi16(ta, 4); + tp = _mm_xor_si128(_mm_shuffle_epi8 (tlow[3], ti), tp); + ti = _mm_slli_epi16 (ti, 8); + ti = _mm_shuffle_epi8 (thigh[3], ti); + tp = _mm_xor_si128 (tp, ti); + ti = _mm_load_si128((__m128i *)d64); + tp = _mm_xor_si128 (tp, ti); + _mm_store_si128 ((__m128i *)d64, tp); + s64 += 2; + d64 += 2; + } + } else { + while (d64 != top64) { + ta = _mm_load_si128((__m128i *) s64); + ti = _mm_and_si128 (mask, ta); + tp = _mm_shuffle_epi8 (tlow[0], ti); + ti = _mm_slli_epi16 (ti, 8); + ti = _mm_shuffle_epi8 (thigh[0], ti); + tp = _mm_xor_si128 (tp, ti); + + ta = _mm_srli_epi16(ta, 4); + ti = _mm_and_si128 (mask, ta); + tp = _mm_xor_si128(_mm_shuffle_epi8 (tlow[1], ti), tp); + ti = _mm_slli_epi16 (ti, 8); + ti = _mm_shuffle_epi8 (thigh[1], ti); + tp = _mm_xor_si128 (tp, ti); + + ta = _mm_srli_epi16(ta, 4); + ti = _mm_and_si128 (mask, ta); + tp = _mm_xor_si128(_mm_shuffle_epi8 (tlow[2], ti), tp); + ti = _mm_slli_epi16 (ti, 8); + ti = _mm_shuffle_epi8 (thigh[2], ti); + tp = _mm_xor_si128 (tp, ti); + + ti = _mm_srli_epi16(ta, 4); + tp = _mm_xor_si128(_mm_shuffle_epi8 (tlow[3], ti), tp); + ti = _mm_slli_epi16 (ti, 8); + ti = _mm_shuffle_epi8 (thigh[3], ti); + tp = _mm_xor_si128 (tp, ti); + _mm_store_si128 ((__m128i *)d64, tp); + s64 += 2; + d64 += 2; + } + } + + d16 = (uint16_t *) d64; + s16 = (uint16_t *) s64; + + while (d16 != top) { + a = *s16; + prod = (xor) ? *d16 : 0; + for (i = 0; i < 4; i++) { + c = a & 0xf; + prod ^= low[i][c]; + prod ^= (high[i][c] << 8); + a >>= 4; + } + *d16 = prod; + s16++; + d16++; + } +#endif +} +*/ + + +static +void +gf_w16_split_4_16_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor) +{ +#ifdef INTEL_SSE4 + uint64_t i, j, *s64, *d64, *top64;; + uint64_t a, c, prod; + uint16_t *s16, *d16, *top; + uint8_t low[4][16]; + uint8_t high[4][16]; + unsigned long uls, uld; + + __m128i mask, ta, tb, ti, tpl, tph, tlow[4], thigh[4]; + + struct gf_single_table_data *std; + + uls = ((unsigned long) src) & 0xf; + uld = ((unsigned long) dest) & 0xf; + if (uls != uld || uls % 2 != 0 || bytes % 2 != 0) gf_alignment_error("gf_w16_split_4_16_lazy_sse_altmap_multiply_region", 2); + + if (val == 0) { + if (xor) return; + bzero(dest, bytes); + return; + } + + for (j = 0; j < 16; j++) { + for (i = 0; i < 4; i++) { + c = (j << (i*4)); + prod = gf_w16_log_multiply(gf, c, val); + low[i][j] = (prod & 0xff); + high[i][j] = (prod >> 8); + } + } + + s16 = (uint16_t *) src; + d16 = (uint16_t *) dest; + top = (uint16_t *) (dest+bytes); + + if (uls != 0) { + while (uls != 16 && d16 < top) { + a = *s16; + prod = (xor) ? *d16 : 0; + for (i = 0; i < 4; i++) { + c = a & 0xf; + prod ^= low[i][c]; + prod ^= (high[i][c] << 8); + a >>= 4; + } + *d16 = prod; + s16++; + d16++; + uls += 2; + } + if (d16 == top) return; + } + + for (i = 0; i < 4; i++) { + tlow[i] = _mm_loadu_si128((__m128i *)low[i]); + thigh[i] = _mm_loadu_si128((__m128i *)high[i]); + } + + uls = ((unsigned long) top); + uld = ((unsigned long) d16); + bytes = (uls - uld); + if ((bytes & 0x1f) != 0) bytes -= (bytes & 0x1f); + + top64 = (uint64_t *) (uld + bytes); + s64 = (uint64_t *) s16; + d64 = (uint64_t *) d16; + mask = _mm_set1_epi8 (0x0f); + + if (xor) { + while (d64 != top64) { + + ta = _mm_load_si128((__m128i *) s64); + tb = _mm_load_si128((__m128i *) (s64+2)); + + ti = _mm_and_si128 (mask, tb); + tph = _mm_shuffle_epi8 (thigh[0], ti); + tpl = _mm_shuffle_epi8 (tlow[0], ti); + + tb = _mm_srli_epi16(tb, 4); + ti = _mm_and_si128 (mask, tb); + tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[1], ti), tpl); + tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[1], ti), tph); + + ti = _mm_and_si128 (mask, ta); + tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[2], ti), tpl); + tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[2], ti), tph); + + ta = _mm_srli_epi16(ta, 4); + ti = _mm_and_si128 (mask, ta); + tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[3], ti), tpl); + tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[3], ti), tph); + + ta = _mm_load_si128((__m128i *) d64); + tph = _mm_xor_si128(tph, ta); + _mm_store_si128 ((__m128i *)d64, tph); + tb = _mm_load_si128((__m128i *) (d64+2)); + tpl = _mm_xor_si128(tpl, tb); + _mm_store_si128 ((__m128i *)(d64+2), tpl); + + d64 += 4; + s64 += 4; + } + } else { + while (d64 != top64) { + + ta = _mm_load_si128((__m128i *) s64); + tb = _mm_load_si128((__m128i *) (s64+2)); + + ti = _mm_and_si128 (mask, tb); + tph = _mm_shuffle_epi8 (thigh[0], ti); + tpl = _mm_shuffle_epi8 (tlow[0], ti); + + tb = _mm_srli_epi16(tb, 4); + ti = _mm_and_si128 (mask, tb); + tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[1], ti), tpl); + tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[1], ti), tph); + + ti = _mm_and_si128 (mask, ta); + tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[2], ti), tpl); + tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[2], ti), tph); + + ta = _mm_srli_epi16(ta, 4); + ti = _mm_and_si128 (mask, ta); + tpl = _mm_xor_si128(_mm_shuffle_epi8 (tlow[3], ti), tpl); + tph = _mm_xor_si128(_mm_shuffle_epi8 (thigh[3], ti), tph); + + _mm_store_si128 ((__m128i *)d64, tph); + _mm_store_si128 ((__m128i *)(d64+2), tpl); + + d64 += 4; + s64 += 4; + + } + } + + d16 = (uint16_t *) d64; + s16 = (uint16_t *) s64; + + while (d16 != top) { + a = *s16; + prod = (xor) ? *d16 : 0; + for (i = 0; i < 4; i++) { + c = a & 0xf; + prod ^= low[i][c]; + prod ^= (high[i][c] << 8); + a >>= 4; + } + *d16 = prod; + s16++; + d16++; + } +#endif +} + +static +int gf_w16_split_init(gf_t *gf) +{ + gf_internal_t *h; + gf_w16_log_init(gf); + + h = (gf_internal_t *) gf->scratch; + if (h->arg1 == 8 || h->arg2 == 8) { + gf->multiply_region.w16 = gf_w16_split_8_16_lazy_multiply_region; + } else if (h->arg1 == 4 || h->arg2 == 4) { + if (h->region_type & GF_REGION_SSE) { + if (h->region_type & GF_REGION_ALTMAP) { + gf->multiply_region.w16 = gf_w16_split_4_16_lazy_sse_altmap_multiply_region; + } else { + gf->multiply_region.w16 = gf_w16_split_4_16_lazy_sse_multiply_region; + } + } else { + gf->multiply_region.w16 = gf_w16_split_4_16_lazy_multiply_region; + } + } + return 1; +} + +static +int gf_w16_table_init(gf_t *gf) +{ + gf_internal_t *h; + gf_w16_log_init(gf); + + h = (gf_internal_t *) gf->scratch; + gf->multiply_region.w16 = NULL; + gf->multiply_region.w16 = gf_w16_table_lazy_multiply_region; + return 1; +} + +static +void +gf_w16_log_zero_multiply_region(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor) +{ + unsigned long uls, uld; + int i; + uint16_t lv, b, c; + uint16_t *s16, *d16; + int num_syms = bytes >> 1; + int sym_divisible = bytes % 2; + + struct gf_zero_logtable_data *ltd; + + uls = (unsigned long) src; + uld = (unsigned long) dest; + if ((uls & 0x7) != (uld & 0x7)) gf_alignment_error("gf_w16_buf_const_log", 2); + if (sym_divisible) { + gf_alignment_error("gf_w16_buf_const_log: buffer size not divisible by symbol size = 2 bytes", 2); + } + + if (val == 0) { + if (xor) return; + bzero(dest, bytes); + return; + } + + ltd = (struct gf_zero_logtable_data*) ((gf_internal_t *) gf->scratch)->private; + s16 = (uint16_t *) src; + d16 = (uint16_t *) dest; + + lv = ltd->log_tbl[val]; + + if (xor) { + for (i = 0; i < num_syms; i++) { + d16[i] ^= ltd->antilog_tbl[lv + ltd->log_tbl[s16[i]]]; + } + } else { + for (i = 0; i < num_syms; i++) { + d16[i] = ltd->antilog_tbl[lv + ltd->log_tbl[s16[i]]]; + } + } +} + +static +inline +gf_val_16_t +gf_w16_log_zero_multiply (gf_t *gf, gf_val_16_t a, gf_val_16_t b) +{ + struct gf_zero_logtable_data *ltd; + + ltd = (struct gf_zero_logtable_data *) ((gf_internal_t *) gf->scratch)->private; + return ltd->antilog_tbl[ltd->log_tbl[a] + ltd->log_tbl[b]]; +} + +static +inline +gf_val_16_t +gf_w16_log_zero_divide (gf_t *gf, gf_val_16_t a, gf_val_16_t b) +{ + int log_sum = 0; + struct gf_zero_logtable_data *ltd; + + if (a == 0 || b == 0) return 0; + ltd = (struct gf_zero_logtable_data *) ((gf_internal_t *) gf->scratch)->private; + + log_sum = ltd->log_tbl[a] - ltd->log_tbl[b] + (GF_MULT_GROUP_SIZE); + return (ltd->antilog_tbl[log_sum]); +} + +static +gf_val_16_t +gf_w16_log_zero_inverse (gf_t *gf, gf_val_16_t a) +{ + struct gf_zero_logtable_data *ltd; + + ltd = (struct gf_zero_logtable_data *) ((gf_internal_t *) gf->scratch)->private; + return (ltd->inv_tbl[a]); +} + +static +int gf_w16_log_zero_init(gf_t *gf) +{ + gf_internal_t *h; + struct gf_zero_logtable_data *ltd; + int i, b; + + h = (gf_internal_t *) gf->scratch; + ltd = h->private; + + ltd->log_tbl[0] = (-GF_MULT_GROUP_SIZE) + 1; + + bzero(&(ltd->_antilog_tbl[0]), sizeof(ltd->_antilog_tbl)); + + ltd->antilog_tbl = &(ltd->_antilog_tbl[GF_FIELD_SIZE * 2]); + + b = 1; + for (i = 0; i < GF_MULT_GROUP_SIZE; i++) { + ltd->log_tbl[b] = (gf_val_16_t)i; + ltd->antilog_tbl[i] = (gf_val_16_t)b; + ltd->antilog_tbl[i+GF_MULT_GROUP_SIZE] = (gf_val_16_t)b; + b <<= 1; + if (b & GF_FIELD_SIZE) { + b = b ^ h->prim_poly; + } + } + ltd->inv_tbl[0] = 0; /* Not really, but we need to fill it with something */ + ltd->inv_tbl[1] = 1; + for (i = 2; i < GF_FIELD_SIZE; i++) { + ltd->inv_tbl[i] = ltd->antilog_tbl[GF_MULT_GROUP_SIZE-ltd->log_tbl[i]]; + } + + gf->inverse.w16 = gf_w16_log_zero_inverse; + gf->divide.w16 = gf_w16_log_zero_divide; + gf->multiply.w16 = gf_w16_log_zero_multiply; + gf->multiply_region.w16 = gf_w16_log_zero_multiply_region; + return 1; +} + +static +gf_val_16_t +gf_w16_composite_multiply(gf_t *gf, gf_val_16_t a, gf_val_16_t b) +{ + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + uint8_t b0 = b & 0x00ff; + uint8_t b1 = (b & 0xff00) >> 8; + uint8_t a0 = a & 0x00ff; + uint8_t a1 = (a & 0xff00) >> 8; + uint8_t a1b1; + + a1b1 = base_gf->multiply.w8(base_gf, a1, b1); + + return ((base_gf->multiply.w8(base_gf, a0, b0) ^ a1b1) | ((base_gf->multiply.w8(base_gf, a1, b0) ^ base_gf->multiply.w8(base_gf, a0, b1) ^ base_gf->multiply.w8(base_gf, a1b1, GF_S_GF_8_2)) << 8)); +} + +/* + * Composite field division trick (explained in 2007 tech report) + * + * Compute a / b = a*b^-1, where p(x) = x^2 + sx + 1 + * + * let c = b^-1 + * + * c*b = (s*b1c1+b1c0+b0c1)x+(b1c1+b0c0) + * + * want (s*b1c1+b1c0+b0c1) = 0 and (b1c1+b0c0) = 1 + * + * let d = b1c1 and d+1 = b0c0 + * + * solve s*b1c1+b1c0+b0c1 = 0 + * + * solution: d = (b1b0^-1)(b1b0^-1+b0b1^-1+s)^-1 + * + * c0 = (d+1)b0^-1 + * c1 = d*b1^-1 + * + * a / b = a * c + */ +static +gf_val_16_t +gf_w16_composite_inverse(gf_t *gf, gf_val_16_t a) +{ + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + uint8_t a0 = a & 0x00ff; + uint8_t a1 = (a & 0xff00) >> 8; + uint8_t c0, c1, d, tmp; + uint16_t c; + uint8_t a0inv, a1inv; + + if (a0 == 0) { + a1inv = base_gf->inverse.w8(base_gf, a1); + c0 = base_gf->multiply.w8(base_gf, a1inv, GF_S_GF_8_2); + c1 = a1inv; + } else if (a1 == 0) { + c0 = base_gf->inverse.w8(base_gf, a0); + c1 = 0; + } else { + a1inv = base_gf->inverse.w8(base_gf, a1); + a0inv = base_gf->inverse.w8(base_gf, a0); + + d = base_gf->multiply.w8(base_gf, a1, a0inv); + + tmp = (base_gf->multiply.w8(base_gf, a1, a0inv) ^ base_gf->multiply.w8(base_gf, a0, a1inv) ^ GF_S_GF_8_2); + tmp = base_gf->inverse.w8(base_gf, tmp); + + d = base_gf->multiply.w8(base_gf, d, tmp); + + c0 = base_gf->multiply.w8(base_gf, (d^1), a0inv); + c1 = base_gf->multiply.w8(base_gf, d, a1inv); + } + + c = c0 | (c1 << 8); + + return c; +} + +static +gf_val_16_t +gf_w16_composite_divide(gf_t *gf, gf_val_16_t a, gf_val_16_t b) +{ + gf_val_16_t binv; + + binv = gf_w16_composite_inverse(gf, b); + + return gf_w16_composite_multiply(gf, a, binv); +} + +static +void +gf_w16_composite_multiply_region_table(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor) +{ + unsigned long uls, uld; + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + int i=0; + struct gf_w8_single_table_data * std; + uint8_t b0 = val & 0x00ff; + uint8_t b1 = (val & 0xff00) >> 8; + uint16_t *s16 = (uint16_t *) src; + uint16_t *d16 = (uint16_t *) dest; + uint8_t a0, a1, a1b1; + int num_syms = bytes >> 1; + int sym_divisible = bytes % 2; + + struct gf_logtable_data *ltd; + + uls = (unsigned long) src; + uld = (unsigned long) dest; + if ((uls & 0x7) != (uld & 0x7)) gf_alignment_error("gf_w16_buf_const_log", 2); + if (sym_divisible) { + gf_alignment_error("gf_w16_buf_const_log: buffer size not divisible by symbol size = 2 bytes", 2); + } + + if (val == 0) { + if (xor) return; + bzero(dest, bytes); + return; + } + + std = (struct gf_w8_single_table_data *) h->private; + + if (xor) { + for (i = 0;i < num_syms; i++) { + a0 = s16[i] & 0x00ff; + a1 = (s16[i] & 0xff00) >> 8; + a1b1 = std->mult[a1][b1]; + + d16[i] ^= ((std->mult[a0][b0] ^ a1b1) | ((std->mult[a1][b0] ^ std->mult[a0][b1] ^ std->mult[a1b1][GF_S_GF_8_2]) << 8)); + + } + } else { + for (i = 0;i < num_syms; i++) { + a0 = s16[i] & 0x00ff; + a1 = (s16[i] & 0xff00) >> 8; + a1b1 = std->mult[a1][b1]; + + d16[i] = ((std->mult[a0][b0] ^ a1b1) | ((std->mult[a1][b0] ^ std->mult[a0][b1] ^ std->mult[a1b1][GF_S_GF_8_2]) << 8)); + } + } +} + +static +void +gf_w16_composite_multiply_region(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor) +{ + unsigned long uls, uld; + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + int i=0; + struct gf_w8_single_table_data * std; + uint8_t b0 = val & 0x00ff; + uint8_t b1 = (val & 0xff00) >> 8; + uint16_t *s16 = (uint16_t *) src; + uint16_t *d16 = (uint16_t *) dest; + uint8_t a0, a1, a1b1; + int num_syms = bytes >> 1; + int sym_divisible = bytes % 2; + + struct gf_logtable_data *ltd; + + uls = (unsigned long) src; + uld = (unsigned long) dest; + if ((uls & 0x7) != (uld & 0x7)) gf_alignment_error("gf_w16_buf_const_log", 2); + if (sym_divisible) { + gf_alignment_error("gf_w16_buf_const_log: buffer size not divisible by symbol size = 2 bytes", 2); + } + + if (val == 0) { + if (xor) return; + bzero(dest, bytes); + return; + } + + std = (struct gf_w8_single_table_data *) h->private; + + if (xor) { + for (i = 0;i < num_syms; i++) { + a0 = s16[i] & 0x00ff; + a1 = (s16[i] & 0xff00) >> 8; + a1b1 = std->mult[a1][b1]; + + d16[i] ^= ((base_gf->multiply.w8(base_gf, a0, b0) ^ a1b1) | + ((base_gf->multiply.w8(base_gf, a1, b0) ^ base_gf->multiply.w8(base_gf, a0, b1) ^ base_gf->multiply.w8(base_gf, a1b1, GF_S_GF_8_2)) << 8)); + + } + } else { + for (i = 0;i < num_syms; i++) { + a0 = s16[i] & 0x00ff; + a1 = (s16[i] & 0xff00) >> 8; + a1b1 = std->mult[a1][b1]; + + d16[i] = ((base_gf->multiply.w8(base_gf, a0, b0) ^ a1b1) | + ((base_gf->multiply.w8(base_gf, a1, b0) ^ base_gf->multiply.w8(base_gf, a0, b1) ^ base_gf->multiply.w8(base_gf, a1b1, GF_S_GF_8_2)) << 8)); + } + } +} + +static +void +gf_w16_composite_multiply_region_alt(gf_t *gf, void *src, void *dest, gf_val_16_t val, int bytes, int xor) +{ + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + gf_val_8_t val0 = val & 0x00ff; + gf_val_8_t val1 = (val & 0xff00) >> 8; + int sub_reg_size = bytes / 2; + + if (!xor) { + memset(dest, 0, bytes); + } + + if (bytes % 2 != 0) gf_alignment_error("gf_w8_composite_multiply_region_alt", 1); + + base_gf->multiply_region.w8(base_gf, src, dest, val0, sub_reg_size, xor); + base_gf->multiply_region.w8(base_gf, src+sub_reg_size, dest, val1, sub_reg_size, 1); + base_gf->multiply_region.w8(base_gf, src, dest+sub_reg_size, val1, sub_reg_size, xor); + base_gf->multiply_region.w8(base_gf, src+sub_reg_size, dest+sub_reg_size, val0, sub_reg_size, 1); + base_gf->multiply_region.w8(base_gf, src+sub_reg_size, dest+sub_reg_size, base_gf->multiply.w8(base_gf, GF_S_GF_8_2, val1), sub_reg_size, 1); +} + +static +int gf_w16_composite_init(gf_t *gf) +{ + struct gf_w8_single_table_data * std; + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + gf_val_16_t a, b; + + std = (struct gf_w8_single_table_data *) h->private; + + for (a = 0; a < 256; a++) { + for (b = 0; b < 256; b++) { + std->mult[a][b] = base_gf->multiply.w8(base_gf, a, b); + } + } + + if (h->region_type & GF_REGION_ALTMAP) { + gf->multiply_region.w16 = gf_w16_composite_multiply_region_alt; + } else { + if (h->region_type & GF_REGION_SINGLE_TABLE) { + gf->multiply_region.w16 = gf_w16_composite_multiply_region_table; + } else { + gf->multiply_region.w16 = gf_w16_composite_multiply_region; + } + } + + gf->multiply.w16 = gf_w16_composite_multiply; + gf->divide.w16 = gf_w16_composite_divide; + gf->inverse.w16 = gf_w16_composite_inverse; + + return 1; +} + + + +int gf_w16_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2) +{ + int ss; + int sa; + + ss = (GF_REGION_SSE | GF_REGION_NOSSE); + sa = (GF_REGION_STDMAP | GF_REGION_ALTMAP); + + switch(mult_type) + { + case GF_MULT_TABLE: + region_type |= GF_REGION_LAZY; + if (arg1 != 0 || arg2 != 0 || region_type != GF_REGION_LAZY) return -1; + return sizeof(gf_internal_t) + sizeof(struct gf_lazytable_data) + 64; + break; + case GF_MULT_LOG_TABLE: + if (arg2 != 0) return -1; + if (region_type != GF_REGION_DEFAULT) return -1; + if (arg1 == 1) { + return sizeof(gf_internal_t) + sizeof(struct gf_zero_logtable_data) + 64; + } else if (arg1 == 0) { + return sizeof(gf_internal_t) + sizeof(struct gf_logtable_data) + 64; + } else { + return -1; + } + break; + case GF_MULT_SPLIT_TABLE: + if ((arg1 == 8 && arg2 == 16) || (arg2 == 8 && arg1 == 16)) { + region_type |= GF_REGION_LAZY; + if (region_type != GF_REGION_LAZY) return -1; + return sizeof(gf_internal_t) + sizeof(struct gf_logtable_data) + 64; + } else if ((arg1 == 4 && arg2 == 16) || (arg2 == 4 && arg1 == 16)) { + region_type &= (~GF_REGION_LAZY); /* Ignore GF_REGION_LAZY */ + if ((region_type & ss) == ss) return -1; + if ((region_type & sa) == sa) return -1; + if ((region_type & ss) == 0) region_type |= GF_REGION_SSE; + if (region_type & GF_REGION_NOSSE) { + if (region_type != GF_REGION_NOSSE) return -1; + return sizeof(gf_internal_t) + sizeof(struct gf_logtable_data) + 64; + } else { + if ((region_type | ss | sa) != (ss|sa)) return -1; + return sizeof(gf_internal_t) + sizeof(struct gf_logtable_data) + 64; + } + } + return -1; + break; + case GF_MULT_DEFAULT: + case GF_MULT_SHIFT: + if (arg1 != 0 || arg2 != 0 || region_type != 0) return -1; + return sizeof(gf_internal_t); + break; + case GF_MULT_COMPOSITE: + if (region_type & ~(GF_REGION_SINGLE_TABLE | GF_REGION_ALTMAP | GF_REGION_STDMAP)) return -1; + if ((region_type & (GF_REGION_SINGLE_TABLE | GF_REGION_ALTMAP)) == (GF_REGION_SINGLE_TABLE | GF_REGION_ALTMAP)) return -1; + if (arg1 == 2 && arg2 == 8) { + return sizeof(gf_internal_t) + sizeof(struct gf_w8_single_table_data) + 64; + } else { + return -1; + } + + default: + return -1; + } +} + +int gf_w16_init(gf_t *gf) +{ + gf_internal_t *h; + + h = (gf_internal_t *) gf->scratch; + if (h->prim_poly == 0) h->prim_poly = 0x1100b; + + gf->multiply.w16 = NULL; + gf->divide.w16 = NULL; + gf->inverse.w16 = NULL; + gf->multiply_region.w16 = NULL; + + switch(h->mult_type) { + case GF_MULT_LOG_TABLE: + if (h->arg1 == 1) { + return gf_w16_log_zero_init(gf); + } else { + return gf_w16_log_init(gf); + } + case GF_MULT_SPLIT_TABLE: return gf_w16_split_init(gf); + case GF_MULT_TABLE: return gf_w16_table_init(gf); + case GF_MULT_DEFAULT: + case GF_MULT_SHIFT: if (gf_w16_shift_init(gf) == 0) return 0; break; + case GF_MULT_COMPOSITE: if (gf_w16_composite_init(gf) == 0) return 0; break; + default: return 0; + } + if (h->divide_type == GF_DIVIDE_EUCLID) { + gf->divide.w16 = gf_w16_divide_from_inverse; + gf->inverse.w16 = gf_w16_euclid; + } else if (h->divide_type == GF_DIVIDE_MATRIX) { + gf->divide.w16 = gf_w16_divide_from_inverse; + gf->inverse.w16 = gf_w16_matrix; + } + + if (gf->inverse.w16 != NULL && gf->divide.w16 == NULL) { + gf->divide.w16 = gf_w16_divide_from_inverse; + } + if (gf->inverse.w16 == NULL && gf->divide.w16 != NULL) { + gf->inverse.w16 = gf_w16_inverse_from_divide; + } + return 1; +} diff --git a/junk-w16-timing-tests.sh b/junk-w16-timing-tests.sh new file mode 100644 index 0000000..cea73cf --- /dev/null +++ b/junk-w16-timing-tests.sh @@ -0,0 +1,12 @@ +sh tmp-time-test.sh 16 LOG - - +sh tmp-time-test.sh 16 LOG_ZERO - - +sh tmp-time-test.sh 16 TABLE - - +sh tmp-time-test.sh 16 TABLE LE,LAZY - +sh tmp-time-test.sh 16 SPLIT 16 4 ALTMAP,NOSSE - +sh tmp-time-test.sh 16 SPLIT 16 4 ALTMAP,LAZY,SSE - +sh tmp-time-test.sh 16 SPLIT 16 4 ALTMAP,LAZY,NOSSE - +sh tmp-time-test.sh 16 SPLIT 16 4 ALTMAP,SSE - +sh tmp-time-test.sh 16 SPLIT 16 4 NOSSE - +sh tmp-time-test.sh 16 SPLIT 16 4 LAZY,SSE - +sh tmp-time-test.sh 16 SPLIT 16 4 LAZY,NOSSE - +sh tmp-time-test.sh 16 SPLIT 16 4 SSE - diff --git a/junk-w2.eps b/junk-w2.eps new file mode 100644 index 0000000..1796352 --- /dev/null +++ b/junk-w2.eps @@ -0,0 +1,203 @@ +%!PS-Adobe-2.0 EPSF-1.2 +%%Page: 1 1 +%%BoundingBox: -40 -93 289 73 +%%EndComments +1 setlinecap 1 setlinejoin +0.700 setlinewidth +0.00 setgray + +/Jrnd { exch cvi exch cvi dup 3 1 roll idiv mul } def +/JDEdict 8 dict def +JDEdict /mtrx matrix put +/JDE { + JDEdict begin + /yrad exch def + /xrad exch def + /savematrix mtrx currentmatrix def + xrad yrad scale + 0 0 1 0 360 arc + savematrix setmatrix + end +} def +/JSTR { + gsave 1 eq { gsave 1 setgray fill grestore } if + exch neg exch neg translate + clip + rotate + 4 dict begin + pathbbox /&top exch def + /&right exch def + /&bottom exch def + &right sub /&width exch def + newpath + currentlinewidth mul round dup + &bottom exch Jrnd exch &top + 4 -1 roll currentlinewidth mul setlinewidth + { &right exch moveto &width 0 rlineto stroke } for + end + grestore + newpath +} bind def + gsave /Times-Roman findfont 9.000000 scalefont setfont +0.000000 0.000000 translate +0.700000 setlinewidth gsave newpath 0.000000 0.000000 moveto 288.000000 0.000000 lineto stroke +newpath 0.000000 0.000000 moveto 0.000000 -5.000000 lineto stroke +newpath 26.181818 0.000000 moveto 26.181818 -2.000000 lineto stroke +newpath 52.363636 0.000000 moveto 52.363636 -5.000000 lineto stroke +newpath 78.545456 0.000000 moveto 78.545456 -2.000000 lineto stroke +newpath 104.727272 0.000000 moveto 104.727272 -5.000000 lineto stroke +newpath 130.909088 0.000000 moveto 130.909088 -2.000000 lineto stroke +newpath 157.090912 0.000000 moveto 157.090912 -5.000000 lineto stroke +newpath 183.272720 0.000000 moveto 183.272720 -2.000000 lineto stroke +newpath 209.454544 0.000000 moveto 209.454544 -5.000000 lineto stroke +newpath 235.636368 0.000000 moveto 235.636368 -2.000000 lineto stroke +newpath 261.818176 0.000000 moveto 261.818176 -5.000000 lineto stroke +newpath 288.000000 0.000000 moveto 288.000000 -2.000000 lineto stroke +/Times-Roman findfont 11.000000 scalefont setfont +gsave 26.181818 -8.000000 translate -90.000000 rotate +0 -3.300000 translate (BYTWO_p) dup stringwidth pop pop 0 0 moveto +show +grestore +gsave 52.363636 -8.000000 translate -90.000000 rotate +0 -3.300000 translate (BYTWO_p SSE) dup stringwidth pop pop 0 0 moveto +show +grestore +gsave 78.545456 -8.000000 translate -90.000000 rotate +0 -3.300000 translate (BYTWO_b) dup stringwidth pop pop 0 0 moveto +show +grestore +gsave 104.727272 -8.000000 translate -90.000000 rotate +0 -3.300000 translate (BYTWO_b SSE) dup stringwidth pop pop 0 0 moveto +show +grestore +gsave 130.909088 -8.000000 translate -90.000000 rotate +0 -3.300000 translate (TABLE SINGLE) dup stringwidth pop pop 0 0 moveto +show +grestore +gsave 157.090912 -8.000000 translate -90.000000 rotate +0 -3.300000 translate (TABLE DOUBLE) dup stringwidth pop pop 0 0 moveto +show +grestore +gsave 183.272720 -8.000000 translate -90.000000 rotate +0 -3.300000 translate (TABLE QUAD) dup stringwidth pop pop 0 0 moveto +show +grestore +gsave 209.454544 -8.000000 translate -90.000000 rotate +0 -3.300000 translate (TABLE QUAD,LAZY) dup stringwidth pop pop 0 0 moveto +show +grestore +gsave 235.636368 -8.000000 translate -90.000000 rotate +0 -3.300000 translate (TABLE SINGLE,SSE) dup stringwidth pop pop 0 0 moveto +show +grestore +gsave 261.818176 -8.000000 translate -90.000000 rotate +0 -3.300000 translate (LOG) dup stringwidth pop pop 0 0 moveto +show +grestore + grestore +0.700000 setlinewidth gsave newpath 0.000000 0.000000 moveto 0.000000 72.000000 lineto stroke +newpath 0.000000 0.000000 moveto -5.000000 0.000000 lineto stroke +newpath 0.000000 9.916304 moveto -2.000000 9.916304 lineto stroke +newpath 0.000000 19.832607 moveto -5.000000 19.832607 lineto stroke +newpath 0.000000 29.748911 moveto -2.000000 29.748911 lineto stroke +newpath 0.000000 39.665215 moveto -5.000000 39.665215 lineto stroke +newpath 0.000000 49.581520 moveto -2.000000 49.581520 lineto stroke +newpath 0.000000 59.497822 moveto -5.000000 59.497822 lineto stroke +newpath 0.000000 69.414124 moveto -2.000000 69.414124 lineto stroke +/Times-Roman findfont 9.000000 scalefont setfont +gsave -8.000000 0.000000 translate 0.000000 rotate +0 -2.700000 translate (0) dup stringwidth pop neg 0 moveto +show +grestore +gsave -8.000000 19.832607 translate 0.000000 rotate +0 -2.700000 translate (2000) dup stringwidth pop neg 0 moveto +show +grestore +gsave -8.000000 39.665215 translate 0.000000 rotate +0 -2.700000 translate (4000) dup stringwidth pop neg 0 moveto +show +grestore +gsave -8.000000 59.497822 translate 0.000000 rotate +0 -2.700000 translate (6000) dup stringwidth pop neg 0 moveto +show +grestore +/Times-Bold findfont 10.000000 scalefont setfont +gsave -33.279999 36.000000 translate 90.000000 rotate +0 0.000000 translate (MB/s) dup stringwidth pop 2 div neg 0 moveto +show +grestore + grestore + gsave + gsave gsave 26.181818 9.564870 translate 0.000000 rotate + newpath 13.090909 0.000000 moveto -13.090909 0.000000 lineto + -13.090909 -9.564870 lineto + 13.090909 -9.564870 lineto +closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore +stroke + grestore grestore + gsave gsave 52.363636 15.887009 translate 0.000000 rotate + newpath 13.090909 0.000000 moveto -13.090909 0.000000 lineto + -13.090909 -15.887009 lineto + 13.090909 -15.887009 lineto +closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore +stroke + grestore grestore + gsave gsave 78.545456 20.109272 translate 0.000000 rotate + newpath 13.090909 0.000000 moveto -13.090909 0.000000 lineto + -13.090909 -20.109272 lineto + 13.090909 -20.109272 lineto +closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore +stroke + grestore grestore + gsave gsave 104.727272 26.881811 translate 0.000000 rotate + newpath 13.090909 0.000000 moveto -13.090909 0.000000 lineto + -13.090909 -26.881811 lineto + 13.090909 -26.881811 lineto +closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore +stroke + grestore grestore + gsave gsave 130.909088 4.538296 translate 0.000000 rotate + newpath 13.090909 0.000000 moveto -13.090909 0.000000 lineto + -13.090909 -4.538296 lineto + 13.090909 -4.538296 lineto +closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore +stroke + grestore grestore + gsave gsave 157.090912 8.978618 translate 0.000000 rotate + newpath 13.090909 0.000000 moveto -13.090909 0.000000 lineto + -13.090909 -8.978618 lineto + 13.090909 -8.978618 lineto +closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore +stroke + grestore grestore + gsave gsave 183.272720 13.178271 translate 0.000000 rotate + newpath 13.090909 0.000000 moveto -13.090909 0.000000 lineto + -13.090909 -13.178271 lineto + 13.090909 -13.178271 lineto +closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore +stroke + grestore grestore + gsave gsave 209.454544 11.003130 translate 0.000000 rotate + newpath 13.090909 0.000000 moveto -13.090909 0.000000 lineto + -13.090909 -11.003130 lineto + 13.090909 -11.003130 lineto +closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore +stroke + grestore grestore + gsave gsave 235.636368 72.000000 translate 0.000000 rotate + newpath 13.090909 0.000000 moveto -13.090909 0.000000 lineto + -13.090909 -72.000000 lineto + 13.090909 -72.000000 lineto +closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore +stroke + grestore grestore + gsave gsave 261.818176 2.016877 translate 0.000000 rotate + newpath 13.090909 0.000000 moveto -13.090909 0.000000 lineto + -13.090909 -2.016877 lineto + 13.090909 -2.016877 lineto +closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore +stroke + grestore grestore + grestore +-0.000000 -0.000000 translate + grestore diff --git a/junk-w32-backup.c b/junk-w32-backup.c new file mode 100644 index 0000000..d742a3f --- /dev/null +++ b/junk-w32-backup.c @@ -0,0 +1,1337 @@ +/* + * gf_w32.c + * + * Routines for 32-bit Galois fields + */ + +#define MM_PRINT32(s, r) { uint8_t blah[16], ii; printf("%-12s", s); _mm_storeu_si128((__m128i *)blah, r); for (ii = 0; ii < 16; ii += 4) printf(" %02x%02x%02x%02x", blah[15-ii], blah[14-ii], blah[13-ii], blah[12-ii]); printf("\n"); } + +#define MM_PRINT8(s, r) { uint8_t blah[16], ii; printf("%-12s", s); _mm_storeu_si128((__m128i *)blah, r); for (ii = 0; ii < 16; ii += 1) printf("%s%02x", (ii%4==0) ? " " : " ", blah[15-ii]); printf("\n"); } + +#include "gf_int.h" +#include +#include + +#define GF_FIELD_WIDTH (32) +#define GF_FIRST_BIT (1 << 31) + +#define GF_BASE_FIELD_WIDTH (16) +#define GF_BASE_FIELD_SIZE (1 << GF_BASE_FIELD_WIDTH) +#define GF_BASE_FIELD_GROUP_SIZE GF_BASE_FIELD_SIZE-1 +#define GF_S_GF_16_2 (40188) +#define GF_MULTBY_TWO(p) (((p) & GF_FIRST_BIT) ? (((p) << 1) ^ h->prim_poly) : (p) << 1); + + +struct gf_w16_logtable_data { + int log_tbl[GF_BASE_FIELD_SIZE]; + gf_val_16_t _antilog_tbl[GF_BASE_FIELD_SIZE * 4]; + gf_val_16_t *antilog_tbl; + gf_val_16_t inv_tbl[GF_BASE_FIELD_SIZE]; +}; + +struct gf_split_2_32_lazy_data { + gf_val_32_t last_value; + gf_val_32_t tables[16][4]; +}; + +struct gf_split_8_8_data { + gf_val_32_t tables[7][256][256]; +}; + +struct gf_split_4_32_lazy_data { + gf_val_32_t last_value; + gf_val_32_t tables[8][16]; +}; + +static +inline +gf_val_32_t gf_w32_inverse_from_divide (gf_t *gf, gf_val_32_t a) +{ + return gf->divide.w32(gf, 1, a); +} + +static +inline +gf_val_32_t gf_w32_divide_from_inverse (gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + b = gf->inverse.w32(gf, b); + return gf->multiply.w32(gf, a, b); +} + +static +void +gf_w32_multiply_region_from_single(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int +xor) +{ + int i; + gf_val_32_t *s32; + gf_val_32_t *d32; + + s32 = (gf_val_32_t *) src; + d32 = (gf_val_32_t *) dest; + + if (xor) { + for (i = 0; i < bytes/sizeof(gf_val_32_t); i++) { + d32[i] ^= gf->multiply.w32(gf, val, s32[i]); + } + } else { + for (i = 0; i < bytes/sizeof(gf_val_32_t); i++) { + d32[i] = gf->multiply.w32(gf, val, s32[i]); + } + } +} + +static +inline +gf_val_32_t gf_w32_euclid (gf_t *gf, gf_val_32_t b) +{ + gf_val_32_t e_i, e_im1, e_ip1; + gf_val_32_t d_i, d_im1, d_ip1; + gf_val_32_t y_i, y_im1, y_ip1; + gf_val_32_t c_i; + + if (b == 0) return -1; + e_im1 = ((gf_internal_t *) (gf->scratch))->prim_poly; + e_i = b; + d_im1 = 32; + for (d_i = d_im1-1; ((1 << d_i) & e_i) == 0; d_i--) ; + y_i = 1; + y_im1 = 0; + + while (e_i != 1) { + + e_ip1 = e_im1; + d_ip1 = d_im1; + c_i = 0; + + while (d_ip1 >= d_i) { + c_i ^= (1 << (d_ip1 - d_i)); + e_ip1 ^= (e_i << (d_ip1 - d_i)); + d_ip1--; + while ((e_ip1 & (1 << d_ip1)) == 0) d_ip1--; + } + + y_ip1 = y_im1 ^ gf->multiply.w32(gf, c_i, y_i); + y_im1 = y_i; + y_i = y_ip1; + + e_im1 = e_i; + d_im1 = d_i; + e_i = e_ip1; + d_i = d_ip1; + } + + return y_i; +} + +static +inline +gf_val_32_t gf_w32_matrix (gf_t *gf, gf_val_32_t b) +{ + return gf_bitmatrix_inverse(b, 32, ((gf_internal_t *) (gf->scratch))->prim_poly); +} + +/* JSP: GF_MULT_SHIFT: The world's dumbest multiplication algorithm. I only + include it for completeness. It does have the feature that it requires no + extra memory. +*/ + +static +inline +gf_val_32_t +gf_w32_shift_multiply (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32) +{ + uint64_t product, i, pp, a, b, one; + gf_internal_t *h; + + a = a32; + b = b32; + h = (gf_internal_t *) gf->scratch; + one = 1; + pp = h->prim_poly | (one << 32); + + product = 0; + + for (i = 0; i < GF_FIELD_WIDTH; i++) { + if (a & (one << i)) product ^= (b << i); + } + for (i = (GF_FIELD_WIDTH*2-1); i >= GF_FIELD_WIDTH; i--) { + if (product & (one << i)) product ^= (pp << (i-GF_FIELD_WIDTH)); + } + return product; +} + +static +int gf_w32_shift_init(gf_t *gf) +{ + gf->multiply.w32 = gf_w32_shift_multiply; + gf->inverse.w32 = gf_w32_euclid; + gf->multiply_region.w32 = gf_w32_multiply_region_from_single; + return 1; +} + +static +inline +gf_val_32_t +gf_w32_split_8_8_multiply (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32) +{ + uint32_t product, i, j, mask, tb; + gf_internal_t *h; + struct gf_split_8_8_data *d8; + + h = (gf_internal_t *) gf->scratch; + d8 = (struct gf_split_8_8_data *) h->private; + product = 0; + mask = 0xff; + + for (i = 0; i < 4; i++) { + tb = b32; + for (j = 0; j < 4; j++) { + product ^= d8->tables[i+j][a32&mask][tb&mask]; + tb >>= 8; + } + a32 >>= 8; + } + return product; +} + +static +inline +void +gf_w32_split_8_8_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + uint32_t product, mask, tb, tv, tp; + gf_internal_t *h; + struct gf_split_8_8_data *d8; + uint32_t *p00, *p01, *p02, *p03; + uint32_t *p10, *p11, *p12, *p13; + uint32_t *p20, *p21, *p22, *p23; + uint32_t *p30, *p31, *p32, *p33; + uint32_t *s32, *d32, *top; + unsigned long uls, uld; + + uls = (unsigned long) src; + uld = (unsigned long) dest; + if (uls %4 != 0 || ((uls & 0x7) != (uld & 0x7))) gf_alignment_error("gf_w32_split_8_8_multiply_region", 4); + if (bytes % 4 != 0) { + gf_alignment_error("gf_w32_split_8_8_multiply_region: buffer size not divisible by symbol size = 4 bytes", 4); + } + + tv = val; + h = (gf_internal_t *) gf->scratch; + d8 = (struct gf_split_8_8_data *) h->private; + mask = 0xff; + + p00 = &(d8->tables[0][val&mask][0]); + p01 = &(d8->tables[1][val&mask][0]); + p02 = &(d8->tables[2][val&mask][0]); + p03 = &(d8->tables[3][val&mask][0]); + val >>= 8; + p10 = &(d8->tables[1][val&mask][0]); + p11 = &(d8->tables[2][val&mask][0]); + p12 = &(d8->tables[3][val&mask][0]); + p13 = &(d8->tables[4][val&mask][0]); + val >>= 8; + p20 = &(d8->tables[2][val&mask][0]); + p21 = &(d8->tables[3][val&mask][0]); + p22 = &(d8->tables[4][val&mask][0]); + p23 = &(d8->tables[5][val&mask][0]); + val >>= 8; + p30 = &(d8->tables[3][val&mask][0]); + p31 = &(d8->tables[4][val&mask][0]); + p32 = &(d8->tables[5][val&mask][0]); + p33 = &(d8->tables[6][val&mask][0]); + + s32 = (uint32_t *) src; + d32 = (uint32_t *) dest; + top = (d32 + (bytes/4)); + + while (d32 < top) { + tb = *s32; + tp = *d32; + product = (xor) ? (*d32) : 0; + product ^= p00[tb&mask]; + product ^= p10[tb&mask]; + product ^= p20[tb&mask]; + product ^= p30[tb&mask]; + + tb >>= 8; + product ^= p01[tb&mask]; + product ^= p11[tb&mask]; + product ^= p21[tb&mask]; + product ^= p31[tb&mask]; + + tb >>= 8; + product ^= p02[tb&mask]; + product ^= p12[tb&mask]; + product ^= p22[tb&mask]; + product ^= p32[tb&mask]; + + tb >>= 8; + product ^= p03[tb&mask]; + product ^= p13[tb&mask]; + product ^= p23[tb&mask]; + product ^= p33[tb&mask]; + *d32 = product; + s32++; + d32++; + } +} + +static +void +gf_w32_split_2_32_lazy_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + unsigned long uls, uld; + gf_internal_t *h; + struct gf_split_2_32_lazy_data *ld; + int i; + gf_val_32_t pp, v, v2, s, *s32, *d32, *top; + + uls = (unsigned long) src; + uld = (unsigned long) dest; + if (uls %4 != 0 || ((uls & 0x7) != (uld & 0x7))) gf_alignment_error("gf_w32_split_2_32_lazy_multiply_region", 4); + if (bytes % 4 != 0) { + gf_alignment_error("gf_w32_split_2_32_lazy_multiply_region: buffer size not divisible by symbol size = 4 bytes", 4); + } + + if (val == 0) { + if (xor) return; + bzero(dest, bytes); + return; + } + + h = (gf_internal_t *) gf->scratch; + pp = h->prim_poly; + + ld = (struct gf_split_2_32_lazy_data *) h->private; + + if (ld->last_value != val) { + v = val; + for (i = 0; i < 16; i++) { + v2 = (v << 1); + if (v & GF_FIRST_BIT) v2 ^= pp; + ld->tables[i][0] = 0; + ld->tables[i][1] = v; + ld->tables[i][2] = v2; + ld->tables[i][3] = (v2 ^ v); + v = (v2 << 1); + if (v2 & GF_FIRST_BIT) v ^= pp; + } + } + ld->last_value = val; + + s32 = (gf_val_32_t *) src; + d32 = (gf_val_32_t *) dest; + top = d32 + (bytes/4); + + while (d32 != top) { + v = (xor) ? *d32 : 0; + s = *s32; + i = 0; + while (s != 0) { + v ^= ld->tables[i][s&3]; + s >>= 2; + i++; + } + *d32 = v; + d32++; + s32++; + } +} + +static +void +gf_w32_split_2_32_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ +#ifdef INTEL_SSE4 + unsigned long uls, uld; + gf_internal_t *h; + int i, m, j, tindex; + gf_val_32_t pp, v, v2, s, *s32, *d32, *top; + __m128i vi, si, pi, shuffler, tables[16], adder, xi, mask1, mask2; + + uls = (unsigned long) src; + uld = (unsigned long) dest; + if (uls %4 != 0 || ((uls & 0xf) != (uld & 0xf))) gf_alignment_error("gf_w32_split_2_32_lazy_sse_multiply_region", 4); + if (bytes % 4 != 0) { + gf_alignment_error("gf_w32_split_2_32_lazy_sse_multiply_region: buffer size not divisible by symbol size = 4 bytes", 4); + } + + if (val == 0) { + if (xor) return; + bzero(dest, bytes); + return; + } + + h = (gf_internal_t *) gf->scratch; + pp = h->prim_poly; + + uls &= 0xf; + + s32 = (gf_val_32_t *) src; + d32 = (gf_val_32_t *) dest; + top = d32 + (bytes/4); + + if (uls != 0) { + while (uls != 16) { + if (xor) { + *d32 ^= gf->multiply.w32(gf, *s32, val); + } else { + *d32 = gf->multiply.w32(gf, *s32, val); + } + *s32++; + *d32++; + if (d32 == top) return; + uls += 4; + } + } + + uld = (unsigned long) top; + top = (gf_val_32_t *) (uld - (uld & 0xf)); + uld &= 0xf; + + v = val; + for (i = 0; i < 16; i++) { + v2 = (v << 1); + if (v & GF_FIRST_BIT) v2 ^= pp; + tables[i] = _mm_set_epi32(v2 ^ v, v2, v, 0); + v = (v2 << 1); + if (v2 & GF_FIRST_BIT) v ^= pp; + } + + shuffler = _mm_set_epi8(0xc, 0xc, 0xc, 0xc, 8, 8, 8, 8, 4, 4, 4, 4, 0, 0, 0, 0); + adder = _mm_set_epi8(3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0); + mask1 = _mm_set1_epi8(0x3); + mask2 = _mm_set1_epi8(0xc); + + while (d32 != top) { + pi = (xor) ? _mm_load_si128 ((__m128i *) d32) : _mm_setzero_si128(); + vi = _mm_load_si128((__m128i *) s32); + + tindex = 0; + for (i = 0; i < 4; i++) { + si = _mm_shuffle_epi8(vi, shuffler); + + xi = _mm_and_si128(si, mask1); + xi = _mm_slli_epi16(xi, 2); + xi = _mm_xor_si128(xi, adder); + pi = _mm_xor_si128(pi, _mm_shuffle_epi8(tables[tindex], xi)); + tindex++; + + xi = _mm_and_si128(si, mask2); + xi = _mm_xor_si128(xi, adder); + pi = _mm_xor_si128(pi, _mm_shuffle_epi8(tables[tindex], xi)); + si = _mm_srli_epi16(si, 2); + tindex++; + + xi = _mm_and_si128(si, mask2); + xi = _mm_xor_si128(xi, adder); + pi = _mm_xor_si128(pi, _mm_shuffle_epi8(tables[tindex], xi)); + si = _mm_srli_epi16(si, 2); + tindex++; + + xi = _mm_and_si128(si, mask2); + xi = _mm_xor_si128(xi, adder); + pi = _mm_xor_si128(pi, _mm_shuffle_epi8(tables[tindex], xi)); + si = _mm_srli_epi16(si, 2); + tindex++; + + vi = _mm_srli_epi32(vi, 8); + } + _mm_store_si128((__m128i *) d32, pi); + d32 += 4; + s32 += 4; + } + + while (uld > 0) { + if (xor) { + *d32 ^= gf->multiply.w32(gf, *s32, val); + } else { + *d32 = gf->multiply.w32(gf, *s32, val); + } + *s32++; + *d32++; + uld -= 4; + } + + +#endif +} + +static +void +gf_w32_split_4_32_lazy_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + unsigned long uls, uld; + gf_internal_t *h; + struct gf_split_4_32_lazy_data *ld; + int i, j, k; + gf_val_32_t pp, v, s, *s32, *d32, *top; + + uls = (unsigned long) src; + uld = (unsigned long) dest; + if (uls %4 != 0 || ((uls & 0x7) != (uld & 0x7))) gf_alignment_error("gf_w32_split_4_32_lazy_multiply_region", 4); + if (bytes % 4 != 0) { + gf_alignment_error("gf_w32_split_4_32_lazy_multiply_region: buffer size not divisible by symbol size = 4 bytes", 4); + } + + if (val == 0) { + if (xor) return; + bzero(dest, bytes); + return; + } + + h = (gf_internal_t *) gf->scratch; + pp = h->prim_poly; + + ld = (struct gf_split_4_32_lazy_data *) h->private; + + if (ld->last_value != val) { + v = val; + for (i = 0; i < 8; i++) { + ld->tables[i][0] = 0; + for (j = 1; j < 16; j <<= 1) { + for (k = 0; k < j; k++) { + ld->tables[i][k^j] = (v ^ ld->tables[i][k]); + } + v = (v & GF_FIRST_BIT) ? ((v << 1) ^ pp) : (v << 1); + } + } + } + ld->last_value = val; + + s32 = (gf_val_32_t *) src; + d32 = (gf_val_32_t *) dest; + top = d32 + (bytes/4); + + while (d32 != top) { + v = (xor) ? *d32 : 0; + s = *s32; + i = 0; + while (s != 0) { + v ^= ld->tables[i][s&0xf]; + s >>= 4; + i++; + } + *d32 = v; + d32++; + s32++; + } +} + +static +void +gf_w32_split_4_32_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ +#ifdef INTEL_SSE4 + unsigned long uls, uld; + gf_internal_t *h; + int i, m, j, k, tindex; + gf_val_32_t pp, v, s, *s32, *d32, *top, *realtop; + __m128i vi, si, tables[8][4], p0, p1, p2, p3, mask1, v0, v1, v2, v3; + __m128i tv1, tv2, tv3, tv0; + struct gf_split_4_32_lazy_data *ld; + uint8_t btable[16]; + + uls = (unsigned long) src; + uld = (unsigned long) dest; + if (uls %4 != 0 || ((uls & 0xf) != (uld & 0xf))) gf_alignment_error("gf_w32_split_4_32_lazy_sse_multiply_region", 4); + if (bytes % 4 != 0) { + gf_alignment_error("gf_w32_split_4_32_lazy_sse_multiply_region: buffer size not divisible by symbol size = 4 bytes", 4); + } + + if (val == 0) { + if (xor) return; + bzero(dest, bytes); + return; + } + + h = (gf_internal_t *) gf->scratch; + pp = h->prim_poly; + + uls &= 0xf; + + s32 = (gf_val_32_t *) src; + d32 = (gf_val_32_t *) dest; + top = d32 + (bytes/4); + + if (uls != 0) { + while (uls != 16) { + if (xor) { + *d32 ^= gf->multiply.w32(gf, *s32, val); + } else { + *d32 = gf->multiply.w32(gf, *s32, val); + } + *s32++; + *d32++; + if (d32 == top) return; + uls += 4; + } + } + + uld = (unsigned long) top; + realtop = top; + + /* You need the size of this region to be a multiple of 64 bytes */ + bytes = (top - d32); + bytes -= (bytes & 0xf); + top = (d32 + bytes); + + ld = (struct gf_split_4_32_lazy_data *) h->private; + + v = val; + for (i = 0; i < 8; i++) { + ld->tables[i][0] = 0; + for (j = 1; j < 16; j <<= 1) { + for (k = 0; k < j; k++) { + ld->tables[i][k^j] = (v ^ ld->tables[i][k]); + } + v = (v & GF_FIRST_BIT) ? ((v << 1) ^ pp) : (v << 1); + } + for (j = 0; j < 4; j++) { + for (k = 0; k < 16; k++) { + btable[k] = (uint8_t) ld->tables[i][k]; + ld->tables[i][k] >>= 8; + } + tables[i][j] = _mm_loadu_si128((__m128i *) btable); + } + } + + mask1 = _mm_set1_epi8(0xf); + + if (xor) { + while (d32 != top) { + p0 = _mm_load_si128 ((__m128i *) d32); + p1 = _mm_load_si128 ((__m128i *) (d32+4)); + p2 = _mm_load_si128 ((__m128i *) (d32+8)); + p3 = _mm_load_si128 ((__m128i *) (d32+12)); + + v0 = _mm_load_si128((__m128i *) s32); s32 += 4; + v1 = _mm_load_si128((__m128i *) s32); s32 += 4; + v2 = _mm_load_si128((__m128i *) s32); s32 += 4; + v3 = _mm_load_si128((__m128i *) s32); s32 += 4; + + si = _mm_and_si128(v0, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[0][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[0][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[0][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[0][3], si)); + + v0 = _mm_srli_epi32(v0, 4); + si = _mm_and_si128(v0, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[1][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[1][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[1][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[1][3], si)); + + si = _mm_and_si128(v1, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[2][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[2][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[2][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[2][3], si)); + + v1 = _mm_srli_epi32(v1, 4); + si = _mm_and_si128(v1, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[3][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[3][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[3][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[3][3], si)); + + si = _mm_and_si128(v2, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[4][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[4][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[4][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[4][3], si)); + + v2 = _mm_srli_epi32(v2, 4); + si = _mm_and_si128(v2, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[5][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[5][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[5][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[5][3], si)); + + si = _mm_and_si128(v3, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[6][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[6][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[6][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[6][3], si)); + + v3 = _mm_srli_epi32(v3, 4); + si = _mm_and_si128(v3, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[7][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[7][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[7][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[7][3], si)); + + _mm_store_si128((__m128i *) d32, p0); + _mm_store_si128((__m128i *) (d32+4), p1); + _mm_store_si128((__m128i *) (d32+8), p2); + _mm_store_si128((__m128i *) (d32+12), p3); + d32 += 16; + } + } else { + while (d32 != top) { + + v0 = _mm_load_si128((__m128i *) s32); s32 += 4; + v1 = _mm_load_si128((__m128i *) s32); s32 += 4; + v2 = _mm_load_si128((__m128i *) s32); s32 += 4; + v3 = _mm_load_si128((__m128i *) s32); s32 += 4; + + + + si = _mm_and_si128(v0, mask1); + p0 = _mm_shuffle_epi8(tables[0][0], si); + p1 = _mm_shuffle_epi8(tables[0][1], si); + p2 = _mm_shuffle_epi8(tables[0][2], si); + p3 = _mm_shuffle_epi8(tables[0][3], si); + + v0 = _mm_srli_epi32(v0, 4); + si = _mm_and_si128(v0, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[1][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[1][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[1][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[1][3], si)); + + si = _mm_and_si128(v1, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[2][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[2][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[2][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[2][3], si)); + + v1 = _mm_srli_epi32(v1, 4); + si = _mm_and_si128(v1, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[3][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[3][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[3][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[3][3], si)); + + si = _mm_and_si128(v2, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[4][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[4][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[4][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[4][3], si)); + + v2 = _mm_srli_epi32(v2, 4); + si = _mm_and_si128(v2, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[5][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[5][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[5][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[5][3], si)); + + si = _mm_and_si128(v3, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[6][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[6][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[6][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[6][3], si)); + + v3 = _mm_srli_epi32(v3, 4); + si = _mm_and_si128(v3, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[7][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[7][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[7][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[7][3], si)); + + _mm_store_si128((__m128i *) d32, p0); + _mm_store_si128((__m128i *) (d32+4), p1); + _mm_store_si128((__m128i *) (d32+8), p2); + _mm_store_si128((__m128i *) (d32+12), p3); + d32 += 16; + } + } + + while (d32 < realtop) { + if (xor) { + *d32 ^= gf->multiply.w32(gf, *s32, val); + } else { + *d32 = gf->multiply.w32(gf, *s32, val); + } + *s32++; + *d32++; + } + + +#endif +} + +/* +static +void +gf_w32_split_4_32_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ +#ifdef INTEL_SSE4 + unsigned long uls, uld; + gf_internal_t *h; + int i, m, j, k, tindex; + gf_val_32_t pp, v, s, *s32, *d32, *top, *realtop; + __m128i vi, si, tables[8][4], p0, p1, p2, p3, mask1; + struct gf_split_4_32_lazy_data *ld; + uint8_t btable[16]; + + uls = (unsigned long) src; + uld = (unsigned long) dest; + if (uls %4 != 0 || ((uls & 0xf) != (uld & 0xf))) gf_alignment_error("gf_w32_split_4_32_lazy_sse_multiply_region", 4); + if (bytes % 4 != 0) { + gf_alignment_error("gf_w32_split_4_32_lazy_sse_multiply_region: buffer size not divisible by symbol size = 4 bytes", 4); + } + + if (val == 0) { + if (xor) return; + bzero(dest, bytes); + return; + } + + h = (gf_internal_t *) gf->scratch; + pp = h->prim_poly; + + uls &= 0xf; + + s32 = (gf_val_32_t *) src; + d32 = (gf_val_32_t *) dest; + top = d32 + (bytes/4); + + if (uls != 0) { + while (uls != 16) { + if (xor) { + *d32 ^= gf->multiply.w32(gf, *s32, val); + } else { + *d32 = gf->multiply.w32(gf, *s32, val); + } + *s32++; + *d32++; + if (d32 == top) return; + uls += 4; + } + } + + uld = (unsigned long) top; + realtop = top; + + bytes = (top - d32); + bytes -= (bytes & 0xf); + top = (d32 + bytes); + + ld = (struct gf_split_4_32_lazy_data *) h->private; + + v = val; + for (i = 0; i < 8; i++) { + ld->tables[i][0] = 0; + for (j = 1; j < 16; j <<= 1) { + for (k = 0; k < j; k++) { + ld->tables[i][k^j] = (v ^ ld->tables[i][k]); + } + v = (v & GF_FIRST_BIT) ? ((v << 1) ^ pp) : (v << 1); + } + for (j = 0; j < 4; j++) { + for (k = 0; k < 16; k++) { + btable[k] = (uint8_t) ld->tables[i][k]; + ld->tables[i][k] >>= 8; + } + tables[i][j] = _mm_loadu_si128((__m128i *) btable); + } + } + + mask1 = _mm_set1_epi8(0xf); + + if (xor) { + while (d32 != top) { + p0 = _mm_load_si128 ((__m128i *) d32); + p1 = _mm_load_si128 ((__m128i *) (d32+4)); + p2 = _mm_load_si128 ((__m128i *) (d32+8)); + p3 = _mm_load_si128 ((__m128i *) (d32+12)); + + for (i = 0; i < 8; i++) { + vi = _mm_load_si128((__m128i *) s32); + + si = _mm_and_si128(vi, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[i][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[i][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[i][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[i][3], si)); + + i++; + vi = _mm_srli_epi32(vi, 4); + si = _mm_and_si128(vi, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[i][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[i][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[i][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[i][3], si)); + s32 += 4; + } + _mm_store_si128((__m128i *) d32, p0); + _mm_store_si128((__m128i *) (d32+4), p1); + _mm_store_si128((__m128i *) (d32+8), p2); + _mm_store_si128((__m128i *) (d32+12), p3); + d32 += 16; + } + } else { + while (d32 != top) { + for (i = 0; i < 8; i++) { + vi = _mm_load_si128((__m128i *) s32); + + si = _mm_and_si128(vi, mask1); + p0 = _mm_shuffle_epi8(tables[i][0], si); + p1 = _mm_shuffle_epi8(tables[i][1], si); + p2 = _mm_shuffle_epi8(tables[i][2], si); + p3 = _mm_shuffle_epi8(tables[i][3], si); + + i++; + vi = _mm_srli_epi32(vi, 4); + si = _mm_and_si128(vi, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[i][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[i][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[i][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[i][3], si)); + s32 += 4; + } + _mm_store_si128((__m128i *) d32, p0); + _mm_store_si128((__m128i *) (d32+4), p1); + _mm_store_si128((__m128i *) (d32+8), p2); + _mm_store_si128((__m128i *) (d32+12), p3); + d32 += 16; + } + } + + while (d32 < realtop) { + if (xor) { + *d32 ^= gf->multiply.w32(gf, *s32, val); + } else { + *d32 = gf->multiply.w32(gf, *s32, val); + } + *s32++; + *d32++; + } + + +#endif +} +*/ + +static +int gf_w32_split_init(gf_t *gf) +{ + gf_internal_t *h; + struct gf_split_2_32_lazy_data *ld2; + struct gf_split_4_32_lazy_data *ld4; + struct gf_split_8_8_data *d8; + uint32_t p, basep; + int i, j, exp; + + h = (gf_internal_t *) gf->scratch; + + /* Defaults */ + gf->multiply_region.w32 = gf_w32_multiply_region_from_single; + gf->multiply.w32 = gf_w32_shift_multiply; + gf->inverse.w32 = gf_w32_euclid; + + if (h->arg1 == 8 && h->arg2 == 8) { + gf->multiply.w32 = gf_w32_split_8_8_multiply; + gf->multiply_region.w32 = gf_w32_split_8_8_multiply_region; + d8 = (struct gf_split_8_8_data *) h->private; + basep = 1; + for (exp = 0; exp < 7; exp++) { + for (j = 0; j < 256; j++) d8->tables[exp][0][j] = 0; + for (i = 0; i < 256; i++) d8->tables[exp][i][0] = 0; + d8->tables[exp][1][1] = basep; + for (i = 2; i < 256; i++) { + if (i&1) { + p = d8->tables[exp][i^1][1]; + d8->tables[exp][i][1] = p ^ basep; + } else { + p = d8->tables[exp][i>>1][1]; + d8->tables[exp][i][1] = GF_MULTBY_TWO(p); + } + } + for (i = 1; i < 256; i++) { + p = d8->tables[exp][i][1]; + for (j = 1; j < 256; j++) { + if (j&1) { + d8->tables[exp][i][j] = d8->tables[exp][i][j^1] ^ p; + } else { + d8->tables[exp][i][j] = GF_MULTBY_TWO(d8->tables[exp][i][j>>1]); + } + } + } + for (i = 0; i < 8; i++) basep = GF_MULTBY_TWO(basep); + } + } + if ((h->arg1 == 2 && h->arg2 == 32) || (h->arg1 == 32 && h->arg2 == 2)) { + ld2 = (struct gf_split_2_32_lazy_data *) h->private; + ld2->last_value = 0; + if (h->region_type & GF_REGION_SSE) { + gf->multiply_region.w32 = gf_w32_split_2_32_lazy_sse_multiply_region; + } else { + gf->multiply_region.w32 = gf_w32_split_2_32_lazy_multiply_region; + } + } + if ((h->arg1 == 4 && h->arg2 == 32) || (h->arg1 == 32 && h->arg2 == 4)) { + ld4 = (struct gf_split_4_32_lazy_data *) h->private; + ld4->last_value = 0; + if (h->region_type & GF_REGION_SSE) { + if (h->region_type & GF_REGION_ALTMAP) { + gf->multiply_region.w32 = gf_w32_split_4_32_lazy_sse_altmap_multiply_region; + } + } else { + gf->multiply_region.w32 = gf_w32_split_4_32_lazy_multiply_region; + } + } + return 1; +} + +static +gf_val_32_t +gf_w32_composite_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + uint16_t b0 = b & 0x0000ffff; + uint16_t b1 = (b & 0xffff0000) >> 16; + uint16_t a0 = a & 0x0000ffff; + uint16_t a1 = (a & 0xffff0000) >> 16; + uint16_t a1b1; + + a1b1 = base_gf->multiply.w16(base_gf, a1, b1); + + return ((base_gf->multiply.w16(base_gf, a0, b0) ^ a1b1) | ((base_gf->multiply.w16(base_gf, a1, b0) ^ base_gf->multiply.w16(base_gf, a0, b1) ^ base_gf->multiply.w16(base_gf, a1b1, GF_S_GF_16_2)) << 16)); +} + +/* + * Composite field division trick (explained in 2007 tech report) + * + * Compute a / b = a*b^-1, where p(x) = x^2 + sx + 1 + * + * let c = b^-1 + * + * c*b = (s*b1c1+b1c0+b0c1)x+(b1c1+b0c0) + * + * want (s*b1c1+b1c0+b0c1) = 0 and (b1c1+b0c0) = 1 + * + * let d = b1c1 and d+1 = b0c0 + * + * solve s*b1c1+b1c0+b0c1 = 0 + * + * solution: d = (b1b0^-1)(b1b0^-1+b0b1^-1+s)^-1 + * + * c0 = (d+1)b0^-1 + * c1 = d*b1^-1 + * + * a / b = a * c + */ +static +gf_val_32_t +gf_w32_composite_inverse(gf_t *gf, gf_val_32_t a) +{ + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + uint16_t a0 = a & 0x0000ffff; + uint16_t a1 = (a & 0xffff0000) >> 16; + uint16_t c0, c1, d, tmp; + uint32_t c; + uint16_t a0inv, a1inv; + + if (a0 == 0) { + a1inv = base_gf->inverse.w16(base_gf, a1); + c0 = base_gf->multiply.w16(base_gf, a1inv, GF_S_GF_16_2); + c1 = a1inv; + } else if (a1 == 0) { + c0 = base_gf->inverse.w16(base_gf, a0); + c1 = 0; + } else { + a1inv = base_gf->inverse.w16(base_gf, a1); + a0inv = base_gf->inverse.w16(base_gf, a0); + + d = base_gf->multiply.w16(base_gf, a1, a0inv); + + tmp = (base_gf->multiply.w16(base_gf, a1, a0inv) ^ base_gf->multiply.w16(base_gf, a0, a1inv) ^ GF_S_GF_16_2); + tmp = base_gf->inverse.w16(base_gf, tmp); + + d = base_gf->multiply.w16(base_gf, d, tmp); + + c0 = base_gf->multiply.w16(base_gf, (d^1), a0inv); + c1 = base_gf->multiply.w16(base_gf, d, a1inv); + } + + c = c0 | (c1 << 16); + + return c; +} + +static +gf_val_32_t +gf_w32_composite_divide(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + gf_val_32_t binv; + + binv = gf_w32_composite_inverse(gf, b); + + return gf_w32_composite_multiply(gf, a, binv); +} + +static +void +gf_w32_composite_multiply_region_table(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + unsigned long uls, uld; + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + int i=0; + struct gf_w16_logtable_data * ltd; + uint16_t b0 = val & 0x0000ffff; + uint16_t b1 = (val & 0xffff0000) >> 16; + uint32_t *s32 = (uint32_t *) src; + uint32_t *d32 = (uint32_t *) dest; + uint16_t a0, a1, a1b1; + int num_syms = bytes >> 2; + int sym_divisible = bytes % 4; + + uls = (unsigned long) src; + uld = (unsigned long) dest; + if ((uls & 0x7) != (uld & 0x7)) gf_alignment_error("gf_w32_buf_const_log", 2); + if (sym_divisible) { + gf_alignment_error("gf_w32_buf_const_log: buffer size not divisible by symbol size = 2 bytes", 2); + } + + if (val == 0) { + if (xor) return; + bzero(dest, bytes); + return; + } + + ltd = (struct gf_w16_logtable_data *) h->private; + + if (xor) { + for (i = 0;i < num_syms; i++) { + a0 = s32[i] & 0x0000ffff; + a1 = (s32[i] & 0xffff0000) >> 16; + a1b1 = ltd->antilog_tbl[ltd->log_tbl[a1] + ltd->log_tbl[b1]]; + + d32[i] ^= ((ltd->antilog_tbl[ltd->log_tbl[a0] + ltd->log_tbl[b0]] ^ a1b1) | + ((ltd->antilog_tbl[ltd->log_tbl[a1] + ltd->log_tbl[b0]] ^ ltd->antilog_tbl[ltd->log_tbl[a0] + ltd->log_tbl[b1]] ^ + ltd->antilog_tbl[ltd->log_tbl[a1b1] + ltd->log_tbl[GF_S_GF_16_2]]) << 16)); + + } + } else { + for (i = 0;i < num_syms; i++) { + a0 = s32[i] & 0x0000ffff; + a1 = (s32[i] & 0xffff0000) >> 16; + a1b1 = ltd->antilog_tbl[ltd->log_tbl[a1] + ltd->log_tbl[b1]]; + + d32[i] = ((ltd->antilog_tbl[ltd->log_tbl[a0] + ltd->log_tbl[b0]] ^ a1b1) | + ((ltd->antilog_tbl[ltd->log_tbl[a1] + ltd->log_tbl[b0]] ^ ltd->antilog_tbl[ltd->log_tbl[a0] + ltd->log_tbl[b1]] ^ + ltd->antilog_tbl[ltd->log_tbl[a1b1] + ltd->log_tbl[GF_S_GF_16_2]]) << 16)); + } + } +} + +static +void +gf_w32_composite_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + unsigned long uls, uld; + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + int i=0; + struct gf_w16_logtable_data * ltd; + uint16_t b0 = val & 0x0000ffff; + uint16_t b1 = (val & 0xffff0000) >> 16; + uint32_t *s32 = (uint32_t *) src; + uint32_t *d32 = (uint32_t *) dest; + uint16_t a0, a1, a1b1; + int num_syms = bytes >> 2; + int sym_divisible = bytes % 4; + + uls = (unsigned long) src; + uld = (unsigned long) dest; + if ((uls & 0x7) != (uld & 0x7)) gf_alignment_error("gf_w32_buf_const_log", 2); + if (sym_divisible) { + gf_alignment_error("gf_w32_buf_const_log: buffer size not divisible by symbol size = 2 bytes", 2); + } + + if (val == 0) { + if (xor) return; + bzero(dest, bytes); + return; + } + + ltd = (struct gf_w16_logtable_data *) h->private; + + if (xor) { + for (i = 0;i < num_syms; i++) { + a0 = s32[i] & 0x0000ffff; + a1 = (s32[i] & 0xffff0000) >> 16; + a1b1 = base_gf->multiply.w16(base_gf, a1, b1); + + d32[i] ^= ((base_gf->multiply.w16(base_gf, a0, b0) ^ a1b1) | + ((base_gf->multiply.w16(base_gf, a1, b0) ^ base_gf->multiply.w16(base_gf, a0, b1) ^ base_gf->multiply.w16(base_gf, a1b1, GF_S_GF_16_2)) << 16)); + + } + } else { + for (i = 0;i < num_syms; i++) { + a0 = s32[i] & 0x0000ffff; + a1 = (s32[i] & 0xffff0000) >> 16; + a1b1 = base_gf->multiply.w16(base_gf, a1, b1); + + d32[i] = ((base_gf->multiply.w16(base_gf, a0, b0) ^ a1b1) | + ((base_gf->multiply.w16(base_gf, a1, b0) ^ base_gf->multiply.w16(base_gf, a0, b1) ^ base_gf->multiply.w16(base_gf, a1b1, GF_S_GF_16_2)) << 16)); + } + } +} + + + +static +void +gf_w32_composite_multiply_region_alt(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + gf_val_16_t val0 = val & 0x0000ffff; + gf_val_16_t val1 = (val & 0xffff0000) >> 16; + int sub_reg_size = bytes / 2; + + if (bytes % 2 != 0) gf_alignment_error("gf_w32_composite_multiply_region_alt", 1); + if (sub_reg_size % 2 != 0) gf_alignment_error("gf_w32_composite_multiply_region_alt", 1); + + if (!xor) { + memset(dest, 0, bytes); + } + + base_gf->multiply_region.w16(base_gf, src, dest, val0, sub_reg_size, xor); + base_gf->multiply_region.w16(base_gf, src+sub_reg_size, dest, val1, sub_reg_size, 1); + base_gf->multiply_region.w16(base_gf, src, dest+sub_reg_size, val1, sub_reg_size, xor); + base_gf->multiply_region.w16(base_gf, src+sub_reg_size, dest+sub_reg_size, val0, sub_reg_size, 1); + base_gf->multiply_region.w16(base_gf, src+sub_reg_size, dest+sub_reg_size, base_gf->multiply.w16(base_gf, GF_S_GF_16_2, val1), sub_reg_size, 1); +} + +static +int gf_w32_composite_init(gf_t *gf) +{ + struct gf_w16_logtable_data *ltd; + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + gf_val_32_t a, b; + uint64_t prim_poly = ((gf_internal_t *) base_gf->scratch)->prim_poly; + int i; + + ltd = (struct gf_w16_logtable_data *) h->private; + + ltd->log_tbl[0] = 0; + + bzero(&(ltd->_antilog_tbl[0]), sizeof(ltd->_antilog_tbl)); + + ltd->antilog_tbl = &(ltd->_antilog_tbl[GF_BASE_FIELD_SIZE * 2]); + + b = 1; + for (i = 0; i < GF_BASE_FIELD_GROUP_SIZE; i++) { + ltd->log_tbl[b] = (gf_val_16_t)i; + ltd->antilog_tbl[i] = (gf_val_16_t)b; + ltd->antilog_tbl[i+GF_BASE_FIELD_GROUP_SIZE] = (gf_val_16_t)b; + b <<= 1; + if (b & GF_BASE_FIELD_SIZE) { + b = b ^ prim_poly; + } + } + ltd->inv_tbl[0] = 0; /* Not really, but we need to fill it with something */ + ltd->inv_tbl[1] = 1; + for (i = 2; i < GF_BASE_FIELD_SIZE; i++) { + ltd->inv_tbl[i] = ltd->antilog_tbl[GF_BASE_FIELD_GROUP_SIZE-ltd->log_tbl[i]]; + } + + if (h->region_type & GF_REGION_ALTMAP) { + gf->multiply_region.w32 = gf_w32_composite_multiply_region_alt; + } else { + if (h->region_type & GF_REGION_SINGLE_TABLE) { + gf->multiply_region.w32 = gf_w32_composite_multiply_region_table; + } else { + gf->multiply_region.w32 = gf_w32_composite_multiply_region; + } + } + + gf->multiply.w32 = gf_w32_composite_multiply; + gf->divide.w32 = gf_w32_composite_divide; + gf->inverse.w32 = gf_w32_composite_inverse; + + return 1; +} + +int gf_w32_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2) +{ + int ss; + + ss = (GF_REGION_SSE | GF_REGION_NOSSE); + switch(mult_type) + { + case GF_MULT_SPLIT_TABLE: + if (arg1 == 8 && arg2 == 8){ + if (region_type != GF_REGION_DEFAULT) return -1; + return sizeof(gf_internal_t) + sizeof(struct gf_split_8_8_data) + 64; + } + if ((arg1 == 2 && arg2 == 32) || (arg2 == 2 && arg1 == 32)) { + region_type &= (~GF_REGION_LAZY); + if ((region_type & ss) == ss) return -1; + if ((region_type | ss) != ss) return -1; + return sizeof(gf_internal_t) + sizeof(struct gf_split_2_32_lazy_data) + 64; + } + if ((arg1 == 4 && arg2 == 32) || (arg2 == 4 && arg1 == 32)) { + region_type &= (~GF_REGION_LAZY); + if (region_type & GF_REGION_ALTMAP) { + region_type &= (~GF_REGION_ALTMAP); + if ((region_type & ss) == ss) return -1; + if ((region_type | ss) != ss) return -1; + return sizeof(gf_internal_t) + sizeof(struct gf_split_4_32_lazy_data) + 64; + } else return -1; + } + return -1; + case GF_MULT_DEFAULT: + case GF_MULT_SHIFT: + if (arg1 != 0 || arg2 != 0 || region_type != 0) return -1; + return sizeof(gf_internal_t); + break; + case GF_MULT_COMPOSITE: + if (region_type & ~(GF_REGION_SINGLE_TABLE | GF_REGION_ALTMAP | GF_REGION_STDMAP)) return -1; + if ((region_type & (GF_REGION_SINGLE_TABLE | GF_REGION_ALTMAP)) == (GF_REGION_SINGLE_TABLE | GF_REGION_ALTMAP)) return -1; + if (arg1 == 2 && arg2 == 16 || arg2 == 2 && arg1 == 16) { + return sizeof(gf_internal_t) + sizeof(struct gf_w16_logtable_data) + 64; + } else { + return -1; + } + default: + return -1; + } +} + +int gf_w32_init(gf_t *gf) +{ + gf_internal_t *h; + + h = (gf_internal_t *) gf->scratch; + if (h->prim_poly == 0) h->prim_poly = 0x400007; + + gf->multiply.w32 = NULL; + gf->divide.w32 = NULL; + gf->inverse.w32 = NULL; + gf->multiply_region.w32 = NULL; + + switch(h->mult_type) { + case GF_MULT_DEFAULT: + case GF_MULT_SHIFT: if (gf_w32_shift_init(gf) == 0) return 0; break; + case GF_MULT_COMPOSITE: if (gf_w32_composite_init(gf) == 0) return 0; break; + case GF_MULT_SPLIT_TABLE: if (gf_w32_split_init(gf) == 0) return 0; break; + default: return 0; + } + if (h->divide_type == GF_DIVIDE_EUCLID) { + gf->divide.w32 = gf_w32_divide_from_inverse; + gf->inverse.w32 = gf_w32_euclid; + } else if (h->divide_type == GF_DIVIDE_MATRIX) { + gf->divide.w32 = gf_w32_divide_from_inverse; + gf->inverse.w32 = gf_w32_matrix; + } + + if (gf->inverse.w32 != NULL && gf->divide.w32 == NULL) { + gf->divide.w32 = gf_w32_divide_from_inverse; + } + if (gf->inverse.w32 == NULL && gf->divide.w32 != NULL) { + gf->inverse.w32 = gf_w32_inverse_from_divide; + } + return 1; +} diff --git a/junk-w32-single-time.c b/junk-w32-single-time.c new file mode 100644 index 0000000..ab406b7 --- /dev/null +++ b/junk-w32-single-time.c @@ -0,0 +1,16 @@ +echo "SHIFT" `gf_time 32 M 0 10240 10240 SHIFT - - | tail -n 1` +echo "GROUP 2 4" `gf_time 32 M 0 10240 10240 GROUP 2 4 - - | tail -n 1` +echo "GROUP 3 4" `gf_time 32 M 0 10240 10240 GROUP 3 4 - - | tail -n 1` +echo "GROUP 4 4" `gf_time 32 M 0 10240 10240 GROUP 4 4 - - | tail -n 1` +echo "GROUP 2 8" `gf_time 32 M 0 10240 10240 GROUP 2 8 - - | tail -n 1` +echo "GROUP 3 8" `gf_time 32 M 0 10240 10240 GROUP 3 8 - - | tail -n 1` +echo "GROUP 4 8" `gf_time 32 M 0 10240 10240 GROUP 4 8 - - | tail -n 1` +echo "GROUP 2 2" `gf_time 32 M 0 10240 10240 GROUP 2 2 - - | tail -n 1` +echo "GROUP 3 3" `gf_time 32 M 0 10240 10240 GROUP 3 3 - - | tail -n 1` +echo "BYTWO_p" `gf_time 32 M 0 10240 10240 BYTWO_p - - | tail -n 1` +echo "BYTWO_b" `gf_time 32 M 0 10240 10240 BYTWO_b - - | tail -n 1` +echo "SPLIT 32 2" `gf_time 32 M 0 10240 10240 SPLIT 32 2 - - | tail -n 1` +echo "SPLIT 32 4" `gf_time 32 M 0 10240 10240 SPLIT 32 4 - - | tail -n 1` +echo "SPLIT 32 8" `gf_time 32 M 0 10240 10240 SPLIT 32 8 - - | tail -n 1` +echo "SPLIT 8 8" `gf_time 32 M 0 10240 10240 SPLIT 8 8 - - | tail -n 1` +echo "COMPOSITE 2 16 -" `gf_time 32 M 0 10240 10240 COMPOSITE 2 16 - - - | tail -n 1` diff --git a/junk-w4-out.txt b/junk-w4-out.txt new file mode 100644 index 0000000..60efcdc --- /dev/null +++ b/junk-w4-out.txt @@ -0,0 +1,60 @@ +Seed: 1345648646 +Buffer-Const,s!=d,xor=0: 1.005451 s 971.268 MB/s +Buffer-Const,s!=d,xor=1: 1.029715 s 948.382 MB/s +Buffer-Const,s==d,xor=0: 0.989556 s 986.869 MB/s +Buffer-Const,s==d,xor=1: 1.026105 s 951.718 MB/s +BYTWO_p +Seed: 1345648655 +Buffer-Const,s!=d,xor=0: 0.603574 s 1617.966 MB/s +Buffer-Const,s!=d,xor=1: 0.612757 s 1593.720 MB/s +Buffer-Const,s==d,xor=0: 0.599630 s 1628.609 MB/s +Buffer-Const,s==d,xor=1: 0.622749 s 1568.149 MB/s +BYTWO_p SSE +Seed: 1345648662 +Buffer-Const,s!=d,xor=0: 0.487348 s 2003.831 MB/s +Buffer-Const,s!=d,xor=1: 0.488745 s 1998.100 MB/s +Buffer-Const,s==d,xor=0: 0.470528 s 2075.463 MB/s +Buffer-Const,s==d,xor=1: 0.480067 s 2034.223 MB/s +BYTWO_b +Seed: 1345648669 +Buffer-Const,s!=d,xor=0: 0.359088 s 2719.564 MB/s +Buffer-Const,s!=d,xor=1: 0.365816 s 2669.543 MB/s +Buffer-Const,s==d,xor=0: 0.361701 s 2699.920 MB/s +Buffer-Const,s==d,xor=1: 0.354540 s 2754.449 MB/s +BYTWO_b SSE +Seed: 1345648689 +Buffer-Const,s!=d,xor=0: 2.036338 s 479.568 MB/s +Buffer-Const,s!=d,xor=1: 2.237701 s 436.413 MB/s +Buffer-Const,s==d,xor=0: 2.048971 s 476.611 MB/s +Buffer-Const,s==d,xor=1: 2.229312 s 438.056 MB/s +TABLE SINGLE +Seed: 1345648703 +Buffer-Const,s!=d,xor=0: 1.074082 s 909.207 MB/s +Buffer-Const,s!=d,xor=1: 1.083797 s 901.057 MB/s +Buffer-Const,s==d,xor=0: 1.077001 s 906.743 MB/s +Buffer-Const,s==d,xor=1: 1.079369 s 904.753 MB/s +TABLE DOUBLE +Seed: 1345648712 +Buffer-Const,s!=d,xor=0: 0.743830 s 1312.884 MB/s +Buffer-Const,s!=d,xor=1: 0.760719 s 1283.736 MB/s +Buffer-Const,s==d,xor=0: 0.708908 s 1377.559 MB/s +Buffer-Const,s==d,xor=1: 0.727896 s 1341.624 MB/s +TABLE QUAD +Seed: 1345648720 +Buffer-Const,s!=d,xor=0: 0.898810 s 1086.506 MB/s +Buffer-Const,s!=d,xor=1: 0.876269 s 1114.455 MB/s +Buffer-Const,s==d,xor=0: 0.872698 s 1119.015 MB/s +Buffer-Const,s==d,xor=1: 0.873175 s 1118.404 MB/s +TABLE QUAD,LAZY +Seed: 1345648729 +Buffer-Const,s!=d,xor=0: 0.143798 s 6791.205 MB/s +Buffer-Const,s!=d,xor=1: 0.151166 s 6460.201 MB/s +Buffer-Const,s==d,xor=0: 0.123824 s 7886.721 MB/s +Buffer-Const,s==d,xor=1: 0.123538 s 7904.940 MB/s +TABLE SINGLE,SSE +Seed: 1345648748 +Buffer-Const,s!=d,xor=0: 4.562493 s 214.041 MB/s +Buffer-Const,s!=d,xor=1: 5.116838 s 190.853 MB/s +Buffer-Const,s==d,xor=0: 4.533105 s 215.429 MB/s +Buffer-Const,s==d,xor=1: 5.053730 s 193.236 MB/s +LOG diff --git a/junk-w4-timing-out.txt b/junk-w4-timing-out.txt new file mode 100644 index 0000000..6daadd4 --- /dev/null +++ b/junk-w4-timing-out.txt @@ -0,0 +1,792 @@ +Seed: 1352748099 +Buffer-Const,s!=d,xor=0: 0.608121 s 210.484 MB/s +Buffer-Const,s!=d,xor=1: 0.692329 s 184.883 MB/s +1024 131072 4 LOG - - +Seed: 1352748102 +Buffer-Const,s!=d,xor=0: 0.699226 s 183.060 MB/s +Buffer-Const,s!=d,xor=1: 0.687310 s 186.233 MB/s +2048 65536 4 LOG - - +Seed: 1352748106 +Buffer-Const,s!=d,xor=0: 0.604397 s 211.781 MB/s +Buffer-Const,s!=d,xor=1: 0.682591 s 187.521 MB/s +4096 32768 4 LOG - - +Seed: 1352748109 +Buffer-Const,s!=d,xor=0: 0.602384 s 212.489 MB/s +Buffer-Const,s!=d,xor=1: 0.678849 s 188.555 MB/s +8192 16384 4 LOG - - +Seed: 1352748112 +Buffer-Const,s!=d,xor=0: 0.602103 s 212.588 MB/s +Buffer-Const,s!=d,xor=1: 0.688450 s 185.925 MB/s +16384 8192 4 LOG - - +Seed: 1352748115 +Buffer-Const,s!=d,xor=0: 0.598464 s 213.881 MB/s +Buffer-Const,s!=d,xor=1: 0.676076 s 189.328 MB/s +32768 4096 4 LOG - - +Seed: 1352748119 +Buffer-Const,s!=d,xor=0: 0.611499 s 209.322 MB/s +Buffer-Const,s!=d,xor=1: 0.693351 s 184.611 MB/s +65536 2048 4 LOG - - +Seed: 1352748122 +Buffer-Const,s!=d,xor=0: 0.609786 s 209.910 MB/s +Buffer-Const,s!=d,xor=1: 0.689794 s 185.563 MB/s +131072 1024 4 LOG - - +Seed: 1352748125 +Buffer-Const,s!=d,xor=0: 0.619027 s 206.776 MB/s +Buffer-Const,s!=d,xor=1: 0.703627 s 181.915 MB/s +262144 512 4 LOG - - +Seed: 1352748129 +Buffer-Const,s!=d,xor=0: 0.605785 s 211.296 MB/s +Buffer-Const,s!=d,xor=1: 0.696728 s 183.716 MB/s +524288 256 4 LOG - - +Seed: 1352748132 +Buffer-Const,s!=d,xor=0: 0.591555 s 216.379 MB/s +Buffer-Const,s!=d,xor=1: 0.666735 s 191.980 MB/s +1048576 128 4 LOG - - +Seed: 1352748135 +Buffer-Const,s!=d,xor=0: 0.623167 s 205.403 MB/s +Buffer-Const,s!=d,xor=1: 0.675010 s 189.627 MB/s +2097152 64 4 LOG - - +Seed: 1352748138 +Buffer-Const,s!=d,xor=0: 0.572467 s 223.594 MB/s +Buffer-Const,s!=d,xor=1: 0.733714 s 174.455 MB/s +4194304 32 4 LOG - - +Seed: 1352748142 +Buffer-Const,s!=d,xor=0: 0.617676 s 207.228 MB/s +Buffer-Const,s!=d,xor=1: 0.742744 s 172.334 MB/s +8388608 16 4 LOG - - +Seed: 1352748145 +Buffer-Const,s!=d,xor=0: 0.579833 s 220.753 MB/s +Buffer-Const,s!=d,xor=1: 0.736355 s 173.829 MB/s +16777216 8 4 LOG - - +Seed: 1352748148 +Buffer-Const,s!=d,xor=0: 0.682980 s 187.414 MB/s +Buffer-Const,s!=d,xor=1: 0.738846 s 173.243 MB/s +33554432 4 4 LOG - - +Seed: 1352748152 +Buffer-Const,s!=d,xor=0: 0.692141 s 184.933 MB/s +Buffer-Const,s!=d,xor=1: 0.725968 s 176.316 MB/s +67108864 2 4 LOG - - +Seed: 1352748155 +Buffer-Const,s!=d,xor=0: 0.737346 s 173.596 MB/s +Buffer-Const,s!=d,xor=1: 0.725769 s 176.365 MB/s +134217728 1 4 LOG - - +Seed: 1352748159 +Buffer-Const,s!=d,xor=0: 0.252694 s 506.541 MB/s +Buffer-Const,s!=d,xor=1: 0.280102 s 456.976 MB/s +1024 131072 4 TABLE SINGLE - +Seed: 1352748160 +Buffer-Const,s!=d,xor=0: 0.246866 s 518.501 MB/s +Buffer-Const,s!=d,xor=1: 0.276830 s 462.377 MB/s +2048 65536 4 TABLE SINGLE - +Seed: 1352748162 +Buffer-Const,s!=d,xor=0: 0.246874 s 518.482 MB/s +Buffer-Const,s!=d,xor=1: 0.274016 s 467.125 MB/s +4096 32768 4 TABLE SINGLE - +Seed: 1352748164 +Buffer-Const,s!=d,xor=0: 0.247869 s 516.402 MB/s +Buffer-Const,s!=d,xor=1: 0.271679 s 471.144 MB/s +8192 16384 4 TABLE SINGLE - +Seed: 1352748166 +Buffer-Const,s!=d,xor=0: 0.244581 s 523.345 MB/s +Buffer-Const,s!=d,xor=1: 0.270779 s 472.710 MB/s +16384 8192 4 TABLE SINGLE - +Seed: 1352748167 +Buffer-Const,s!=d,xor=0: 0.256167 s 499.675 MB/s +Buffer-Const,s!=d,xor=1: 0.278188 s 460.121 MB/s +32768 4096 4 TABLE SINGLE - +Seed: 1352748169 +Buffer-Const,s!=d,xor=0: 0.248786 s 514.498 MB/s +Buffer-Const,s!=d,xor=1: 0.305109 s 419.522 MB/s +65536 2048 4 TABLE SINGLE - +Seed: 1352748171 +Buffer-Const,s!=d,xor=0: 0.249003 s 514.050 MB/s +Buffer-Const,s!=d,xor=1: 0.276043 s 463.696 MB/s +131072 1024 4 TABLE SINGLE - +Seed: 1352748173 +Buffer-Const,s!=d,xor=0: 0.249019 s 514.016 MB/s +Buffer-Const,s!=d,xor=1: 0.278464 s 459.665 MB/s +262144 512 4 TABLE SINGLE - +Seed: 1352748174 +Buffer-Const,s!=d,xor=0: 0.257905 s 496.308 MB/s +Buffer-Const,s!=d,xor=1: 0.266241 s 480.767 MB/s +524288 256 4 TABLE SINGLE - +Seed: 1352748176 +Buffer-Const,s!=d,xor=0: 0.254655 s 502.641 MB/s +Buffer-Const,s!=d,xor=1: 0.267730 s 478.093 MB/s +1048576 128 4 TABLE SINGLE - +Seed: 1352748178 +Buffer-Const,s!=d,xor=0: 0.264532 s 483.874 MB/s +Buffer-Const,s!=d,xor=1: 0.270533 s 473.140 MB/s +2097152 64 4 TABLE SINGLE - +Seed: 1352748180 +Buffer-Const,s!=d,xor=0: 0.249658 s 512.702 MB/s +Buffer-Const,s!=d,xor=1: 0.265106 s 482.826 MB/s +4194304 32 4 TABLE SINGLE - +Seed: 1352748181 +Buffer-Const,s!=d,xor=0: 0.244030 s 524.527 MB/s +Buffer-Const,s!=d,xor=1: 0.301052 s 425.176 MB/s +8388608 16 4 TABLE SINGLE - +Seed: 1352748183 +Buffer-Const,s!=d,xor=0: 0.263009 s 486.676 MB/s +Buffer-Const,s!=d,xor=1: 0.270075 s 473.943 MB/s +16777216 8 4 TABLE SINGLE - +Seed: 1352748185 +Buffer-Const,s!=d,xor=0: 0.318133 s 402.348 MB/s +Buffer-Const,s!=d,xor=1: 0.315726 s 405.415 MB/s +33554432 4 4 TABLE SINGLE - +Seed: 1352748187 +Buffer-Const,s!=d,xor=0: 0.329082 s 388.961 MB/s +Buffer-Const,s!=d,xor=1: 0.303774 s 421.366 MB/s +67108864 2 4 TABLE SINGLE - +Seed: 1352748189 +Buffer-Const,s!=d,xor=0: 0.373282 s 342.904 MB/s +Buffer-Const,s!=d,xor=1: 0.299255 s 427.729 MB/s +134217728 1 4 TABLE SINGLE - +Seed: 1352748191 +Buffer-Const,s!=d,xor=0: 0.026432 s 4842.652 MB/s +Buffer-Const,s!=d,xor=1: 0.028027 s 4566.976 MB/s +1024 131072 4 TABLE SINGLE,SSE - +Seed: 1352748192 +Buffer-Const,s!=d,xor=0: 0.020923 s 6117.629 MB/s +Buffer-Const,s!=d,xor=1: 0.021753 s 5884.226 MB/s +2048 65536 4 TABLE SINGLE,SSE - +Seed: 1352748193 +Buffer-Const,s!=d,xor=0: 0.017533 s 7300.592 MB/s +Buffer-Const,s!=d,xor=1: 0.018308 s 6991.599 MB/s +4096 32768 4 TABLE SINGLE,SSE - +Seed: 1352748193 +Buffer-Const,s!=d,xor=0: 0.016224 s 7889.591 MB/s +Buffer-Const,s!=d,xor=1: 0.016537 s 7740.353 MB/s +8192 16384 4 TABLE SINGLE,SSE - +Seed: 1352748194 +Buffer-Const,s!=d,xor=0: 0.015627 s 8191.000 MB/s +Buffer-Const,s!=d,xor=1: 0.016160 s 7921.020 MB/s +16384 8192 4 TABLE SINGLE,SSE - +Seed: 1352748195 +Buffer-Const,s!=d,xor=0: 0.015679 s 8163.599 MB/s +Buffer-Const,s!=d,xor=1: 0.016548 s 7735.000 MB/s +32768 4096 4 TABLE SINGLE,SSE - +Seed: 1352748196 +Buffer-Const,s!=d,xor=0: 0.016351 s 7828.046 MB/s +Buffer-Const,s!=d,xor=1: 0.017147 s 7464.939 MB/s +65536 2048 4 TABLE SINGLE,SSE - +Seed: 1352748196 +Buffer-Const,s!=d,xor=0: 0.015204 s 8418.863 MB/s +Buffer-Const,s!=d,xor=1: 0.016621 s 7701.049 MB/s +131072 1024 4 TABLE SINGLE,SSE - +Seed: 1352748197 +Buffer-Const,s!=d,xor=0: 0.019366 s 6609.594 MB/s +Buffer-Const,s!=d,xor=1: 0.020611 s 6210.405 MB/s +262144 512 4 TABLE SINGLE,SSE - +Seed: 1352748198 +Buffer-Const,s!=d,xor=0: 0.019287 s 6636.721 MB/s +Buffer-Const,s!=d,xor=1: 0.020470 s 6253.155 MB/s +524288 256 4 TABLE SINGLE,SSE - +Seed: 1352748199 +Buffer-Const,s!=d,xor=0: 0.019210 s 6663.244 MB/s +Buffer-Const,s!=d,xor=1: 0.021175 s 6044.754 MB/s +1048576 128 4 TABLE SINGLE,SSE - +Seed: 1352748199 +Buffer-Const,s!=d,xor=0: 0.035533 s 3602.314 MB/s +Buffer-Const,s!=d,xor=1: 0.032351 s 3956.628 MB/s +2097152 64 4 TABLE SINGLE,SSE - +Seed: 1352748200 +Buffer-Const,s!=d,xor=0: 0.048733 s 2626.557 MB/s +Buffer-Const,s!=d,xor=1: 0.044163 s 2898.370 MB/s +4194304 32 4 TABLE SINGLE,SSE - +Seed: 1352748201 +Buffer-Const,s!=d,xor=0: 0.051737 s 2474.071 MB/s +Buffer-Const,s!=d,xor=1: 0.048826 s 2621.555 MB/s +8388608 16 4 TABLE SINGLE,SSE - +Seed: 1352748202 +Buffer-Const,s!=d,xor=0: 0.056330 s 2272.306 MB/s +Buffer-Const,s!=d,xor=1: 0.029557 s 4330.617 MB/s +16777216 8 4 TABLE SINGLE,SSE - +Seed: 1352748203 +Buffer-Const,s!=d,xor=0: 0.066551 s 1923.338 MB/s +Buffer-Const,s!=d,xor=1: 0.037378 s 3424.489 MB/s +33554432 4 4 TABLE SINGLE,SSE - +Seed: 1352748203 +Buffer-Const,s!=d,xor=0: 0.082171 s 1557.728 MB/s +Buffer-Const,s!=d,xor=1: 0.048228 s 2654.058 MB/s +67108864 2 4 TABLE SINGLE,SSE - +Seed: 1352748204 +Buffer-Const,s!=d,xor=0: 0.125187 s 1022.469 MB/s +Buffer-Const,s!=d,xor=1: 0.047497 s 2694.905 MB/s +134217728 1 4 TABLE SINGLE,SSE - +Seed: 1352748205 +Buffer-Const,s!=d,xor=0: 0.151542 s 844.651 MB/s +Buffer-Const,s!=d,xor=1: 0.153138 s 835.847 MB/s +1024 131072 4 TABLE DOUBLE - +Seed: 1352748207 +Buffer-Const,s!=d,xor=0: 0.146267 s 875.111 MB/s +Buffer-Const,s!=d,xor=1: 0.150025 s 853.189 MB/s +2048 65536 4 TABLE DOUBLE - +Seed: 1352748208 +Buffer-Const,s!=d,xor=0: 0.145038 s 882.529 MB/s +Buffer-Const,s!=d,xor=1: 0.146365 s 874.525 MB/s +4096 32768 4 TABLE DOUBLE - +Seed: 1352748209 +Buffer-Const,s!=d,xor=0: 0.142601 s 897.608 MB/s +Buffer-Const,s!=d,xor=1: 0.144650 s 884.893 MB/s +8192 16384 4 TABLE DOUBLE - +Seed: 1352748211 +Buffer-Const,s!=d,xor=0: 0.141861 s 902.293 MB/s +Buffer-Const,s!=d,xor=1: 0.142722 s 896.848 MB/s +16384 8192 4 TABLE DOUBLE - +Seed: 1352748212 +Buffer-Const,s!=d,xor=0: 0.140131 s 913.433 MB/s +Buffer-Const,s!=d,xor=1: 0.143035 s 894.888 MB/s +32768 4096 4 TABLE DOUBLE - +Seed: 1352748213 +Buffer-Const,s!=d,xor=0: 0.141368 s 905.436 MB/s +Buffer-Const,s!=d,xor=1: 0.142083 s 900.879 MB/s +65536 2048 4 TABLE DOUBLE - +Seed: 1352748214 +Buffer-Const,s!=d,xor=0: 0.144412 s 886.351 MB/s +Buffer-Const,s!=d,xor=1: 0.145837 s 877.693 MB/s +131072 1024 4 TABLE DOUBLE - +Seed: 1352748216 +Buffer-Const,s!=d,xor=0: 0.141466 s 904.810 MB/s +Buffer-Const,s!=d,xor=1: 0.146338 s 874.686 MB/s +262144 512 4 TABLE DOUBLE - +Seed: 1352748217 +Buffer-Const,s!=d,xor=0: 0.141775 s 902.837 MB/s +Buffer-Const,s!=d,xor=1: 0.143733 s 890.543 MB/s +524288 256 4 TABLE DOUBLE - +Seed: 1352748218 +Buffer-Const,s!=d,xor=0: 0.144309 s 886.984 MB/s +Buffer-Const,s!=d,xor=1: 0.145978 s 876.843 MB/s +1048576 128 4 TABLE DOUBLE - +Seed: 1352748219 +Buffer-Const,s!=d,xor=0: 0.145523 s 879.584 MB/s +Buffer-Const,s!=d,xor=1: 0.152104 s 841.530 MB/s +2097152 64 4 TABLE DOUBLE - +Seed: 1352748221 +Buffer-Const,s!=d,xor=0: 0.150421 s 850.944 MB/s +Buffer-Const,s!=d,xor=1: 0.154586 s 828.018 MB/s +4194304 32 4 TABLE DOUBLE - +Seed: 1352748222 +Buffer-Const,s!=d,xor=0: 0.151304 s 845.978 MB/s +Buffer-Const,s!=d,xor=1: 0.151530 s 844.720 MB/s +8388608 16 4 TABLE DOUBLE - +Seed: 1352748223 +Buffer-Const,s!=d,xor=0: 0.160126 s 799.369 MB/s +Buffer-Const,s!=d,xor=1: 0.151316 s 845.910 MB/s +16777216 8 4 TABLE DOUBLE - +Seed: 1352748224 +Buffer-Const,s!=d,xor=0: 0.167688 s 763.323 MB/s +Buffer-Const,s!=d,xor=1: 0.152321 s 840.331 MB/s +33554432 4 4 TABLE DOUBLE - +Seed: 1352748226 +Buffer-Const,s!=d,xor=0: 0.194515 s 658.047 MB/s +Buffer-Const,s!=d,xor=1: 0.149023 s 858.929 MB/s +67108864 2 4 TABLE DOUBLE - +Seed: 1352748227 +Buffer-Const,s!=d,xor=0: 0.237898 s 538.046 MB/s +Buffer-Const,s!=d,xor=1: 0.148526 s 861.802 MB/s +134217728 1 4 TABLE DOUBLE - +Seed: 1352748229 +Buffer-Const,s!=d,xor=0: 0.151483 s 844.979 MB/s +Buffer-Const,s!=d,xor=1: 0.153012 s 836.535 MB/s +1024 131072 4 TABLE DOUBLE - +Seed: 1352748230 +Buffer-Const,s!=d,xor=0: 0.146577 s 873.259 MB/s +Buffer-Const,s!=d,xor=1: 0.146274 s 875.070 MB/s +2048 65536 4 TABLE DOUBLE - +Seed: 1352748231 +Buffer-Const,s!=d,xor=0: 0.145069 s 882.341 MB/s +Buffer-Const,s!=d,xor=1: 0.143911 s 889.436 MB/s +4096 32768 4 TABLE DOUBLE - +Seed: 1352748233 +Buffer-Const,s!=d,xor=0: 0.143011 s 895.035 MB/s +Buffer-Const,s!=d,xor=1: 0.142096 s 900.798 MB/s +8192 16384 4 TABLE DOUBLE - +Seed: 1352748234 +Buffer-Const,s!=d,xor=0: 0.142743 s 896.719 MB/s +Buffer-Const,s!=d,xor=1: 0.142004 s 901.383 MB/s +16384 8192 4 TABLE DOUBLE - +Seed: 1352748235 +Buffer-Const,s!=d,xor=0: 0.141290 s 905.940 MB/s +Buffer-Const,s!=d,xor=1: 0.142891 s 895.785 MB/s +32768 4096 4 TABLE DOUBLE - +Seed: 1352748236 +Buffer-Const,s!=d,xor=0: 0.141509 s 904.534 MB/s +Buffer-Const,s!=d,xor=1: 0.142357 s 899.150 MB/s +65536 2048 4 TABLE DOUBLE - +Seed: 1352748237 +Buffer-Const,s!=d,xor=0: 0.141353 s 905.532 MB/s +Buffer-Const,s!=d,xor=1: 0.147224 s 869.422 MB/s +131072 1024 4 TABLE DOUBLE - +Seed: 1352748239 +Buffer-Const,s!=d,xor=0: 0.142758 s 896.623 MB/s +Buffer-Const,s!=d,xor=1: 0.144537 s 885.585 MB/s +262144 512 4 TABLE DOUBLE - +Seed: 1352748240 +Buffer-Const,s!=d,xor=0: 0.141772 s 902.858 MB/s +Buffer-Const,s!=d,xor=1: 0.145832 s 877.723 MB/s +524288 256 4 TABLE DOUBLE - +Seed: 1352748241 +Buffer-Const,s!=d,xor=0: 0.142111 s 900.705 MB/s +Buffer-Const,s!=d,xor=1: 0.143957 s 889.155 MB/s +1048576 128 4 TABLE DOUBLE - +Seed: 1352748242 +Buffer-Const,s!=d,xor=0: 0.144863 s 883.596 MB/s +Buffer-Const,s!=d,xor=1: 0.148948 s 859.359 MB/s +2097152 64 4 TABLE DOUBLE - +Seed: 1352748244 +Buffer-Const,s!=d,xor=0: 0.150453 s 850.766 MB/s +Buffer-Const,s!=d,xor=1: 0.151897 s 842.677 MB/s +4194304 32 4 TABLE DOUBLE - +Seed: 1352748245 +Buffer-Const,s!=d,xor=0: 0.152495 s 839.371 MB/s +Buffer-Const,s!=d,xor=1: 0.153424 s 834.289 MB/s +8388608 16 4 TABLE DOUBLE - +Seed: 1352748246 +Buffer-Const,s!=d,xor=0: 0.159227 s 803.886 MB/s +Buffer-Const,s!=d,xor=1: 0.151101 s 847.118 MB/s +16777216 8 4 TABLE DOUBLE - +Seed: 1352748248 +Buffer-Const,s!=d,xor=0: 0.167903 s 762.344 MB/s +Buffer-Const,s!=d,xor=1: 0.152000 s 842.106 MB/s +33554432 4 4 TABLE DOUBLE - +Seed: 1352748249 +Buffer-Const,s!=d,xor=0: 0.193370 s 661.943 MB/s +Buffer-Const,s!=d,xor=1: 0.153193 s 835.547 MB/s +67108864 2 4 TABLE DOUBLE - +Seed: 1352748250 +Buffer-Const,s!=d,xor=0: 0.241834 s 529.288 MB/s +Buffer-Const,s!=d,xor=1: 0.150811 s 848.745 MB/s +134217728 1 4 TABLE DOUBLE - +Seed: 1352748252 +Buffer-Const,s!=d,xor=0: 0.158047 s 809.887 MB/s +Buffer-Const,s!=d,xor=1: 0.156660 s 817.057 MB/s +1024 131072 4 TABLE QUAD - +Seed: 1352748253 +Buffer-Const,s!=d,xor=0: 0.141239 s 906.264 MB/s +Buffer-Const,s!=d,xor=1: 0.146382 s 874.422 MB/s +2048 65536 4 TABLE QUAD - +Seed: 1352748254 +Buffer-Const,s!=d,xor=0: 0.134986 s 948.245 MB/s +Buffer-Const,s!=d,xor=1: 0.140656 s 910.023 MB/s +4096 32768 4 TABLE QUAD - +Seed: 1352748256 +Buffer-Const,s!=d,xor=0: 0.153383 s 834.514 MB/s +Buffer-Const,s!=d,xor=1: 0.128968 s 992.498 MB/s +8192 16384 4 TABLE QUAD - +Seed: 1352748257 +Buffer-Const,s!=d,xor=0: 0.120985 s 1057.984 MB/s +Buffer-Const,s!=d,xor=1: 0.121486 s 1053.618 MB/s +16384 8192 4 TABLE QUAD - +Seed: 1352748258 +Buffer-Const,s!=d,xor=0: 0.113212 s 1130.626 MB/s +Buffer-Const,s!=d,xor=1: 0.116994 s 1094.076 MB/s +32768 4096 4 TABLE QUAD - +Seed: 1352748259 +Buffer-Const,s!=d,xor=0: 0.106910 s 1197.266 MB/s +Buffer-Const,s!=d,xor=1: 0.109951 s 1164.152 MB/s +65536 2048 4 TABLE QUAD - +Seed: 1352748260 +Buffer-Const,s!=d,xor=0: 0.106585 s 1200.916 MB/s +Buffer-Const,s!=d,xor=1: 0.119656 s 1069.735 MB/s +131072 1024 4 TABLE QUAD - +Seed: 1352748261 +Buffer-Const,s!=d,xor=0: 0.108813 s 1176.332 MB/s +Buffer-Const,s!=d,xor=1: 0.109021 s 1174.081 MB/s +262144 512 4 TABLE QUAD - +Seed: 1352748263 +Buffer-Const,s!=d,xor=0: 0.103341 s 1238.614 MB/s +Buffer-Const,s!=d,xor=1: 0.108952 s 1174.826 MB/s +524288 256 4 TABLE QUAD - +Seed: 1352748264 +Buffer-Const,s!=d,xor=0: 0.105469 s 1213.627 MB/s +Buffer-Const,s!=d,xor=1: 0.110848 s 1154.735 MB/s +1048576 128 4 TABLE QUAD - +Seed: 1352748265 +Buffer-Const,s!=d,xor=0: 0.105542 s 1212.785 MB/s +Buffer-Const,s!=d,xor=1: 0.108646 s 1178.134 MB/s +2097152 64 4 TABLE QUAD - +Seed: 1352748266 +Buffer-Const,s!=d,xor=0: 0.106677 s 1199.889 MB/s +Buffer-Const,s!=d,xor=1: 0.112022 s 1142.631 MB/s +4194304 32 4 TABLE QUAD - +Seed: 1352748267 +Buffer-Const,s!=d,xor=0: 0.110966 s 1153.507 MB/s +Buffer-Const,s!=d,xor=1: 0.100766 s 1270.264 MB/s +8388608 16 4 TABLE QUAD - +Seed: 1352748268 +Buffer-Const,s!=d,xor=0: 0.108207 s 1182.915 MB/s +Buffer-Const,s!=d,xor=1: 0.113488 s 1127.871 MB/s +16777216 8 4 TABLE QUAD - +Seed: 1352748269 +Buffer-Const,s!=d,xor=0: 0.129142 s 991.157 MB/s +Buffer-Const,s!=d,xor=1: 0.110923 s 1153.953 MB/s +33554432 4 4 TABLE QUAD - +Seed: 1352748270 +Buffer-Const,s!=d,xor=0: 0.156426 s 818.279 MB/s +Buffer-Const,s!=d,xor=1: 0.110093 s 1162.652 MB/s +67108864 2 4 TABLE QUAD - +Seed: 1352748272 +Buffer-Const,s!=d,xor=0: 0.203508 s 628.967 MB/s +Buffer-Const,s!=d,xor=1: 0.111907 s 1143.807 MB/s +134217728 1 4 TABLE QUAD - +Seed: 1352748273 +Buffer-Const,s!=d,xor=0: 8.741033 s 14.644 MB/s +Buffer-Const,s!=d,xor=1: 8.972750 s 14.265 MB/s +1024 131072 4 TABLE QUAD,LAZY - +Seed: 1352748309 +Buffer-Const,s!=d,xor=0: 4.387740 s 29.172 MB/s +Buffer-Const,s!=d,xor=1: 4.401799 s 29.079 MB/s +2048 65536 4 TABLE QUAD,LAZY - +Seed: 1352748327 +Buffer-Const,s!=d,xor=0: 2.255454 s 56.751 MB/s +Buffer-Const,s!=d,xor=1: 2.243299 s 57.059 MB/s +4096 32768 4 TABLE QUAD,LAZY - +Seed: 1352748337 +Buffer-Const,s!=d,xor=0: 1.166870 s 109.695 MB/s +Buffer-Const,s!=d,xor=1: 1.180004 s 108.474 MB/s +8192 16384 4 TABLE QUAD,LAZY - +Seed: 1352748342 +Buffer-Const,s!=d,xor=0: 0.661613 s 193.467 MB/s +Buffer-Const,s!=d,xor=1: 0.629827 s 203.230 MB/s +16384 8192 4 TABLE QUAD,LAZY - +Seed: 1352748345 +Buffer-Const,s!=d,xor=0: 0.364647 s 351.024 MB/s +Buffer-Const,s!=d,xor=1: 0.376395 s 340.069 MB/s +32768 4096 4 TABLE QUAD,LAZY - +Seed: 1352748348 +Buffer-Const,s!=d,xor=0: 0.226271 s 565.694 MB/s +Buffer-Const,s!=d,xor=1: 0.234560 s 545.704 MB/s +65536 2048 4 TABLE QUAD,LAZY - +Seed: 1352748349 +Buffer-Const,s!=d,xor=0: 0.160475 s 797.630 MB/s +Buffer-Const,s!=d,xor=1: 0.166329 s 769.561 MB/s +131072 1024 4 TABLE QUAD,LAZY - +Seed: 1352748351 +Buffer-Const,s!=d,xor=0: 0.130999 s 977.110 MB/s +Buffer-Const,s!=d,xor=1: 0.134676 s 950.431 MB/s +262144 512 4 TABLE QUAD,LAZY - +Seed: 1352748352 +Buffer-Const,s!=d,xor=0: 0.110626 s 1157.057 MB/s +Buffer-Const,s!=d,xor=1: 0.118067 s 1084.134 MB/s +524288 256 4 TABLE QUAD,LAZY - +Seed: 1352748353 +Buffer-Const,s!=d,xor=0: 0.105213 s 1216.581 MB/s +Buffer-Const,s!=d,xor=1: 0.109697 s 1166.854 MB/s +1048576 128 4 TABLE QUAD,LAZY - +Seed: 1352748354 +Buffer-Const,s!=d,xor=0: 0.107641 s 1189.138 MB/s +Buffer-Const,s!=d,xor=1: 0.108062 s 1184.502 MB/s +2097152 64 4 TABLE QUAD,LAZY - +Seed: 1352748355 +Buffer-Const,s!=d,xor=0: 0.103473 s 1237.035 MB/s +Buffer-Const,s!=d,xor=1: 0.098362 s 1301.310 MB/s +4194304 32 4 TABLE QUAD,LAZY - +Seed: 1352748356 +Buffer-Const,s!=d,xor=0: 0.107058 s 1195.616 MB/s +Buffer-Const,s!=d,xor=1: 0.097883 s 1307.687 MB/s +8388608 16 4 TABLE QUAD,LAZY - +Seed: 1352748357 +Buffer-Const,s!=d,xor=0: 0.116388 s 1099.769 MB/s +Buffer-Const,s!=d,xor=1: 0.098690 s 1296.990 MB/s +16777216 8 4 TABLE QUAD,LAZY - +Seed: 1352748358 +Buffer-Const,s!=d,xor=0: 0.129120 s 991.325 MB/s +Buffer-Const,s!=d,xor=1: 0.109833 s 1165.403 MB/s +33554432 4 4 TABLE QUAD,LAZY - +Seed: 1352748360 +Buffer-Const,s!=d,xor=0: 0.157534 s 812.524 MB/s +Buffer-Const,s!=d,xor=1: 0.114721 s 1115.750 MB/s +67108864 2 4 TABLE QUAD,LAZY - +Seed: 1352748361 +Buffer-Const,s!=d,xor=0: 0.205053 s 624.229 MB/s +Buffer-Const,s!=d,xor=1: 0.110099 s 1162.589 MB/s +134217728 1 4 TABLE QUAD,LAZY - +Seed: 1352748362 +Buffer-Const,s!=d,xor=0: 0.142388 s 898.955 MB/s +Buffer-Const,s!=d,xor=1: 0.146045 s 876.440 MB/s +1024 131072 4 BYTWO_p - - +Seed: 1352748363 +Buffer-Const,s!=d,xor=0: 0.135040 s 947.867 MB/s +Buffer-Const,s!=d,xor=1: 0.140142 s 913.360 MB/s +2048 65536 4 BYTWO_p - - +Seed: 1352748365 +Buffer-Const,s!=d,xor=0: 0.131358 s 974.437 MB/s +Buffer-Const,s!=d,xor=1: 0.137115 s 933.525 MB/s +4096 32768 4 BYTWO_p - - +Seed: 1352748366 +Buffer-Const,s!=d,xor=0: 0.129772 s 986.347 MB/s +Buffer-Const,s!=d,xor=1: 0.135098 s 947.462 MB/s +8192 16384 4 BYTWO_p - - +Seed: 1352748367 +Buffer-Const,s!=d,xor=0: 0.128670 s 994.795 MB/s +Buffer-Const,s!=d,xor=1: 0.133591 s 958.145 MB/s +16384 8192 4 BYTWO_p - - +Seed: 1352748368 +Buffer-Const,s!=d,xor=0: 0.130064 s 984.129 MB/s +Buffer-Const,s!=d,xor=1: 0.135170 s 946.959 MB/s +32768 4096 4 BYTWO_p - - +Seed: 1352748369 +Buffer-Const,s!=d,xor=0: 0.129942 s 985.052 MB/s +Buffer-Const,s!=d,xor=1: 0.134780 s 949.695 MB/s +65536 2048 4 BYTWO_p - - +Seed: 1352748371 +Buffer-Const,s!=d,xor=0: 0.130649 s 979.725 MB/s +Buffer-Const,s!=d,xor=1: 0.134556 s 951.280 MB/s +131072 1024 4 BYTWO_p - - +Seed: 1352748372 +Buffer-Const,s!=d,xor=0: 0.129390 s 989.255 MB/s +Buffer-Const,s!=d,xor=1: 0.134418 s 952.257 MB/s +262144 512 4 BYTWO_p - - +Seed: 1352748373 +Buffer-Const,s!=d,xor=0: 0.130153 s 983.455 MB/s +Buffer-Const,s!=d,xor=1: 0.137027 s 934.126 MB/s +524288 256 4 BYTWO_p - - +Seed: 1352748374 +Buffer-Const,s!=d,xor=0: 0.128065 s 999.493 MB/s +Buffer-Const,s!=d,xor=1: 0.136548 s 937.402 MB/s +1048576 128 4 BYTWO_p - - +Seed: 1352748375 +Buffer-Const,s!=d,xor=0: 0.137841 s 928.608 MB/s +Buffer-Const,s!=d,xor=1: 0.149983 s 853.428 MB/s +2097152 64 4 BYTWO_p - - +Seed: 1352748377 +Buffer-Const,s!=d,xor=0: 0.143009 s 895.049 MB/s +Buffer-Const,s!=d,xor=1: 0.151799 s 843.218 MB/s +4194304 32 4 BYTWO_p - - +Seed: 1352748378 +Buffer-Const,s!=d,xor=0: 0.148001 s 864.859 MB/s +Buffer-Const,s!=d,xor=1: 0.150979 s 847.802 MB/s +8388608 16 4 BYTWO_p - - +Seed: 1352748379 +Buffer-Const,s!=d,xor=0: 0.153637 s 833.133 MB/s +Buffer-Const,s!=d,xor=1: 0.133152 s 961.307 MB/s +16777216 8 4 BYTWO_p - - +Seed: 1352748380 +Buffer-Const,s!=d,xor=0: 0.164125 s 779.894 MB/s +Buffer-Const,s!=d,xor=1: 0.150620 s 849.821 MB/s +33554432 4 4 BYTWO_p - - +Seed: 1352748382 +Buffer-Const,s!=d,xor=0: 0.188526 s 678.952 MB/s +Buffer-Const,s!=d,xor=1: 0.153114 s 835.979 MB/s +67108864 2 4 BYTWO_p - - +Seed: 1352748383 +Buffer-Const,s!=d,xor=0: 0.235626 s 543.234 MB/s +Buffer-Const,s!=d,xor=1: 0.158839 s 805.847 MB/s +134217728 1 4 BYTWO_p - - +Seed: 1352748385 +Buffer-Const,s!=d,xor=0: 0.076323 s 1677.087 MB/s +Buffer-Const,s!=d,xor=1: 0.077654 s 1648.345 MB/s +1024 131072 4 BYTWO_b - - +Seed: 1352748386 +Buffer-Const,s!=d,xor=0: 0.068027 s 1881.605 MB/s +Buffer-Const,s!=d,xor=1: 0.070778 s 1808.462 MB/s +2048 65536 4 BYTWO_b - - +Seed: 1352748387 +Buffer-Const,s!=d,xor=0: 0.065722 s 1947.591 MB/s +Buffer-Const,s!=d,xor=1: 0.068535 s 1867.669 MB/s +4096 32768 4 BYTWO_b - - +Seed: 1352748388 +Buffer-Const,s!=d,xor=0: 0.063732 s 2008.398 MB/s +Buffer-Const,s!=d,xor=1: 0.066054 s 1937.805 MB/s +8192 16384 4 BYTWO_b - - +Seed: 1352748389 +Buffer-Const,s!=d,xor=0: 0.062660 s 2042.779 MB/s +Buffer-Const,s!=d,xor=1: 0.065213 s 1962.793 MB/s +16384 8192 4 BYTWO_b - - +Seed: 1352748390 +Buffer-Const,s!=d,xor=0: 0.062758 s 2039.566 MB/s +Buffer-Const,s!=d,xor=1: 0.066957 s 1911.668 MB/s +32768 4096 4 BYTWO_b - - +Seed: 1352748390 +Buffer-Const,s!=d,xor=0: 0.063058 s 2029.865 MB/s +Buffer-Const,s!=d,xor=1: 0.065829 s 1944.424 MB/s +65536 2048 4 BYTWO_b - - +Seed: 1352748391 +Buffer-Const,s!=d,xor=0: 0.065844 s 1943.994 MB/s +Buffer-Const,s!=d,xor=1: 0.065374 s 1957.968 MB/s +131072 1024 4 BYTWO_b - - +Seed: 1352748392 +Buffer-Const,s!=d,xor=0: 0.062168 s 2058.949 MB/s +Buffer-Const,s!=d,xor=1: 0.068710 s 1862.906 MB/s +262144 512 4 BYTWO_b - - +Seed: 1352748393 +Buffer-Const,s!=d,xor=0: 0.062623 s 2043.984 MB/s +Buffer-Const,s!=d,xor=1: 0.066550 s 1923.379 MB/s +524288 256 4 BYTWO_b - - +Seed: 1352748394 +Buffer-Const,s!=d,xor=0: 0.064571 s 1982.317 MB/s +Buffer-Const,s!=d,xor=1: 0.061325 s 2087.246 MB/s +1048576 128 4 BYTWO_b - - +Seed: 1352748395 +Buffer-Const,s!=d,xor=0: 0.070771 s 1808.657 MB/s +Buffer-Const,s!=d,xor=1: 0.072981 s 1753.878 MB/s +2097152 64 4 BYTWO_b - - +Seed: 1352748396 +Buffer-Const,s!=d,xor=0: 0.078018 s 1640.643 MB/s +Buffer-Const,s!=d,xor=1: 0.072307 s 1770.227 MB/s +4194304 32 4 BYTWO_b - - +Seed: 1352748397 +Buffer-Const,s!=d,xor=0: 0.079478 s 1610.508 MB/s +Buffer-Const,s!=d,xor=1: 0.073757 s 1735.424 MB/s +8388608 16 4 BYTWO_b - - +Seed: 1352748398 +Buffer-Const,s!=d,xor=0: 0.085826 s 1491.383 MB/s +Buffer-Const,s!=d,xor=1: 0.087615 s 1460.945 MB/s +16777216 8 4 BYTWO_b - - +Seed: 1352748399 +Buffer-Const,s!=d,xor=0: 0.081822 s 1564.373 MB/s +Buffer-Const,s!=d,xor=1: 0.083410 s 1534.583 MB/s +33554432 4 4 BYTWO_b - - +Seed: 1352748400 +Buffer-Const,s!=d,xor=0: 0.101873 s 1256.467 MB/s +Buffer-Const,s!=d,xor=1: 0.074412 s 1720.150 MB/s +67108864 2 4 BYTWO_b - - +Seed: 1352748401 +Buffer-Const,s!=d,xor=0: 0.188405 s 679.387 MB/s +Buffer-Const,s!=d,xor=1: 0.053904 s 2374.589 MB/s +134217728 1 4 BYTWO_b - - +Seed: 1352748403 +Buffer-Const,s!=d,xor=0: 0.092518 s 1383.520 MB/s +Buffer-Const,s!=d,xor=1: 0.097347 s 1314.877 MB/s +1024 131072 4 BYTWO_p SSE - +Seed: 1352748404 +Buffer-Const,s!=d,xor=0: 0.086226 s 1484.463 MB/s +Buffer-Const,s!=d,xor=1: 0.092092 s 1389.910 MB/s +2048 65536 4 BYTWO_p SSE - +Seed: 1352748405 +Buffer-Const,s!=d,xor=0: 0.082721 s 1547.370 MB/s +Buffer-Const,s!=d,xor=1: 0.088092 s 1453.025 MB/s +4096 32768 4 BYTWO_p SSE - +Seed: 1352748406 +Buffer-Const,s!=d,xor=0: 0.081612 s 1568.395 MB/s +Buffer-Const,s!=d,xor=1: 0.086144 s 1485.885 MB/s +8192 16384 4 BYTWO_p SSE - +Seed: 1352748407 +Buffer-Const,s!=d,xor=0: 0.080819 s 1583.783 MB/s +Buffer-Const,s!=d,xor=1: 0.085448 s 1497.982 MB/s +16384 8192 4 BYTWO_p SSE - +Seed: 1352748408 +Buffer-Const,s!=d,xor=0: 0.080971 s 1580.804 MB/s +Buffer-Const,s!=d,xor=1: 0.086504 s 1479.709 MB/s +32768 4096 4 BYTWO_p SSE - +Seed: 1352748409 +Buffer-Const,s!=d,xor=0: 0.080746 s 1585.214 MB/s +Buffer-Const,s!=d,xor=1: 0.085679 s 1493.943 MB/s +65536 2048 4 BYTWO_p SSE - +Seed: 1352748410 +Buffer-Const,s!=d,xor=0: 0.081038 s 1579.511 MB/s +Buffer-Const,s!=d,xor=1: 0.086381 s 1481.804 MB/s +131072 1024 4 BYTWO_p SSE - +Seed: 1352748411 +Buffer-Const,s!=d,xor=0: 0.079807 s 1603.873 MB/s +Buffer-Const,s!=d,xor=1: 0.085420 s 1498.484 MB/s +262144 512 4 BYTWO_p SSE - +Seed: 1352748412 +Buffer-Const,s!=d,xor=0: 0.080044 s 1599.115 MB/s +Buffer-Const,s!=d,xor=1: 0.083843 s 1526.654 MB/s +524288 256 4 BYTWO_p SSE - +Seed: 1352748413 +Buffer-Const,s!=d,xor=0: 0.082954 s 1543.016 MB/s +Buffer-Const,s!=d,xor=1: 0.086807 s 1474.535 MB/s +1048576 128 4 BYTWO_p SSE - +Seed: 1352748414 +Buffer-Const,s!=d,xor=0: 0.090553 s 1413.536 MB/s +Buffer-Const,s!=d,xor=1: 0.092115 s 1389.565 MB/s +2097152 64 4 BYTWO_p SSE - +Seed: 1352748415 +Buffer-Const,s!=d,xor=0: 0.087072 s 1470.054 MB/s +Buffer-Const,s!=d,xor=1: 0.093465 s 1369.492 MB/s +4194304 32 4 BYTWO_p SSE - +Seed: 1352748416 +Buffer-Const,s!=d,xor=0: 0.097724 s 1309.812 MB/s +Buffer-Const,s!=d,xor=1: 0.090922 s 1407.795 MB/s +8388608 16 4 BYTWO_p SSE - +Seed: 1352748417 +Buffer-Const,s!=d,xor=0: 0.104649 s 1223.136 MB/s +Buffer-Const,s!=d,xor=1: 0.084963 s 1506.532 MB/s +16777216 8 4 BYTWO_p SSE - +Seed: 1352748418 +Buffer-Const,s!=d,xor=0: 0.112079 s 1142.050 MB/s +Buffer-Const,s!=d,xor=1: 0.096727 s 1323.313 MB/s +33554432 4 4 BYTWO_p SSE - +Seed: 1352748419 +Buffer-Const,s!=d,xor=0: 0.136256 s 939.408 MB/s +Buffer-Const,s!=d,xor=1: 0.103244 s 1239.781 MB/s +67108864 2 4 BYTWO_p SSE - +Seed: 1352748420 +Buffer-Const,s!=d,xor=0: 0.181231 s 706.281 MB/s +Buffer-Const,s!=d,xor=1: 0.092887 s 1378.016 MB/s +134217728 1 4 BYTWO_p SSE - +Seed: 1352748422 +Buffer-Const,s!=d,xor=0: 0.107760 s 1187.825 MB/s +Buffer-Const,s!=d,xor=1: 0.065748 s 1946.828 MB/s +1024 131072 4 BYTWO_b SSE - +Seed: 1352748423 +Buffer-Const,s!=d,xor=0: 0.104705 s 1222.484 MB/s +Buffer-Const,s!=d,xor=1: 0.058541 s 2186.508 MB/s +2048 65536 4 BYTWO_b SSE - +Seed: 1352748424 +Buffer-Const,s!=d,xor=0: 0.098082 s 1305.026 MB/s +Buffer-Const,s!=d,xor=1: 0.053539 s 2390.768 MB/s +4096 32768 4 BYTWO_b SSE - +Seed: 1352748425 +Buffer-Const,s!=d,xor=0: 0.094147 s 1359.576 MB/s +Buffer-Const,s!=d,xor=1: 0.051867 s 2467.839 MB/s +8192 16384 4 BYTWO_b SSE - +Seed: 1352748426 +Buffer-Const,s!=d,xor=0: 0.092755 s 1379.975 MB/s +Buffer-Const,s!=d,xor=1: 0.049600 s 2580.651 MB/s +16384 8192 4 BYTWO_b SSE - +Seed: 1352748427 +Buffer-Const,s!=d,xor=0: 0.093161 s 1373.971 MB/s +Buffer-Const,s!=d,xor=1: 0.048734 s 2626.480 MB/s +32768 4096 4 BYTWO_b SSE - +Seed: 1352748428 +Buffer-Const,s!=d,xor=0: 0.092071 s 1390.227 MB/s +Buffer-Const,s!=d,xor=1: 0.048645 s 2631.282 MB/s +65536 2048 4 BYTWO_b SSE - +Seed: 1352748429 +Buffer-Const,s!=d,xor=0: 0.093282 s 1372.191 MB/s +Buffer-Const,s!=d,xor=1: 0.047374 s 2701.903 MB/s +131072 1024 4 BYTWO_b SSE - +Seed: 1352748430 +Buffer-Const,s!=d,xor=0: 0.094085 s 1360.479 MB/s +Buffer-Const,s!=d,xor=1: 0.050752 s 2522.072 MB/s +262144 512 4 BYTWO_b SSE - +Seed: 1352748431 +Buffer-Const,s!=d,xor=0: 0.099099 s 1291.639 MB/s +Buffer-Const,s!=d,xor=1: 0.046550 s 2749.729 MB/s +524288 256 4 BYTWO_b SSE - +Seed: 1352748431 +Buffer-Const,s!=d,xor=0: 0.093943 s 1362.530 MB/s +Buffer-Const,s!=d,xor=1: 0.050178 s 2550.940 MB/s +1048576 128 4 BYTWO_b SSE - +Seed: 1352748432 +Buffer-Const,s!=d,xor=0: 0.121096 s 1057.011 MB/s +Buffer-Const,s!=d,xor=1: 0.055513 s 2305.770 MB/s +2097152 64 4 BYTWO_b SSE - +Seed: 1352748433 +Buffer-Const,s!=d,xor=0: 0.109734 s 1166.456 MB/s +Buffer-Const,s!=d,xor=1: 0.057743 s 2216.716 MB/s +4194304 32 4 BYTWO_b SSE - +Seed: 1352748434 +Buffer-Const,s!=d,xor=0: 0.117161 s 1092.513 MB/s +Buffer-Const,s!=d,xor=1: 0.057568 s 2223.464 MB/s +8388608 16 4 BYTWO_b SSE - +Seed: 1352748436 +Buffer-Const,s!=d,xor=0: 0.102332 s 1250.832 MB/s +Buffer-Const,s!=d,xor=1: 0.061185 s 2092.004 MB/s +16777216 8 4 BYTWO_b SSE - +Seed: 1352748437 +Buffer-Const,s!=d,xor=0: 0.173641 s 737.153 MB/s +Buffer-Const,s!=d,xor=1: 0.054822 s 2334.830 MB/s +33554432 4 4 BYTWO_b SSE - +Seed: 1352748438 +Buffer-Const,s!=d,xor=0: 0.130181 s 983.246 MB/s +Buffer-Const,s!=d,xor=1: 0.051398 s 2490.367 MB/s +67108864 2 4 BYTWO_b SSE - +Seed: 1352748439 +Buffer-Const,s!=d,xor=0: 0.150805 s 848.778 MB/s +Buffer-Const,s!=d,xor=1: 0.000005 s 2330.524 MB/s +134217728 1 4 BYTWO_b SSE - diff --git a/junk-w4-timing-tests.sh b/junk-w4-timing-tests.sh new file mode 100644 index 0000000..8ed8c08 --- /dev/null +++ b/junk-w4-timing-tests.sh @@ -0,0 +1,11 @@ +sh tmp-time-test.sh 4 LOG - - +sh tmp-time-test.sh 4 TABLE SINGLE - +sh tmp-time-test.sh 4 TABLE SINGLE,SSE - +sh tmp-time-test.sh 4 TABLE DOUBLE - +sh tmp-time-test.sh 4 TABLE DOUBLE - +sh tmp-time-test.sh 4 TABLE QUAD - +sh tmp-time-test.sh 4 TABLE QUAD,LAZY - +sh tmp-time-test.sh 4 BYTWO_p - - +sh tmp-time-test.sh 4 BYTWO_b - - +sh tmp-time-test.sh 4 BYTWO_p SSE - +sh tmp-time-test.sh 4 BYTWO_b SSE - diff --git a/junk-w4-timing.jgr b/junk-w4-timing.jgr new file mode 100644 index 0000000..9123257 --- /dev/null +++ b/junk-w4-timing.jgr @@ -0,0 +1,11 @@ +newgraph +xaxis size 4 min 0 no_auto_hash_labels + hash_labels hjl vjc rotate -90 fontsize 11 + +shell : junk-pick-best-output < junk-w4-timing-out.txt | sort -nr | sed 's/.............//' | awk '{ print "hash_label at ", ++l, ":", $0 }' + +yaxis size 1 min 0 label : MB/s + +newcurve marktype xbar cfill 1 1 0 marksize 1 pts +shell : junk-pick-best-output < junk-w4-timing-out.txt | sort -nr | awk '{ print $1 }' | cat -n + diff --git a/junk-w4.jgr b/junk-w4.jgr new file mode 100644 index 0000000..e4c4a82 --- /dev/null +++ b/junk-w4.jgr @@ -0,0 +1,6 @@ +newgraph +xaxis size 4 min 0 no_auto_hash_labels + hash_labels hjl vjc rotate -90 fontsize 11 +yaxis size 1 min 0 label : MB/s + +shell : awk -f junk-proc.awk < junk-w4-out.txt diff --git a/junk-w8-timing-out.txt b/junk-w8-timing-out.txt new file mode 100644 index 0000000..cf542be --- /dev/null +++ b/junk-w8-timing-out.txt @@ -0,0 +1,936 @@ +Seed: 1352746852 +Buffer-Const,s!=d,xor=0: 0.205907 s 621.640 MB/s +Buffer-Const,s!=d,xor=1: 0.252565 s 506.800 MB/s +1024 131072 8 LOG - - +Seed: 1352746854 +Buffer-Const,s!=d,xor=0: 0.206410 s 620.126 MB/s +Buffer-Const,s!=d,xor=1: 0.251469 s 509.008 MB/s +2048 65536 8 LOG - - +Seed: 1352746856 +Buffer-Const,s!=d,xor=0: 0.209941 s 609.695 MB/s +Buffer-Const,s!=d,xor=1: 0.255838 s 500.316 MB/s +4096 32768 8 LOG - - +Seed: 1352746857 +Buffer-Const,s!=d,xor=0: 0.206109 s 621.030 MB/s +Buffer-Const,s!=d,xor=1: 0.262056 s 488.445 MB/s +8192 16384 8 LOG - - +Seed: 1352746859 +Buffer-Const,s!=d,xor=0: 0.201892 s 634.001 MB/s +Buffer-Const,s!=d,xor=1: 0.250816 s 510.335 MB/s +16384 8192 8 LOG - - +Seed: 1352746860 +Buffer-Const,s!=d,xor=0: 0.201995 s 633.679 MB/s +Buffer-Const,s!=d,xor=1: 0.254832 s 502.292 MB/s +32768 4096 8 LOG - - +Seed: 1352746862 +Buffer-Const,s!=d,xor=0: 0.203099 s 630.236 MB/s +Buffer-Const,s!=d,xor=1: 0.255779 s 500.431 MB/s +65536 2048 8 LOG - - +Seed: 1352746864 +Buffer-Const,s!=d,xor=0: 0.200691 s 637.796 MB/s +Buffer-Const,s!=d,xor=1: 0.256675 s 498.685 MB/s +131072 1024 8 LOG - - +Seed: 1352746865 +Buffer-Const,s!=d,xor=0: 0.201240 s 636.057 MB/s +Buffer-Const,s!=d,xor=1: 0.255231 s 501.506 MB/s +262144 512 8 LOG - - +Seed: 1352746867 +Buffer-Const,s!=d,xor=0: 0.202006 s 633.645 MB/s +Buffer-Const,s!=d,xor=1: 0.251845 s 508.250 MB/s +524288 256 8 LOG - - +Seed: 1352746868 +Buffer-Const,s!=d,xor=0: 0.203552 s 628.830 MB/s +Buffer-Const,s!=d,xor=1: 0.255775 s 500.440 MB/s +1048576 128 8 LOG - - +Seed: 1352746870 +Buffer-Const,s!=d,xor=0: 0.206480 s 619.915 MB/s +Buffer-Const,s!=d,xor=1: 0.256771 s 498.498 MB/s +2097152 64 8 LOG - - +Seed: 1352746872 +Buffer-Const,s!=d,xor=0: 0.210690 s 607.528 MB/s +Buffer-Const,s!=d,xor=1: 0.260851 s 490.701 MB/s +4194304 32 8 LOG - - +Seed: 1352746873 +Buffer-Const,s!=d,xor=0: 0.212292 s 602.944 MB/s +Buffer-Const,s!=d,xor=1: 0.263464 s 485.834 MB/s +8388608 16 8 LOG - - +Seed: 1352746875 +Buffer-Const,s!=d,xor=0: 0.217703 s 587.957 MB/s +Buffer-Const,s!=d,xor=1: 0.260255 s 491.826 MB/s +16777216 8 8 LOG - - +Seed: 1352746876 +Buffer-Const,s!=d,xor=0: 0.229996 s 556.531 MB/s +Buffer-Const,s!=d,xor=1: 0.268077 s 477.475 MB/s +33554432 4 8 LOG - - +Seed: 1352746878 +Buffer-Const,s!=d,xor=0: 0.255076 s 501.811 MB/s +Buffer-Const,s!=d,xor=1: 0.268757 s 476.266 MB/s +67108864 2 8 LOG - - +Seed: 1352746880 +Buffer-Const,s!=d,xor=0: 0.299095 s 427.958 MB/s +Buffer-Const,s!=d,xor=1: 0.271954 s 470.668 MB/s +134217728 1 8 LOG - - +Seed: 1352746882 +Buffer-Const,s!=d,xor=0: 0.198089 s 646.175 MB/s +Buffer-Const,s!=d,xor=1: 0.199934 s 640.212 MB/s +1024 131072 8 LOG_ZERO - - +Seed: 1352746883 +Buffer-Const,s!=d,xor=0: 0.191693 s 667.733 MB/s +Buffer-Const,s!=d,xor=1: 0.195976 s 653.142 MB/s +2048 65536 8 LOG_ZERO - - +Seed: 1352746885 +Buffer-Const,s!=d,xor=0: 0.190896 s 670.524 MB/s +Buffer-Const,s!=d,xor=1: 0.194985 s 656.459 MB/s +4096 32768 8 LOG_ZERO - - +Seed: 1352746886 +Buffer-Const,s!=d,xor=0: 0.190779 s 670.933 MB/s +Buffer-Const,s!=d,xor=1: 0.195833 s 653.617 MB/s +8192 16384 8 LOG_ZERO - - +Seed: 1352746887 +Buffer-Const,s!=d,xor=0: 0.188468 s 679.159 MB/s +Buffer-Const,s!=d,xor=1: 0.192885 s 663.608 MB/s +16384 8192 8 LOG_ZERO - - +Seed: 1352746889 +Buffer-Const,s!=d,xor=0: 0.187547 s 682.497 MB/s +Buffer-Const,s!=d,xor=1: 0.193131 s 662.763 MB/s +32768 4096 8 LOG_ZERO - - +Seed: 1352746890 +Buffer-Const,s!=d,xor=0: 0.185810 s 688.875 MB/s +Buffer-Const,s!=d,xor=1: 0.192531 s 664.829 MB/s +65536 2048 8 LOG_ZERO - - +Seed: 1352746892 +Buffer-Const,s!=d,xor=0: 0.186486 s 686.379 MB/s +Buffer-Const,s!=d,xor=1: 0.192416 s 665.226 MB/s +131072 1024 8 LOG_ZERO - - +Seed: 1352746893 +Buffer-Const,s!=d,xor=0: 0.187854 s 681.379 MB/s +Buffer-Const,s!=d,xor=1: 0.193211 s 662.488 MB/s +262144 512 8 LOG_ZERO - - +Seed: 1352746895 +Buffer-Const,s!=d,xor=0: 0.186622 s 685.880 MB/s +Buffer-Const,s!=d,xor=1: 0.193951 s 659.961 MB/s +524288 256 8 LOG_ZERO - - +Seed: 1352746896 +Buffer-Const,s!=d,xor=0: 0.193502 s 661.492 MB/s +Buffer-Const,s!=d,xor=1: 0.194600 s 657.760 MB/s +1048576 128 8 LOG_ZERO - - +Seed: 1352746897 +Buffer-Const,s!=d,xor=0: 0.191789 s 667.400 MB/s +Buffer-Const,s!=d,xor=1: 0.206557 s 619.683 MB/s +2097152 64 8 LOG_ZERO - - +Seed: 1352746899 +Buffer-Const,s!=d,xor=0: 0.216762 s 590.509 MB/s +Buffer-Const,s!=d,xor=1: 0.220943 s 579.334 MB/s +4194304 32 8 LOG_ZERO - - +Seed: 1352746901 +Buffer-Const,s!=d,xor=0: 0.212998 s 600.944 MB/s +Buffer-Const,s!=d,xor=1: 0.229660 s 557.346 MB/s +8388608 16 8 LOG_ZERO - - +Seed: 1352746902 +Buffer-Const,s!=d,xor=0: 0.225217 s 568.340 MB/s +Buffer-Const,s!=d,xor=1: 0.208174 s 614.871 MB/s +16777216 8 8 LOG_ZERO - - +Seed: 1352746904 +Buffer-Const,s!=d,xor=0: 0.215686 s 593.456 MB/s +Buffer-Const,s!=d,xor=1: 0.204155 s 626.975 MB/s +33554432 4 8 LOG_ZERO - - +Seed: 1352746905 +Buffer-Const,s!=d,xor=0: 0.250863 s 510.239 MB/s +Buffer-Const,s!=d,xor=1: 0.200680 s 637.832 MB/s +67108864 2 8 LOG_ZERO - - +Seed: 1352746907 +Buffer-Const,s!=d,xor=0: 0.285895 s 447.717 MB/s +Buffer-Const,s!=d,xor=1: 0.201105 s 636.484 MB/s +134217728 1 8 LOG_ZERO - - +Seed: 1352746909 +Buffer-Const,s!=d,xor=0: 0.154129 s 830.473 MB/s +Buffer-Const,s!=d,xor=1: 0.200737 s 637.650 MB/s +1024 131072 8 TABLE - - +Seed: 1352746910 +Buffer-Const,s!=d,xor=0: 0.150785 s 848.888 MB/s +Buffer-Const,s!=d,xor=1: 0.199187 s 642.614 MB/s +2048 65536 8 TABLE - - +Seed: 1352746911 +Buffer-Const,s!=d,xor=0: 0.149158 s 858.153 MB/s +Buffer-Const,s!=d,xor=1: 0.196224 s 652.316 MB/s +4096 32768 8 TABLE - - +Seed: 1352746913 +Buffer-Const,s!=d,xor=0: 0.147988 s 864.936 MB/s +Buffer-Const,s!=d,xor=1: 0.195025 s 656.325 MB/s +8192 16384 8 TABLE - - +Seed: 1352746914 +Buffer-Const,s!=d,xor=0: 0.146994 s 870.786 MB/s +Buffer-Const,s!=d,xor=1: 0.193489 s 661.536 MB/s +16384 8192 8 TABLE - - +Seed: 1352746915 +Buffer-Const,s!=d,xor=0: 0.151192 s 846.606 MB/s +Buffer-Const,s!=d,xor=1: 0.196197 s 652.405 MB/s +32768 4096 8 TABLE - - +Seed: 1352746917 +Buffer-Const,s!=d,xor=0: 0.149436 s 856.553 MB/s +Buffer-Const,s!=d,xor=1: 0.194907 s 656.724 MB/s +65536 2048 8 TABLE - - +Seed: 1352746918 +Buffer-Const,s!=d,xor=0: 0.150252 s 851.900 MB/s +Buffer-Const,s!=d,xor=1: 0.196657 s 650.878 MB/s +131072 1024 8 TABLE - - +Seed: 1352746920 +Buffer-Const,s!=d,xor=0: 0.152423 s 839.767 MB/s +Buffer-Const,s!=d,xor=1: 0.196896 s 650.090 MB/s +262144 512 8 TABLE - - +Seed: 1352746921 +Buffer-Const,s!=d,xor=0: 0.149577 s 855.748 MB/s +Buffer-Const,s!=d,xor=1: 0.196668 s 650.843 MB/s +524288 256 8 TABLE - - +Seed: 1352746922 +Buffer-Const,s!=d,xor=0: 0.151604 s 844.307 MB/s +Buffer-Const,s!=d,xor=1: 0.198012 s 646.425 MB/s +1048576 128 8 TABLE - - +Seed: 1352746924 +Buffer-Const,s!=d,xor=0: 0.155570 s 822.779 MB/s +Buffer-Const,s!=d,xor=1: 0.195111 s 656.036 MB/s +2097152 64 8 TABLE - - +Seed: 1352746925 +Buffer-Const,s!=d,xor=0: 0.159052 s 804.766 MB/s +Buffer-Const,s!=d,xor=1: 0.204684 s 625.353 MB/s +4194304 32 8 TABLE - - +Seed: 1352746926 +Buffer-Const,s!=d,xor=0: 0.163852 s 781.193 MB/s +Buffer-Const,s!=d,xor=1: 0.204403 s 626.215 MB/s +8388608 16 8 TABLE - - +Seed: 1352746928 +Buffer-Const,s!=d,xor=0: 0.174190 s 734.832 MB/s +Buffer-Const,s!=d,xor=1: 0.202681 s 631.535 MB/s +16777216 8 8 TABLE - - +Seed: 1352746929 +Buffer-Const,s!=d,xor=0: 0.184380 s 694.218 MB/s +Buffer-Const,s!=d,xor=1: 0.204282 s 626.585 MB/s +33554432 4 8 TABLE - - +Seed: 1352746931 +Buffer-Const,s!=d,xor=0: 0.204508 s 625.892 MB/s +Buffer-Const,s!=d,xor=1: 0.207667 s 616.371 MB/s +67108864 2 8 TABLE - - +Seed: 1352746932 +Buffer-Const,s!=d,xor=0: 0.252662 s 506.606 MB/s +Buffer-Const,s!=d,xor=1: 0.208596 s 613.626 MB/s +134217728 1 8 TABLE - - +Seed: 1352746934 +Buffer-Const,s!=d,xor=0: 0.870799 s 146.991 MB/s +Buffer-Const,s!=d,xor=1: 0.888333 s 144.090 MB/s +1024 131072 8 TABLE DOUBLE - +Seed: 1352746938 +Buffer-Const,s!=d,xor=0: 0.808797 s 158.260 MB/s +Buffer-Const,s!=d,xor=1: 0.812444 s 157.549 MB/s +2048 65536 8 TABLE DOUBLE - +Seed: 1352746942 +Buffer-Const,s!=d,xor=0: 0.724551 s 176.661 MB/s +Buffer-Const,s!=d,xor=1: 0.733140 s 174.591 MB/s +4096 32768 8 TABLE DOUBLE - +Seed: 1352746946 +Buffer-Const,s!=d,xor=0: 0.622008 s 205.785 MB/s +Buffer-Const,s!=d,xor=1: 0.636914 s 200.969 MB/s +8192 16384 8 TABLE DOUBLE - +Seed: 1352746949 +Buffer-Const,s!=d,xor=0: 0.454528 s 281.611 MB/s +Buffer-Const,s!=d,xor=1: 0.467266 s 273.934 MB/s +16384 8192 8 TABLE DOUBLE - +Seed: 1352746952 +Buffer-Const,s!=d,xor=0: 0.285370 s 448.541 MB/s +Buffer-Const,s!=d,xor=1: 0.292051 s 438.279 MB/s +32768 4096 8 TABLE DOUBLE - +Seed: 1352746954 +Buffer-Const,s!=d,xor=0: 0.193707 s 660.791 MB/s +Buffer-Const,s!=d,xor=1: 0.202114 s 633.307 MB/s +65536 2048 8 TABLE DOUBLE - +Seed: 1352746955 +Buffer-Const,s!=d,xor=0: 0.147023 s 870.614 MB/s +Buffer-Const,s!=d,xor=1: 0.151774 s 843.360 MB/s +131072 1024 8 TABLE DOUBLE - +Seed: 1352746957 +Buffer-Const,s!=d,xor=0: 0.127245 s 1005.930 MB/s +Buffer-Const,s!=d,xor=1: 0.130981 s 977.243 MB/s +262144 512 8 TABLE DOUBLE - +Seed: 1352746958 +Buffer-Const,s!=d,xor=0: 0.112772 s 1135.034 MB/s +Buffer-Const,s!=d,xor=1: 0.117758 s 1086.972 MB/s +524288 256 8 TABLE DOUBLE - +Seed: 1352746959 +Buffer-Const,s!=d,xor=0: 0.106724 s 1199.355 MB/s +Buffer-Const,s!=d,xor=1: 0.110677 s 1156.521 MB/s +1048576 128 8 TABLE DOUBLE - +Seed: 1352746960 +Buffer-Const,s!=d,xor=0: 0.109126 s 1172.960 MB/s +Buffer-Const,s!=d,xor=1: 0.115353 s 1109.641 MB/s +2097152 64 8 TABLE DOUBLE - +Seed: 1352746962 +Buffer-Const,s!=d,xor=0: 0.111492 s 1148.063 MB/s +Buffer-Const,s!=d,xor=1: 0.114936 s 1113.660 MB/s +4194304 32 8 TABLE DOUBLE - +Seed: 1352746963 +Buffer-Const,s!=d,xor=0: 0.114727 s 1115.694 MB/s +Buffer-Const,s!=d,xor=1: 0.112702 s 1135.740 MB/s +8388608 16 8 TABLE DOUBLE - +Seed: 1352746964 +Buffer-Const,s!=d,xor=0: 0.122290 s 1046.691 MB/s +Buffer-Const,s!=d,xor=1: 0.112557 s 1137.205 MB/s +16777216 8 8 TABLE DOUBLE - +Seed: 1352746965 +Buffer-Const,s!=d,xor=0: 0.130774 s 978.789 MB/s +Buffer-Const,s!=d,xor=1: 0.115443 s 1108.772 MB/s +33554432 4 8 TABLE DOUBLE - +Seed: 1352746966 +Buffer-Const,s!=d,xor=0: 0.152678 s 838.367 MB/s +Buffer-Const,s!=d,xor=1: 0.112051 s 1142.337 MB/s +67108864 2 8 TABLE DOUBLE - +Seed: 1352746968 +Buffer-Const,s!=d,xor=0: 0.199972 s 640.090 MB/s +Buffer-Const,s!=d,xor=1: 0.111309 s 1149.951 MB/s +134217728 1 8 TABLE DOUBLE - +Seed: 1352746969 +Buffer-Const,s!=d,xor=0: 12.353054 s 10.362 MB/s +Buffer-Const,s!=d,xor=1: 12.311798 s 10.397 MB/s +1024 131072 8 TABLE DOUBLE,LAZY - +Seed: 1352747019 +Buffer-Const,s!=d,xor=0: 6.245450 s 20.495 MB/s +Buffer-Const,s!=d,xor=1: 6.251623 s 20.475 MB/s +2048 65536 8 TABLE DOUBLE,LAZY - +Seed: 1352747045 +Buffer-Const,s!=d,xor=0: 3.157618 s 40.537 MB/s +Buffer-Const,s!=d,xor=1: 3.147050 s 40.673 MB/s +4096 32768 8 TABLE DOUBLE,LAZY - +Seed: 1352747058 +Buffer-Const,s!=d,xor=0: 1.631175 s 78.471 MB/s +Buffer-Const,s!=d,xor=1: 1.657020 s 77.247 MB/s +8192 16384 8 TABLE DOUBLE,LAZY - +Seed: 1352747065 +Buffer-Const,s!=d,xor=0: 0.860207 s 148.801 MB/s +Buffer-Const,s!=d,xor=1: 0.874988 s 146.288 MB/s +16384 8192 8 TABLE DOUBLE,LAZY - +Seed: 1352747069 +Buffer-Const,s!=d,xor=0: 0.478988 s 267.230 MB/s +Buffer-Const,s!=d,xor=1: 0.485077 s 263.876 MB/s +32768 4096 8 TABLE DOUBLE,LAZY - +Seed: 1352747072 +Buffer-Const,s!=d,xor=0: 0.291041 s 439.800 MB/s +Buffer-Const,s!=d,xor=1: 0.294611 s 434.472 MB/s +65536 2048 8 TABLE DOUBLE,LAZY - +Seed: 1352747074 +Buffer-Const,s!=d,xor=0: 0.195826 s 653.643 MB/s +Buffer-Const,s!=d,xor=1: 0.201743 s 634.472 MB/s +131072 1024 8 TABLE DOUBLE,LAZY - +Seed: 1352747075 +Buffer-Const,s!=d,xor=0: 0.148775 s 860.359 MB/s +Buffer-Const,s!=d,xor=1: 0.153898 s 831.717 MB/s +262144 512 8 TABLE DOUBLE,LAZY - +Seed: 1352747077 +Buffer-Const,s!=d,xor=0: 0.128037 s 999.707 MB/s +Buffer-Const,s!=d,xor=1: 0.130179 s 983.260 MB/s +524288 256 8 TABLE DOUBLE,LAZY - +Seed: 1352747078 +Buffer-Const,s!=d,xor=0: 0.112728 s 1135.473 MB/s +Buffer-Const,s!=d,xor=1: 0.119275 s 1073.152 MB/s +1048576 128 8 TABLE DOUBLE,LAZY - +Seed: 1352747079 +Buffer-Const,s!=d,xor=0: 0.113098 s 1131.763 MB/s +Buffer-Const,s!=d,xor=1: 0.117425 s 1090.056 MB/s +2097152 64 8 TABLE DOUBLE,LAZY - +Seed: 1352747080 +Buffer-Const,s!=d,xor=0: 0.113271 s 1130.033 MB/s +Buffer-Const,s!=d,xor=1: 0.116355 s 1100.082 MB/s +4194304 32 8 TABLE DOUBLE,LAZY - +Seed: 1352747081 +Buffer-Const,s!=d,xor=0: 0.109173 s 1172.448 MB/s +Buffer-Const,s!=d,xor=1: 0.114466 s 1118.239 MB/s +8388608 16 8 TABLE DOUBLE,LAZY - +Seed: 1352747082 +Buffer-Const,s!=d,xor=0: 0.120238 s 1064.555 MB/s +Buffer-Const,s!=d,xor=1: 0.113906 s 1123.737 MB/s +16777216 8 8 TABLE DOUBLE,LAZY - +Seed: 1352747084 +Buffer-Const,s!=d,xor=0: 0.127838 s 1001.266 MB/s +Buffer-Const,s!=d,xor=1: 0.112099 s 1141.846 MB/s +33554432 4 8 TABLE DOUBLE,LAZY - +Seed: 1352747085 +Buffer-Const,s!=d,xor=0: 0.154731 s 827.243 MB/s +Buffer-Const,s!=d,xor=1: 0.111025 s 1152.893 MB/s +67108864 2 8 TABLE DOUBLE,LAZY - +Seed: 1352747086 +Buffer-Const,s!=d,xor=0: 0.202618 s 631.730 MB/s +Buffer-Const,s!=d,xor=1: 0.110840 s 1154.819 MB/s +134217728 1 8 TABLE DOUBLE,LAZY - +Seed: 1352747087 +Buffer-Const,s!=d,xor=0: 0.400666 s 319.468 MB/s +Buffer-Const,s!=d,xor=1: 0.408545 s 313.307 MB/s +1024 131072 8 BYTWO_p - - +Seed: 1352747090 +Buffer-Const,s!=d,xor=0: 0.393822 s 325.020 MB/s +Buffer-Const,s!=d,xor=1: 0.400213 s 319.829 MB/s +2048 65536 8 BYTWO_p - - +Seed: 1352747092 +Buffer-Const,s!=d,xor=0: 0.388415 s 329.545 MB/s +Buffer-Const,s!=d,xor=1: 0.396545 s 322.788 MB/s +4096 32768 8 BYTWO_p - - +Seed: 1352747094 +Buffer-Const,s!=d,xor=0: 0.389005 s 329.044 MB/s +Buffer-Const,s!=d,xor=1: 0.395450 s 323.682 MB/s +8192 16384 8 BYTWO_p - - +Seed: 1352747096 +Buffer-Const,s!=d,xor=0: 0.385698 s 331.866 MB/s +Buffer-Const,s!=d,xor=1: 0.395319 s 323.789 MB/s +16384 8192 8 BYTWO_p - - +Seed: 1352747099 +Buffer-Const,s!=d,xor=0: 0.385273 s 332.232 MB/s +Buffer-Const,s!=d,xor=1: 0.396203 s 323.067 MB/s +32768 4096 8 BYTWO_p - - +Seed: 1352747101 +Buffer-Const,s!=d,xor=0: 0.387427 s 330.385 MB/s +Buffer-Const,s!=d,xor=1: 0.394610 s 324.371 MB/s +65536 2048 8 BYTWO_p - - +Seed: 1352747103 +Buffer-Const,s!=d,xor=0: 0.389866 s 328.318 MB/s +Buffer-Const,s!=d,xor=1: 0.398012 s 321.598 MB/s +131072 1024 8 BYTWO_p - - +Seed: 1352747105 +Buffer-Const,s!=d,xor=0: 0.389453 s 328.666 MB/s +Buffer-Const,s!=d,xor=1: 0.397982 s 321.622 MB/s +262144 512 8 BYTWO_p - - +Seed: 1352747108 +Buffer-Const,s!=d,xor=0: 0.388304 s 329.638 MB/s +Buffer-Const,s!=d,xor=1: 0.399512 s 320.391 MB/s +524288 256 8 BYTWO_p - - +Seed: 1352747110 +Buffer-Const,s!=d,xor=0: 0.390699 s 327.618 MB/s +Buffer-Const,s!=d,xor=1: 0.407622 s 314.016 MB/s +1048576 128 8 BYTWO_p - - +Seed: 1352747112 +Buffer-Const,s!=d,xor=0: 0.398830 s 320.939 MB/s +Buffer-Const,s!=d,xor=1: 0.401909 s 318.480 MB/s +2097152 64 8 BYTWO_p - - +Seed: 1352747114 +Buffer-Const,s!=d,xor=0: 0.402605 s 317.930 MB/s +Buffer-Const,s!=d,xor=1: 0.410941 s 311.480 MB/s +4194304 32 8 BYTWO_p - - +Seed: 1352747117 +Buffer-Const,s!=d,xor=0: 0.404638 s 316.332 MB/s +Buffer-Const,s!=d,xor=1: 0.406369 s 314.984 MB/s +8388608 16 8 BYTWO_p - - +Seed: 1352747119 +Buffer-Const,s!=d,xor=0: 0.412950 s 309.965 MB/s +Buffer-Const,s!=d,xor=1: 0.411819 s 310.816 MB/s +16777216 8 8 BYTWO_p - - +Seed: 1352747121 +Buffer-Const,s!=d,xor=0: 0.417898 s 306.295 MB/s +Buffer-Const,s!=d,xor=1: 0.412159 s 310.560 MB/s +33554432 4 8 BYTWO_p - - +Seed: 1352747124 +Buffer-Const,s!=d,xor=0: 0.444945 s 287.676 MB/s +Buffer-Const,s!=d,xor=1: 0.404381 s 316.533 MB/s +67108864 2 8 BYTWO_p - - +Seed: 1352747126 +Buffer-Const,s!=d,xor=0: 0.494330 s 258.936 MB/s +Buffer-Const,s!=d,xor=1: 0.412325 s 310.435 MB/s +134217728 1 8 BYTWO_p - - +Seed: 1352747129 +Buffer-Const,s!=d,xor=0: 0.306549 s 417.552 MB/s +Buffer-Const,s!=d,xor=1: 0.309033 s 414.195 MB/s +1024 131072 8 BYTWO_b - - +Seed: 1352747131 +Buffer-Const,s!=d,xor=0: 0.297702 s 429.961 MB/s +Buffer-Const,s!=d,xor=1: 0.297253 s 430.609 MB/s +2048 65536 8 BYTWO_b - - +Seed: 1352747132 +Buffer-Const,s!=d,xor=0: 0.293193 s 436.572 MB/s +Buffer-Const,s!=d,xor=1: 0.293018 s 436.833 MB/s +4096 32768 8 BYTWO_b - - +Seed: 1352747134 +Buffer-Const,s!=d,xor=0: 0.294984 s 433.922 MB/s +Buffer-Const,s!=d,xor=1: 0.290863 s 440.070 MB/s +8192 16384 8 BYTWO_b - - +Seed: 1352747136 +Buffer-Const,s!=d,xor=0: 0.288896 s 443.067 MB/s +Buffer-Const,s!=d,xor=1: 0.288462 s 443.732 MB/s +16384 8192 8 BYTWO_b - - +Seed: 1352747138 +Buffer-Const,s!=d,xor=0: 0.290112 s 441.208 MB/s +Buffer-Const,s!=d,xor=1: 0.288533 s 443.623 MB/s +32768 4096 8 BYTWO_b - - +Seed: 1352747140 +Buffer-Const,s!=d,xor=0: 0.288124 s 444.253 MB/s +Buffer-Const,s!=d,xor=1: 0.286360 s 446.989 MB/s +65536 2048 8 BYTWO_b - - +Seed: 1352747142 +Buffer-Const,s!=d,xor=0: 0.292166 s 438.106 MB/s +Buffer-Const,s!=d,xor=1: 0.288037 s 444.388 MB/s +131072 1024 8 BYTWO_b - - +Seed: 1352747143 +Buffer-Const,s!=d,xor=0: 0.295804 s 432.719 MB/s +Buffer-Const,s!=d,xor=1: 0.292226 s 438.017 MB/s +262144 512 8 BYTWO_b - - +Seed: 1352747145 +Buffer-Const,s!=d,xor=0: 0.284928 s 449.236 MB/s +Buffer-Const,s!=d,xor=1: 0.286746 s 446.388 MB/s +524288 256 8 BYTWO_b - - +Seed: 1352747147 +Buffer-Const,s!=d,xor=0: 0.295747 s 432.803 MB/s +Buffer-Const,s!=d,xor=1: 0.291578 s 438.990 MB/s +1048576 128 8 BYTWO_b - - +Seed: 1352747149 +Buffer-Const,s!=d,xor=0: 0.300418 s 426.073 MB/s +Buffer-Const,s!=d,xor=1: 0.283470 s 451.547 MB/s +2097152 64 8 BYTWO_b - - +Seed: 1352747151 +Buffer-Const,s!=d,xor=0: 0.310105 s 412.764 MB/s +Buffer-Const,s!=d,xor=1: 0.306506 s 417.610 MB/s +4194304 32 8 BYTWO_b - - +Seed: 1352747153 +Buffer-Const,s!=d,xor=0: 0.303049 s 422.373 MB/s +Buffer-Const,s!=d,xor=1: 0.294477 s 434.669 MB/s +8388608 16 8 BYTWO_b - - +Seed: 1352747155 +Buffer-Const,s!=d,xor=0: 0.318920 s 401.354 MB/s +Buffer-Const,s!=d,xor=1: 0.292649 s 437.384 MB/s +16777216 8 8 BYTWO_b - - +Seed: 1352747157 +Buffer-Const,s!=d,xor=0: 0.369239 s 346.659 MB/s +Buffer-Const,s!=d,xor=1: 0.299009 s 428.081 MB/s +33554432 4 8 BYTWO_b - - +Seed: 1352747159 +Buffer-Const,s!=d,xor=0: 0.370332 s 345.636 MB/s +Buffer-Const,s!=d,xor=1: 0.292907 s 436.999 MB/s +67108864 2 8 BYTWO_b - - +Seed: 1352747161 +Buffer-Const,s!=d,xor=0: 0.437750 s 292.404 MB/s +Buffer-Const,s!=d,xor=1: 0.303224 s 422.130 MB/s +134217728 1 8 BYTWO_b - - +Seed: 1352747163 +Buffer-Const,s!=d,xor=0: 0.199102 s 642.888 MB/s +Buffer-Const,s!=d,xor=1: 0.198709 s 644.159 MB/s +1024 131072 8 BYTWO_p SSE - +Seed: 1352747164 +Buffer-Const,s!=d,xor=0: 0.188358 s 679.558 MB/s +Buffer-Const,s!=d,xor=1: 0.190699 s 671.215 MB/s +2048 65536 8 BYTWO_p SSE - +Seed: 1352747166 +Buffer-Const,s!=d,xor=0: 0.184177 s 694.985 MB/s +Buffer-Const,s!=d,xor=1: 0.186848 s 685.049 MB/s +4096 32768 8 BYTWO_p SSE - +Seed: 1352747167 +Buffer-Const,s!=d,xor=0: 0.189242 s 676.384 MB/s +Buffer-Const,s!=d,xor=1: 0.186107 s 687.776 MB/s +8192 16384 8 BYTWO_p SSE - +Seed: 1352747169 +Buffer-Const,s!=d,xor=0: 0.179632 s 712.566 MB/s +Buffer-Const,s!=d,xor=1: 0.182739 s 700.454 MB/s +16384 8192 8 BYTWO_p SSE - +Seed: 1352747170 +Buffer-Const,s!=d,xor=0: 0.199486 s 641.648 MB/s +Buffer-Const,s!=d,xor=1: 0.187585 s 682.357 MB/s +32768 4096 8 BYTWO_p SSE - +Seed: 1352747172 +Buffer-Const,s!=d,xor=0: 0.181719 s 704.385 MB/s +Buffer-Const,s!=d,xor=1: 0.183744 s 696.620 MB/s +65536 2048 8 BYTWO_p SSE - +Seed: 1352747173 +Buffer-Const,s!=d,xor=0: 0.179243 s 714.114 MB/s +Buffer-Const,s!=d,xor=1: 0.181455 s 705.409 MB/s +131072 1024 8 BYTWO_p SSE - +Seed: 1352747174 +Buffer-Const,s!=d,xor=0: 0.178887 s 715.536 MB/s +Buffer-Const,s!=d,xor=1: 0.180799 s 707.969 MB/s +262144 512 8 BYTWO_p SSE - +Seed: 1352747176 +Buffer-Const,s!=d,xor=0: 0.180232 s 710.196 MB/s +Buffer-Const,s!=d,xor=1: 0.180657 s 708.523 MB/s +524288 256 8 BYTWO_p SSE - +Seed: 1352747177 +Buffer-Const,s!=d,xor=0: 0.180044 s 710.938 MB/s +Buffer-Const,s!=d,xor=1: 0.183542 s 697.386 MB/s +1048576 128 8 BYTWO_p SSE - +Seed: 1352747179 +Buffer-Const,s!=d,xor=0: 0.188030 s 680.743 MB/s +Buffer-Const,s!=d,xor=1: 0.189776 s 674.480 MB/s +2097152 64 8 BYTWO_p SSE - +Seed: 1352747180 +Buffer-Const,s!=d,xor=0: 0.188869 s 677.718 MB/s +Buffer-Const,s!=d,xor=1: 0.199248 s 642.415 MB/s +4194304 32 8 BYTWO_p SSE - +Seed: 1352747181 +Buffer-Const,s!=d,xor=0: 0.191749 s 667.538 MB/s +Buffer-Const,s!=d,xor=1: 0.188193 s 680.153 MB/s +8388608 16 8 BYTWO_p SSE - +Seed: 1352747183 +Buffer-Const,s!=d,xor=0: 0.200427 s 638.638 MB/s +Buffer-Const,s!=d,xor=1: 0.189489 s 675.501 MB/s +16777216 8 8 BYTWO_p SSE - +Seed: 1352747184 +Buffer-Const,s!=d,xor=0: 0.206467 s 619.954 MB/s +Buffer-Const,s!=d,xor=1: 0.195798 s 653.735 MB/s +33554432 4 8 BYTWO_p SSE - +Seed: 1352747186 +Buffer-Const,s!=d,xor=0: 0.226630 s 564.797 MB/s +Buffer-Const,s!=d,xor=1: 0.189382 s 675.883 MB/s +67108864 2 8 BYTWO_p SSE - +Seed: 1352747187 +Buffer-Const,s!=d,xor=0: 0.279772 s 457.515 MB/s +Buffer-Const,s!=d,xor=1: 0.196061 s 652.858 MB/s +134217728 1 8 BYTWO_p SSE - +Seed: 1352747189 +Buffer-Const,s!=d,xor=0: 0.148536 s 861.741 MB/s +Buffer-Const,s!=d,xor=1: 0.276922 s 462.224 MB/s +1024 131072 8 BYTWO_b SSE - +Seed: 1352747191 +Buffer-Const,s!=d,xor=0: 0.137811 s 928.805 MB/s +Buffer-Const,s!=d,xor=1: 0.268928 s 475.964 MB/s +2048 65536 8 BYTWO_b SSE - +Seed: 1352747192 +Buffer-Const,s!=d,xor=0: 0.132821 s 963.706 MB/s +Buffer-Const,s!=d,xor=1: 0.265851 s 481.474 MB/s +4096 32768 8 BYTWO_b SSE - +Seed: 1352747194 +Buffer-Const,s!=d,xor=0: 0.131842 s 970.862 MB/s +Buffer-Const,s!=d,xor=1: 0.263387 s 485.977 MB/s +8192 16384 8 BYTWO_b SSE - +Seed: 1352747195 +Buffer-Const,s!=d,xor=0: 0.131891 s 970.495 MB/s +Buffer-Const,s!=d,xor=1: 0.260863 s 490.680 MB/s +16384 8192 8 BYTWO_b SSE - +Seed: 1352747197 +Buffer-Const,s!=d,xor=0: 0.128815 s 993.670 MB/s +Buffer-Const,s!=d,xor=1: 0.260589 s 491.196 MB/s +32768 4096 8 BYTWO_b SSE - +Seed: 1352747198 +Buffer-Const,s!=d,xor=0: 0.127239 s 1005.979 MB/s +Buffer-Const,s!=d,xor=1: 0.261076 s 490.278 MB/s +65536 2048 8 BYTWO_b SSE - +Seed: 1352747200 +Buffer-Const,s!=d,xor=0: 0.127946 s 1000.421 MB/s +Buffer-Const,s!=d,xor=1: 0.266347 s 480.576 MB/s +131072 1024 8 BYTWO_b SSE - +Seed: 1352747201 +Buffer-Const,s!=d,xor=0: 0.129641 s 987.340 MB/s +Buffer-Const,s!=d,xor=1: 0.261065 s 490.299 MB/s +262144 512 8 BYTWO_b SSE - +Seed: 1352747202 +Buffer-Const,s!=d,xor=0: 0.131109 s 976.285 MB/s +Buffer-Const,s!=d,xor=1: 0.259368 s 493.507 MB/s +524288 256 8 BYTWO_b SSE - +Seed: 1352747204 +Buffer-Const,s!=d,xor=0: 0.130358 s 981.911 MB/s +Buffer-Const,s!=d,xor=1: 0.268218 s 477.224 MB/s +1048576 128 8 BYTWO_b SSE - +Seed: 1352747205 +Buffer-Const,s!=d,xor=0: 0.135308 s 945.990 MB/s +Buffer-Const,s!=d,xor=1: 0.282554 s 453.011 MB/s +2097152 64 8 BYTWO_b SSE - +Seed: 1352747207 +Buffer-Const,s!=d,xor=0: 0.141210 s 906.454 MB/s +Buffer-Const,s!=d,xor=1: 0.284272 s 450.272 MB/s +4194304 32 8 BYTWO_b SSE - +Seed: 1352747208 +Buffer-Const,s!=d,xor=0: 0.150900 s 848.245 MB/s +Buffer-Const,s!=d,xor=1: 0.291628 s 438.916 MB/s +8388608 16 8 BYTWO_b SSE - +Seed: 1352747210 +Buffer-Const,s!=d,xor=0: 0.147792 s 866.084 MB/s +Buffer-Const,s!=d,xor=1: 0.278963 s 458.842 MB/s +16777216 8 8 BYTWO_b SSE - +Seed: 1352747211 +Buffer-Const,s!=d,xor=0: 0.154891 s 826.390 MB/s +Buffer-Const,s!=d,xor=1: 0.176620 s 724.721 MB/s +33554432 4 8 BYTWO_b SSE - +Seed: 1352747213 +Buffer-Const,s!=d,xor=0: 0.193885 s 660.186 MB/s +Buffer-Const,s!=d,xor=1: 0.268795 s 476.199 MB/s +67108864 2 8 BYTWO_b SSE - +Seed: 1352747214 +Buffer-Const,s!=d,xor=0: 0.204667 s 625.407 MB/s +Buffer-Const,s!=d,xor=1: 0.269170 s 475.536 MB/s +134217728 1 8 BYTWO_b SSE - +Seed: 1352747216 +Buffer-Const,s!=d,xor=0: 1.940300 s 65.969 MB/s +Buffer-Const,s!=d,xor=1: 2.143284 s 59.721 MB/s +1024 131072 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - +Seed: 1352747225 +Buffer-Const,s!=d,xor=0: 1.923481 s 66.546 MB/s +Buffer-Const,s!=d,xor=1: 2.147470 s 59.605 MB/s +2048 65536 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - +Seed: 1352747234 +Buffer-Const,s!=d,xor=0: 1.916270 s 66.796 MB/s +Buffer-Const,s!=d,xor=1: 2.139770 s 59.820 MB/s +4096 32768 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - +Seed: 1352747243 +Buffer-Const,s!=d,xor=0: 1.938715 s 66.023 MB/s +Buffer-Const,s!=d,xor=1: 2.137380 s 59.886 MB/s +8192 16384 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - +Seed: 1352747252 +Buffer-Const,s!=d,xor=0: 1.922527 s 66.579 MB/s +Buffer-Const,s!=d,xor=1: 2.148529 s 59.576 MB/s +16384 8192 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - +Seed: 1352747261 +Buffer-Const,s!=d,xor=0: 1.929218 s 66.348 MB/s +Buffer-Const,s!=d,xor=1: 2.138858 s 59.845 MB/s +32768 4096 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - +Seed: 1352747270 +Buffer-Const,s!=d,xor=0: 1.921590 s 66.612 MB/s +Buffer-Const,s!=d,xor=1: 2.137566 s 59.881 MB/s +65536 2048 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - +Seed: 1352747278 +Buffer-Const,s!=d,xor=0: 1.932345 s 66.241 MB/s +Buffer-Const,s!=d,xor=1: 2.130586 s 60.077 MB/s +131072 1024 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - +Seed: 1352747287 +Buffer-Const,s!=d,xor=0: 1.944353 s 65.832 MB/s +Buffer-Const,s!=d,xor=1: 2.126287 s 60.199 MB/s +262144 512 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - +Seed: 1352747296 +Buffer-Const,s!=d,xor=0: 1.921692 s 66.608 MB/s +Buffer-Const,s!=d,xor=1: 2.128691 s 60.131 MB/s +524288 256 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - +Seed: 1352747305 +Buffer-Const,s!=d,xor=0: 1.883663 s 67.953 MB/s +Buffer-Const,s!=d,xor=1: 2.149924 s 59.537 MB/s +1048576 128 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - +Seed: 1352747314 +Buffer-Const,s!=d,xor=0: 1.957364 s 65.394 MB/s +Buffer-Const,s!=d,xor=1: 2.167789 s 59.046 MB/s +2097152 64 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - +Seed: 1352747323 +Buffer-Const,s!=d,xor=0: 1.958212 s 65.366 MB/s +Buffer-Const,s!=d,xor=1: 2.159558 s 59.271 MB/s +4194304 32 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - +Seed: 1352747332 +Buffer-Const,s!=d,xor=0: 1.958506 s 65.356 MB/s +Buffer-Const,s!=d,xor=1: 2.019473 s 63.383 MB/s +8388608 16 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - +Seed: 1352747341 +Buffer-Const,s!=d,xor=0: 1.949758 s 65.649 MB/s +Buffer-Const,s!=d,xor=1: 2.165875 s 59.099 MB/s +16777216 8 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - +Seed: 1352747349 +Buffer-Const,s!=d,xor=0: 1.964626 s 65.152 MB/s +Buffer-Const,s!=d,xor=1: 2.151822 s 59.484 MB/s +33554432 4 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - +Seed: 1352747358 +Buffer-Const,s!=d,xor=0: 2.045733 s 62.569 MB/s +Buffer-Const,s!=d,xor=1: 2.177383 s 58.786 MB/s +67108864 2 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - +Seed: 1352747367 +Buffer-Const,s!=d,xor=0: 2.055240 s 62.280 MB/s +Buffer-Const,s!=d,xor=1: 2.190975 s 58.421 MB/s +134217728 1 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - +Seed: 1352747377 +Buffer-Const,s!=d,xor=0: 0.080290 s 1594.215 MB/s +Buffer-Const,s!=d,xor=1: 0.082083 s 1559.402 MB/s +1024 131072 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - +Seed: 1352747378 +Buffer-Const,s!=d,xor=0: 0.059030 s 2168.378 MB/s +Buffer-Const,s!=d,xor=1: 0.064752 s 1976.763 MB/s +2048 65536 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - +Seed: 1352747379 +Buffer-Const,s!=d,xor=0: 0.050239 s 2547.829 MB/s +Buffer-Const,s!=d,xor=1: 0.050503 s 2534.526 MB/s +4096 32768 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - +Seed: 1352747379 +Buffer-Const,s!=d,xor=0: 0.044825 s 2855.560 MB/s +Buffer-Const,s!=d,xor=1: 0.045130 s 2836.220 MB/s +8192 16384 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - +Seed: 1352747380 +Buffer-Const,s!=d,xor=0: 0.042018 s 3046.301 MB/s +Buffer-Const,s!=d,xor=1: 0.042297 s 3026.210 MB/s +16384 8192 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - +Seed: 1352747381 +Buffer-Const,s!=d,xor=0: 0.040955 s 3125.413 MB/s +Buffer-Const,s!=d,xor=1: 0.041454 s 3087.754 MB/s +32768 4096 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - +Seed: 1352747382 +Buffer-Const,s!=d,xor=0: 0.040984 s 3123.195 MB/s +Buffer-Const,s!=d,xor=1: 0.041577 s 3078.635 MB/s +65536 2048 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - +Seed: 1352747383 +Buffer-Const,s!=d,xor=0: 0.041093 s 3114.859 MB/s +Buffer-Const,s!=d,xor=1: 0.042611 s 3003.911 MB/s +131072 1024 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - +Seed: 1352747384 +Buffer-Const,s!=d,xor=0: 0.047338 s 2703.972 MB/s +Buffer-Const,s!=d,xor=1: 0.049673 s 2576.836 MB/s +262144 512 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - +Seed: 1352747385 +Buffer-Const,s!=d,xor=0: 0.049656 s 2577.739 MB/s +Buffer-Const,s!=d,xor=1: 0.050634 s 2527.950 MB/s +524288 256 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - +Seed: 1352747386 +Buffer-Const,s!=d,xor=0: 0.049906 s 2564.833 MB/s +Buffer-Const,s!=d,xor=1: 0.051381 s 2491.188 MB/s +1048576 128 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - +Seed: 1352747386 +Buffer-Const,s!=d,xor=0: 0.075184 s 1702.487 MB/s +Buffer-Const,s!=d,xor=1: 0.070414 s 1817.825 MB/s +2097152 64 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - +Seed: 1352747387 +Buffer-Const,s!=d,xor=0: 0.108748 s 1177.034 MB/s +Buffer-Const,s!=d,xor=1: 0.111286 s 1150.190 MB/s +4194304 32 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - +Seed: 1352747388 +Buffer-Const,s!=d,xor=0: 0.117474 s 1089.600 MB/s +Buffer-Const,s!=d,xor=1: 0.114860 s 1114.400 MB/s +8388608 16 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - +Seed: 1352747389 +Buffer-Const,s!=d,xor=0: 0.126348 s 1013.075 MB/s +Buffer-Const,s!=d,xor=1: 0.109330 s 1170.768 MB/s +16777216 8 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - +Seed: 1352747391 +Buffer-Const,s!=d,xor=0: 0.123002 s 1040.635 MB/s +Buffer-Const,s!=d,xor=1: 0.110046 s 1163.148 MB/s +33554432 4 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - +Seed: 1352747392 +Buffer-Const,s!=d,xor=0: 0.159381 s 803.107 MB/s +Buffer-Const,s!=d,xor=1: 0.120685 s 1060.611 MB/s +67108864 2 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - +Seed: 1352747393 +Buffer-Const,s!=d,xor=0: 0.196446 s 651.578 MB/s +Buffer-Const,s!=d,xor=1: 0.121685 s 1051.896 MB/s +134217728 1 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - +Seed: 1352747520 +Buffer-Const,s!=d,xor=0: 0.244784 s 522.910 MB/s +Buffer-Const,s!=d,xor=1: 0.259940 s 492.421 MB/s +1024 131072 8 SPLIT 8 4 NOSSE - +Seed: 1352747522 +Buffer-Const,s!=d,xor=0: 0.243595 s 525.463 MB/s +Buffer-Const,s!=d,xor=1: 0.253145 s 505.640 MB/s +2048 65536 8 SPLIT 8 4 NOSSE - +Seed: 1352747523 +Buffer-Const,s!=d,xor=0: 0.240463 s 532.307 MB/s +Buffer-Const,s!=d,xor=1: 0.251567 s 508.811 MB/s +4096 32768 8 SPLIT 8 4 NOSSE - +Seed: 1352747525 +Buffer-Const,s!=d,xor=0: 0.240079 s 533.157 MB/s +Buffer-Const,s!=d,xor=1: 0.255671 s 500.643 MB/s +8192 16384 8 SPLIT 8 4 NOSSE - +Seed: 1352747527 +Buffer-Const,s!=d,xor=0: 0.242857 s 527.059 MB/s +Buffer-Const,s!=d,xor=1: 0.251837 s 508.264 MB/s +16384 8192 8 SPLIT 8 4 NOSSE - +Seed: 1352747528 +Buffer-Const,s!=d,xor=0: 0.240757 s 531.657 MB/s +Buffer-Const,s!=d,xor=1: 0.253888 s 504.160 MB/s +32768 4096 8 SPLIT 8 4 NOSSE - +Seed: 1352747530 +Buffer-Const,s!=d,xor=0: 0.240586 s 532.034 MB/s +Buffer-Const,s!=d,xor=1: 0.256642 s 498.749 MB/s +65536 2048 8 SPLIT 8 4 NOSSE - +Seed: 1352747532 +Buffer-Const,s!=d,xor=0: 0.238570 s 536.529 MB/s +Buffer-Const,s!=d,xor=1: 0.254111 s 503.717 MB/s +131072 1024 8 SPLIT 8 4 NOSSE - +Seed: 1352747533 +Buffer-Const,s!=d,xor=0: 0.237666 s 538.572 MB/s +Buffer-Const,s!=d,xor=1: 0.254334 s 503.275 MB/s +262144 512 8 SPLIT 8 4 NOSSE - +Seed: 1352747535 +Buffer-Const,s!=d,xor=0: 0.244512 s 523.491 MB/s +Buffer-Const,s!=d,xor=1: 0.255911 s 500.174 MB/s +524288 256 8 SPLIT 8 4 NOSSE - +Seed: 1352747537 +Buffer-Const,s!=d,xor=0: 0.242439 s 527.968 MB/s +Buffer-Const,s!=d,xor=1: 0.255622 s 500.740 MB/s +1048576 128 8 SPLIT 8 4 NOSSE - +Seed: 1352747538 +Buffer-Const,s!=d,xor=0: 0.248633 s 514.815 MB/s +Buffer-Const,s!=d,xor=1: 0.257451 s 497.181 MB/s +2097152 64 8 SPLIT 8 4 NOSSE - +Seed: 1352747540 +Buffer-Const,s!=d,xor=0: 0.241531 s 529.952 MB/s +Buffer-Const,s!=d,xor=1: 0.264452 s 484.020 MB/s +4194304 32 8 SPLIT 8 4 NOSSE - +Seed: 1352747542 +Buffer-Const,s!=d,xor=0: 0.255533 s 500.914 MB/s +Buffer-Const,s!=d,xor=1: 0.248849 s 514.368 MB/s +8388608 16 8 SPLIT 8 4 NOSSE - +Seed: 1352747543 +Buffer-Const,s!=d,xor=0: 0.259687 s 492.902 MB/s +Buffer-Const,s!=d,xor=1: 0.264417 s 484.084 MB/s +16777216 8 8 SPLIT 8 4 NOSSE - +Seed: 1352747545 +Buffer-Const,s!=d,xor=0: 0.267928 s 477.740 MB/s +Buffer-Const,s!=d,xor=1: 0.269417 s 475.100 MB/s +33554432 4 8 SPLIT 8 4 NOSSE - +Seed: 1352747547 +Buffer-Const,s!=d,xor=0: 0.295526 s 433.126 MB/s +Buffer-Const,s!=d,xor=1: 0.270747 s 472.766 MB/s +67108864 2 8 SPLIT 8 4 NOSSE - +Seed: 1352747549 +Buffer-Const,s!=d,xor=0: 0.342706 s 373.498 MB/s +Buffer-Const,s!=d,xor=1: 0.266642 s 480.045 MB/s +134217728 1 8 SPLIT 8 4 NOSSE - +Seed: 1352747551 +Buffer-Const,s!=d,xor=0: 0.027748 s 4612.927 MB/s +Buffer-Const,s!=d,xor=1: 0.028090 s 4556.704 MB/s +1024 131072 8 SPLIT 8 4 SSE - +Seed: 1352747552 +Buffer-Const,s!=d,xor=0: 0.023128 s 5534.409 MB/s +Buffer-Const,s!=d,xor=1: 0.023134 s 5533.040 MB/s +2048 65536 8 SPLIT 8 4 SSE - +Seed: 1352747552 +Buffer-Const,s!=d,xor=0: 0.019114 s 6696.740 MB/s +Buffer-Const,s!=d,xor=1: 0.019763 s 6476.596 MB/s +4096 32768 8 SPLIT 8 4 SSE - +Seed: 1352747553 +Buffer-Const,s!=d,xor=0: 0.017541 s 7297.119 MB/s +Buffer-Const,s!=d,xor=1: 0.018266 s 7007.661 MB/s +8192 16384 8 SPLIT 8 4 SSE - +Seed: 1352747554 +Buffer-Const,s!=d,xor=0: 0.017010 s 7524.892 MB/s +Buffer-Const,s!=d,xor=1: 0.017399 s 7356.613 MB/s +16384 8192 8 SPLIT 8 4 SSE - +Seed: 1352747555 +Buffer-Const,s!=d,xor=0: 0.016979 s 7538.522 MB/s +Buffer-Const,s!=d,xor=1: 0.017508 s 7311.130 MB/s +32768 4096 8 SPLIT 8 4 SSE - +Seed: 1352747555 +Buffer-Const,s!=d,xor=0: 0.016780 s 7628.283 MB/s +Buffer-Const,s!=d,xor=1: 0.017439 s 7340.018 MB/s +65536 2048 8 SPLIT 8 4 SSE - +Seed: 1352747556 +Buffer-Const,s!=d,xor=0: 0.017527 s 7302.876 MB/s +Buffer-Const,s!=d,xor=1: 0.018656 s 6861.145 MB/s +131072 1024 8 SPLIT 8 4 SSE - +Seed: 1352747557 +Buffer-Const,s!=d,xor=0: 0.020679 s 6189.855 MB/s +Buffer-Const,s!=d,xor=1: 0.022183 s 5770.138 MB/s +262144 512 8 SPLIT 8 4 SSE - +Seed: 1352747558 +Buffer-Const,s!=d,xor=0: 0.020437 s 6263.296 MB/s +Buffer-Const,s!=d,xor=1: 0.021715 s 5894.434 MB/s +524288 256 8 SPLIT 8 4 SSE - +Seed: 1352747558 +Buffer-Const,s!=d,xor=0: 0.020800 s 6153.883 MB/s +Buffer-Const,s!=d,xor=1: 0.021934 s 5835.617 MB/s +1048576 128 8 SPLIT 8 4 SSE - +Seed: 1352747559 +Buffer-Const,s!=d,xor=0: 0.035634 s 3592.095 MB/s +Buffer-Const,s!=d,xor=1: 0.036323 s 3523.977 MB/s +2097152 64 8 SPLIT 8 4 SSE - +Seed: 1352747560 +Buffer-Const,s!=d,xor=0: 0.050565 s 2531.419 MB/s +Buffer-Const,s!=d,xor=1: 0.048358 s 2646.914 MB/s +4194304 32 8 SPLIT 8 4 SSE - +Seed: 1352747561 +Buffer-Const,s!=d,xor=0: 0.053646 s 2386.008 MB/s +Buffer-Const,s!=d,xor=1: 0.047063 s 2719.766 MB/s +8388608 16 8 SPLIT 8 4 SSE - +Seed: 1352747562 +Buffer-Const,s!=d,xor=0: 0.055658 s 2299.775 MB/s +Buffer-Const,s!=d,xor=1: 0.047532 s 2692.918 MB/s +16777216 8 8 SPLIT 8 4 SSE - +Seed: 1352747563 +Buffer-Const,s!=d,xor=0: 0.064355 s 1988.963 MB/s +Buffer-Const,s!=d,xor=1: 0.047547 s 2692.067 MB/s +33554432 4 8 SPLIT 8 4 SSE - +Seed: 1352747563 +Buffer-Const,s!=d,xor=0: 0.084876 s 1508.086 MB/s +Buffer-Const,s!=d,xor=1: 0.048017 s 2665.721 MB/s +67108864 2 8 SPLIT 8 4 SSE - +Seed: 1352747564 +Buffer-Const,s!=d,xor=0: 0.121661 s 1052.104 MB/s +Buffer-Const,s!=d,xor=1: 0.047558 s 2691.447 MB/s +134217728 1 8 SPLIT 8 4 SSE - diff --git a/junk-w8-timing-tests.sh b/junk-w8-timing-tests.sh new file mode 100644 index 0000000..6b78dab --- /dev/null +++ b/junk-w8-timing-tests.sh @@ -0,0 +1,13 @@ +sh tmp-time-test.sh 8 LOG - - +sh tmp-time-test.sh 8 LOG_ZERO - - +sh tmp-time-test.sh 8 TABLE - - +sh tmp-time-test.sh 8 TABLE DOUBLE - +sh tmp-time-test.sh 8 TABLE DOUBLE,LAZY - +sh tmp-time-test.sh 8 BYTWO_p - - +sh tmp-time-test.sh 8 BYTWO_b - - +sh tmp-time-test.sh 8 BYTWO_p SSE - +sh tmp-time-test.sh 8 BYTWO_b SSE - +sh tmp-time-test.sh 8 SPLIT 8 4 NOSSE - +sh tmp-time-test.sh 8 SPLIT 8 4 SSE - +sh tmp-time-test.sh 8 COMPOSITE 2 4 TABLE SINGLE,SSE - - - +sh tmp-time-test.sh 8 COMPOSITE 2 4 TABLE SINGLE,SSE - ALTMAP - diff --git a/junk-w8-timing.jgr b/junk-w8-timing.jgr new file mode 100644 index 0000000..0245111 --- /dev/null +++ b/junk-w8-timing.jgr @@ -0,0 +1,11 @@ +newgraph +xaxis size 4 min 0 no_auto_hash_labels + hash_labels hjl vjc rotate -90 fontsize 11 + +shell : junk-pick-best-output < junk-w8-timing-out.txt | sort -nr | sed 's/.............//' | awk '{ print "hash_label at ", ++l, ":", $0 }' + +yaxis size 1 min 0 label : MB/s + +newcurve marktype xbar cfill 1 1 0 marksize 1 pts +shell : junk-pick-best-output < junk-w8-timing-out.txt | sort -nr | awk '{ print $1 }' | cat -n + diff --git a/junk.c b/junk.c new file mode 100644 index 0000000..739a514 --- /dev/null +++ b/junk.c @@ -0,0 +1,17 @@ +#include + +main() +{ + int size, iterations; + double ds, di, elapsed; + + elapsed = 0.614553; + size = 8192; + iterations = 655360; + + ds = size; + di = iterations; + + printf("%10.3lf\n", ((double) (size*iterations)) / (1024 * 1024 * elapsed)); + printf("%10.3lf\n", ds * di / 1024.0 / 1024.0 / elapsed); +} diff --git a/junk.ps b/junk.ps new file mode 100644 index 0000000..f8e80c7 --- /dev/null +++ b/junk.ps @@ -0,0 +1,199 @@ +%!PS-Adobe-2.0 EPSF-1.2 +%%Page: 1 1 +%%BoundingBox: -40 -93 292 73 +%%EndComments +180.000000 406.000000 translate +1 setlinecap 1 setlinejoin +0.700 setlinewidth +0.00 setgray + +/Jrnd { exch cvi exch cvi dup 3 1 roll idiv mul } def +/JDEdict 8 dict def +JDEdict /mtrx matrix put +/JDE { + JDEdict begin + /yrad exch def + /xrad exch def + /savematrix mtrx currentmatrix def + xrad yrad scale + 0 0 1 0 360 arc + savematrix setmatrix + end +} def +/JSTR { + gsave 1 eq { gsave 1 setgray fill grestore } if + exch neg exch neg translate + clip + rotate + 4 dict begin + pathbbox /&top exch def + /&right exch def + /&bottom exch def + &right sub /&width exch def + newpath + currentlinewidth mul round dup + &bottom exch Jrnd exch &top + 4 -1 roll currentlinewidth mul setlinewidth + { &right exch moveto &width 0 rlineto stroke } for + end + grestore + newpath +} bind def + gsave /Times-Roman findfont 9.000000 scalefont setfont +0.000000 0.000000 translate +0.700000 setlinewidth gsave newpath 0.000000 0.000000 moveto 288.000000 0.000000 lineto stroke +newpath 0.000000 0.000000 moveto 0.000000 -5.000000 lineto stroke +newpath 28.799999 0.000000 moveto 28.799999 -2.000000 lineto stroke +newpath 57.599998 0.000000 moveto 57.599998 -5.000000 lineto stroke +newpath 86.399994 0.000000 moveto 86.399994 -2.000000 lineto stroke +newpath 115.199997 0.000000 moveto 115.199997 -5.000000 lineto stroke +newpath 144.000000 0.000000 moveto 144.000000 -2.000000 lineto stroke +newpath 172.799988 0.000000 moveto 172.799988 -5.000000 lineto stroke +newpath 201.599991 0.000000 moveto 201.599991 -2.000000 lineto stroke +newpath 230.399994 0.000000 moveto 230.399994 -5.000000 lineto stroke +newpath 259.199982 0.000000 moveto 259.199982 -2.000000 lineto stroke +newpath 288.000000 0.000000 moveto 288.000000 -5.000000 lineto stroke +/Times-Roman findfont 11.000000 scalefont setfont +gsave 28.799999 -8.000000 translate -90.000000 rotate +0 -3.300000 translate (TABLE SINGLE,SSE) dup stringwidth pop pop 0 0 moveto +show +grestore +gsave 57.599998 -8.000000 translate -90.000000 rotate +0 -3.300000 translate (BYTWO_b SSE) dup stringwidth pop pop 0 0 moveto +show +grestore +gsave 86.399994 -8.000000 translate -90.000000 rotate +0 -3.300000 translate (BYTWO_b) dup stringwidth pop pop 0 0 moveto +show +grestore +gsave 115.199997 -8.000000 translate -90.000000 rotate +0 -3.300000 translate (BYTWO_p SSE) dup stringwidth pop pop 0 0 moveto +show +grestore +gsave 144.000000 -8.000000 translate -90.000000 rotate +0 -3.300000 translate (TABLE QUAD,LAZY) dup stringwidth pop pop 0 0 moveto +show +grestore +gsave 172.799988 -8.000000 translate -90.000000 rotate +0 -3.300000 translate (TABLE QUAD) dup stringwidth pop pop 0 0 moveto +show +grestore +gsave 201.599991 -8.000000 translate -90.000000 rotate +0 -3.300000 translate (BYTWO_p) dup stringwidth pop pop 0 0 moveto +show +grestore +gsave 230.399994 -8.000000 translate -90.000000 rotate +0 -3.300000 translate (TABLE DOUBLE) dup stringwidth pop pop 0 0 moveto +show +grestore +gsave 259.199982 -8.000000 translate -90.000000 rotate +0 -3.300000 translate (TABLE SINGLE) dup stringwidth pop pop 0 0 moveto +show +grestore +gsave 288.000000 -8.000000 translate -90.000000 rotate +0 -3.300000 translate (LOG) dup stringwidth pop pop 0 0 moveto +show +grestore + grestore +0.700000 setlinewidth gsave newpath 0.000000 0.000000 moveto 0.000000 72.000000 lineto stroke +newpath 0.000000 0.000000 moveto -5.000000 0.000000 lineto stroke +newpath 0.000000 8.552223 moveto -2.000000 8.552223 lineto stroke +newpath 0.000000 17.104446 moveto -5.000000 17.104446 lineto stroke +newpath 0.000000 25.656670 moveto -2.000000 25.656670 lineto stroke +newpath 0.000000 34.208893 moveto -5.000000 34.208893 lineto stroke +newpath 0.000000 42.761116 moveto -2.000000 42.761116 lineto stroke +newpath 0.000000 51.313339 moveto -5.000000 51.313339 lineto stroke +newpath 0.000000 59.865562 moveto -2.000000 59.865562 lineto stroke +newpath 0.000000 68.417786 moveto -5.000000 68.417786 lineto stroke +/Times-Roman findfont 9.000000 scalefont setfont +gsave -8.000000 0.000000 translate 0.000000 rotate +0 -2.700000 translate (0) dup stringwidth pop neg 0 moveto +show +grestore +gsave -8.000000 17.104446 translate 0.000000 rotate +0 -2.700000 translate (2000) dup stringwidth pop neg 0 moveto +show +grestore +gsave -8.000000 34.208893 translate 0.000000 rotate +0 -2.700000 translate (4000) dup stringwidth pop neg 0 moveto +show +grestore +gsave -8.000000 51.313339 translate 0.000000 rotate +0 -2.700000 translate (6000) dup stringwidth pop neg 0 moveto +show +grestore +gsave -8.000000 68.417786 translate 0.000000 rotate +0 -2.700000 translate (8000) dup stringwidth pop neg 0 moveto +show +grestore +/Times-Bold findfont 10.000000 scalefont setfont +gsave -33.279999 36.000000 translate 90.000000 rotate +0 0.000000 translate (MB/s) dup stringwidth pop 2 div neg 0 moveto +show +grestore + grestore + gsave + gsave gsave 28.799999 72.000000 translate 0.000000 rotate + newpath 14.400000 0.000000 moveto -14.400000 0.000000 lineto + -14.400000 -72.000000 lineto + 14.400000 -72.000000 lineto +closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore +stroke + grestore gsave 57.599998 23.516296 translate 0.000000 rotate + newpath 14.400000 0.000000 moveto -14.400000 0.000000 lineto + -14.400000 -23.516296 lineto + 14.400000 -23.516296 lineto +closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore +stroke + grestore gsave 86.399994 20.308016 translate 0.000000 rotate + newpath 14.400000 0.000000 moveto -14.400000 0.000000 lineto + -14.400000 -20.308016 lineto + 14.400000 -20.308016 lineto +closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore +stroke + grestore gsave 115.199997 13.716681 translate 0.000000 rotate + newpath 14.400000 0.000000 moveto -14.400000 0.000000 lineto + -14.400000 -13.716681 lineto + 14.400000 -13.716681 lineto +closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore +stroke + grestore gsave 144.000000 11.183632 translate 0.000000 rotate + newpath 14.400000 0.000000 moveto -14.400000 0.000000 lineto + -14.400000 -11.183632 lineto + 14.400000 -11.183632 lineto +closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore +stroke + grestore gsave 172.799988 10.863582 translate 0.000000 rotate + newpath 14.400000 0.000000 moveto -14.400000 0.000000 lineto + -14.400000 -10.863582 lineto + 14.400000 -10.863582 lineto +closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore +stroke + grestore gsave 201.599991 8.547887 translate 0.000000 rotate + newpath 14.400000 0.000000 moveto -14.400000 0.000000 lineto + -14.400000 -8.547887 lineto + 14.400000 -8.547887 lineto +closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore +stroke + grestore gsave 230.399994 7.811883 translate 0.000000 rotate + newpath 14.400000 0.000000 moveto -14.400000 0.000000 lineto + -14.400000 -7.811883 lineto + 14.400000 -7.811883 lineto +closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore +stroke + grestore gsave 259.199982 4.485872 translate 0.000000 rotate + newpath 14.400000 0.000000 moveto -14.400000 0.000000 lineto + -14.400000 -4.485872 lineto + 14.400000 -4.485872 lineto +closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore +stroke + grestore gsave 288.000000 1.912226 translate 0.000000 rotate + newpath 14.400000 0.000000 moveto -14.400000 0.000000 lineto + -14.400000 -1.912226 lineto + 14.400000 -1.912226 lineto +closepath gsave 1.000000 1.000000 0.000000 setrgbcolor fill grestore +stroke + grestore grestore + grestore +-0.000000 -0.000000 translate + grestore showpage diff --git a/junk.sh b/junk.sh new file mode 100644 index 0000000..8e62234 --- /dev/null +++ b/junk.sh @@ -0,0 +1,14 @@ +gf_time 4 R -1 1024000 1000 - ; echo '-' +gf_time 4 R -1 1024000 1000 SHIFT - - ; echo 'SHIFT - -' +gf_time 4 R -1 1024000 1000 BYTWO_p - - ; echo 'BYTWO_p - -' +gf_time 4 R -1 1024000 1000 BYTWO_p SSE - ; echo 'BYTWO_p SSE -' +gf_time 4 R -1 1024000 1000 BYTWO_b - - ; echo 'BYTWO_b - -' +gf_time 4 R -1 1024000 1000 BYTWO_b SSE - ; echo 'BYTWO_b SSE -' +gf_time 4 R -1 1024000 1000 TABLE - - ; echo 'TABLE - -' +gf_time 4 R -1 1024000 1000 TABLE SINGLE - ; echo 'TABLE SINGLE -' +gf_time 4 R -1 1024000 1000 TABLE DOUBLE - ; echo 'TABLE DOUBLE -' +gf_time 4 R -1 1024000 1000 TABLE QUAD - ; echo 'TABLE QUAD -' +gf_time 4 R -1 1024000 1000 TABLE QUAD,LAZY - ; echo 'TABLE QUAD,LAZY -' +gf_time 4 R -1 1024000 1000 TABLE SINGLE,SSE - ; echo 'TABLE SINGLE,SSE -' +gf_time 4 R -1 1024000 1000 TABLE SINGLE,NOSSE - ; echo 'TABLE SINGLE,NOSSE -' +gf_time 4 R -1 1024000 1000 LOG - - ; echo 'LOG - -' diff --git a/junk.txt b/junk.txt new file mode 100644 index 0000000..ea18270 --- /dev/null +++ b/junk.txt @@ -0,0 +1,110 @@ +static +void +gf_w16_bytwo_b_sse_region_2_noxor(gf_region_data *rd, struct gf_w16_bytwo_data *btd) +{ +#ifdef INTEL_SSE4 + int i; + uint8_t *d8, *s8, tb; + __m128i pp, m1, m2, t1, t2, va, vb; + + s8 = (uint8_t *) rd->s_start; + d8 = (uint8_t *) rd->d_start; + + pp = _mm_set1_epi16(btd->prim_poly&0xffff); + m1 = _mm_set1_epi16((btd->mask1)&0xffff); + m2 = _mm_set1_epi16((btd->mask2)&0xffff); + + while (d8 < (uint8_t *) rd->d_top) { + va = _mm_load_si128 ((__m128i *)(s8)); + SSE_AB2(pp, m1, m2, va, t1, t2); + _mm_store_si128((__m128i *)d8, va); + d8 += 16; + s8 += 16; + } +#endif +} + +static +void +gf_w16_bytwo_b_sse_region_2_xor(gf_region_data *rd, struct gf_w16_bytwo_data *btd) +{ +#ifdef INTEL_SSE4 + int i; + uint8_t *d8, *s8, tb; + __m128i pp, m1, m2, t1, t2, va, vb; + + s8 = (uint8_t *) rd->s_start; + d8 = (uint8_t *) rd->d_start; + + pp = _mm_set1_epi16(btd->prim_poly&0xffff); + m1 = _mm_set1_epi16((btd->mask1)&0xffff); + m2 = _mm_set1_epi16((btd->mask2)&0xffff); + + while (d8 < (uint8_t *) rd->d_top) { + va = _mm_load_si128 ((__m128i *)(s8)); + SSE_AB2(pp, m1, m2, va, t1, t2); + vb = _mm_load_si128 ((__m128i *)(d8)); + vb = _mm_xor_si128(vb, va); + _mm_store_si128((__m128i *)d8, vb); + d8 += 16; + s8 += 16; + } +#endif +} + + +static +void +gf_w16_bytwo_b_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ +#ifdef INTEL_SSE4 + int itb; + uint8_t *d8, *s8; + __m128i pp, m1, m2, t1, t2, va, vb; + struct gf_w16_bytwo_data *btd; + gf_region_data rd; + + if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } + if (val == 1) { gf_multby_one(gf, src, dest, bytes, xor); return; } + + gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 16); + gf_do_initial_region_alignment(&rd); + + btd = (struct gf_w16_bytwo_data *) ((gf_internal_t *) (gf->scratch))->private; + + if (val == 2) { + if (xor) { + gf_w16_bytwo_b_sse_region_2_xor(&rd, btd); + } else { + gf_w16_bytwo_b_sse_region_2_noxor(&rd, btd); + } + gf_do_final_region_alignment(&rd); + return; + } + + s8 = (uint8_t *) rd.s_start; + d8 = (uint8_t *) rd.d_start; + + pp = _mm_set1_epi16(btd->prim_poly&0xffff); + m1 = _mm_set1_epi16((btd->mask1)&0xffff); + m2 = _mm_set1_epi16((btd->mask2)&0xffff); + + while (d8 < (uint8_t *) rd.d_top) { + va = _mm_load_si128 ((__m128i *)(s8)); + vb = (!xor) ? _mm_setzero_si128() : _mm_load_si128 ((__m128i *)(d8)); + itb = val; + while (1) { + if (itb & 1) vb = _mm_xor_si128(vb, va); + itb >>= 1; + if (itb == 0) break; + SSE_AB2(pp, m1, m2, va, t1, t2); + } + _mm_store_si128((__m128i *)d8, vb); + d8 += 16; + s8 += 16; + } + + gf_do_final_region_alignment(&rd); +#endif +} + diff --git a/junk_gf_unit.c b/junk_gf_unit.c new file mode 100644 index 0000000..3ae45a3 --- /dev/null +++ b/junk_gf_unit.c @@ -0,0 +1,957 @@ +/* + * gf_unit.c + * + * Performs unit testing for gf arithmetic + */ + +#include +#include +#include +#include +#include +#include + +#include "gf.h" +#include "gf_int.h" +#include "gf_method.h" +#include "gf_rand.h" + +#define REGION_SIZE (65536) + +static +uint8_t get_alt_map_2w8(int offset, uint8_t *buf, int region_size) +{ + uint8_t symbol = 0; + int bit_off = offset % 2; + + if (bit_off == 0) { + symbol = buf[offset / 2] & 0x0f | ((buf[(offset / 2)+region_size] & 0x0f) << 4); + } else { + symbol = ((buf[offset / 2] & 0xf0) >> 4) | (buf[(offset / 2)+region_size] & 0xf0); + } + + return symbol; +} + +static +uint16_t get_alt_map_2w16(int offset, uint8_t *buf, int region_size) +{ + uint16_t symbol = 0; + + symbol = buf[offset] | (buf[offset+region_size] << 8); + + return symbol; +} + +static +uint32_t get_alt_map_2w32(int offset, uint8_t *buf, int region_size) +{ + uint32_t symbol = 0; + uint16_t buf_a = buf[offset] | (buf[offset + 1] << 8); + uint16_t buf_b = buf[offset + region_size] | (buf[offset + region_size + 1] << 8); + + symbol = buf_a | (buf_b << 16); + + return symbol; +} + +static +void test_alt_map() +{ + uint8_t* buf = (uint8_t*)malloc(sizeof(uint8_t)*REGION_SIZE); + int i=0; + uint8_t c=1, next_c; + + for (i=0; i < REGION_SIZE/2;i++) { + if (c == 255) c = 1; + buf[i] = c; + buf[i+(REGION_SIZE/2)] = c; + c++; + } + + + c = 1; + for (i=0; i < REGION_SIZE;i++) { + uint8_t sym_w8 = get_alt_map_2w8(i, buf, REGION_SIZE/2); + uint8_t c_val = ((i % 2) == 0) ? (c & 0x0f) : ((c & 0xf0) >> 4); + uint8_t exp_sym_w8 = c_val | c_val << 4; + + if (exp_sym_w8 != sym_w8) { + fprintf(stderr, "Alt mapping failure (w=8,c=%d,i=%d): %u != %u\n", c, i, exp_sym_w8, sym_w8); + exit(1); + } + + if ((i % 2) == 1) { + c++; + } + if (c == 255) { + c = 1; + } + } + + c = 1; + + for (i=0; i < REGION_SIZE/2;i++) { + uint16_t sym_w16 = get_alt_map_2w16(i, buf, REGION_SIZE/2); + uint16_t exp_sym_w16 = c | c << 8; + + if (exp_sym_w16 != sym_w16) { + fprintf(stderr, "Alt mapping failure (w=16,c=%d,i=%d): %u != %u\n", c, i, exp_sym_w16, sym_w16); + exit(1); + } + + c++; + if (c == 255) { + c = 1; + } + } + + c = 1; + next_c = 2; + + for (i=0; i < REGION_SIZE/4;i++) { + uint32_t sym_w32 = get_alt_map_2w32(i, buf, REGION_SIZE/2); + uint32_t exp_sym_w32 = c | (next_c << 8) | c << 16 | (next_c << 24); + + if (exp_sym_w32 != sym_w32) { + fprintf(stderr, "Alt mapping failure (w=32,c=%d,i=%d): %u != %u\n", c, i, exp_sym_w32, sym_w32); + exit(1); + } + c++; + next_c++; + if (c == 255) { + c = 1; + next_c = 2; + } else if (c == 254) { + next_c = 1; + } + } + +} + +void fill_random_region(void *reg, int size) +{ + uint32_t *r; + int i; + + r = (uint32_t *) reg; + for (i = 0; i < size/sizeof(uint32_t); i++) { + r[i] = MOA_Random_32(); + } +} + +void problem(char *s) +{ + fprintf(stderr, "Unit test failed.\n"); + fprintf(stderr, "%s\n", s); + exit(1); +} + +void usage(char *s) +{ + fprintf(stderr, "usage: gf_unit w tests seed [method] - does unit testing in GF(2^w)\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Legal w are: 4, 8, 16, 32, 64 and 128\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Tests may be any combination of:\n"); + fprintf(stderr, " A: All\n"); + fprintf(stderr, " S: Single operations (multiplication/division)\n"); + fprintf(stderr, " R: Region operations\n"); + fprintf(stderr, " V: Verbose Output\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Use -1 for time(0) as a seed.\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "For method specification, type gf_methods\n"); + fprintf(stderr, "\n"); + if (s != NULL) fprintf(stderr, "%s\n", s); + exit(1); +} + +int main(int argc, char **argv) +{ + int w, i, j, verbose, single, region, xor, off, size, sindex, eindex, tested, top; + uint32_t a, b, c, d, ai, da, bi, mask; + uint64_t a64, b64, c64, d64; + uint64_t a128[2], b128[2], c128[2], d128[2], e128[2]; + gf_t gf, gf_def; + uint8_t *r8b, *r8c, *r8d; + uint16_t *r16b, *r16c, *r16d; + uint32_t *r32b, *r32c, *r32d; + uint64_t *r64b, *r64c, *r64d; + uint64_t *r128b, *r128c, *r128d; + time_t t0; + gf_internal_t *h; + + if (argc < 4) usage(NULL); + if (sscanf(argv[1], "%d", &w) == 0) usage("Bad w\n"); + if (sscanf(argv[3], "%ld", &t0) == 0) usage("Bad seed\n"); + if (t0 == -1) t0 = time(0); + MOA_Seed(t0); + + if (w > 32 && w != 64 && w != 128) usage("Bad w"); + + if (create_gf_from_argv(&gf, w, argc, argv, 4) == 0) usage("Bad Method"); + + for (i = 0; i < strlen(argv[2]); i++) { + if (strchr("ASRV", argv[2][i]) == NULL) usage("Bad test\n"); + } + + h = (gf_internal_t *) gf.scratch; + if (w <= 32) { + mask = 0; + for (i = 0; i < w; i++) mask |= (1 << i); + } + + verbose = (strchr(argv[2], 'V') != NULL); + single = (strchr(argv[2], 'S') != NULL || strchr(argv[2], 'A') != NULL); + region = (strchr(argv[2], 'R') != NULL || strchr(argv[2], 'A') != NULL); + + if (((h->region_type & GF_REGION_ALTMAP) != 0) && (h->mult_type == GF_MULT_COMPOSITE)) { + test_alt_map(); + } + + if (!gf_init_easy(&gf_def, w, GF_MULT_DEFAULT)) problem("No default for this value of w"); + + if (verbose) printf("Seed: %ld\n", t0); + + if (single) { + + if (w <= 32) { + if (gf.multiply.w32 == NULL) problem("No multiplication operation defined."); + if (verbose) { printf("Testing single multiplications/divisions.\n"); fflush(stdout); } + if (w <= 10) { + top = (1 << w)*(1 << w); + } else { + top = 1000000; + } + for (i = 0; i < top; i++) { + if (w <= 10) { + a = i % (1 << w); + b = i >> w; + } else if (i < 10) { + a = 0; + b = MOA_Random_W(w, 1); + } else if (i < 20) { + b = 0; + a = MOA_Random_W(w, 1); + } else if (i < 30) { + a = 1; + b = MOA_Random_W(w, 1); + } else if (i < 40) { + b = 1; + a = MOA_Random_W(w, 1); + } else { + a = MOA_Random_W(w, 1); + b = MOA_Random_W(w, 1); + } + + c = gf.multiply.w32(&gf, a, b); + tested = 0; + + /* If this is not composite, then first test against the default: */ + + if (h->mult_type != GF_MULT_COMPOSITE) { + tested = 1; + d = gf_def.multiply.w32(&gf_def, a, b); + + if (c != d) { + printf("Error in single multiplication (all numbers in hex):\n\n"); + printf(" gf.multiply.w32() of %x and %x returned %x\n", a, b, c); + printf(" The default returned %x\n", d); + exit(1); + } + } + + /* Now, we also need to double-check, in case the default is wanky, and when + we're performing composite operations. Start with 0 and 1: */ + + if (a == 0 || b == 0 || a == 1 || b == 1) { + tested = 1; + if (((a == 0 || b == 0) && c != 0) || + (a == 1 && c != b) || + (b == 1 && c != a)) { + printf("Error in single multiplication (all numbers in hex):\n\n"); + printf(" gf.multiply.w32() of %x and %x returned %x, which is clearly wrong.\n", a, b, c); + exit(1); + } + + /* If division or inverses are defined, let's test all combinations to make sure + that the operations are consistent with each other. */ + + } else { + if ((c & mask) != c) { + printf("Error in single multiplication (all numbers in hex):\n\n"); + printf(" gf.multiply.w32() of %x and %x returned %x, which is too big.\n", a, b, c); + exit(1); + } + + } + if (gf.inverse.w32 != NULL && (a != 0 || b != 0)) { + tested = 1; + if (a != 0) { + ai = gf.inverse.w32(&gf, a); + + if (gf.multiply.w32(&gf, c, ai) != b) { + printf("Error in single multiplication (all numbers in hex):\n\n"); + printf(" gf.multiply.w32() of %x and %x returned %x\n", a, b, c); + printf(" The inverse of %x is %x, and gf_multiply.w32() of %x and %x equals %x\n", + a, ai, c, ai, gf.multiply.w32(&gf, c, ai)); + exit(1); + } + } + if (b != 0) { + bi = gf.inverse.w32(&gf, b); + if (gf.multiply.w32(&gf, c, bi) != a) { + printf("Error in single multiplication (all numbers in hex):\n\n"); + printf(" gf.multiply.w32() of %x and %x returned %x\n", a, b, c); + printf(" The inverse of %x is %x, and gf_multiply.w32() of %x and %x equals %x\n", + b, bi, c, bi, gf.multiply.w32(&gf, c, bi)); + exit(1); + } + } + } + if (gf.divide.w32 != NULL && (a != 0 || b != 0)) { + tested = 1; + + if (a != 0) { + ai = gf.divide.w32(&gf, c, a); + + if (ai != b) { + printf("Error in single multiplication/division (all numbers in hex):\n\n"); + printf(" gf.multiply.w32() of %x and %x returned %x\n", a, b, c); + printf(" gf.divide.w32() of %x and %x returned %x\n", c, a, ai); + exit(1); + } + } + if (b != 0) { + bi = gf.divide.w32(&gf, c, b); + + if (bi != a) { + printf("Error in single multiplication/division (all numbers in hex):\n\n"); + printf(" gf.multiply.w32() of %x and %x returned %x\n", a, b, c); + printf(" gf.divide.w32() of %x and %x returned %x\n", c, b, bi); + exit(1); + } + } + } + + if (!tested) problem("There is no way to test multiplication.\n"); + } + + } else if (w == 64) { + if (verbose) { printf("Testing single multiplications/divisions.\n"); fflush(stdout); } + if (gf.multiply.w64 == NULL) problem("No multiplication operation defined."); + for (i = 0; i < 1000; i++) { + for (j = 0; j < 1000; j++) { + a64 = MOA_Random_64(); + b64 = MOA_Random_64(); + c64 = gf.multiply.w64(&gf, a64, b64); + if ((a64 == 0 || b64 == 0) && c64 != 0) problem("Single Multiplication by zero Failed"); + if (a64 != 0 && b64 != 0) { + d64 = (gf.divide.w64 == NULL) ? gf_def.divide.w64(&gf_def, c64, b64) : gf.divide.w64(&gf, c64, b64); + if (d64 != a64) { + printf("0x%llx * 0x%llx =? 0x%llx (check-a: 0x%llx)\n", a64, b64, c64, d64); + problem("Single multiplication/division failed"); + } + } + } + } + if (gf.inverse.w64 == NULL) { + printf("No inverse defined for this method.\n"); + } else { + if (verbose) { printf("Testing Inversions.\n"); fflush(stdout); } + for (i = 0; i < 1000; i++) { + do { a64 = MOA_Random_64(); } while (a64 == 0); + b64 = gf.inverse.w64(&gf, a64); + if (gf.multiply.w64(&gf, a64, b64) != 1) problem("Inversion failed.\n"); + } + } + } else if (w == 128) { + if (verbose) { printf("Testing single multiplications/divisions.\n"); fflush(stdout); } + if (gf.multiply.w128 == NULL) problem("No multiplication operation defined."); + for (i = 0; i < 500; i++) { + for (j = 0; j < 500; j++) { + MOA_Random_128(a128); + MOA_Random_128(b128); + gf.multiply.w128(&gf, a128, b128, c128); + if ((GF_W128_IS_ZERO(a128) && GF_W128_IS_ZERO(b128)) && !(GF_W128_IS_ZERO(c128))) problem("Single Multiplication by zero Failed"); + if (!GF_W128_IS_ZERO(a128) && !GF_W128_IS_ZERO(b128)) { + gf.divide.w128 == NULL ? gf_def.divide.w128(&gf_def, c128, b128, d128) : gf.divide.w128(&gf, c128, b128, d128); + if (!GF_W128_EQUAL(a128, d128)) { + printf("0x%llx 0x%llx * 0x%llx 0x%llx =? 0x%llx 0x%llx (check-a: 0x%llx 0x%llx)\n", a128[0], a128[1], b128[0], b128[1], c128[0], c128[1], d128[0], d128[1]); + problem("Single multiplication/division failed"); + } + } + } + } + if (gf.inverse.w128 == NULL) { + printf("No inverse defined for this method.\n"); + } else { + if (verbose) { printf("Testing Inversions.\n"); fflush(stdout); } + for (i = 0; i < 1000; i++) { + do { MOA_Random_128(a128); } while (GF_W128_IS_ZERO(a128)); + gf.inverse.w128(&gf, a128, b128); + gf.multiply.w128(&gf, a128, b128, c128); + if (!(c128[0] == 0 && c128[1] == 1)) problem("Inversion failed.\n"); + } + } + + } else { + problem("Value of w not implemented yet"); + } + } + + if (region) { + + if (w == 4) { + if (gf.multiply_region.w32 == NULL) { + printf("No multiply_region.\n"); + } else { + r8b = (uint8_t *) malloc(REGION_SIZE); + r8c = (uint8_t *) malloc(REGION_SIZE); + r8d = (uint8_t *) malloc(REGION_SIZE); + fill_random_region(r8b, REGION_SIZE); + for (xor = 0; xor < 2; xor++) { + if (verbose) { + printf("Testing buffer-constant, src != dest, xor = %d\n", xor); + fflush(stdout); + } + for (a = 0; a < 16; a++) { + fill_random_region(r8c, REGION_SIZE); + memcpy(r8d, r8c, REGION_SIZE); + sindex = MOA_Random_W(3, 1); + eindex = REGION_SIZE/sizeof(uint8_t)-MOA_Random_W(3, 1); + size = (eindex-sindex)*sizeof(uint8_t); + gf.multiply_region.w32(&gf, (void *) (r8b+sindex), (void *) (r8c+sindex), a, size, xor); + for (i = sindex; i < eindex; i++) { + b = (r8b[i] >> 4); + c = (r8c[i] >> 4); + d = (r8d[i] >> 4); + if (!xor && gf.multiply.w32(&gf, a, b) != c) { + printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r8b+i)); + printf(" %d * %d = %d, but should equal %d\n", a, b, c, gf.multiply.w32(&gf, a, b) ); + printf("i=%d. %d %d %d %d\n", i, a, r8b[i], r8c[i], r8d[i]); + problem("Failed buffer-constant, xor=0"); + } + if (xor && (gf.multiply.w32(&gf, a, b) ^ d) != c) { + printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r8b+i)); + printf(" %d %d %d %d\n", a, b, c, d); + printf(" %d %d %d %d\n", a, r8b[i], r8c[i], r8d[i]); + problem("Failed buffer-constant, xor=1"); + } + b = (r8b[i] & 0xf); + c = (r8c[i] & 0xf); + d = (r8d[i] & 0xf); + if (!xor && gf.multiply.w32(&gf, a, b) != c) { + printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r8b+i)); + printf(" %d * %d = %d, but should equal %d\n", a, b, c, gf.multiply.w32(&gf, a, b) ); + printf("i=%d. 0x%x 0x%x 0x%x 0x%x\n", i, a, r8b[i], r8c[i], r8d[i]); + problem("Failed buffer-constant, xor=0"); + } + if (xor && (gf.multiply.w32(&gf, a, b) ^ d) != c) { + printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r8b+i)); + printf(" (%d * %d ^ %d) should equal %d - equals %d\n", + a, b, d, (gf.multiply.w32(&gf, a, b) ^ d), c); + printf(" %d %d %d %d\n", a, r8b[i], r8c[i], r8d[i]); + problem("Failed buffer-constant, xor=1"); + } + } + } + } + for (xor = 0; xor < 2; xor++) { + if (verbose) { + printf("Testing buffer-constant, src == dest, xor = %d\n", xor); + fflush(stdout); + } + for (a = 0; a < 16; a++) { + fill_random_region(r8b, REGION_SIZE); + memcpy(r8d, r8b, REGION_SIZE); + sindex = MOA_Random_W(3, 1); + eindex = REGION_SIZE/sizeof(uint8_t)-MOA_Random_W(3, 1); + size = (eindex-sindex)*sizeof(uint8_t); + gf.multiply_region.w32(&gf, (void *) (r8b+sindex), (void *) (r8b+sindex), a, size, xor); + for (i = sindex; i < eindex; i++) { + b = (r8b[i] >> 4); + d = (r8d[i] >> 4); + if (!xor && gf.multiply.w32(&gf, a, d) != b) problem("Failed buffer-constant, xor=0"); + if (xor && (gf.multiply.w32(&gf, a, d) ^ d) != b) { + printf("i=%d. %d %d %d\n", i, a, b, d); + printf("i=%d. %d %d %d\n", i, a, r8b[i], r8d[i]); + problem("Failed buffer-constant, xor=1"); + } + b = (r8b[i] & 0xf); + d = (r8d[i] & 0xf); + if (!xor && gf.multiply.w32(&gf, a, d) != b) problem("Failed buffer-constant, xor=0"); + if (xor && (gf.multiply.w32(&gf, a, d) ^ d) != b) { + printf("%d %d %d\n", a, b, d); + problem("Failed buffer-constant, xor=1"); + } + } + } + } + free(r8b); + free(r8c); + free(r8d); + } + } else if (w == 8) { + if (gf.multiply_region.w32 == NULL) { + printf("No multiply_region.\n"); + } else { + r8b = (uint8_t *) malloc(REGION_SIZE); + r8c = (uint8_t *) malloc(REGION_SIZE); + r8d = (uint8_t *) malloc(REGION_SIZE); + fill_random_region(r8b, REGION_SIZE); + for (xor = 0; xor < 2; xor++) { + if (verbose) { + printf("Testing buffer-constant, src != dest, xor = %d\n", xor); + fflush(stdout); + } + for (a = 0; a < 256; a++) { + fill_random_region(r8c, REGION_SIZE); + memcpy(r8d, r8c, REGION_SIZE); + if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && + (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { + sindex = 0; + eindex = REGION_SIZE; + } else { + sindex = MOA_Random_W(3, 1); + eindex = REGION_SIZE/sizeof(uint8_t)-MOA_Random_W(3, 1); + } + size = (eindex-sindex)*sizeof(uint8_t); + gf.multiply_region.w32(&gf, (void *) (r8b+sindex), (void *) (r8c+sindex), a, size, xor); + for (i = sindex; i < eindex; i++) { + if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && + (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { + b = get_alt_map_2w8(i, (uint8_t*)r8b, REGION_SIZE / 2); + c = get_alt_map_2w8(i, (uint8_t*)r8c, REGION_SIZE / 2); + d = get_alt_map_2w8(i, (uint8_t*)r8d, REGION_SIZE / 2); + } else { + b = r8b[i]; + c = r8c[i]; + d = r8d[i]; + } + if (!xor && gf.multiply.w32(&gf, a, b) != c) { + printf("i=%d. %d %d %d %d\n", i, a, b, c, d); + printf("i=%d. %d %d %d %d\n", i, a, r8b[i], r8c[i], r8d[i]); + printf("%llx. Sindex: %d\n", r8b+i, sindex); + problem("Failed buffer-constant, xor=0"); + } + if (xor && (gf.multiply.w32(&gf, a, b) ^ d) != c) { + printf("i=%d. %d %d %d %d\n", i, a, b, c, d); + printf("i=%d. %d %d %d %d\n", i, a, r8b[i], r8c[i], r8d[i]); + problem("Failed buffer-constant, xor=1"); + } + } + } + } + for (xor = 0; xor < 2; xor++) { + if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && + (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { + continue; + } + if (verbose) { + printf("Testing buffer-constant, src == dest, xor = %d\n", xor); + fflush(stdout); + } + for (a = 0; a < 256; a++) { + fill_random_region(r8b, REGION_SIZE); + memcpy(r8d, r8b, REGION_SIZE); + sindex = MOA_Random_W(3, 1); + eindex = REGION_SIZE/sizeof(uint8_t)-MOA_Random_W(3, 1); + size = (eindex-sindex)*sizeof(uint8_t); + gf.multiply_region.w32(&gf, (void *) (r8b+sindex), (void *) (r8b+sindex), a, size, xor); + for (i = sindex; i < eindex; i++) { + b = r8b[i]; + d = r8d[i]; + if (!xor && gf.multiply.w32(&gf, a, d) != b) problem("Failed buffer-constant, xor=0"); + if (xor && (gf.multiply.w32(&gf, a, d) ^ d) != b) { + printf("i=%d. %d %d %d\n", i, a, b, d); + printf("i=%d. %d %d %d\n", i, a, r8b[i], r8d[i]); + problem("Failed buffer-constant, xor=1"); + } + } + } + } + free(r8b); + free(r8c); + free(r8d); + } + } else if (w == 16) { + if (gf.multiply_region.w32 == NULL) { + printf("No multiply_region.\n"); + } else { + r16b = (uint16_t *) malloc(REGION_SIZE); + r16c = (uint16_t *) malloc(REGION_SIZE); + r16d = (uint16_t *) malloc(REGION_SIZE); + for (xor = 0; xor < 2; xor++) { + if (verbose) { + printf("Testing buffer-constant, src != dest, xor = %d\n", xor); + fflush(stdout); + } + for (j = 0; j < 1000; j++) { + fill_random_region(r16b, REGION_SIZE); + a = MOA_Random_W(w, 0); + fill_random_region(r16c, REGION_SIZE); + memcpy(r16d, r16c, REGION_SIZE); + if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && + (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { + sindex = 0; + eindex = REGION_SIZE / sizeof(uint16_t); + } else { + sindex = MOA_Random_W(3, 1); + eindex = REGION_SIZE/sizeof(uint16_t)-MOA_Random_W(3, 1); + } + size = (eindex-sindex)*sizeof(uint16_t); + gf.multiply_region.w32(&gf, (void *) (r16b+sindex), (void *) (r16c+sindex), a, size, xor); + ai = gf.inverse.w32(&gf, a); + if (!xor) { + gf.multiply_region.w32(&gf, (void *) (r16c+sindex), (void *) (r16d+sindex), ai, size, xor); + } else { + gf.multiply_region.w32(&gf, (void *) (r16c+sindex), (void *) (r16d+sindex), 1, size, xor); + gf.multiply_region.w32(&gf, (void *) (r16d+sindex), (void *) (r16b+sindex), ai, size, xor); + } + + for (i = sindex; i < eindex; i++) { + if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && + (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { + b = get_alt_map_2w16(i, (uint8_t*)r16b, size / 2); + c = get_alt_map_2w16(i, (uint8_t*)r16c, size / 2); + d = get_alt_map_2w16(i, (uint8_t*)r16d, size / 2); + } else { + b = r16b[i]; + c = r16c[i]; + d = r16d[i]; + } + if (!xor && d != b) { + printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r16b+i)); + printf("We have %d * %d = %d, and %d * %d = %d.\n", a, b, c, c, ai, d); + printf("%d is the inverse of %d\n", ai, a); + problem("Failed buffer-constant, xor=0"); + } + if (xor && b != 0) { + printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r16b+i)); + printf("We did d=c; c ^= ba; d ^= c; b ^= (a^-1)d;\n"); + printf(" b should equal 0, but it doesn't. Probe into it.\n"); + problem("Failed buffer-constant, xor=1"); + } + } + } + } + for (xor = 0; xor < 2; xor++) { + if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && + (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { + continue; + } + if (verbose) { + printf("Testing buffer-constant, src == dest, xor = %d\n", xor); + fflush(stdout); + } + for (j = 0; j < 1000; j++) { + a = MOA_Random_W(w, 0); + fill_random_region(r16b, REGION_SIZE); + memcpy(r16d, r16b, REGION_SIZE); + sindex = MOA_Random_W(3, 1); + eindex = REGION_SIZE/sizeof(uint16_t)-MOA_Random_W(3, 1); + size = (eindex-sindex)*sizeof(uint16_t); + gf.multiply_region.w32(&gf, (void *) (r16b+sindex), (void *) (r16b+sindex), a, size, xor); + ai = gf.inverse.w32(&gf, a); + if (!xor) { + gf.multiply_region.w32(&gf, (void *) (r16b+sindex), (void *) (r16b+sindex), ai, size, xor); + } else { + gf.multiply_region.w32(&gf, (void *) (r16d+sindex), (void *) (r16b+sindex), 1, size, xor); + gf.multiply_region.w32(&gf, (void *) (r16b+sindex), (void *) (r16b+sindex), ai, size, 0); + } + + for (i = sindex; i < eindex; i++) { + b = r16b[i]; + c = r16c[i]; + d = r16d[i]; + if (!xor && (d != b)) { + printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r16b+i)); + printf("We did d=b; b = ba; b = b(a^-1).\n"); + printf("So, b should equal d, but it doesn't. Look into it.\n"); + printf("b = %d. d = %d. a = %d\n", b, d, a); + problem("Failed buffer-constant, xor=0"); + } + if (xor && d != b) { + printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r16b+i)); + printf("We did d=b; b = b + ba; b += d; b = b(a^-1);\n"); + printf("We did d=c; c ^= ba; d ^= c; b ^= (a^-1)d;\n"); + printf("So, b should equal d, but it doesn't. Look into it.\n"); + problem("Failed buffer-constant, xor=1"); + } + } + } + } + free(r16b); + free(r16c); + free(r16d); + } + } else if (w == 32) { + if (gf.multiply_region.w32 == NULL) { + printf("No multiply_region.\n"); + } else { + r32b = (uint32_t *) malloc(REGION_SIZE); + r32c = (uint32_t *) malloc(REGION_SIZE); + r32d = (uint32_t *) malloc(REGION_SIZE); + for (xor = 0; xor < 2; xor++) { + if (verbose) { + printf("Testing buffer-constant, src != dest, xor = %d\n", xor); + fflush(stdout); + } + for (j = 0; j < 1000; j++) { + a = MOA_Random_32(); + fill_random_region(r32b, REGION_SIZE); + fill_random_region(r32c, REGION_SIZE); + memcpy(r32d, r32c, REGION_SIZE); + if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && + (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { + sindex = 0; + eindex = REGION_SIZE / sizeof(uint32_t); + } else { + sindex = MOA_Random_W(3, 1); + eindex = REGION_SIZE/sizeof(uint32_t)-MOA_Random_W(3, 1); + } + size = (eindex-sindex)*sizeof(uint32_t); + gf.multiply_region.w32(&gf, (void *) (r32b+sindex), (void *) (r32c+sindex), a, size, xor); + ai = gf.inverse.w32(&gf, a); + if (!xor) { + gf.multiply_region.w32(&gf, (void *) (r32c+sindex), (void *) (r32d+sindex), ai, size, xor); + } else { + gf.multiply_region.w32(&gf, (void *) (r32c+sindex), (void *) (r32d+sindex), 1, size, xor); + gf.multiply_region.w32(&gf, (void *) (r32d+sindex), (void *) (r32b+sindex), ai, size, xor); + } + for (i = sindex; i < eindex; i++) { + if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && + (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { + b = get_alt_map_2w32(i, (uint8_t*)r32b, size / 2); + c = get_alt_map_2w32(i, (uint8_t*)r32c, size / 2); + d = get_alt_map_2w32(i, (uint8_t*)r32d, size / 2); + i++; + } else { + b = r32b[i]; + c = r32c[i]; + d = r32d[i]; + } + if (!xor && d != b) { + printf("i=%d. Addresses: b: 0x%lx\n", i, (unsigned long) (r32b+i)); + printf("We have %d * %d = %d, and %d * %d = %d.\n", a, b, c, c, ai, d); + printf("%d is the inverse of %d\n", ai, a); + problem("Failed buffer-constant, xor=0"); + } + if (xor && b != 0) { + printf("i=%d. Addresses: b: 0x%lx c: 0x%lx d: 0x%lx\n", i, + (unsigned long) (r32b+i), (unsigned long) (r32c+i), (unsigned long) (r32d+i)); + printf("We did d=c; c ^= ba; d ^= c; b ^= (a^-1)d;\n"); + printf(" b should equal 0, but it doesn't. Probe into it.\n"); + printf("a: %8x b: %8x c: %8x, d: %8x\n", a, b, c, d); + problem("Failed buffer-constant, xor=1"); + } + + } + } + } + for (xor = 0; xor < 2; xor++) { + if ((((gf_internal_t*)gf.scratch)->region_type & GF_REGION_ALTMAP) != 0 && + (((gf_internal_t*)gf.scratch)->mult_type == GF_MULT_COMPOSITE)) { + continue; + } + if (verbose) { + printf("Testing buffer-constant, src == dest, xor = %d\n", xor); + fflush(stdout); + } + for (j = 0; j < 1000; j++) { + a = MOA_Random_32(); + fill_random_region(r32b, REGION_SIZE); + memcpy(r32d, r32b, REGION_SIZE); + sindex = MOA_Random_W(3, 1); + eindex = REGION_SIZE/sizeof(uint32_t)-MOA_Random_W(3, 1); + size = (eindex-sindex)*sizeof(uint32_t); + gf.multiply_region.w32(&gf, (void *) (r32b+sindex), (void *) (r32b+sindex), a, size, xor); + ai = gf.inverse.w32(&gf, a); + if (!xor) { + gf.multiply_region.w32(&gf, (void *) (r32b+sindex), (void *) (r32b+sindex), ai, size, xor); + } else { + gf.multiply_region.w32(&gf, (void *) (r32d+sindex), (void *) (r32b+sindex), 1, size, xor); + gf.multiply_region.w32(&gf, (void *) (r32b+sindex), (void *) (r32b+sindex), ai, size, 0); + } + + for (i = sindex; i < eindex; i++) { + b = r32b[i]; + c = r32c[i]; + d = r32d[i]; + if (!xor && (d != b)) { + printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r32b+i)); + printf("We did d=b; b = ba; b = b(a^-1).\n"); + printf("So, b should equal d, but it doesn't. Look into it.\n"); + printf("b = %d. d = %d. a = %d\n", b, d, a); + problem("Failed buffer-constant, xor=0"); + } + if (xor && d != b) { + printf("i=%d. Address 0x%lx\n", i, (unsigned long) (r32b+i)); + printf("We did d=b; b = b + ba; b += d; b = b(a^-1);\n"); + printf("We did d=c; c ^= ba; d ^= c; b ^= (a^-1)d;\n"); + printf("So, b should equal d, but it doesn't. Look into it.\n"); + problem("Failed buffer-constant, xor=1"); + } + } + } + } + free(r32b); + free(r32c); + free(r32d); + } + } else if (w == 64) { + if (gf.multiply_region.w64 == NULL) { + printf("No multiply_region.\n"); + } else { + r64b = (uint64_t *) malloc(REGION_SIZE); + r64c = (uint64_t *) malloc(REGION_SIZE); + r64d = (uint64_t *) malloc(REGION_SIZE); + fill_random_region(r64b, REGION_SIZE); + for (xor = 0; xor < 2; xor++) { + if (verbose) { + printf("Testing buffer-constant, src != dest, xor = %d\n", xor); + fflush(stdout); + } + for (j = 0; j < 1000; j++) { + a64 = MOA_Random_64(); + fill_random_region(r64c, REGION_SIZE); + memcpy(r64d, r64c, REGION_SIZE); + sindex = MOA_Random_W(3, 1); + eindex = REGION_SIZE/sizeof(uint64_t)-MOA_Random_W(3, 1); + size = (eindex-sindex)*sizeof(uint64_t); + gf.multiply_region.w64(&gf, (void *) (r64b+sindex), (void *) (r64c+sindex), a64, size, xor); + for (i = sindex; i < eindex; i++) { + b64 = r64b[i]; + c64 = r64c[i]; + d64 = r64d[i]; + if (!xor && gf.multiply.w64(&gf, a64, b64) != c64) { + printf("i=%d. 0x%llx 0x%llx 0x%llx should be 0x%llx\n", i, a64, b64, c64, + gf.multiply.w64(&gf, a64, b64)); + printf("i=%d. 0x%llx 0x%llx 0x%llx\n", i, a64, r64b[i], r64c[i]); + problem("Failed buffer-constant, xor=0"); + } + if (xor && (gf.multiply.w64(&gf, a64, b64) ^ d64) != c64) { + printf("i=%d. 0x%llx 0x%llx 0x%llx 0x%llx\n", i, a64, b64, c64, d64); + printf("i=%d. 0x%llx 0x%llx 0x%llx 0x%llx\n", i, a64, r64b[i], r64c[i], r64d[i]); + problem("Failed buffer-constant, xor=1"); + } + } + } + } + for (xor = 0; xor < 2; xor++) { + if (verbose) { + printf("Testing buffer-constant, src == dest, xor = %d\n", xor); + fflush(stdout); + } + for (j = 0; j < 1000; j++) { + a64 = MOA_Random_64(); + fill_random_region(r64b, REGION_SIZE); + memcpy(r64d, r64b, REGION_SIZE); + sindex = MOA_Random_W(3, 1); + eindex = REGION_SIZE/sizeof(uint64_t)-MOA_Random_W(3, 1); + size = (eindex-sindex)*sizeof(uint64_t); + gf.multiply_region.w64(&gf, (void *) (r64b+sindex), (void *) (r64b+sindex), a64, size, xor); + for (i = sindex; i < eindex; i++) { + b64 = r64b[i]; + d64 = r64d[i]; + if (!xor && gf.multiply.w64(&gf, a64, d64) != b64) problem("Failed buffer-constant, xor=0"); + if (xor && (gf.multiply.w64(&gf, a64, d64) ^ d64) != b64) { + printf("i=%d. 0x%llx 0x%llx 0x%llx\n", i, a64, b64, d64); + printf("i=%d. 0x%llx 0x%llx 0x%llx\n", i, a64, r64b[i], r64d[i]); + problem("Failed buffer-constant, xor=1"); + } + } + } + } + free(r64b); + free(r64c); + free(r64d); + } + } else if (w == 128) { + if (gf.multiply_region.w128 == NULL) { + printf("No multiply_region.\n"); + } else { + r128b = (uint64_t *) malloc(REGION_SIZE); + r128c = (uint64_t *) malloc(REGION_SIZE); + r128d = (uint64_t *) malloc(REGION_SIZE); + fill_random_region(r128b, REGION_SIZE); + for (xor = 0; xor < 2; xor++) { + if (verbose) { + printf("Testing buffer-constant, src != dest, xor = %d\n", xor); + fflush(stdout); + } + for (j = 0; j < 1000; j++) { + MOA_Random_128(a128); + fill_random_region(r128c, REGION_SIZE); + memcpy(r128d, r128c, REGION_SIZE); + sindex = MOA_Random_W(3, 1); + eindex = REGION_SIZE/(2*sizeof(uint64_t))-MOA_Random_W(3, 1); + size = (eindex-sindex)*sizeof(uint64_t)*2; + gf.multiply_region.w128(&gf, (void *) (r128b+sindex*2), (void *) (r128c+sindex*2), a128, size, xor); + for (i = sindex; i < eindex; i++) { + b128[0] = r128b[2*i]; + b128[1] = r128b[2*i+1]; + c128[0] = r128c[2*i]; + c128[1] = r128c[2*i+1]; + d128[0] = r128d[2*i]; + d128[1] = r128d[2*i+1]; + gf.multiply.w128(&gf, a128, b128, e128); + if (xor) { + e128[0] ^= d128[0]; + e128[1] ^= d128[1]; + } + if (!xor && !GF_W128_EQUAL(c128, e128)) { + printf("i=%d. 0x%llx%llx 0x%llx%llx 0x%llx%llx should be 0x%llx%llx\n", + i, a128[0], a128[1], b128[0], b128[1], c128[0], c128[1], e128[0], e128[1]); + problem("Failed buffer-constant, xor=0"); + } + if (xor && !GF_W128_EQUAL(e128, c128)) { + printf("i=%d. 0x%llx%llx 0x%llx%llx 0x%llx%llx 0x%llx%llx\n", i, + a128[0], a128[1], b128[0], b128[1], c128[0], c128[1], d128[0], d128[1]); + problem("Failed buffer-constant, xor=1"); + } + } + } + } + for (xor = 0; xor < 2; xor++) { + if (verbose) { + printf("Testing buffer-constant, src == dest, xor = %d\n", xor); + fflush(stdout); + } + for (j = 0; j < 1000; j++) { + MOA_Random_128(a128); + fill_random_region(r128b, REGION_SIZE); + memcpy(r128d, r128b, REGION_SIZE); + sindex = MOA_Random_W(3, 1); + sindex = 0; + eindex = REGION_SIZE/(2*sizeof(uint64_t))-MOA_Random_W(3, 1); + eindex = REGION_SIZE/(2*sizeof(uint64_t)); + size = (eindex-sindex)*sizeof(uint64_t)*2; + gf.multiply_region.w128(&gf, (void *) (r128b+sindex), (void *) (r128b+sindex), a128, size, xor); + for (i = sindex; i < eindex; i++) { + b128[0] = r128b[2*i]; + b128[1] = r128b[2*i + 1]; + d128[0] = r128d[2*i]; + d128[1] = r128d[2*i + 1]; + gf.multiply.w128(&gf, a128, d128, e128); + if (xor) { + e128[0] ^= d128[0]; + e128[1] ^= d128[1]; + } + if (!xor && !GF_W128_EQUAL(b128, e128)) problem("Failed buffer-constant, xor=0"); + if (xor && !GF_W128_EQUAL(b128, e128)) { + problem("Failed buffer-constant, xor=1"); + } + } + } + } + free(r128b); + free(r128c); + free(r128d); + } + } + } + exit(0); +} diff --git a/tests.txt b/tests.txt new file mode 100644 index 0000000..e69de29 diff --git a/tmp-10-out.txt b/tmp-10-out.txt new file mode 100644 index 0000000..e69de29 diff --git a/tmp-time-test.sh b/tmp-time-test.sh new file mode 100644 index 0000000..b1c7823 --- /dev/null +++ b/tmp-time-test.sh @@ -0,0 +1,14 @@ +if [ $# -lt 4 ]; then + echo 'usage: sh tmp-test.sh w gf_specs (e.g. LOG - -)' >&2 + exit 1 +fi + +w=$1 +shift +i=1024 +while [ $i -le 134217728 ]; do + iter=`echo $i | awk '{ print (134217728/$1)*1 }'` + gf_time $w R -1 $i $iter $* | head -n 3 + echo $i $iter $w $* + i=`echo $i | awk '{ print $1*2 }'` +done diff --git a/tmp.c b/tmp.c new file mode 100644 index 0000000..a6deaab --- /dev/null +++ b/tmp.c @@ -0,0 +1,1583 @@ +/* + * gf_w32.c + * + * Routines for 32-bit Galois fields + */ + +#define MM_PRINT32(s, r) { uint8_t blah[16], ii; printf("%-12s", s); _mm_storeu_si128((__m128i *)blah, r); for (ii = 0; ii < 16; ii += 4) printf(" %02x%02x%02x%02x", blah[15-ii], blah[14-ii], blah[13-ii], blah[12-ii]); printf("\n"); } + +#define MM_PRINT8(s, r) { uint8_t blah[16], ii; printf("%-12s", s); _mm_storeu_si128((__m128i *)blah, r); for (ii = 0; ii < 16; ii += 1) printf("%s%02x", (ii%4==0) ? " " : " ", blah[15-ii]); printf("\n"); } + +#include "gf_int.h" +#include +#include + +#define GF_FIELD_WIDTH (32) +#define GF_FIRST_BIT (1 << 31) + +#define GF_BASE_FIELD_WIDTH (16) +#define GF_BASE_FIELD_SIZE (1 << GF_BASE_FIELD_WIDTH) +#define GF_BASE_FIELD_GROUP_SIZE GF_BASE_FIELD_SIZE-1 +#define GF_S_GF_16_2 (40188) +#define GF_MULTBY_TWO(p) (((p) & GF_FIRST_BIT) ? (((p) << 1) ^ h->prim_poly) : (p) << 1); + + +struct gf_w16_logtable_data { + int log_tbl[GF_BASE_FIELD_SIZE]; + gf_val_16_t _antilog_tbl[GF_BASE_FIELD_SIZE * 4]; + gf_val_16_t *antilog_tbl; + gf_val_16_t inv_tbl[GF_BASE_FIELD_SIZE]; +}; + +struct gf_split_2_32_lazy_data { + gf_val_32_t last_value; + gf_val_32_t tables[16][4]; +}; + +struct gf_split_8_8_data { + gf_val_32_t tables[7][256][256]; +}; + +struct gf_split_4_32_lazy_data { + gf_val_32_t last_value; + gf_val_32_t tables[8][16]; +}; + +static +inline +gf_val_32_t gf_w32_inverse_from_divide (gf_t *gf, gf_val_32_t a) +{ + return gf->divide.w32(gf, 1, a); +} + +static +inline +gf_val_32_t gf_w32_divide_from_inverse (gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + b = gf->inverse.w32(gf, b); + return gf->multiply.w32(gf, a, b); +} + +static +void +gf_w32_multiply_region_from_single(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int +xor) +{ + int i; + gf_val_32_t *s32; + gf_val_32_t *d32; + + s32 = (gf_val_32_t *) src; + d32 = (gf_val_32_t *) dest; + + if (xor) { + for (i = 0; i < bytes/sizeof(gf_val_32_t); i++) { + d32[i] ^= gf->multiply.w32(gf, val, s32[i]); + } + } else { + for (i = 0; i < bytes/sizeof(gf_val_32_t); i++) { + d32[i] = gf->multiply.w32(gf, val, s32[i]); + } + } +} + +static +inline +gf_val_32_t gf_w32_euclid (gf_t *gf, gf_val_32_t b) +{ + gf_val_32_t e_i, e_im1, e_ip1; + gf_val_32_t d_i, d_im1, d_ip1; + gf_val_32_t y_i, y_im1, y_ip1; + gf_val_32_t c_i; + + if (b == 0) return -1; + e_im1 = ((gf_internal_t *) (gf->scratch))->prim_poly; + e_i = b; + d_im1 = 32; + for (d_i = d_im1-1; ((1 << d_i) & e_i) == 0; d_i--) ; + y_i = 1; + y_im1 = 0; + + while (e_i != 1) { + + e_ip1 = e_im1; + d_ip1 = d_im1; + c_i = 0; + + while (d_ip1 >= d_i) { + c_i ^= (1 << (d_ip1 - d_i)); + e_ip1 ^= (e_i << (d_ip1 - d_i)); + d_ip1--; + while ((e_ip1 & (1 << d_ip1)) == 0) d_ip1--; + } + + y_ip1 = y_im1 ^ gf->multiply.w32(gf, c_i, y_i); + y_im1 = y_i; + y_i = y_ip1; + + e_im1 = e_i; + d_im1 = d_i; + e_i = e_ip1; + d_i = d_ip1; + } + + return y_i; +} + +static +inline +gf_val_32_t gf_w32_matrix (gf_t *gf, gf_val_32_t b) +{ + return gf_bitmatrix_inverse(b, 32, ((gf_internal_t *) (gf->scratch))->prim_poly); +} + +/* JSP: GF_MULT_SHIFT: The world's dumbest multiplication algorithm. I only + include it for completeness. It does have the feature that it requires no + extra memory. +*/ + +static +inline +gf_val_32_t +gf_w32_shift_multiply (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32) +{ + uint64_t product, i, pp, a, b, one; + gf_internal_t *h; + + a = a32; + b = b32; + h = (gf_internal_t *) gf->scratch; + one = 1; + pp = h->prim_poly | (one << 32); + + product = 0; + + for (i = 0; i < GF_FIELD_WIDTH; i++) { + if (a & (one << i)) product ^= (b << i); + } + for (i = (GF_FIELD_WIDTH*2-1); i >= GF_FIELD_WIDTH; i--) { + if (product & (one << i)) product ^= (pp << (i-GF_FIELD_WIDTH)); + } + return product; +} + +static +int gf_w32_shift_init(gf_t *gf) +{ + gf->multiply.w32 = gf_w32_shift_multiply; + gf->inverse.w32 = gf_w32_euclid; + gf->multiply_region.w32 = gf_w32_multiply_region_from_single; + return 1; +} + +static +inline +gf_val_32_t +gf_w32_split_8_8_multiply (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32) +{ + uint32_t product, i, j, mask, tb; + gf_internal_t *h; + struct gf_split_8_8_data *d8; + + h = (gf_internal_t *) gf->scratch; + d8 = (struct gf_split_8_8_data *) h->private; + product = 0; + mask = 0xff; + + for (i = 0; i < 4; i++) { + tb = b32; + for (j = 0; j < 4; j++) { + product ^= d8->tables[i+j][a32&mask][tb&mask]; + tb >>= 8; + } + a32 >>= 8; + } + return product; +} + +static +inline +void +gf_w32_split_8_8_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + uint32_t product, mask, tb, tv, tp; + gf_internal_t *h; + struct gf_split_8_8_data *d8; + uint32_t *p00, *p01, *p02, *p03; + uint32_t *p10, *p11, *p12, *p13; + uint32_t *p20, *p21, *p22, *p23; + uint32_t *p30, *p31, *p32, *p33; + uint32_t *s32, *d32, *top; + unsigned long uls, uld; + + uls = (unsigned long) src; + uld = (unsigned long) dest; + if (uls %4 != 0 || ((uls & 0x7) != (uld & 0x7))) gf_alignment_error("gf_w32_split_8_8_multiply_region", 4); + if (bytes % 4 != 0) { + gf_alignment_error("gf_w32_split_8_8_multiply_region: buffer size not divisible by symbol size = 4 bytes", 4); + } + + tv = val; + h = (gf_internal_t *) gf->scratch; + d8 = (struct gf_split_8_8_data *) h->private; + mask = 0xff; + + p00 = &(d8->tables[0][val&mask][0]); + p01 = &(d8->tables[1][val&mask][0]); + p02 = &(d8->tables[2][val&mask][0]); + p03 = &(d8->tables[3][val&mask][0]); + val >>= 8; + p10 = &(d8->tables[1][val&mask][0]); + p11 = &(d8->tables[2][val&mask][0]); + p12 = &(d8->tables[3][val&mask][0]); + p13 = &(d8->tables[4][val&mask][0]); + val >>= 8; + p20 = &(d8->tables[2][val&mask][0]); + p21 = &(d8->tables[3][val&mask][0]); + p22 = &(d8->tables[4][val&mask][0]); + p23 = &(d8->tables[5][val&mask][0]); + val >>= 8; + p30 = &(d8->tables[3][val&mask][0]); + p31 = &(d8->tables[4][val&mask][0]); + p32 = &(d8->tables[5][val&mask][0]); + p33 = &(d8->tables[6][val&mask][0]); + + s32 = (uint32_t *) src; + d32 = (uint32_t *) dest; + top = (d32 + (bytes/4)); + + while (d32 < top) { + tb = *s32; + tp = *d32; + product = (xor) ? (*d32) : 0; + product ^= p00[tb&mask]; + product ^= p10[tb&mask]; + product ^= p20[tb&mask]; + product ^= p30[tb&mask]; + + tb >>= 8; + product ^= p01[tb&mask]; + product ^= p11[tb&mask]; + product ^= p21[tb&mask]; + product ^= p31[tb&mask]; + + tb >>= 8; + product ^= p02[tb&mask]; + product ^= p12[tb&mask]; + product ^= p22[tb&mask]; + product ^= p32[tb&mask]; + + tb >>= 8; + product ^= p03[tb&mask]; + product ^= p13[tb&mask]; + product ^= p23[tb&mask]; + product ^= p33[tb&mask]; + *d32 = product; + s32++; + d32++; + } +} + +static +void +gf_w32_split_2_32_lazy_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + unsigned long uls, uld; + gf_internal_t *h; + struct gf_split_2_32_lazy_data *ld; + int i; + gf_val_32_t pp, v, v2, s, *s32, *d32, *top; + + uls = (unsigned long) src; + uld = (unsigned long) dest; + if (uls %4 != 0 || ((uls & 0x7) != (uld & 0x7))) gf_alignment_error("gf_w32_split_2_32_lazy_multiply_region", 4); + if (bytes % 4 != 0) { + gf_alignment_error("gf_w32_split_2_32_lazy_multiply_region: buffer size not divisible by symbol size = 4 bytes", 4); + } + + if (val == 0) { + if (xor) return; + bzero(dest, bytes); + return; + } + + h = (gf_internal_t *) gf->scratch; + pp = h->prim_poly; + + ld = (struct gf_split_2_32_lazy_data *) h->private; + + if (ld->last_value != val) { + v = val; + for (i = 0; i < 16; i++) { + v2 = (v << 1); + if (v & GF_FIRST_BIT) v2 ^= pp; + ld->tables[i][0] = 0; + ld->tables[i][1] = v; + ld->tables[i][2] = v2; + ld->tables[i][3] = (v2 ^ v); + v = (v2 << 1); + if (v2 & GF_FIRST_BIT) v ^= pp; + } + } + ld->last_value = val; + + s32 = (gf_val_32_t *) src; + d32 = (gf_val_32_t *) dest; + top = d32 + (bytes/4); + + while (d32 != top) { + v = (xor) ? *d32 : 0; + s = *s32; + i = 0; + while (s != 0) { + v ^= ld->tables[i][s&3]; + s >>= 2; + i++; + } + *d32 = v; + d32++; + s32++; + } +} + +static +void +gf_w32_split_2_32_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ +#ifdef INTEL_SSE4 + unsigned long uls, uld; + gf_internal_t *h; + int i, m, j, tindex; + gf_val_32_t pp, v, v2, s, *s32, *d32, *top; + __m128i vi, si, pi, shuffler, tables[16], adder, xi, mask1, mask2; + + uls = (unsigned long) src; + uld = (unsigned long) dest; + if (uls %4 != 0 || ((uls & 0xf) != (uld & 0xf))) gf_alignment_error("gf_w32_split_2_32_lazy_sse_multiply_region", 4); + if (bytes % 4 != 0) { + gf_alignment_error("gf_w32_split_2_32_lazy_sse_multiply_region: buffer size not divisible by symbol size = 4 bytes", 4); + } + + if (val == 0) { + if (xor) return; + bzero(dest, bytes); + return; + } + + h = (gf_internal_t *) gf->scratch; + pp = h->prim_poly; + + uls &= 0xf; + + s32 = (gf_val_32_t *) src; + d32 = (gf_val_32_t *) dest; + top = d32 + (bytes/4); + + if (uls != 0) { + while (uls != 16) { + if (xor) { + *d32 ^= gf->multiply.w32(gf, *s32, val); + } else { + *d32 = gf->multiply.w32(gf, *s32, val); + } + *s32++; + *d32++; + if (d32 == top) return; + uls += 4; + } + } + + uld = (unsigned long) top; + top = (gf_val_32_t *) (uld - (uld & 0xf)); + uld &= 0xf; + + v = val; + for (i = 0; i < 16; i++) { + v2 = (v << 1); + if (v & GF_FIRST_BIT) v2 ^= pp; + tables[i] = _mm_set_epi32(v2 ^ v, v2, v, 0); + v = (v2 << 1); + if (v2 & GF_FIRST_BIT) v ^= pp; + } + + shuffler = _mm_set_epi8(0xc, 0xc, 0xc, 0xc, 8, 8, 8, 8, 4, 4, 4, 4, 0, 0, 0, 0); + adder = _mm_set_epi8(3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0); + mask1 = _mm_set1_epi8(0x3); + mask2 = _mm_set1_epi8(0xc); + + while (d32 != top) { + pi = (xor) ? _mm_load_si128 ((__m128i *) d32) : _mm_setzero_si128(); + vi = _mm_load_si128((__m128i *) s32); + + tindex = 0; + for (i = 0; i < 4; i++) { + si = _mm_shuffle_epi8(vi, shuffler); + + xi = _mm_and_si128(si, mask1); + xi = _mm_slli_epi16(xi, 2); + xi = _mm_xor_si128(xi, adder); + pi = _mm_xor_si128(pi, _mm_shuffle_epi8(tables[tindex], xi)); + tindex++; + + xi = _mm_and_si128(si, mask2); + xi = _mm_xor_si128(xi, adder); + pi = _mm_xor_si128(pi, _mm_shuffle_epi8(tables[tindex], xi)); + si = _mm_srli_epi16(si, 2); + tindex++; + + xi = _mm_and_si128(si, mask2); + xi = _mm_xor_si128(xi, adder); + pi = _mm_xor_si128(pi, _mm_shuffle_epi8(tables[tindex], xi)); + si = _mm_srli_epi16(si, 2); + tindex++; + + xi = _mm_and_si128(si, mask2); + xi = _mm_xor_si128(xi, adder); + pi = _mm_xor_si128(pi, _mm_shuffle_epi8(tables[tindex], xi)); + si = _mm_srli_epi16(si, 2); + tindex++; + + vi = _mm_srli_epi32(vi, 8); + } + _mm_store_si128((__m128i *) d32, pi); + d32 += 4; + s32 += 4; + } + + while (uld > 0) { + if (xor) { + *d32 ^= gf->multiply.w32(gf, *s32, val); + } else { + *d32 = gf->multiply.w32(gf, *s32, val); + } + *s32++; + *d32++; + uld -= 4; + } + + +#endif +} + +static +void +gf_w32_split_4_32_lazy_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + unsigned long uls, uld; + gf_internal_t *h; + struct gf_split_4_32_lazy_data *ld; + int i, j, k; + gf_val_32_t pp, v, s, *s32, *d32, *top; + + uls = (unsigned long) src; + uld = (unsigned long) dest; + if (uls %4 != 0 || ((uls & 0x7) != (uld & 0x7))) gf_alignment_error("gf_w32_split_4_32_lazy_multiply_region", 4); + if (bytes % 4 != 0) { + gf_alignment_error("gf_w32_split_4_32_lazy_multiply_region: buffer size not divisible by symbol size = 4 bytes", 4); + } + + if (val == 0) { + if (xor) return; + bzero(dest, bytes); + return; + } + + h = (gf_internal_t *) gf->scratch; + pp = h->prim_poly; + + ld = (struct gf_split_4_32_lazy_data *) h->private; + + if (ld->last_value != val) { + v = val; + for (i = 0; i < 8; i++) { + ld->tables[i][0] = 0; + for (j = 1; j < 16; j <<= 1) { + for (k = 0; k < j; k++) { + ld->tables[i][k^j] = (v ^ ld->tables[i][k]); + } + v = (v & GF_FIRST_BIT) ? ((v << 1) ^ pp) : (v << 1); + } + } + } + ld->last_value = val; + + s32 = (gf_val_32_t *) src; + d32 = (gf_val_32_t *) dest; + top = d32 + (bytes/4); + + while (d32 != top) { + v = (xor) ? *d32 : 0; + s = *s32; + i = 0; + while (s != 0) { + v ^= ld->tables[i][s&0xf]; + s >>= 4; + i++; + } + *d32 = v; + d32++; + s32++; + } +} + +static +void +gf_w32_split_4_32_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ +#ifdef INTEL_SSE4 + unsigned long uls, uld; + gf_internal_t *h; + int i, m, j, k, tindex; + gf_val_32_t pp, v, s, *s32, *d32, *top, *realtop; + __m128i si, tables[8][4], p0, p1, p2, p3, mask1, v0, v1, v2, v3; + struct gf_split_4_32_lazy_data *ld; + uint8_t btable[16]; + + uls = (unsigned long) src; + uld = (unsigned long) dest; + if (uls %4 != 0 || ((uls & 0xf) != (uld & 0xf))) gf_alignment_error("gf_w32_split_4_32_lazy_sse_multiply_region", 4); + if (bytes % 4 != 0) { + gf_alignment_error("gf_w32_split_4_32_lazy_sse_multiply_region: buffer size not divisible by symbol size = 4 bytes", 4); + } + + if (val == 0) { + if (xor) return; + bzero(dest, bytes); + return; + } + + h = (gf_internal_t *) gf->scratch; + pp = h->prim_poly; + + uls &= 0xf; + + s32 = (gf_val_32_t *) src; + d32 = (gf_val_32_t *) dest; + top = d32 + (bytes/4); + + if (uls != 0) { + while (uls != 16) { + if (xor) { + *d32 ^= gf->multiply.w32(gf, *s32, val); + } else { + *d32 = gf->multiply.w32(gf, *s32, val); + } + *s32++; + *d32++; + if (d32 == top) return; + uls += 4; + } + } + + uld = (unsigned long) top; + realtop = top; + + /* You need the size of this region to be a multiple of 64 bytes */ + bytes = (top - d32); + bytes -= (bytes & 0xf); + top = (d32 + bytes); + + ld = (struct gf_split_4_32_lazy_data *) h->private; + + v = val; + for (i = 0; i < 8; i++) { + ld->tables[i][0] = 0; + for (j = 1; j < 16; j <<= 1) { + for (k = 0; k < j; k++) { + ld->tables[i][k^j] = (v ^ ld->tables[i][k]); + } + v = (v & GF_FIRST_BIT) ? ((v << 1) ^ pp) : (v << 1); + } + for (j = 0; j < 4; j++) { + for (k = 0; k < 16; k++) { + btable[k] = (uint8_t) ld->tables[i][k]; + ld->tables[i][k] >>= 8; + } + tables[i][j] = _mm_loadu_si128((__m128i *) btable); + } + } + + mask1 = _mm_set1_epi8(0xf); + + if (xor) { + while (d32 != top) { + p0 = _mm_load_si128 ((__m128i *) d32); + p1 = _mm_load_si128 ((__m128i *) (d32+4)); + p2 = _mm_load_si128 ((__m128i *) (d32+8)); + p3 = _mm_load_si128 ((__m128i *) (d32+12)); + + v0 = _mm_load_si128((__m128i *) s32); s32 += 4; + v1 = _mm_load_si128((__m128i *) s32); s32 += 4; + v2 = _mm_load_si128((__m128i *) s32); s32 += 4; + v3 = _mm_load_si128((__m128i *) s32); s32 += 4; + + si = _mm_and_si128(v0, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[0][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[0][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[0][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[0][3], si)); + + v0 = _mm_srli_epi32(v0, 4); + si = _mm_and_si128(v0, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[1][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[1][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[1][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[1][3], si)); + + si = _mm_and_si128(v1, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[2][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[2][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[2][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[2][3], si)); + + v1 = _mm_srli_epi32(v1, 4); + si = _mm_and_si128(v1, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[3][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[3][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[3][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[3][3], si)); + + si = _mm_and_si128(v2, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[4][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[4][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[4][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[4][3], si)); + + v2 = _mm_srli_epi32(v2, 4); + si = _mm_and_si128(v2, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[5][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[5][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[5][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[5][3], si)); + + si = _mm_and_si128(v3, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[6][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[6][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[6][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[6][3], si)); + + v3 = _mm_srli_epi32(v3, 4); + si = _mm_and_si128(v3, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[7][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[7][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[7][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[7][3], si)); + + _mm_store_si128((__m128i *) d32, p0); + _mm_store_si128((__m128i *) (d32+4), p1); + _mm_store_si128((__m128i *) (d32+8), p2); + _mm_store_si128((__m128i *) (d32+12), p3); + d32 += 16; + } + } else { + while (d32 != top) { + + v0 = _mm_load_si128((__m128i *) s32); s32 += 4; + v1 = _mm_load_si128((__m128i *) s32); s32 += 4; + v2 = _mm_load_si128((__m128i *) s32); s32 += 4; + v3 = _mm_load_si128((__m128i *) s32); s32 += 4; + + si = _mm_and_si128(v0, mask1); + p0 = _mm_shuffle_epi8(tables[0][0], si); + p1 = _mm_shuffle_epi8(tables[0][1], si); + p2 = _mm_shuffle_epi8(tables[0][2], si); + p3 = _mm_shuffle_epi8(tables[0][3], si); + + v0 = _mm_srli_epi32(v0, 4); + si = _mm_and_si128(v0, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[1][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[1][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[1][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[1][3], si)); + + si = _mm_and_si128(v1, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[2][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[2][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[2][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[2][3], si)); + + v1 = _mm_srli_epi32(v1, 4); + si = _mm_and_si128(v1, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[3][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[3][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[3][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[3][3], si)); + + si = _mm_and_si128(v2, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[4][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[4][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[4][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[4][3], si)); + + v2 = _mm_srli_epi32(v2, 4); + si = _mm_and_si128(v2, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[5][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[5][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[5][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[5][3], si)); + + si = _mm_and_si128(v3, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[6][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[6][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[6][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[6][3], si)); + + v3 = _mm_srli_epi32(v3, 4); + si = _mm_and_si128(v3, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[7][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[7][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[7][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[7][3], si)); + + _mm_store_si128((__m128i *) d32, p0); + _mm_store_si128((__m128i *) (d32+4), p1); + _mm_store_si128((__m128i *) (d32+8), p2); + _mm_store_si128((__m128i *) (d32+12), p3); + d32 += 16; + } + } + + while (d32 < realtop) { + if (xor) { + *d32 ^= gf->multiply.w32(gf, *s32, val); + } else { + *d32 = gf->multiply.w32(gf, *s32, val); + } + *s32++; + *d32++; + } + +#endif +} + + +static +void +gf_w32_split_4_32_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ +#ifdef INTEL_SSE4 + unsigned long uls, uld; + gf_internal_t *h; + int i, m, j, k, tindex; + gf_val_32_t pp, v, s, *s32, *d32, *top, *realtop; + __m128i vi, si, tables[8][4], p0, p1, p2, p3, mask1, v0, v1, v2, v3, mask8, mask16; + __m128i tv1, tv2, tv3, tv0; + struct gf_split_4_32_lazy_data *ld; + uint8_t btable[16]; + + uls = (unsigned long) src; + uld = (unsigned long) dest; + if (uls %4 != 0 || ((uls & 0xf) != (uld & 0xf))) gf_alignment_error("gf_w32_split_4_32_lazy_sse_multiply_region", 4); + if (bytes % 4 != 0) { + gf_alignment_error("gf_w32_split_4_32_lazy_sse_multiply_region: buffer size not divisible by symbol size = 4 bytes", 4); + } + + if (val == 0) { + if (xor) return; + bzero(dest, bytes); + return; + } + + h = (gf_internal_t *) gf->scratch; + pp = h->prim_poly; + + uls &= 0xf; + + s32 = (gf_val_32_t *) src; + d32 = (gf_val_32_t *) dest; + top = d32 + (bytes/4); + + if (uls != 0) { + while (uls != 16) { + if (xor) { + *d32 ^= gf->multiply.w32(gf, *s32, val); + } else { + *d32 = gf->multiply.w32(gf, *s32, val); + } + *s32++; + *d32++; + if (d32 == top) return; + uls += 4; + } + } + + uld = (unsigned long) top; + realtop = top; + + /* You need the size of this region to be a multiple of 64 bytes */ + bytes = (top - d32); + bytes -= (bytes & 0xf); + top = (d32 + bytes); + + ld = (struct gf_split_4_32_lazy_data *) h->private; + + v = val; + for (i = 0; i < 8; i++) { + ld->tables[i][0] = 0; + for (j = 1; j < 16; j <<= 1) { + for (k = 0; k < j; k++) { + ld->tables[i][k^j] = (v ^ ld->tables[i][k]); + } + v = (v & GF_FIRST_BIT) ? ((v << 1) ^ pp) : (v << 1); + } + for (j = 0; j < 4; j++) { + for (k = 0; k < 16; k++) { + btable[k] = (uint8_t) ld->tables[i][k]; + ld->tables[i][k] >>= 8; + } + tables[i][j] = _mm_loadu_si128((__m128i *) btable); + } + } + + mask1 = _mm_set1_epi8(0xf); + mask8 = _mm_set1_epi16(0xff); + mask16 = _mm_set1_epi32(0xffff); + + if (xor) { + while (d32 != top) { + v0 = _mm_load_si128((__m128i *) s32); s32 += 4; + v1 = _mm_load_si128((__m128i *) s32); s32 += 4; + v2 = _mm_load_si128((__m128i *) s32); s32 += 4; + v3 = _mm_load_si128((__m128i *) s32); s32 += 4; + +/* printf("Val = %x\n", val); + MM_PRINT8("Old V0", v0); + MM_PRINT8("Old V1", v1); + MM_PRINT8("Old V2", v2); + MM_PRINT8("Old V3", v3); + printf("\n"); */ + + p0 = _mm_srli_epi16(v0, 8); + p1 = _mm_srli_epi16(v1, 8); + p2 = _mm_srli_epi16(v2, 8); + p3 = _mm_srli_epi16(v3, 8); + + tv0 = _mm_and_si128(v0, mask8); + tv1 = _mm_and_si128(v1, mask8); + tv2 = _mm_and_si128(v2, mask8); + tv3 = _mm_and_si128(v3, mask8); + + v0 = _mm_packus_epi16(p1, p0); + v1 = _mm_packus_epi16(tv1, tv0); + v2 = _mm_packus_epi16(p3, p2); + v3 = _mm_packus_epi16(tv3, tv2); + +/* MM_PRINT8("Middle V0", v0); + MM_PRINT8("Middle V1", v1); + MM_PRINT8("Middle V2", v2); + MM_PRINT8("Middle V3", v3); + printf("\n"); */ + + p0 = _mm_srli_epi16(v0, 8); + p1 = _mm_srli_epi16(v1, 8); + p2 = _mm_srli_epi16(v2, 8); + p3 = _mm_srli_epi16(v3, 8); + + tv0 = _mm_and_si128(v0, mask8); + tv1 = _mm_and_si128(v1, mask8); + tv2 = _mm_and_si128(v2, mask8); + tv3 = _mm_and_si128(v3, mask8); + + v0 = _mm_packus_epi16(p2, p0); + v1 = _mm_packus_epi16(p3, p1); + v2 = _mm_packus_epi16(tv2, tv0); + v3 = _mm_packus_epi16(tv3, tv1); + +/* MM_PRINT8("New V0", v0); + MM_PRINT8("New V1", v1); + MM_PRINT8("New V2", v2); + MM_PRINT8("New V3", v3); + printf("\n"); */ + + si = _mm_and_si128(v0, mask1); + p0 = _mm_shuffle_epi8(tables[6][0], si); + p1 = _mm_shuffle_epi8(tables[6][1], si); + p2 = _mm_shuffle_epi8(tables[6][2], si); + p3 = _mm_shuffle_epi8(tables[6][3], si); + + v0 = _mm_srli_epi32(v0, 4); + si = _mm_and_si128(v0, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[7][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[7][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[7][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[7][3], si)); + + si = _mm_and_si128(v1, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[4][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[4][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[4][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[4][3], si)); + + v1 = _mm_srli_epi32(v1, 4); + si = _mm_and_si128(v1, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[5][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[5][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[5][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[5][3], si)); + + si = _mm_and_si128(v2, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[2][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[2][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[2][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[2][3], si)); + + v2 = _mm_srli_epi32(v2, 4); + si = _mm_and_si128(v2, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[3][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[3][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[3][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[3][3], si)); + + si = _mm_and_si128(v3, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[0][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[0][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[0][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[0][3], si)); + + v3 = _mm_srli_epi32(v3, 4); + si = _mm_and_si128(v3, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[1][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[1][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[1][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[1][3], si)); + +/* MM_PRINT8("Old P0", p0); + MM_PRINT8("Old P1", p1); + MM_PRINT8("Old P2", p2); + MM_PRINT8("Old P3", p3); + printf("\n"); */ + + tv0 = _mm_unpackhi_epi8(p1, p3); + tv1 = _mm_unpackhi_epi8(p0, p2); + tv2 = _mm_unpacklo_epi8(p1, p3); + tv3 = _mm_unpacklo_epi8(p0, p2); + +/* MM_PRINT8("Middle P0", tv0); + MM_PRINT8("Middle P1", tv1); + MM_PRINT8("Middle P2", tv2); + MM_PRINT8("Middle P3", tv3); + printf("\n"); */ + + p0 = _mm_unpackhi_epi8(tv1, tv0); + p1 = _mm_unpacklo_epi8(tv1, tv0); + p2 = _mm_unpackhi_epi8(tv3, tv2); + p3 = _mm_unpacklo_epi8(tv3, tv2); + +/* MM_PRINT8("New P0", p0); + MM_PRINT8("New P1", p1); + MM_PRINT8("New P2", p2); + MM_PRINT8("New P3", p3); + printf("\n"); + exit(1); */ + + v0 = _mm_load_si128 ((__m128i *) d32); + v1 = _mm_load_si128 ((__m128i *) (d32+4)); + v2 = _mm_load_si128 ((__m128i *) (d32+8)); + v3 = _mm_load_si128 ((__m128i *) (d32+12)); + + p0 = _mm_xor_si128(p0, v0); + p1 = _mm_xor_si128(p1, v1); + p2 = _mm_xor_si128(p2, v2); + p3 = _mm_xor_si128(p3, v3); + + _mm_store_si128((__m128i *) d32, p0); + _mm_store_si128((__m128i *) (d32+4), p1); + _mm_store_si128((__m128i *) (d32+8), p2); + _mm_store_si128((__m128i *) (d32+12), p3); + d32 += 16; + } + } else { + while (d32 != top) { + v0 = _mm_load_si128((__m128i *) s32); s32 += 4; + v1 = _mm_load_si128((__m128i *) s32); s32 += 4; + v2 = _mm_load_si128((__m128i *) s32); s32 += 4; + v3 = _mm_load_si128((__m128i *) s32); s32 += 4; + +/* printf("Val = %x\n", val); + MM_PRINT8("Old V0", v0); + MM_PRINT8("Old V1", v1); + MM_PRINT8("Old V2", v2); + MM_PRINT8("Old V3", v3); + printf("\n"); */ + + p0 = _mm_srli_epi16(v0, 8); + p1 = _mm_srli_epi16(v1, 8); + p2 = _mm_srli_epi16(v2, 8); + p3 = _mm_srli_epi16(v3, 8); + + tv0 = _mm_and_si128(v0, mask8); + tv1 = _mm_and_si128(v1, mask8); + tv2 = _mm_and_si128(v2, mask8); + tv3 = _mm_and_si128(v3, mask8); + + v0 = _mm_packus_epi16(p1, p0); + v1 = _mm_packus_epi16(tv1, tv0); + v2 = _mm_packus_epi16(p3, p2); + v3 = _mm_packus_epi16(tv3, tv2); + +/* MM_PRINT8("Middle V0", v0); + MM_PRINT8("Middle V1", v1); + MM_PRINT8("Middle V2", v2); + MM_PRINT8("Middle V3", v3); + printf("\n"); */ + + p0 = _mm_srli_epi16(v0, 8); + p1 = _mm_srli_epi16(v1, 8); + p2 = _mm_srli_epi16(v2, 8); + p3 = _mm_srli_epi16(v3, 8); + + tv0 = _mm_and_si128(v0, mask8); + tv1 = _mm_and_si128(v1, mask8); + tv2 = _mm_and_si128(v2, mask8); + tv3 = _mm_and_si128(v3, mask8); + + v0 = _mm_packus_epi16(p2, p0); + v1 = _mm_packus_epi16(p3, p1); + v2 = _mm_packus_epi16(tv2, tv0); + v3 = _mm_packus_epi16(tv3, tv1); + +/* MM_PRINT8("New V0", v0); + MM_PRINT8("New V1", v1); + MM_PRINT8("New V2", v2); + MM_PRINT8("New V3", v3); + printf("\n"); */ + + si = _mm_and_si128(v0, mask1); + p0 = _mm_shuffle_epi8(tables[6][0], si); + p1 = _mm_shuffle_epi8(tables[6][1], si); + p2 = _mm_shuffle_epi8(tables[6][2], si); + p3 = _mm_shuffle_epi8(tables[6][3], si); + + v0 = _mm_srli_epi32(v0, 4); + si = _mm_and_si128(v0, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[7][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[7][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[7][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[7][3], si)); + + si = _mm_and_si128(v1, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[4][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[4][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[4][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[4][3], si)); + + v1 = _mm_srli_epi32(v1, 4); + si = _mm_and_si128(v1, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[5][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[5][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[5][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[5][3], si)); + + si = _mm_and_si128(v2, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[2][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[2][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[2][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[2][3], si)); + + v2 = _mm_srli_epi32(v2, 4); + si = _mm_and_si128(v2, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[3][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[3][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[3][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[3][3], si)); + + si = _mm_and_si128(v3, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[0][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[0][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[0][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[0][3], si)); + + v3 = _mm_srli_epi32(v3, 4); + si = _mm_and_si128(v3, mask1); + p0 = _mm_xor_si128(p0, _mm_shuffle_epi8(tables[1][0], si)); + p1 = _mm_xor_si128(p1, _mm_shuffle_epi8(tables[1][1], si)); + p2 = _mm_xor_si128(p2, _mm_shuffle_epi8(tables[1][2], si)); + p3 = _mm_xor_si128(p3, _mm_shuffle_epi8(tables[1][3], si)); + +/* MM_PRINT8("Old P0", p0); + MM_PRINT8("Old P1", p1); + MM_PRINT8("Old P2", p2); + MM_PRINT8("Old P3", p3); + printf("\n"); */ + + tv0 = _mm_unpackhi_epi8(p1, p3); + tv1 = _mm_unpackhi_epi8(p0, p2); + tv2 = _mm_unpacklo_epi8(p1, p3); + tv3 = _mm_unpacklo_epi8(p0, p2); + +/* MM_PRINT8("Middle P0", tv0); + MM_PRINT8("Middle P1", tv1); + MM_PRINT8("Middle P2", tv2); + MM_PRINT8("Middle P3", tv3); + printf("\n"); */ + + p0 = _mm_unpackhi_epi8(tv1, tv0); + p1 = _mm_unpacklo_epi8(tv1, tv0); + p2 = _mm_unpackhi_epi8(tv3, tv2); + p3 = _mm_unpacklo_epi8(tv3, tv2); + +/* MM_PRINT8("New P0", p0); + MM_PRINT8("New P1", p1); + MM_PRINT8("New P2", p2); + MM_PRINT8("New P3", p3); + printf("\n"); + exit(1); */ + + _mm_store_si128((__m128i *) d32, p0); + _mm_store_si128((__m128i *) (d32+4), p1); + _mm_store_si128((__m128i *) (d32+8), p2); + _mm_store_si128((__m128i *) (d32+12), p3); + d32 += 16; + } + } + + while (d32 < realtop) { + if (xor) { + *d32 ^= gf->multiply.w32(gf, *s32, val); + } else { + *d32 = gf->multiply.w32(gf, *s32, val); + } + *s32++; + *d32++; + } + + +#endif +} + +static +int gf_w32_split_init(gf_t *gf) +{ + gf_internal_t *h; + struct gf_split_2_32_lazy_data *ld2; + struct gf_split_4_32_lazy_data *ld4; + struct gf_split_8_8_data *d8; + uint32_t p, basep; + int i, j, exp; + + h = (gf_internal_t *) gf->scratch; + + /* Defaults */ + gf->multiply_region.w32 = gf_w32_multiply_region_from_single; + gf->multiply.w32 = gf_w32_shift_multiply; + gf->inverse.w32 = gf_w32_euclid; + + if (h->arg1 == 8 && h->arg2 == 8) { + gf->multiply.w32 = gf_w32_split_8_8_multiply; + gf->multiply_region.w32 = gf_w32_split_8_8_multiply_region; + d8 = (struct gf_split_8_8_data *) h->private; + basep = 1; + for (exp = 0; exp < 7; exp++) { + for (j = 0; j < 256; j++) d8->tables[exp][0][j] = 0; + for (i = 0; i < 256; i++) d8->tables[exp][i][0] = 0; + d8->tables[exp][1][1] = basep; + for (i = 2; i < 256; i++) { + if (i&1) { + p = d8->tables[exp][i^1][1]; + d8->tables[exp][i][1] = p ^ basep; + } else { + p = d8->tables[exp][i>>1][1]; + d8->tables[exp][i][1] = GF_MULTBY_TWO(p); + } + } + for (i = 1; i < 256; i++) { + p = d8->tables[exp][i][1]; + for (j = 1; j < 256; j++) { + if (j&1) { + d8->tables[exp][i][j] = d8->tables[exp][i][j^1] ^ p; + } else { + d8->tables[exp][i][j] = GF_MULTBY_TWO(d8->tables[exp][i][j>>1]); + } + } + } + for (i = 0; i < 8; i++) basep = GF_MULTBY_TWO(basep); + } + } + if ((h->arg1 == 2 && h->arg2 == 32) || (h->arg1 == 32 && h->arg2 == 2)) { + ld2 = (struct gf_split_2_32_lazy_data *) h->private; + ld2->last_value = 0; + if (h->region_type & GF_REGION_SSE) { + gf->multiply_region.w32 = gf_w32_split_2_32_lazy_sse_multiply_region; + } else { + gf->multiply_region.w32 = gf_w32_split_2_32_lazy_multiply_region; + } + } + if ((h->arg1 == 4 && h->arg2 == 32) || (h->arg1 == 32 && h->arg2 == 4)) { + ld4 = (struct gf_split_4_32_lazy_data *) h->private; + ld4->last_value = 0; + if (h->region_type & GF_REGION_SSE) { + if (h->region_type & GF_REGION_ALTMAP) { + gf->multiply_region.w32 = gf_w32_split_4_32_lazy_sse_altmap_multiply_region; + } else { + gf->multiply_region.w32 = gf_w32_split_4_32_lazy_sse_multiply_region; + } + } else { + gf->multiply_region.w32 = gf_w32_split_4_32_lazy_multiply_region; + } + } + return 1; +} + +static +gf_val_32_t +gf_w32_composite_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + uint16_t b0 = b & 0x0000ffff; + uint16_t b1 = (b & 0xffff0000) >> 16; + uint16_t a0 = a & 0x0000ffff; + uint16_t a1 = (a & 0xffff0000) >> 16; + uint16_t a1b1; + + a1b1 = base_gf->multiply.w16(base_gf, a1, b1); + + return ((base_gf->multiply.w16(base_gf, a0, b0) ^ a1b1) | ((base_gf->multiply.w16(base_gf, a1, b0) ^ base_gf->multiply.w16(base_gf, a0, b1) ^ base_gf->multiply.w16(base_gf, a1b1, GF_S_GF_16_2)) << 16)); +} + +/* + * Composite field division trick (explained in 2007 tech report) + * + * Compute a / b = a*b^-1, where p(x) = x^2 + sx + 1 + * + * let c = b^-1 + * + * c*b = (s*b1c1+b1c0+b0c1)x+(b1c1+b0c0) + * + * want (s*b1c1+b1c0+b0c1) = 0 and (b1c1+b0c0) = 1 + * + * let d = b1c1 and d+1 = b0c0 + * + * solve s*b1c1+b1c0+b0c1 = 0 + * + * solution: d = (b1b0^-1)(b1b0^-1+b0b1^-1+s)^-1 + * + * c0 = (d+1)b0^-1 + * c1 = d*b1^-1 + * + * a / b = a * c + */ +static +gf_val_32_t +gf_w32_composite_inverse(gf_t *gf, gf_val_32_t a) +{ + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + uint16_t a0 = a & 0x0000ffff; + uint16_t a1 = (a & 0xffff0000) >> 16; + uint16_t c0, c1, d, tmp; + uint32_t c; + uint16_t a0inv, a1inv; + + if (a0 == 0) { + a1inv = base_gf->inverse.w16(base_gf, a1); + c0 = base_gf->multiply.w16(base_gf, a1inv, GF_S_GF_16_2); + c1 = a1inv; + } else if (a1 == 0) { + c0 = base_gf->inverse.w16(base_gf, a0); + c1 = 0; + } else { + a1inv = base_gf->inverse.w16(base_gf, a1); + a0inv = base_gf->inverse.w16(base_gf, a0); + + d = base_gf->multiply.w16(base_gf, a1, a0inv); + + tmp = (base_gf->multiply.w16(base_gf, a1, a0inv) ^ base_gf->multiply.w16(base_gf, a0, a1inv) ^ GF_S_GF_16_2); + tmp = base_gf->inverse.w16(base_gf, tmp); + + d = base_gf->multiply.w16(base_gf, d, tmp); + + c0 = base_gf->multiply.w16(base_gf, (d^1), a0inv); + c1 = base_gf->multiply.w16(base_gf, d, a1inv); + } + + c = c0 | (c1 << 16); + + return c; +} + +static +gf_val_32_t +gf_w32_composite_divide(gf_t *gf, gf_val_32_t a, gf_val_32_t b) +{ + gf_val_32_t binv; + + binv = gf_w32_composite_inverse(gf, b); + + return gf_w32_composite_multiply(gf, a, binv); +} + +static +void +gf_w32_composite_multiply_region_table(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + unsigned long uls, uld; + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + int i=0; + struct gf_w16_logtable_data * ltd; + uint16_t b0 = val & 0x0000ffff; + uint16_t b1 = (val & 0xffff0000) >> 16; + uint32_t *s32 = (uint32_t *) src; + uint32_t *d32 = (uint32_t *) dest; + uint16_t a0, a1, a1b1; + int num_syms = bytes >> 2; + int sym_divisible = bytes % 4; + + uls = (unsigned long) src; + uld = (unsigned long) dest; + if ((uls & 0x7) != (uld & 0x7)) gf_alignment_error("gf_w32_buf_const_log", 2); + if (sym_divisible) { + gf_alignment_error("gf_w32_buf_const_log: buffer size not divisible by symbol size = 2 bytes", 2); + } + + if (val == 0) { + if (xor) return; + bzero(dest, bytes); + return; + } + + ltd = (struct gf_w16_logtable_data *) h->private; + + if (xor) { + for (i = 0;i < num_syms; i++) { + a0 = s32[i] & 0x0000ffff; + a1 = (s32[i] & 0xffff0000) >> 16; + a1b1 = ltd->antilog_tbl[ltd->log_tbl[a1] + ltd->log_tbl[b1]]; + + d32[i] ^= ((ltd->antilog_tbl[ltd->log_tbl[a0] + ltd->log_tbl[b0]] ^ a1b1) | + ((ltd->antilog_tbl[ltd->log_tbl[a1] + ltd->log_tbl[b0]] ^ ltd->antilog_tbl[ltd->log_tbl[a0] + ltd->log_tbl[b1]] ^ + ltd->antilog_tbl[ltd->log_tbl[a1b1] + ltd->log_tbl[GF_S_GF_16_2]]) << 16)); + + } + } else { + for (i = 0;i < num_syms; i++) { + a0 = s32[i] & 0x0000ffff; + a1 = (s32[i] & 0xffff0000) >> 16; + a1b1 = ltd->antilog_tbl[ltd->log_tbl[a1] + ltd->log_tbl[b1]]; + + d32[i] = ((ltd->antilog_tbl[ltd->log_tbl[a0] + ltd->log_tbl[b0]] ^ a1b1) | + ((ltd->antilog_tbl[ltd->log_tbl[a1] + ltd->log_tbl[b0]] ^ ltd->antilog_tbl[ltd->log_tbl[a0] + ltd->log_tbl[b1]] ^ + ltd->antilog_tbl[ltd->log_tbl[a1b1] + ltd->log_tbl[GF_S_GF_16_2]]) << 16)); + } + } +} + +static +void +gf_w32_composite_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + unsigned long uls, uld; + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + int i=0; + struct gf_w16_logtable_data * ltd; + uint16_t b0 = val & 0x0000ffff; + uint16_t b1 = (val & 0xffff0000) >> 16; + uint32_t *s32 = (uint32_t *) src; + uint32_t *d32 = (uint32_t *) dest; + uint16_t a0, a1, a1b1; + int num_syms = bytes >> 2; + int sym_divisible = bytes % 4; + + uls = (unsigned long) src; + uld = (unsigned long) dest; + if ((uls & 0x7) != (uld & 0x7)) gf_alignment_error("gf_w32_buf_const_log", 2); + if (sym_divisible) { + gf_alignment_error("gf_w32_buf_const_log: buffer size not divisible by symbol size = 2 bytes", 2); + } + + if (val == 0) { + if (xor) return; + bzero(dest, bytes); + return; + } + + ltd = (struct gf_w16_logtable_data *) h->private; + + if (xor) { + for (i = 0;i < num_syms; i++) { + a0 = s32[i] & 0x0000ffff; + a1 = (s32[i] & 0xffff0000) >> 16; + a1b1 = base_gf->multiply.w16(base_gf, a1, b1); + + d32[i] ^= ((base_gf->multiply.w16(base_gf, a0, b0) ^ a1b1) | + ((base_gf->multiply.w16(base_gf, a1, b0) ^ base_gf->multiply.w16(base_gf, a0, b1) ^ base_gf->multiply.w16(base_gf, a1b1, GF_S_GF_16_2)) << 16)); + + } + } else { + for (i = 0;i < num_syms; i++) { + a0 = s32[i] & 0x0000ffff; + a1 = (s32[i] & 0xffff0000) >> 16; + a1b1 = base_gf->multiply.w16(base_gf, a1, b1); + + d32[i] = ((base_gf->multiply.w16(base_gf, a0, b0) ^ a1b1) | + ((base_gf->multiply.w16(base_gf, a1, b0) ^ base_gf->multiply.w16(base_gf, a0, b1) ^ base_gf->multiply.w16(base_gf, a1b1, GF_S_GF_16_2)) << 16)); + } + } +} + + + +static +void +gf_w32_composite_multiply_region_alt(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) +{ + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + gf_val_16_t val0 = val & 0x0000ffff; + gf_val_16_t val1 = (val & 0xffff0000) >> 16; + int sub_reg_size = bytes / 2; + + if (bytes % 2 != 0) gf_alignment_error("gf_w32_composite_multiply_region_alt", 1); + if (sub_reg_size % 2 != 0) gf_alignment_error("gf_w32_composite_multiply_region_alt", 1); + + if (!xor) { + memset(dest, 0, bytes); + } + + base_gf->multiply_region.w16(base_gf, src, dest, val0, sub_reg_size, xor); + base_gf->multiply_region.w16(base_gf, src+sub_reg_size, dest, val1, sub_reg_size, 1); + base_gf->multiply_region.w16(base_gf, src, dest+sub_reg_size, val1, sub_reg_size, xor); + base_gf->multiply_region.w16(base_gf, src+sub_reg_size, dest+sub_reg_size, val0, sub_reg_size, 1); + base_gf->multiply_region.w16(base_gf, src+sub_reg_size, dest+sub_reg_size, base_gf->multiply.w16(base_gf, GF_S_GF_16_2, val1), sub_reg_size, 1); +} + +static +int gf_w32_composite_init(gf_t *gf) +{ + struct gf_w16_logtable_data *ltd; + gf_internal_t *h = (gf_internal_t *) gf->scratch; + gf_t *base_gf = h->base_gf; + gf_val_32_t a, b; + uint64_t prim_poly = ((gf_internal_t *) base_gf->scratch)->prim_poly; + int i; + + ltd = (struct gf_w16_logtable_data *) h->private; + + ltd->log_tbl[0] = 0; + + bzero(&(ltd->_antilog_tbl[0]), sizeof(ltd->_antilog_tbl)); + + ltd->antilog_tbl = &(ltd->_antilog_tbl[GF_BASE_FIELD_SIZE * 2]); + + b = 1; + for (i = 0; i < GF_BASE_FIELD_GROUP_SIZE; i++) { + ltd->log_tbl[b] = (gf_val_16_t)i; + ltd->antilog_tbl[i] = (gf_val_16_t)b; + ltd->antilog_tbl[i+GF_BASE_FIELD_GROUP_SIZE] = (gf_val_16_t)b; + b <<= 1; + if (b & GF_BASE_FIELD_SIZE) { + b = b ^ prim_poly; + } + } + ltd->inv_tbl[0] = 0; /* Not really, but we need to fill it with something */ + ltd->inv_tbl[1] = 1; + for (i = 2; i < GF_BASE_FIELD_SIZE; i++) { + ltd->inv_tbl[i] = ltd->antilog_tbl[GF_BASE_FIELD_GROUP_SIZE-ltd->log_tbl[i]]; + } + + if (h->region_type & GF_REGION_ALTMAP) { + gf->multiply_region.w32 = gf_w32_composite_multiply_region_alt; + } else { + if (h->region_type & GF_REGION_SINGLE_TABLE) { + gf->multiply_region.w32 = gf_w32_composite_multiply_region_table; + } else { + gf->multiply_region.w32 = gf_w32_composite_multiply_region; + } + } + + gf->multiply.w32 = gf_w32_composite_multiply; + gf->divide.w32 = gf_w32_composite_divide; + gf->inverse.w32 = gf_w32_composite_inverse; + + return 1; +} + +int gf_w32_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2) +{ + int ss, sa; + + ss = (GF_REGION_SSE | GF_REGION_NOSSE); + sa = (GF_REGION_STDMAP | GF_REGION_ALTMAP); + + switch(mult_type) + { + case GF_MULT_SPLIT_TABLE: + if (arg1 == 8 && arg2 == 8){ + if (region_type != GF_REGION_DEFAULT) return -1; + return sizeof(gf_internal_t) + sizeof(struct gf_split_8_8_data) + 64; + } + if ((arg1 == 2 && arg2 == 32) || (arg2 == 2 && arg1 == 32)) { + region_type &= (~GF_REGION_LAZY); + if ((region_type & ss) == ss) return -1; + if ((region_type | ss) != ss) return -1; + return sizeof(gf_internal_t) + sizeof(struct gf_split_2_32_lazy_data) + 64; + } + if ((arg1 == 4 && arg2 == 32) || (arg2 == 4 && arg1 == 32)) { + region_type &= (~GF_REGION_LAZY); + if ((region_type & ss) == ss) return -1; + if ((region_type & sa) == sa) return -1; + if (region_type & (~(ss|sa))) return -1; + if (region_type & GF_REGION_SSE) { + return sizeof(gf_internal_t) + sizeof(struct gf_split_4_32_lazy_data) + 64; + } else if (region_type & GF_REGION_ALTMAP) { + return -1; + } else { + return sizeof(gf_internal_t) + sizeof(struct gf_split_4_32_lazy_data) + 64; + } + } + return -1; + case GF_MULT_DEFAULT: + case GF_MULT_SHIFT: + if (arg1 != 0 || arg2 != 0 || region_type != 0) return -1; + return sizeof(gf_internal_t); + break; + case GF_MULT_COMPOSITE: + if (region_type & ~(GF_REGION_SINGLE_TABLE | GF_REGION_ALTMAP | GF_REGION_STDMAP)) return -1; + if ((region_type & (GF_REGION_SINGLE_TABLE | GF_REGION_ALTMAP)) == (GF_REGION_SINGLE_TABLE | GF_REGION_ALTMAP)) return -1; + if (arg1 == 2 && arg2 == 16 || arg2 == 2 && arg1 == 16) { + return sizeof(gf_internal_t) + sizeof(struct gf_w16_logtable_data) + 64; + } else { + return -1; + } + default: + return -1; + } +} + +int gf_w32_init(gf_t *gf) +{ + gf_internal_t *h; + + h = (gf_internal_t *) gf->scratch; + if (h->prim_poly == 0) h->prim_poly = 0x400007; + + gf->multiply.w32 = NULL; + gf->divide.w32 = NULL; + gf->inverse.w32 = NULL; + gf->multiply_region.w32 = NULL; + + switch(h->mult_type) { + case GF_MULT_DEFAULT: + case GF_MULT_SHIFT: if (gf_w32_shift_init(gf) == 0) return 0; break; + case GF_MULT_COMPOSITE: if (gf_w32_composite_init(gf) == 0) return 0; break; + case GF_MULT_SPLIT_TABLE: if (gf_w32_split_init(gf) == 0) return 0; break; + default: return 0; + } + if (h->divide_type == GF_DIVIDE_EUCLID) { + gf->divide.w32 = gf_w32_divide_from_inverse; + gf->inverse.w32 = gf_w32_euclid; + } else if (h->divide_type == GF_DIVIDE_MATRIX) { + gf->divide.w32 = gf_w32_divide_from_inverse; + gf->inverse.w32 = gf_w32_matrix; + } + + if (gf->inverse.w32 != NULL && gf->divide.w32 == NULL) { + gf->divide.w32 = gf_w32_divide_from_inverse; + } + if (gf->inverse.w32 == NULL && gf->divide.w32 != NULL) { + gf->inverse.w32 = gf_w32_inverse_from_divide; + } + return 1; +} diff --git a/tmp.sh b/tmp.sh new file mode 100644 index 0000000..6bd92b2 --- /dev/null +++ b/tmp.sh @@ -0,0 +1,15 @@ +for i in 5 10 ; do + sed 's/1 }/'$i' }/' tmp-time-test.sh > tmp2.sh + sh tmp2.sh 4 LOG - - >> tmp-$i-out.txt + sh tmp2.sh 4 TABLE - - >> tmp-$i-out.txt + sh tmp2.sh 4 TABLE SINGLE,SSE - >> tmp-$i-out.txt + sh tmp2.sh 8 LOG - - >> tmp-$i-out.txt + sh tmp2.sh 8 TABLE - - >> tmp-$i-out.txt + sh tmp2.sh 8 SPLIT 8 4 SSE - >> tmp-$i-out.txt + sh tmp2.sh 16 LOG - - >> tmp-$i-out.txt + sh tmp2.sh 16 SPLIT 16 4 SSE,STDMAP - >> tmp-$i-out.txt + sh tmp2.sh 16 SPLIT 16 4 SSE,ALTMAP - >> tmp-$i-out.txt + sh tmp2.sh 32 SPLIT 8 8 - - >> tmp-$i-out.txt + sh tmp2.sh 32 SPLIT 32 4 SSE,STDMAP - >> tmp-$i-out.txt + sh tmp2.sh 32 SPLIT 32 4 SSE,ALTMAP - >> tmp-$i-out.txt +done diff --git a/tmp.txt b/tmp.txt new file mode 100644 index 0000000..5094a84 --- /dev/null +++ b/tmp.txt @@ -0,0 +1,294 @@ +1024 1048576 4 LOG - - Seed: 1347471838 Buffer-Const,s!=d,xor=0: 4.824089 s 212.268 MB/s Buffer-Const,s!=d,xor=1: 5.341791 s 191.696 MB/s Buffer-Const,s==d,xor=0: 4.816530 s 212.601 MB/s Buffer-Const,s==d,xor=1: 5.333377 s 191.998 MB/s +2048 524288 4 LOG - - Seed: 1347471864 Buffer-Const,s!=d,xor=0: 4.796388 s 213.494 MB/s Buffer-Const,s!=d,xor=1: 5.355381 s 191.210 MB/s Buffer-Const,s==d,xor=0: 4.790053 s 213.776 MB/s Buffer-Const,s==d,xor=1: 5.342280 s 191.678 MB/s +4096 262144 4 LOG - - Seed: 1347471890 Buffer-Const,s!=d,xor=0: 4.785699 s 213.971 MB/s Buffer-Const,s!=d,xor=1: 5.272175 s 194.227 MB/s Buffer-Const,s==d,xor=0: 4.760163 s 215.119 MB/s Buffer-Const,s==d,xor=1: 5.285017 s 193.755 MB/s +8192 131072 4 LOG - - Seed: 1347471915 Buffer-Const,s!=d,xor=0: 4.772734 s 214.552 MB/s Buffer-Const,s!=d,xor=1: 5.301345 s 193.159 MB/s Buffer-Const,s==d,xor=0: 4.782723 s 214.104 MB/s Buffer-Const,s==d,xor=1: 5.294336 s 193.414 MB/s +16384 65536 4 LOG - - Seed: 1347471940 Buffer-Const,s!=d,xor=0: 4.779516 s 214.248 MB/s Buffer-Const,s!=d,xor=1: 5.311189 s 192.801 MB/s Buffer-Const,s==d,xor=0: 4.771980 s 214.586 MB/s Buffer-Const,s==d,xor=1: 5.294589 s 193.405 MB/s +32768 32768 4 LOG - - Seed: 1347471966 Buffer-Const,s!=d,xor=0: 4.745805 s 215.769 MB/s Buffer-Const,s!=d,xor=1: 5.289698 s 193.584 MB/s Buffer-Const,s==d,xor=0: 4.788919 s 213.827 MB/s Buffer-Const,s==d,xor=1: 5.323099 s 192.369 MB/s +65536 16384 4 LOG - - Seed: 1347471991 Buffer-Const,s!=d,xor=0: 4.782660 s 214.107 MB/s Buffer-Const,s!=d,xor=1: 5.279925 s 193.942 MB/s Buffer-Const,s==d,xor=0: 4.807014 s 213.022 MB/s Buffer-Const,s==d,xor=1: 5.296893 s 193.321 MB/s +131072 8192 4 LOG - - Seed: 1347472017 Buffer-Const,s!=d,xor=0: 4.792920 s 213.648 MB/s Buffer-Const,s!=d,xor=1: 5.460566 s 187.526 MB/s Buffer-Const,s==d,xor=0: 4.749562 s 215.599 MB/s Buffer-Const,s==d,xor=1: 5.267351 s 194.405 MB/s +262144 4096 4 LOG - - Seed: 1347472042 Buffer-Const,s!=d,xor=0: 4.785846 s 213.964 MB/s Buffer-Const,s!=d,xor=1: 5.336344 s 191.892 MB/s Buffer-Const,s==d,xor=0: 4.730902 s 216.449 MB/s Buffer-Const,s==d,xor=1: 5.312972 s 192.736 MB/s +524288 2048 4 LOG - - Seed: 1347472068 Buffer-Const,s!=d,xor=0: 4.768488 s 214.743 MB/s Buffer-Const,s!=d,xor=1: 5.302696 s 193.109 MB/s Buffer-Const,s==d,xor=0: 4.769302 s 214.706 MB/s Buffer-Const,s==d,xor=1: 5.322016 s 192.408 MB/s +1048576 1024 4 LOG - - Seed: 1347472093 Buffer-Const,s!=d,xor=0: 4.795875 s 213.517 MB/s Buffer-Const,s!=d,xor=1: 5.345346 s 191.569 MB/s Buffer-Const,s==d,xor=0: 4.810602 s 212.863 MB/s Buffer-Const,s==d,xor=1: 5.223796 s 196.026 MB/s +2097152 512 4 LOG - - Seed: 1347472118 Buffer-Const,s!=d,xor=0: 4.809727 s 212.902 MB/s Buffer-Const,s!=d,xor=1: 5.255259 s 194.852 MB/s Buffer-Const,s==d,xor=0: 4.853752 s 210.971 MB/s Buffer-Const,s==d,xor=1: 5.401798 s 189.567 MB/s +4194304 256 4 LOG - - Seed: 1347472144 Buffer-Const,s!=d,xor=0: 4.888658 s 209.464 MB/s Buffer-Const,s!=d,xor=1: 5.275764 s 194.095 MB/s Buffer-Const,s==d,xor=0: 4.880836 s 209.800 MB/s Buffer-Const,s==d,xor=1: 5.202162 s 196.841 MB/s +8388608 128 4 LOG - - Seed: 1347472170 Buffer-Const,s!=d,xor=0: 4.693878 s 218.156 MB/s Buffer-Const,s!=d,xor=1: 5.467869 s 187.276 MB/s Buffer-Const,s==d,xor=0: 4.752496 s 215.466 MB/s Buffer-Const,s==d,xor=1: 5.441666 s 188.178 MB/s +16777216 64 4 LOG - - Seed: 1347472195 Buffer-Const,s!=d,xor=0: 4.743789 s 215.861 MB/s Buffer-Const,s!=d,xor=1: 5.284770 s 193.764 MB/s Buffer-Const,s==d,xor=0: 4.864533 s 210.503 MB/s Buffer-Const,s==d,xor=1: 5.531778 s 185.112 MB/s +33554432 32 4 LOG - - Seed: 1347472221 Buffer-Const,s!=d,xor=0: 5.058158 s 202.445 MB/s Buffer-Const,s!=d,xor=1: 5.388520 s 190.034 MB/s Buffer-Const,s==d,xor=0: 5.017543 s 204.084 MB/s Buffer-Const,s==d,xor=1: 5.550337 s 184.493 MB/s +67108864 16 4 LOG - - Seed: 1347472247 Buffer-Const,s!=d,xor=0: 4.273755 s 239.602 MB/s Buffer-Const,s!=d,xor=1: 5.356849 s 191.157 MB/s Buffer-Const,s==d,xor=0: 4.884432 s 209.646 MB/s Buffer-Const,s==d,xor=1: 5.328478 s 192.175 MB/s +134217728 8 4 LOG - - Seed: 1347472272 Buffer-Const,s!=d,xor=0: 4.608675 s 222.190 MB/s Buffer-Const,s!=d,xor=1: 5.757140 s 177.866 MB/s Buffer-Const,s==d,xor=0: 4.494134 s 227.853 MB/s Buffer-Const,s==d,xor=1: 5.725754 s 178.841 MB/s +268435456 4 4 LOG - - Seed: 1347472298 Buffer-Const,s!=d,xor=0: 5.326828 s 192.234 MB/s Buffer-Const,s!=d,xor=1: 5.749257 s 178.110 MB/s Buffer-Const,s==d,xor=0: 3.930798 s 260.507 MB/s Buffer-Const,s==d,xor=1: 5.769782 s 177.476 MB/s +536870912 2 4 LOG - - Seed: 1347472325 Buffer-Const,s!=d,xor=0: 5.506971 s 185.946 MB/s Buffer-Const,s!=d,xor=1: 5.820843 s 175.920 MB/s Buffer-Const,s==d,xor=0: 5.151835 s 198.764 MB/s Buffer-Const,s==d,xor=1: 2.846869 s 359.693 MB/s +1073741824 1 4 LOG - - Seed: 1347472350 Buffer-Const,s!=d,xor=0: 5.887568 s 173.926 MB/s Buffer-Const,s!=d,xor=1: 5.696556 s 179.758 MB/s Buffer-Const,s==d,xor=0: 5.188843 s 197.346 MB/s Buffer-Const,s==d,xor=1: 5.662299 s 180.845 MB/s +1024 1048576 4 TABLE - - Seed: 1347472378 Buffer-Const,s!=d,xor=0: 2.090874 s 489.747 MB/s Buffer-Const,s!=d,xor=1: 2.333704 s 438.787 MB/s Buffer-Const,s==d,xor=0: 2.076584 s 493.117 MB/s Buffer-Const,s==d,xor=1: 2.341999 s 437.233 MB/s +2048 524288 4 TABLE - - Seed: 1347472393 Buffer-Const,s!=d,xor=0: 2.100408 s 487.524 MB/s Buffer-Const,s!=d,xor=1: 2.312246 s 442.859 MB/s Buffer-Const,s==d,xor=0: 2.095576 s 488.649 MB/s Buffer-Const,s==d,xor=1: 2.278695 s 449.380 MB/s +4096 262144 4 TABLE - - Seed: 1347472407 Buffer-Const,s!=d,xor=0: 2.051966 s 499.034 MB/s Buffer-Const,s!=d,xor=1: 2.292821 s 446.611 MB/s Buffer-Const,s==d,xor=0: 2.064646 s 495.969 MB/s Buffer-Const,s==d,xor=1: 2.306956 s 443.875 MB/s +8192 131072 4 TABLE - - Seed: 1347472421 Buffer-Const,s!=d,xor=0: 2.074299 s 493.661 MB/s Buffer-Const,s!=d,xor=1: 2.298558 s 445.497 MB/s Buffer-Const,s==d,xor=0: 2.066750 s 495.464 MB/s Buffer-Const,s==d,xor=1: 2.287467 s 447.657 MB/s +16384 65536 4 TABLE - - Seed: 1347472435 Buffer-Const,s!=d,xor=0: 2.152980 s 475.620 MB/s Buffer-Const,s!=d,xor=1: 2.282884 s 448.555 MB/s Buffer-Const,s==d,xor=0: 2.058036 s 497.562 MB/s Buffer-Const,s==d,xor=1: 2.298184 s 445.569 MB/s +32768 32768 4 TABLE - - Seed: 1347472449 Buffer-Const,s!=d,xor=0: 2.213344 s 462.648 MB/s Buffer-Const,s!=d,xor=1: 2.320572 s 441.271 MB/s Buffer-Const,s==d,xor=0: 2.206635 s 464.055 MB/s Buffer-Const,s==d,xor=1: 2.306156 s 444.029 MB/s +65536 16384 4 TABLE - - Seed: 1347472463 Buffer-Const,s!=d,xor=0: 2.201297 s 465.180 MB/s Buffer-Const,s!=d,xor=1: 2.309327 s 443.419 MB/s Buffer-Const,s==d,xor=0: 2.184618 s 468.732 MB/s Buffer-Const,s==d,xor=1: 2.301818 s 444.866 MB/s +131072 8192 4 TABLE - - Seed: 1347472477 Buffer-Const,s!=d,xor=0: 2.141175 s 478.242 MB/s Buffer-Const,s!=d,xor=1: 2.316740 s 442.000 MB/s Buffer-Const,s==d,xor=0: 2.187070 s 468.206 MB/s Buffer-Const,s==d,xor=1: 2.306461 s 443.970 MB/s +262144 4096 4 TABLE - - Seed: 1347472492 Buffer-Const,s!=d,xor=0: 2.166170 s 472.724 MB/s Buffer-Const,s!=d,xor=1: 2.306049 s 444.050 MB/s Buffer-Const,s==d,xor=0: 2.147129 s 476.916 MB/s Buffer-Const,s==d,xor=1: 2.309562 s 443.374 MB/s +524288 2048 4 TABLE - - Seed: 1347472506 Buffer-Const,s!=d,xor=0: 2.156061 s 474.940 MB/s Buffer-Const,s!=d,xor=1: 2.304203 s 444.405 MB/s Buffer-Const,s==d,xor=0: 2.155717 s 475.016 MB/s Buffer-Const,s==d,xor=1: 2.321065 s 441.177 MB/s +1048576 1024 4 TABLE - - Seed: 1347472520 Buffer-Const,s!=d,xor=0: 2.152224 s 475.787 MB/s Buffer-Const,s!=d,xor=1: 2.310472 s 443.199 MB/s Buffer-Const,s==d,xor=0: 2.151816 s 475.877 MB/s Buffer-Const,s==d,xor=1: 2.312655 s 442.781 MB/s +2097152 512 4 TABLE - - Seed: 1347472534 Buffer-Const,s!=d,xor=0: 2.170889 s 471.696 MB/s Buffer-Const,s!=d,xor=1: 2.361295 s 433.660 MB/s Buffer-Const,s==d,xor=0: 2.139913 s 478.524 MB/s Buffer-Const,s==d,xor=1: 2.316579 s 442.031 MB/s +4194304 256 4 TABLE - - Seed: 1347472548 Buffer-Const,s!=d,xor=0: 2.187952 s 468.018 MB/s Buffer-Const,s!=d,xor=1: 2.354228 s 434.962 MB/s Buffer-Const,s==d,xor=0: 2.193449 s 466.845 MB/s Buffer-Const,s==d,xor=1: 2.344275 s 436.809 MB/s +8388608 128 4 TABLE - - Seed: 1347472563 Buffer-Const,s!=d,xor=0: 2.211300 s 463.076 MB/s Buffer-Const,s!=d,xor=1: 2.382068 s 429.879 MB/s Buffer-Const,s==d,xor=0: 2.206019 s 464.185 MB/s Buffer-Const,s==d,xor=1: 2.333248 s 438.873 MB/s +16777216 64 4 TABLE - - Seed: 1347472577 Buffer-Const,s!=d,xor=0: 2.193599 s 466.813 MB/s Buffer-Const,s!=d,xor=1: 2.373979 s 431.343 MB/s Buffer-Const,s==d,xor=0: 2.181715 s 469.355 MB/s Buffer-Const,s==d,xor=1: 2.363553 s 433.246 MB/s +33554432 32 4 TABLE - - Seed: 1347472592 Buffer-Const,s!=d,xor=0: 2.205605 s 464.272 MB/s Buffer-Const,s!=d,xor=1: 2.388323 s 428.753 MB/s Buffer-Const,s==d,xor=0: 2.194591 s 466.602 MB/s Buffer-Const,s==d,xor=1: 2.352825 s 435.221 MB/s +67108864 16 4 TABLE - - Seed: 1347472606 Buffer-Const,s!=d,xor=0: 2.252406 s 454.625 MB/s Buffer-Const,s!=d,xor=1: 2.350086 s 435.729 MB/s Buffer-Const,s==d,xor=0: 2.186626 s 468.301 MB/s Buffer-Const,s==d,xor=1: 2.357336 s 434.389 MB/s +134217728 8 4 TABLE - - Seed: 1347472621 Buffer-Const,s!=d,xor=0: 2.312211 s 442.866 MB/s Buffer-Const,s!=d,xor=1: 2.397869 s 427.046 MB/s Buffer-Const,s==d,xor=0: 2.195088 s 466.496 MB/s Buffer-Const,s==d,xor=1: 2.354865 s 434.844 MB/s +268435456 4 4 TABLE - - Seed: 1347472635 Buffer-Const,s!=d,xor=0: 2.409825 s 424.927 MB/s Buffer-Const,s!=d,xor=1: 2.388709 s 428.683 MB/s Buffer-Const,s==d,xor=0: 2.217935 s 461.691 MB/s Buffer-Const,s==d,xor=1: 2.427467 s 421.839 MB/s +536870912 2 4 TABLE - - Seed: 1347472650 Buffer-Const,s!=d,xor=0: 2.572154 s 398.110 MB/s Buffer-Const,s!=d,xor=1: 2.357918 s 434.281 MB/s Buffer-Const,s==d,xor=0: 2.180809 s 469.551 MB/s Buffer-Const,s==d,xor=1: 2.330464 s 439.397 MB/s +1073741824 1 4 TABLE - - Seed: 1347472665 Buffer-Const,s!=d,xor=0: 2.942518 s 348.001 MB/s Buffer-Const,s!=d,xor=1: 2.349215 s 435.890 MB/s Buffer-Const,s==d,xor=0: 2.209902 s 463.369 MB/s Buffer-Const,s==d,xor=1: 2.368640 s 432.316 MB/s +1024 1048576 4 TABLE SINGLE,SSE - Seed: 1347472681 Buffer-Const,s!=d,xor=0: 0.160061 s 6397.547 MB/s Buffer-Const,s!=d,xor=1: 0.169124 s 6054.742 MB/s Buffer-Const,s==d,xor=0: 0.160015 s 6399.396 MB/s Buffer-Const,s==d,xor=1: 0.170060 s 6021.416 MB/s +2048 524288 4 TABLE SINGLE,SSE - Seed: 1347472688 Buffer-Const,s!=d,xor=0: 0.144030 s 7109.637 MB/s Buffer-Const,s!=d,xor=1: 0.149962 s 6828.377 MB/s Buffer-Const,s==d,xor=0: 0.143702 s 7125.880 MB/s Buffer-Const,s==d,xor=1: 0.149732 s 6838.902 MB/s +4096 262144 4 TABLE SINGLE,SSE - Seed: 1347472693 Buffer-Const,s!=d,xor=0: 0.129829 s 7887.273 MB/s Buffer-Const,s!=d,xor=1: 0.134809 s 7595.958 MB/s Buffer-Const,s==d,xor=0: 0.131632 s 7779.258 MB/s Buffer-Const,s==d,xor=1: 0.135138 s 7577.437 MB/s +8192 131072 4 TABLE SINGLE,SSE - Seed: 1347472699 Buffer-Const,s!=d,xor=0: 0.124071 s 8253.315 MB/s Buffer-Const,s!=d,xor=1: 0.127894 s 8006.605 MB/s Buffer-Const,s==d,xor=0: 0.124068 s 8253.505 MB/s Buffer-Const,s==d,xor=1: 0.127882 s 8007.382 MB/s +16384 65536 4 TABLE SINGLE,SSE - Seed: 1347472705 Buffer-Const,s!=d,xor=0: 0.120162 s 8521.845 MB/s Buffer-Const,s!=d,xor=1: 0.124806 s 8204.723 MB/s Buffer-Const,s==d,xor=0: 0.119825 s 8545.821 MB/s Buffer-Const,s==d,xor=1: 0.124612 s 8217.517 MB/s +32768 32768 4 TABLE SINGLE,SSE - Seed: 1347472711 Buffer-Const,s!=d,xor=0: 0.123173 s 8313.478 MB/s Buffer-Const,s!=d,xor=1: 0.129224 s 7924.250 MB/s Buffer-Const,s==d,xor=0: 0.118994 s 8605.476 MB/s Buffer-Const,s==d,xor=1: 0.123591 s 8285.397 MB/s +65536 16384 4 TABLE SINGLE,SSE - Seed: 1347472717 Buffer-Const,s!=d,xor=0: 0.120111 s 8525.465 MB/s Buffer-Const,s!=d,xor=1: 0.130905 s 7822.443 MB/s Buffer-Const,s==d,xor=0: 0.118989 s 8605.838 MB/s Buffer-Const,s==d,xor=1: 0.122049 s 8390.066 MB/s +131072 8192 4 TABLE SINGLE,SSE - Seed: 1347472722 Buffer-Const,s!=d,xor=0: 0.120384 s 8506.115 MB/s Buffer-Const,s!=d,xor=1: 0.131319 s 7797.817 MB/s Buffer-Const,s==d,xor=0: 0.118782 s 8620.849 MB/s Buffer-Const,s==d,xor=1: 0.124635 s 8215.976 MB/s +262144 4096 4 TABLE SINGLE,SSE - Seed: 1347472728 Buffer-Const,s!=d,xor=0: 0.151247 s 6770.381 MB/s Buffer-Const,s!=d,xor=1: 0.163074 s 6279.339 MB/s Buffer-Const,s==d,xor=0: 0.118564 s 8636.659 MB/s Buffer-Const,s==d,xor=1: 0.122733 s 8343.290 MB/s +524288 2048 4 TABLE SINGLE,SSE - Seed: 1347472734 Buffer-Const,s!=d,xor=0: 0.148822 s 6880.722 MB/s Buffer-Const,s!=d,xor=1: 0.160966 s 6361.595 MB/s Buffer-Const,s==d,xor=0: 0.129449 s 7910.429 MB/s Buffer-Const,s==d,xor=1: 0.129116 s 7930.864 MB/s +1048576 1024 4 TABLE SINGLE,SSE - Seed: 1347472740 Buffer-Const,s!=d,xor=0: 0.147404 s 6946.896 MB/s Buffer-Const,s!=d,xor=1: 0.159756 s 6409.758 MB/s Buffer-Const,s==d,xor=0: 0.128379 s 7976.391 MB/s Buffer-Const,s==d,xor=1: 0.128835 s 7948.168 MB/s +2097152 512 4 TABLE SINGLE,SSE - Seed: 1347472746 Buffer-Const,s!=d,xor=0: 0.236475 s 4330.268 MB/s Buffer-Const,s!=d,xor=1: 0.246601 s 4152.451 MB/s Buffer-Const,s==d,xor=0: 0.128898 s 7944.287 MB/s Buffer-Const,s==d,xor=1: 0.133252 s 7684.693 MB/s +4194304 256 4 TABLE SINGLE,SSE - Seed: 1347472752 Buffer-Const,s!=d,xor=0: 0.365861 s 2798.874 MB/s Buffer-Const,s!=d,xor=1: 0.361812 s 2830.198 MB/s Buffer-Const,s==d,xor=0: 0.209003 s 4899.441 MB/s Buffer-Const,s==d,xor=1: 0.202078 s 5067.354 MB/s +8388608 128 4 TABLE SINGLE,SSE - Seed: 1347472758 Buffer-Const,s!=d,xor=0: 0.369510 s 2771.238 MB/s Buffer-Const,s!=d,xor=1: 0.347091 s 2950.235 MB/s Buffer-Const,s==d,xor=0: 0.227157 s 4507.888 MB/s Buffer-Const,s==d,xor=1: 0.232318 s 4407.757 MB/s +16777216 64 4 TABLE SINGLE,SSE - Seed: 1347472764 Buffer-Const,s!=d,xor=0: 0.368891 s 2775.890 MB/s Buffer-Const,s!=d,xor=1: 0.356381 s 2873.326 MB/s Buffer-Const,s==d,xor=0: 0.226912 s 4512.772 MB/s Buffer-Const,s==d,xor=1: 0.219236 s 4670.758 MB/s +33554432 32 4 TABLE SINGLE,SSE - Seed: 1347472771 Buffer-Const,s!=d,xor=0: 0.379371 s 2699.205 MB/s Buffer-Const,s!=d,xor=1: 0.341562 s 2997.993 MB/s Buffer-Const,s==d,xor=0: 0.231817 s 4417.282 MB/s Buffer-Const,s==d,xor=1: 0.217154 s 4715.547 MB/s +67108864 16 4 TABLE SINGLE,SSE - Seed: 1347472777 Buffer-Const,s!=d,xor=0: 0.403540 s 2537.545 MB/s Buffer-Const,s!=d,xor=1: 0.360238 s 2842.563 MB/s Buffer-Const,s==d,xor=0: 0.230866 s 4435.479 MB/s Buffer-Const,s==d,xor=1: 0.180604 s 5669.879 MB/s +134217728 8 4 TABLE SINGLE,SSE - Seed: 1347472784 Buffer-Const,s!=d,xor=0: 0.441703 s 2318.301 MB/s Buffer-Const,s!=d,xor=1: 0.386278 s 2650.943 MB/s Buffer-Const,s==d,xor=0: 0.229751 s 4457.002 MB/s Buffer-Const,s==d,xor=1: 0.180658 s 5668.158 MB/s +268435456 4 4 TABLE SINGLE,SSE - Seed: 1347472791 Buffer-Const,s!=d,xor=0: 0.471682 s 2170.955 MB/s Buffer-Const,s!=d,xor=1: 0.383233 s 2672.005 MB/s Buffer-Const,s==d,xor=0: 0.236378 s 4332.041 MB/s Buffer-Const,s==d,xor=1: 0.243849 s 4199.315 MB/s +536870912 2 4 TABLE SINGLE,SSE - Seed: 1347472797 Buffer-Const,s!=d,xor=0: 0.666553 s 1536.262 MB/s Buffer-Const,s!=d,xor=1: 0.374508 s 2734.255 MB/s Buffer-Const,s==d,xor=0: 0.228591 s 4479.617 MB/s Buffer-Const,s==d,xor=1: 0.247453 s 4138.156 MB/s +1073741824 1 4 TABLE SINGLE,SSE - Seed: 1347472805 Buffer-Const,s!=d,xor=0: 0.739952 s 1383.873 MB/s Buffer-Const,s!=d,xor=1: 0.376333 s 2720.994 MB/s Buffer-Const,s==d,xor=0: 0.229283 s 4466.099 MB/s Buffer-Const,s==d,xor=1: 0.242894 s 4215.832 MB/s +1024 1048576 8 LOG - - Seed: 1347472813 Buffer-Const,s!=d,xor=0: 1.621880 s 631.366 MB/s Buffer-Const,s!=d,xor=1: 1.972670 s 519.093 MB/s Buffer-Const,s==d,xor=0: 1.703537 s 601.102 MB/s Buffer-Const,s==d,xor=1: 1.965952 s 520.867 MB/s +2048 524288 8 LOG - - Seed: 1347472825 Buffer-Const,s!=d,xor=0: 1.580008 s 648.098 MB/s Buffer-Const,s!=d,xor=1: 1.922355 s 532.680 MB/s Buffer-Const,s==d,xor=0: 1.619760 s 632.193 MB/s Buffer-Const,s==d,xor=1: 1.935444 s 529.078 MB/s +4096 262144 8 LOG - - Seed: 1347472838 Buffer-Const,s!=d,xor=0: 1.612208 s 635.154 MB/s Buffer-Const,s!=d,xor=1: 1.935781 s 528.985 MB/s Buffer-Const,s==d,xor=0: 1.619466 s 632.307 MB/s Buffer-Const,s==d,xor=1: 1.963975 s 521.391 MB/s +8192 131072 8 LOG - - Seed: 1347472850 Buffer-Const,s!=d,xor=0: 1.618882 s 632.535 MB/s Buffer-Const,s!=d,xor=1: 1.917912 s 533.914 MB/s Buffer-Const,s==d,xor=0: 1.604389 s 638.249 MB/s Buffer-Const,s==d,xor=1: 1.908338 s 536.593 MB/s +16384 65536 8 LOG - - Seed: 1347472863 Buffer-Const,s!=d,xor=0: 1.594616 s 642.161 MB/s Buffer-Const,s!=d,xor=1: 1.910674 s 535.936 MB/s Buffer-Const,s==d,xor=0: 1.609434 s 636.249 MB/s Buffer-Const,s==d,xor=1: 1.912407 s 535.451 MB/s +32768 32768 8 LOG - - Seed: 1347472875 Buffer-Const,s!=d,xor=0: 1.624596 s 630.311 MB/s Buffer-Const,s!=d,xor=1: 2.144199 s 477.568 MB/s Buffer-Const,s==d,xor=0: 1.588486 s 644.639 MB/s Buffer-Const,s==d,xor=1: 1.909198 s 536.351 MB/s +65536 16384 8 LOG - - Seed: 1347472887 Buffer-Const,s!=d,xor=0: 1.662282 s 616.020 MB/s Buffer-Const,s!=d,xor=1: 1.919168 s 533.565 MB/s Buffer-Const,s==d,xor=0: 1.591656 s 643.355 MB/s Buffer-Const,s==d,xor=1: 1.926590 s 531.509 MB/s +131072 8192 8 LOG - - Seed: 1347472900 Buffer-Const,s!=d,xor=0: 1.594085 s 642.375 MB/s Buffer-Const,s!=d,xor=1: 1.937719 s 528.456 MB/s Buffer-Const,s==d,xor=0: 1.648678 s 621.104 MB/s Buffer-Const,s==d,xor=1: 1.924335 s 532.132 MB/s +262144 4096 8 LOG - - Seed: 1347472912 Buffer-Const,s!=d,xor=0: 1.595497 s 641.806 MB/s Buffer-Const,s!=d,xor=1: 1.936042 s 528.914 MB/s Buffer-Const,s==d,xor=0: 1.608699 s 636.539 MB/s Buffer-Const,s==d,xor=1: 1.958862 s 522.752 MB/s +524288 2048 8 LOG - - Seed: 1347472925 Buffer-Const,s!=d,xor=0: 1.646453 s 621.943 MB/s Buffer-Const,s!=d,xor=1: 1.942311 s 527.207 MB/s Buffer-Const,s==d,xor=0: 1.621521 s 631.506 MB/s Buffer-Const,s==d,xor=1: 1.968560 s 520.177 MB/s +1048576 1024 8 LOG - - Seed: 1347472937 Buffer-Const,s!=d,xor=0: 1.627189 s 629.306 MB/s Buffer-Const,s!=d,xor=1: 1.938396 s 528.272 MB/s Buffer-Const,s==d,xor=0: 1.609066 s 636.394 MB/s Buffer-Const,s==d,xor=1: 1.940828 s 527.610 MB/s +2097152 512 8 LOG - - Seed: 1347472949 Buffer-Const,s!=d,xor=0: 1.654112 s 619.063 MB/s Buffer-Const,s!=d,xor=1: 1.977605 s 517.798 MB/s Buffer-Const,s==d,xor=0: 1.625274 s 630.048 MB/s Buffer-Const,s==d,xor=1: 1.957592 s 523.092 MB/s +4194304 256 8 LOG - - Seed: 1347472962 Buffer-Const,s!=d,xor=0: 1.637634 s 625.293 MB/s Buffer-Const,s!=d,xor=1: 1.960637 s 522.279 MB/s Buffer-Const,s==d,xor=0: 1.643415 s 623.093 MB/s Buffer-Const,s==d,xor=1: 1.968736 s 520.131 MB/s +8388608 128 8 LOG - - Seed: 1347472974 Buffer-Const,s!=d,xor=0: 1.642961 s 623.265 MB/s Buffer-Const,s!=d,xor=1: 1.997125 s 512.737 MB/s Buffer-Const,s==d,xor=0: 1.647587 s 621.515 MB/s Buffer-Const,s==d,xor=1: 1.982742 s 516.456 MB/s +16777216 64 8 LOG - - Seed: 1347472987 Buffer-Const,s!=d,xor=0: 1.628143 s 628.937 MB/s Buffer-Const,s!=d,xor=1: 1.988719 s 514.904 MB/s Buffer-Const,s==d,xor=0: 1.697431 s 603.265 MB/s Buffer-Const,s==d,xor=1: 1.982398 s 516.546 MB/s +33554432 32 8 LOG - - Seed: 1347472999 Buffer-Const,s!=d,xor=0: 1.655345 s 618.602 MB/s Buffer-Const,s!=d,xor=1: 1.987288 s 515.275 MB/s Buffer-Const,s==d,xor=0: 1.631150 s 627.778 MB/s Buffer-Const,s==d,xor=1: 1.988191 s 515.041 MB/s +67108864 16 8 LOG - - Seed: 1347473012 Buffer-Const,s!=d,xor=0: 1.716783 s 596.464 MB/s Buffer-Const,s!=d,xor=1: 2.007143 s 510.178 MB/s Buffer-Const,s==d,xor=0: 1.644218 s 622.789 MB/s Buffer-Const,s==d,xor=1: 1.981492 s 516.782 MB/s +134217728 8 8 LOG - - Seed: 1347473025 Buffer-Const,s!=d,xor=0: 1.744457 s 587.002 MB/s Buffer-Const,s!=d,xor=1: 2.015515 s 508.059 MB/s Buffer-Const,s==d,xor=0: 1.656633 s 618.121 MB/s Buffer-Const,s==d,xor=1: 1.984652 s 515.959 MB/s +268435456 4 8 LOG - - Seed: 1347473038 Buffer-Const,s!=d,xor=0: 1.943453 s 526.897 MB/s Buffer-Const,s!=d,xor=1: 1.982043 s 516.639 MB/s Buffer-Const,s==d,xor=0: 1.627782 s 629.077 MB/s Buffer-Const,s==d,xor=1: 1.963902 s 521.411 MB/s +536870912 2 8 LOG - - Seed: 1347473051 Buffer-Const,s!=d,xor=0: 1.984840 s 515.911 MB/s Buffer-Const,s!=d,xor=1: 1.999844 s 512.040 MB/s Buffer-Const,s==d,xor=0: 1.636470 s 625.737 MB/s Buffer-Const,s==d,xor=1: 1.961609 s 522.020 MB/s +1073741824 1 8 LOG - - Seed: 1347473064 Buffer-Const,s!=d,xor=0: 2.326284 s 440.187 MB/s Buffer-Const,s!=d,xor=1: 1.971229 s 519.473 MB/s Buffer-Const,s==d,xor=0: 1.628148 s 628.935 MB/s Buffer-Const,s==d,xor=1: 2.123621 s 482.195 MB/s +1024 1048576 8 TABLE - - Seed: 1347473078 Buffer-Const,s!=d,xor=0: 1.302151 s 786.391 MB/s Buffer-Const,s!=d,xor=1: 1.609089 s 636.385 MB/s Buffer-Const,s==d,xor=0: 1.298172 s 788.802 MB/s Buffer-Const,s==d,xor=1: 1.592942 s 642.836 MB/s +2048 524288 8 TABLE - - Seed: 1347473089 Buffer-Const,s!=d,xor=0: 1.270636 s 805.896 MB/s Buffer-Const,s!=d,xor=1: 1.564955 s 654.332 MB/s Buffer-Const,s==d,xor=0: 1.264134 s 810.040 MB/s Buffer-Const,s==d,xor=1: 1.553215 s 659.278 MB/s +4096 262144 8 TABLE - - Seed: 1347473100 Buffer-Const,s!=d,xor=0: 1.252112 s 817.818 MB/s Buffer-Const,s!=d,xor=1: 1.543294 s 663.516 MB/s Buffer-Const,s==d,xor=0: 1.248986 s 819.865 MB/s Buffer-Const,s==d,xor=1: 1.539075 s 665.335 MB/s +8192 131072 8 TABLE - - Seed: 1347473111 Buffer-Const,s!=d,xor=0: 1.254336 s 816.368 MB/s Buffer-Const,s!=d,xor=1: 1.556689 s 657.806 MB/s Buffer-Const,s==d,xor=0: 1.245368 s 822.247 MB/s Buffer-Const,s==d,xor=1: 1.556819 s 657.752 MB/s +16384 65536 8 TABLE - - Seed: 1347473122 Buffer-Const,s!=d,xor=0: 1.311951 s 780.517 MB/s Buffer-Const,s!=d,xor=1: 1.537484 s 666.023 MB/s Buffer-Const,s==d,xor=0: 1.236025 s 828.462 MB/s Buffer-Const,s==d,xor=1: 1.533817 s 667.615 MB/s +32768 32768 8 TABLE - - Seed: 1347473133 Buffer-Const,s!=d,xor=0: 1.185127 s 864.043 MB/s Buffer-Const,s!=d,xor=1: 1.560520 s 656.192 MB/s Buffer-Const,s==d,xor=0: 1.167446 s 877.128 MB/s Buffer-Const,s==d,xor=1: 1.548851 s 661.135 MB/s +65536 16384 8 TABLE - - Seed: 1347473144 Buffer-Const,s!=d,xor=0: 1.178377 s 868.992 MB/s Buffer-Const,s!=d,xor=1: 1.538837 s 665.438 MB/s Buffer-Const,s==d,xor=0: 1.174308 s 872.003 MB/s Buffer-Const,s==d,xor=1: 1.544995 s 662.785 MB/s +131072 8192 8 TABLE - - Seed: 1347473154 Buffer-Const,s!=d,xor=0: 1.209799 s 846.422 MB/s Buffer-Const,s!=d,xor=1: 1.556000 s 658.098 MB/s Buffer-Const,s==d,xor=0: 1.182813 s 865.733 MB/s Buffer-Const,s==d,xor=1: 1.532919 s 668.007 MB/s +262144 4096 8 TABLE - - Seed: 1347473165 Buffer-Const,s!=d,xor=0: 1.220862 s 838.751 MB/s Buffer-Const,s!=d,xor=1: 1.564978 s 654.322 MB/s Buffer-Const,s==d,xor=0: 1.212298 s 844.677 MB/s Buffer-Const,s==d,xor=1: 1.551679 s 659.930 MB/s +524288 2048 8 TABLE - - Seed: 1347473176 Buffer-Const,s!=d,xor=0: 1.293642 s 791.563 MB/s Buffer-Const,s!=d,xor=1: 1.576479 s 649.549 MB/s Buffer-Const,s==d,xor=0: 1.278135 s 801.167 MB/s Buffer-Const,s==d,xor=1: 1.551030 s 660.206 MB/s +1048576 1024 8 TABLE - - Seed: 1347473187 Buffer-Const,s!=d,xor=0: 1.255426 s 815.659 MB/s Buffer-Const,s!=d,xor=1: 1.552257 s 659.685 MB/s Buffer-Const,s==d,xor=0: 1.222667 s 837.513 MB/s Buffer-Const,s==d,xor=1: 1.537151 s 666.167 MB/s +2097152 512 8 TABLE - - Seed: 1347473198 Buffer-Const,s!=d,xor=0: 1.215521 s 842.437 MB/s Buffer-Const,s!=d,xor=1: 1.595463 s 641.820 MB/s Buffer-Const,s==d,xor=0: 1.183346 s 865.343 MB/s Buffer-Const,s==d,xor=1: 1.562343 s 655.426 MB/s +4194304 256 8 TABLE - - Seed: 1347473209 Buffer-Const,s!=d,xor=0: 1.253935 s 816.629 MB/s Buffer-Const,s!=d,xor=1: 1.607460 s 637.030 MB/s Buffer-Const,s==d,xor=0: 1.231988 s 831.177 MB/s Buffer-Const,s==d,xor=1: 1.585267 s 645.948 MB/s +8388608 128 8 TABLE - - Seed: 1347473220 Buffer-Const,s!=d,xor=0: 1.242970 s 823.833 MB/s Buffer-Const,s!=d,xor=1: 1.610712 s 635.744 MB/s Buffer-Const,s==d,xor=0: 1.217175 s 841.292 MB/s Buffer-Const,s==d,xor=1: 1.596637 s 641.348 MB/s +16777216 64 8 TABLE - - Seed: 1347473230 Buffer-Const,s!=d,xor=0: 1.250264 s 819.027 MB/s Buffer-Const,s!=d,xor=1: 1.613457 s 634.662 MB/s Buffer-Const,s==d,xor=0: 1.218879 s 840.116 MB/s Buffer-Const,s==d,xor=1: 1.610589 s 635.792 MB/s +33554432 32 8 TABLE - - Seed: 1347473241 Buffer-Const,s!=d,xor=0: 1.255215 s 815.797 MB/s Buffer-Const,s!=d,xor=1: 1.608961 s 636.436 MB/s Buffer-Const,s==d,xor=0: 1.288840 s 794.513 MB/s Buffer-Const,s==d,xor=1: 1.566385 s 653.735 MB/s +67108864 16 8 TABLE - - Seed: 1347473253 Buffer-Const,s!=d,xor=0: 1.266899 s 808.273 MB/s Buffer-Const,s!=d,xor=1: 1.589339 s 644.293 MB/s Buffer-Const,s==d,xor=0: 1.208477 s 847.347 MB/s Buffer-Const,s==d,xor=1: 1.563937 s 654.758 MB/s +134217728 8 8 TABLE - - Seed: 1347473264 Buffer-Const,s!=d,xor=0: 1.305275 s 784.509 MB/s Buffer-Const,s!=d,xor=1: 1.601313 s 639.475 MB/s Buffer-Const,s==d,xor=0: 1.205009 s 849.786 MB/s Buffer-Const,s==d,xor=1: 1.562011 s 655.565 MB/s +268435456 4 8 TABLE - - Seed: 1347473275 Buffer-Const,s!=d,xor=0: 1.393569 s 734.804 MB/s Buffer-Const,s!=d,xor=1: 1.588149 s 644.776 MB/s Buffer-Const,s==d,xor=0: 1.208391 s 847.408 MB/s Buffer-Const,s==d,xor=1: 1.568612 s 652.806 MB/s +536870912 2 8 TABLE - - Seed: 1347473286 Buffer-Const,s!=d,xor=0: 1.597872 s 640.852 MB/s Buffer-Const,s!=d,xor=1: 1.630728 s 627.940 MB/s Buffer-Const,s==d,xor=0: 1.222109 s 837.896 MB/s Buffer-Const,s==d,xor=1: 1.581507 s 647.484 MB/s +1073741824 1 8 TABLE - - Seed: 1347473298 Buffer-Const,s!=d,xor=0: 1.988823 s 514.877 MB/s Buffer-Const,s!=d,xor=1: 1.611288 s 635.516 MB/s Buffer-Const,s==d,xor=0: 1.222772 s 837.442 MB/s Buffer-Const,s==d,xor=1: 1.613392 s 634.688 MB/s +1024 1048576 8 SPLIT 8 4 SSE - Seed: 1347473310 Buffer-Const,s!=d,xor=0: 0.172217 s 5945.999 MB/s Buffer-Const,s!=d,xor=1: 0.175352 s 5839.687 MB/s Buffer-Const,s==d,xor=0: 0.163135 s 6277.026 MB/s Buffer-Const,s==d,xor=1: 0.177949 s 5754.459 MB/s +2048 524288 8 SPLIT 8 4 SSE - Seed: 1347473316 Buffer-Const,s!=d,xor=0: 0.154401 s 6632.099 MB/s Buffer-Const,s!=d,xor=1: 0.158355 s 6466.493 MB/s Buffer-Const,s==d,xor=0: 0.151740 s 6748.403 MB/s Buffer-Const,s==d,xor=1: 0.159225 s 6431.142 MB/s +4096 262144 8 SPLIT 8 4 SSE - Seed: 1347473322 Buffer-Const,s!=d,xor=0: 0.138276 s 7405.486 MB/s Buffer-Const,s!=d,xor=1: 0.152603 s 6710.226 MB/s Buffer-Const,s==d,xor=0: 0.136469 s 7503.516 MB/s Buffer-Const,s==d,xor=1: 0.142757 s 7173.019 MB/s +8192 131072 8 SPLIT 8 4 SSE - Seed: 1347473328 Buffer-Const,s!=d,xor=0: 0.129117 s 7930.820 MB/s Buffer-Const,s!=d,xor=1: 0.136170 s 7520.030 MB/s Buffer-Const,s==d,xor=0: 0.129542 s 7904.751 MB/s Buffer-Const,s==d,xor=1: 0.135069 s 7581.303 MB/s +16384 65536 8 SPLIT 8 4 SSE - Seed: 1347473334 Buffer-Const,s!=d,xor=0: 0.125494 s 8159.768 MB/s Buffer-Const,s!=d,xor=1: 0.133779 s 7654.412 MB/s Buffer-Const,s==d,xor=0: 0.126920 s 8068.075 MB/s Buffer-Const,s==d,xor=1: 0.131795 s 7769.618 MB/s +32768 32768 8 SPLIT 8 4 SSE - Seed: 1347473340 Buffer-Const,s!=d,xor=0: 0.128076 s 7995.233 MB/s Buffer-Const,s!=d,xor=1: 0.137454 s 7449.750 MB/s Buffer-Const,s==d,xor=0: 0.122883 s 8333.141 MB/s Buffer-Const,s==d,xor=1: 0.131206 s 7804.505 MB/s +65536 16384 8 SPLIT 8 4 SSE - Seed: 1347473346 Buffer-Const,s!=d,xor=0: 0.127193 s 8050.759 MB/s Buffer-Const,s!=d,xor=1: 0.137505 s 7447.025 MB/s Buffer-Const,s==d,xor=0: 0.123262 s 8307.496 MB/s Buffer-Const,s==d,xor=1: 0.130225 s 7863.331 MB/s +131072 8192 8 SPLIT 8 4 SSE - Seed: 1347473351 Buffer-Const,s!=d,xor=0: 0.138416 s 7398.011 MB/s Buffer-Const,s!=d,xor=1: 0.140051 s 7311.628 MB/s Buffer-Const,s==d,xor=0: 0.123666 s 8280.397 MB/s Buffer-Const,s==d,xor=1: 0.131191 s 7805.384 MB/s +262144 4096 8 SPLIT 8 4 SSE - Seed: 1347473357 Buffer-Const,s!=d,xor=0: 0.158679 s 6453.299 MB/s Buffer-Const,s!=d,xor=1: 0.174885 s 5855.291 MB/s Buffer-Const,s==d,xor=0: 0.126400 s 8101.297 MB/s Buffer-Const,s==d,xor=1: 0.131082 s 7811.901 MB/s +524288 2048 8 SPLIT 8 4 SSE - Seed: 1347473363 Buffer-Const,s!=d,xor=0: 0.153464 s 6672.571 MB/s Buffer-Const,s!=d,xor=1: 0.168238 s 6086.609 MB/s Buffer-Const,s==d,xor=0: 0.132881 s 7706.120 MB/s Buffer-Const,s==d,xor=1: 0.138437 s 7396.852 MB/s +1048576 1024 8 SPLIT 8 4 SSE - Seed: 1347473369 Buffer-Const,s!=d,xor=0: 0.153720 s 6661.456 MB/s Buffer-Const,s!=d,xor=1: 0.167944 s 6097.272 MB/s Buffer-Const,s==d,xor=0: 0.132126 s 7750.172 MB/s Buffer-Const,s==d,xor=1: 0.137430 s 7451.081 MB/s +2097152 512 8 SPLIT 8 4 SSE - Seed: 1347473375 Buffer-Const,s!=d,xor=0: 0.235302 s 4351.859 MB/s Buffer-Const,s!=d,xor=1: 0.252596 s 4053.902 MB/s Buffer-Const,s==d,xor=0: 0.141343 s 7244.769 MB/s Buffer-Const,s==d,xor=1: 0.142658 s 7177.994 MB/s +4194304 256 8 SPLIT 8 4 SSE - Seed: 1347473381 Buffer-Const,s!=d,xor=0: 0.380941 s 2688.078 MB/s Buffer-Const,s!=d,xor=1: 0.380621 s 2690.341 MB/s Buffer-Const,s==d,xor=0: 0.208288 s 4916.260 MB/s Buffer-Const,s==d,xor=1: 0.214539 s 4773.029 MB/s +8388608 128 8 SPLIT 8 4 SSE - Seed: 1347473387 Buffer-Const,s!=d,xor=0: 0.374304 s 2735.747 MB/s Buffer-Const,s!=d,xor=1: 0.371563 s 2755.926 MB/s Buffer-Const,s==d,xor=0: 0.228778 s 4475.957 MB/s Buffer-Const,s==d,xor=1: 0.232901 s 4396.716 MB/s +16777216 64 8 SPLIT 8 4 SSE - Seed: 1347473394 Buffer-Const,s!=d,xor=0: 0.378027 s 2708.800 MB/s Buffer-Const,s!=d,xor=1: 0.371758 s 2754.482 MB/s Buffer-Const,s==d,xor=0: 0.227618 s 4498.761 MB/s Buffer-Const,s==d,xor=1: 0.236930 s 4321.945 MB/s +33554432 32 8 SPLIT 8 4 SSE - Seed: 1347473400 Buffer-Const,s!=d,xor=0: 0.389149 s 2631.383 MB/s Buffer-Const,s!=d,xor=1: 0.373076 s 2744.748 MB/s Buffer-Const,s==d,xor=0: 0.227429 s 4502.496 MB/s Buffer-Const,s==d,xor=1: 0.232095 s 4411.986 MB/s +67108864 16 8 SPLIT 8 4 SSE - Seed: 1347473407 Buffer-Const,s!=d,xor=0: 0.404405 s 2532.117 MB/s Buffer-Const,s!=d,xor=1: 0.375439 s 2727.471 MB/s Buffer-Const,s==d,xor=0: 0.232084 s 4412.195 MB/s Buffer-Const,s==d,xor=1: 0.234886 s 4359.567 MB/s +134217728 8 8 SPLIT 8 4 SSE - Seed: 1347473413 Buffer-Const,s!=d,xor=0: 0.439466 s 2330.102 MB/s Buffer-Const,s!=d,xor=1: 0.373526 s 2741.445 MB/s Buffer-Const,s==d,xor=0: 0.238485 s 4293.774 MB/s Buffer-Const,s==d,xor=1: 0.242573 s 4221.405 MB/s +268435456 4 8 SPLIT 8 4 SSE - Seed: 1347473420 Buffer-Const,s!=d,xor=0: 0.522256 s 1960.723 MB/s Buffer-Const,s!=d,xor=1: 0.369594 s 2770.609 MB/s Buffer-Const,s==d,xor=0: 0.230843 s 4435.914 MB/s Buffer-Const,s==d,xor=1: 0.233838 s 4379.099 MB/s +536870912 2 8 SPLIT 8 4 SSE - Seed: 1347473427 Buffer-Const,s!=d,xor=0: 0.664837 s 1540.227 MB/s Buffer-Const,s!=d,xor=1: 0.374827 s 2731.926 MB/s Buffer-Const,s==d,xor=0: 0.234757 s 4361.958 MB/s Buffer-Const,s==d,xor=1: 0.244709 s 4184.566 MB/s +1073741824 1 8 SPLIT 8 4 SSE - Seed: 1347473434 Buffer-Const,s!=d,xor=0: 0.945331 s 1083.218 MB/s Buffer-Const,s!=d,xor=1: 0.378121 s 2708.129 MB/s Buffer-Const,s==d,xor=0: 0.232104 s 4411.819 MB/s Buffer-Const,s==d,xor=1: 0.236791 s 4324.491 MB/s +1024 1048576 16 LOG - - Seed: 1347473442 Buffer-Const,s!=d,xor=0: 2.210890 s 463.162 MB/s Buffer-Const,s!=d,xor=1: 2.500565 s 409.508 MB/s Buffer-Const,s==d,xor=0: 2.229298 s 459.337 MB/s Buffer-Const,s==d,xor=1: 2.488949 s 411.419 MB/s +2048 524288 16 LOG - - Seed: 1347473457 Buffer-Const,s!=d,xor=0: 2.183274 s 469.020 MB/s Buffer-Const,s!=d,xor=1: 2.471336 s 414.351 MB/s Buffer-Const,s==d,xor=0: 2.161852 s 473.668 MB/s Buffer-Const,s==d,xor=1: 2.537259 s 403.585 MB/s +4096 262144 16 LOG - - Seed: 1347473472 Buffer-Const,s!=d,xor=0: 2.158893 s 474.317 MB/s Buffer-Const,s!=d,xor=1: 2.430101 s 421.382 MB/s Buffer-Const,s==d,xor=0: 2.174221 s 470.973 MB/s Buffer-Const,s==d,xor=1: 2.417720 s 423.540 MB/s +8192 131072 16 LOG - - Seed: 1347473486 Buffer-Const,s!=d,xor=0: 2.139556 s 478.604 MB/s Buffer-Const,s!=d,xor=1: 2.411590 s 424.616 MB/s Buffer-Const,s==d,xor=0: 2.106997 s 486.000 MB/s Buffer-Const,s==d,xor=1: 2.374905 s 431.175 MB/s +16384 65536 16 LOG - - Seed: 1347473501 Buffer-Const,s!=d,xor=0: 2.131013 s 480.523 MB/s Buffer-Const,s!=d,xor=1: 2.424752 s 422.311 MB/s Buffer-Const,s==d,xor=0: 2.159855 s 474.106 MB/s Buffer-Const,s==d,xor=1: 2.340712 s 437.474 MB/s +32768 32768 16 LOG - - Seed: 1347473515 Buffer-Const,s!=d,xor=0: 2.080020 s 492.303 MB/s Buffer-Const,s!=d,xor=1: 2.353990 s 435.006 MB/s Buffer-Const,s==d,xor=0: 2.065719 s 495.711 MB/s Buffer-Const,s==d,xor=1: 2.348487 s 436.025 MB/s +65536 16384 16 LOG - - Seed: 1347473529 Buffer-Const,s!=d,xor=0: 2.083769 s 491.417 MB/s Buffer-Const,s!=d,xor=1: 2.401774 s 426.351 MB/s Buffer-Const,s==d,xor=0: 2.074343 s 493.650 MB/s Buffer-Const,s==d,xor=1: 2.341650 s 437.299 MB/s +131072 8192 16 LOG - - Seed: 1347473543 Buffer-Const,s!=d,xor=0: 2.191856 s 467.184 MB/s Buffer-Const,s!=d,xor=1: 2.369453 s 432.167 MB/s Buffer-Const,s==d,xor=0: 2.034702 s 503.268 MB/s Buffer-Const,s==d,xor=1: 2.307625 s 443.746 MB/s +262144 4096 16 LOG - - Seed: 1347473558 Buffer-Const,s!=d,xor=0: 2.084992 s 491.129 MB/s Buffer-Const,s!=d,xor=1: 2.385670 s 429.230 MB/s Buffer-Const,s==d,xor=0: 2.054360 s 498.452 MB/s Buffer-Const,s==d,xor=1: 2.374879 s 431.180 MB/s +524288 2048 16 LOG - - Seed: 1347473572 Buffer-Const,s!=d,xor=0: 2.107185 s 485.956 MB/s Buffer-Const,s!=d,xor=1: 2.368054 s 432.423 MB/s Buffer-Const,s==d,xor=0: 2.053791 s 498.590 MB/s Buffer-Const,s==d,xor=1: 2.313108 s 442.694 MB/s +1048576 1024 16 LOG - - Seed: 1347473586 Buffer-Const,s!=d,xor=0: 2.105079 s 486.443 MB/s Buffer-Const,s!=d,xor=1: 2.444869 s 418.836 MB/s Buffer-Const,s==d,xor=0: 2.271658 s 450.772 MB/s Buffer-Const,s==d,xor=1: 2.413470 s 424.285 MB/s +2097152 512 16 LOG - - Seed: 1347473600 Buffer-Const,s!=d,xor=0: 2.159018 s 474.290 MB/s Buffer-Const,s!=d,xor=1: 2.419327 s 423.258 MB/s Buffer-Const,s==d,xor=0: 2.031202 s 504.135 MB/s Buffer-Const,s==d,xor=1: 2.301943 s 444.842 MB/s +4194304 256 16 LOG - - Seed: 1347473615 Buffer-Const,s!=d,xor=0: 2.194868 s 466.543 MB/s Buffer-Const,s!=d,xor=1: 2.460607 s 416.158 MB/s Buffer-Const,s==d,xor=0: 2.142866 s 477.865 MB/s Buffer-Const,s==d,xor=1: 2.387212 s 428.952 MB/s +8388608 128 16 LOG - - Seed: 1347473629 Buffer-Const,s!=d,xor=0: 2.178997 s 469.941 MB/s Buffer-Const,s!=d,xor=1: 2.467580 s 414.982 MB/s Buffer-Const,s==d,xor=0: 2.240649 s 457.010 MB/s Buffer-Const,s==d,xor=1: 2.394828 s 427.588 MB/s +16777216 64 16 LOG - - Seed: 1347473644 Buffer-Const,s!=d,xor=0: 2.185086 s 468.632 MB/s Buffer-Const,s!=d,xor=1: 2.635728 s 388.508 MB/s Buffer-Const,s==d,xor=0: 2.144898 s 477.412 MB/s Buffer-Const,s==d,xor=1: 2.402648 s 426.196 MB/s +33554432 32 16 LOG - - Seed: 1347473658 Buffer-Const,s!=d,xor=0: 2.209707 s 463.410 MB/s Buffer-Const,s!=d,xor=1: 2.969263 s 344.867 MB/s Buffer-Const,s==d,xor=0: 2.144736 s 477.448 MB/s Buffer-Const,s==d,xor=1: 2.394575 s 427.633 MB/s +67108864 16 16 LOG - - Seed: 1347473673 Buffer-Const,s!=d,xor=0: 2.281165 s 448.893 MB/s Buffer-Const,s!=d,xor=1: 2.929988 s 349.489 MB/s Buffer-Const,s==d,xor=0: 2.160325 s 474.003 MB/s Buffer-Const,s==d,xor=1: 2.481787 s 412.606 MB/s +134217728 8 16 LOG - - Seed: 1347473689 Buffer-Const,s!=d,xor=0: 2.272319 s 450.641 MB/s Buffer-Const,s!=d,xor=1: 2.957651 s 346.221 MB/s Buffer-Const,s==d,xor=0: 2.097608 s 488.175 MB/s Buffer-Const,s==d,xor=1: 2.352378 s 435.304 MB/s +268435456 4 16 LOG - - Seed: 1347473704 Buffer-Const,s!=d,xor=0: 2.354143 s 434.978 MB/s Buffer-Const,s!=d,xor=1: 2.481760 s 412.610 MB/s Buffer-Const,s==d,xor=0: 2.108223 s 485.717 MB/s Buffer-Const,s==d,xor=1: 2.357849 s 434.294 MB/s +536870912 2 16 LOG - - Seed: 1347473719 Buffer-Const,s!=d,xor=0: 2.545543 s 402.272 MB/s Buffer-Const,s!=d,xor=1: 2.441404 s 419.431 MB/s Buffer-Const,s==d,xor=0: 2.135140 s 479.594 MB/s Buffer-Const,s==d,xor=1: 2.359027 s 434.077 MB/s +1073741824 1 16 LOG - - Seed: 1347473734 Buffer-Const,s!=d,xor=0: 2.985415 s 343.001 MB/s Buffer-Const,s!=d,xor=1: 2.422259 s 422.746 MB/s Buffer-Const,s==d,xor=0: 2.104547 s 486.566 MB/s Buffer-Const,s==d,xor=1: 2.372315 s 431.646 MB/s +1024 1048576 16 SPLIT 16 4 NOSSE - Seed: 1347473750 Buffer-Const,s!=d,xor=0: 1.606844 s 637.274 MB/s Buffer-Const,s!=d,xor=1: 1.804383 s 567.507 MB/s Buffer-Const,s==d,xor=0: 1.615952 s 633.682 MB/s Buffer-Const,s==d,xor=1: 1.797352 s 569.727 MB/s +2048 524288 16 SPLIT 16 4 NOSSE - Seed: 1347473762 Buffer-Const,s!=d,xor=0: 1.541486 s 664.294 MB/s Buffer-Const,s!=d,xor=1: 1.685396 s 607.572 MB/s Buffer-Const,s==d,xor=0: 1.503978 s 680.861 MB/s Buffer-Const,s==d,xor=1: 1.684867 s 607.763 MB/s +4096 262144 16 SPLIT 16 4 NOSSE - Seed: 1347473774 Buffer-Const,s!=d,xor=0: 1.458985 s 701.858 MB/s Buffer-Const,s!=d,xor=1: 1.642648 s 623.384 MB/s Buffer-Const,s==d,xor=0: 1.459893 s 701.421 MB/s Buffer-Const,s==d,xor=1: 1.647799 s 621.435 MB/s +8192 131072 16 SPLIT 16 4 NOSSE - Seed: 1347473785 Buffer-Const,s!=d,xor=0: 1.443518 s 709.378 MB/s Buffer-Const,s!=d,xor=1: 1.632999 s 627.067 MB/s Buffer-Const,s==d,xor=0: 1.445864 s 708.227 MB/s Buffer-Const,s==d,xor=1: 1.640243 s 624.298 MB/s +16384 65536 16 SPLIT 16 4 NOSSE - Seed: 1347473797 Buffer-Const,s!=d,xor=0: 1.440485 s 710.872 MB/s Buffer-Const,s!=d,xor=1: 1.610468 s 635.840 MB/s Buffer-Const,s==d,xor=0: 1.423815 s 719.194 MB/s Buffer-Const,s==d,xor=1: 1.616802 s 633.349 MB/s +32768 32768 16 SPLIT 16 4 NOSSE - Seed: 1347473808 Buffer-Const,s!=d,xor=0: 1.430503 s 715.832 MB/s Buffer-Const,s!=d,xor=1: 1.617286 s 633.159 MB/s Buffer-Const,s==d,xor=0: 1.450425 s 706.000 MB/s Buffer-Const,s==d,xor=1: 1.628290 s 628.881 MB/s +65536 16384 16 SPLIT 16 4 NOSSE - Seed: 1347473819 Buffer-Const,s!=d,xor=0: 1.431340 s 715.414 MB/s Buffer-Const,s!=d,xor=1: 1.603276 s 638.692 MB/s Buffer-Const,s==d,xor=0: 1.484436 s 689.824 MB/s Buffer-Const,s==d,xor=1: 1.626883 s 629.424 MB/s +131072 8192 16 SPLIT 16 4 NOSSE - Seed: 1347473831 Buffer-Const,s!=d,xor=0: 1.435691 s 713.245 MB/s Buffer-Const,s!=d,xor=1: 1.618436 s 632.710 MB/s Buffer-Const,s==d,xor=0: 1.450719 s 705.857 MB/s Buffer-Const,s==d,xor=1: 1.604518 s 638.198 MB/s +262144 4096 16 SPLIT 16 4 NOSSE - Seed: 1347473842 Buffer-Const,s!=d,xor=0: 1.434818 s 713.679 MB/s Buffer-Const,s!=d,xor=1: 1.685219 s 607.636 MB/s Buffer-Const,s==d,xor=0: 1.412647 s 724.880 MB/s Buffer-Const,s==d,xor=1: 1.606347 s 637.471 MB/s +524288 2048 16 SPLIT 16 4 NOSSE - Seed: 1347473854 Buffer-Const,s!=d,xor=0: 1.437057 s 712.568 MB/s Buffer-Const,s!=d,xor=1: 1.605284 s 637.893 MB/s Buffer-Const,s==d,xor=0: 1.424157 s 719.022 MB/s Buffer-Const,s==d,xor=1: 1.616495 s 633.469 MB/s +1048576 1024 16 SPLIT 16 4 NOSSE - Seed: 1347473865 Buffer-Const,s!=d,xor=0: 1.417949 s 722.170 MB/s Buffer-Const,s!=d,xor=1: 1.636933 s 625.560 MB/s Buffer-Const,s==d,xor=0: 1.413730 s 724.325 MB/s Buffer-Const,s==d,xor=1: 1.619097 s 632.451 MB/s +2097152 512 16 SPLIT 16 4 NOSSE - Seed: 1347473876 Buffer-Const,s!=d,xor=0: 1.449239 s 706.577 MB/s Buffer-Const,s!=d,xor=1: 1.642027 s 623.619 MB/s Buffer-Const,s==d,xor=0: 1.482682 s 690.640 MB/s Buffer-Const,s==d,xor=1: 1.606026 s 637.599 MB/s +4194304 256 16 SPLIT 16 4 NOSSE - Seed: 1347473888 Buffer-Const,s!=d,xor=0: 1.466069 s 698.467 MB/s Buffer-Const,s!=d,xor=1: 1.642522 s 623.431 MB/s Buffer-Const,s==d,xor=0: 1.439317 s 711.449 MB/s Buffer-Const,s==d,xor=1: 1.631946 s 627.472 MB/s +8388608 128 16 SPLIT 16 4 NOSSE - Seed: 1347473899 Buffer-Const,s!=d,xor=0: 1.454558 s 703.994 MB/s Buffer-Const,s!=d,xor=1: 1.650872 s 620.278 MB/s Buffer-Const,s==d,xor=0: 1.445065 s 708.619 MB/s Buffer-Const,s==d,xor=1: 1.628372 s 628.849 MB/s +16777216 64 16 SPLIT 16 4 NOSSE - Seed: 1347473911 Buffer-Const,s!=d,xor=0: 1.459724 s 701.503 MB/s Buffer-Const,s!=d,xor=1: 1.650864 s 620.281 MB/s Buffer-Const,s==d,xor=0: 1.440828 s 710.702 MB/s Buffer-Const,s==d,xor=1: 1.634402 s 626.529 MB/s +33554432 32 16 SPLIT 16 4 NOSSE - Seed: 1347473922 Buffer-Const,s!=d,xor=0: 1.466166 s 698.420 MB/s Buffer-Const,s!=d,xor=1: 1.644006 s 622.869 MB/s Buffer-Const,s==d,xor=0: 1.439810 s 711.205 MB/s Buffer-Const,s==d,xor=1: 1.644597 s 622.645 MB/s +67108864 16 16 SPLIT 16 4 NOSSE - Seed: 1347473934 Buffer-Const,s!=d,xor=0: 1.509510 s 678.366 MB/s Buffer-Const,s!=d,xor=1: 1.680938 s 609.184 MB/s Buffer-Const,s==d,xor=0: 1.465974 s 698.512 MB/s Buffer-Const,s==d,xor=1: 1.645416 s 622.335 MB/s +134217728 8 16 SPLIT 16 4 NOSSE - Seed: 1347473945 Buffer-Const,s!=d,xor=0: 1.553459 s 659.174 MB/s Buffer-Const,s!=d,xor=1: 1.670615 s 612.948 MB/s Buffer-Const,s==d,xor=0: 1.468984 s 697.080 MB/s Buffer-Const,s==d,xor=1: 1.640758 s 624.102 MB/s +268435456 4 16 SPLIT 16 4 NOSSE - Seed: 1347473957 Buffer-Const,s!=d,xor=0: 1.656677 s 618.105 MB/s Buffer-Const,s!=d,xor=1: 1.669660 s 613.299 MB/s Buffer-Const,s==d,xor=0: 1.457518 s 702.564 MB/s Buffer-Const,s==d,xor=1: 1.656764 s 618.072 MB/s +536870912 2 16 SPLIT 16 4 NOSSE - Seed: 1347473969 Buffer-Const,s!=d,xor=0: 1.828201 s 560.113 MB/s Buffer-Const,s!=d,xor=1: 1.649980 s 620.614 MB/s Buffer-Const,s==d,xor=0: 1.450785 s 705.825 MB/s Buffer-Const,s==d,xor=1: 1.643633 s 623.010 MB/s +1073741824 1 16 SPLIT 16 4 NOSSE - Seed: 1347473981 Buffer-Const,s!=d,xor=0: 2.171743 s 471.511 MB/s Buffer-Const,s!=d,xor=1: 1.651953 s 619.872 MB/s Buffer-Const,s==d,xor=0: 1.466839 s 698.100 MB/s Buffer-Const,s==d,xor=1: 1.650641 s 620.365 MB/s +1024 1048576 16 SPLIT 16 4 SSE,STDMAP - Seed: 1347473994 Buffer-Const,s!=d,xor=0: 0.440479 s 2324.740 MB/s Buffer-Const,s!=d,xor=1: 0.442003 s 2316.728 MB/s Buffer-Const,s==d,xor=0: 0.434042 s 2359.220 MB/s Buffer-Const,s==d,xor=1: 0.438626 s 2334.561 MB/s +2048 524288 16 SPLIT 16 4 SSE,STDMAP - Seed: 1347474002 Buffer-Const,s!=d,xor=0: 0.316802 s 3232.303 MB/s Buffer-Const,s!=d,xor=1: 0.331473 s 3089.240 MB/s Buffer-Const,s==d,xor=0: 0.317398 s 3226.233 MB/s Buffer-Const,s==d,xor=1: 0.330775 s 3095.758 MB/s +4096 262144 16 SPLIT 16 4 SSE,STDMAP - Seed: 1347474008 Buffer-Const,s!=d,xor=0: 0.264277 s 3874.730 MB/s Buffer-Const,s!=d,xor=1: 0.274880 s 3725.263 MB/s Buffer-Const,s==d,xor=0: 0.262972 s 3893.956 MB/s Buffer-Const,s==d,xor=1: 0.275358 s 3718.789 MB/s +8192 131072 16 SPLIT 16 4 SSE,STDMAP - Seed: 1347474015 Buffer-Const,s!=d,xor=0: 0.232929 s 4396.190 MB/s Buffer-Const,s!=d,xor=1: 0.245429 s 4172.294 MB/s Buffer-Const,s==d,xor=0: 0.231047 s 4432.005 MB/s Buffer-Const,s==d,xor=1: 0.243273 s 4209.263 MB/s +16384 65536 16 SPLIT 16 4 SSE,STDMAP - Seed: 1347474021 Buffer-Const,s!=d,xor=0: 0.220903 s 4635.515 MB/s Buffer-Const,s!=d,xor=1: 0.232513 s 4404.047 MB/s Buffer-Const,s==d,xor=0: 0.217276 s 4712.908 MB/s Buffer-Const,s==d,xor=1: 0.235140 s 4354.860 MB/s +32768 32768 16 SPLIT 16 4 SSE,STDMAP - Seed: 1347474027 Buffer-Const,s!=d,xor=0: 0.209507 s 4887.660 MB/s Buffer-Const,s!=d,xor=1: 0.225435 s 4542.324 MB/s Buffer-Const,s==d,xor=0: 0.211855 s 4833.491 MB/s Buffer-Const,s==d,xor=1: 0.222550 s 4601.205 MB/s +65536 16384 16 SPLIT 16 4 SSE,STDMAP - Seed: 1347474033 Buffer-Const,s!=d,xor=0: 0.206457 s 4959.867 MB/s Buffer-Const,s!=d,xor=1: 0.226325 s 4524.466 MB/s Buffer-Const,s==d,xor=0: 0.209159 s 4895.805 MB/s Buffer-Const,s==d,xor=1: 0.222746 s 4597.167 MB/s +131072 8192 16 SPLIT 16 4 SSE,STDMAP - Seed: 1347474039 Buffer-Const,s!=d,xor=0: 0.211171 s 4849.147 MB/s Buffer-Const,s!=d,xor=1: 0.226647 s 4518.032 MB/s Buffer-Const,s==d,xor=0: 0.205702 s 4978.085 MB/s Buffer-Const,s==d,xor=1: 0.218717 s 4681.842 MB/s +262144 4096 16 SPLIT 16 4 SSE,STDMAP - Seed: 1347474046 Buffer-Const,s!=d,xor=0: 0.217228 s 4713.948 MB/s Buffer-Const,s!=d,xor=1: 0.228891 s 4473.752 MB/s Buffer-Const,s==d,xor=0: 0.208708 s 4906.375 MB/s Buffer-Const,s==d,xor=1: 0.218286 s 4691.098 MB/s +524288 2048 16 SPLIT 16 4 SSE,STDMAP - Seed: 1347474052 Buffer-Const,s!=d,xor=0: 0.213187 s 4803.301 MB/s Buffer-Const,s!=d,xor=1: 0.224506 s 4561.132 MB/s Buffer-Const,s==d,xor=0: 0.205507 s 4982.792 MB/s Buffer-Const,s==d,xor=1: 0.218068 s 4695.780 MB/s +1048576 1024 16 SPLIT 16 4 SSE,STDMAP - Seed: 1347474058 Buffer-Const,s!=d,xor=0: 0.212405 s 4820.980 MB/s Buffer-Const,s!=d,xor=1: 0.224411 s 4563.056 MB/s Buffer-Const,s==d,xor=0: 0.204215 s 5014.334 MB/s Buffer-Const,s==d,xor=1: 0.216172 s 4736.964 MB/s +2097152 512 16 SPLIT 16 4 SSE,STDMAP - Seed: 1347474064 Buffer-Const,s!=d,xor=0: 0.261045 s 3922.692 MB/s Buffer-Const,s!=d,xor=1: 0.282378 s 3626.339 MB/s Buffer-Const,s==d,xor=0: 0.206318 s 4963.209 MB/s Buffer-Const,s==d,xor=1: 0.223613 s 4579.335 MB/s +4194304 256 16 SPLIT 16 4 SSE,STDMAP - Seed: 1347474070 Buffer-Const,s!=d,xor=0: 0.404333 s 2532.567 MB/s Buffer-Const,s!=d,xor=1: 0.395253 s 2590.748 MB/s Buffer-Const,s==d,xor=0: 0.254806 s 4018.747 MB/s Buffer-Const,s==d,xor=1: 0.268242 s 3817.453 MB/s +8388608 128 16 SPLIT 16 4 SSE,STDMAP - Seed: 1347474077 Buffer-Const,s!=d,xor=0: 0.396502 s 2582.583 MB/s Buffer-Const,s!=d,xor=1: 0.384216 s 2665.169 MB/s Buffer-Const,s==d,xor=0: 0.259012 s 3953.481 MB/s Buffer-Const,s==d,xor=1: 0.268045 s 3820.258 MB/s +16777216 64 16 SPLIT 16 4 SSE,STDMAP - Seed: 1347474083 Buffer-Const,s!=d,xor=0: 0.398601 s 2568.986 MB/s Buffer-Const,s!=d,xor=1: 0.389561 s 2628.599 MB/s Buffer-Const,s==d,xor=0: 0.262993 s 3893.642 MB/s Buffer-Const,s==d,xor=1: 0.270538 s 3785.046 MB/s +33554432 32 16 SPLIT 16 4 SSE,STDMAP - Seed: 1347474090 Buffer-Const,s!=d,xor=0: 0.404290 s 2532.837 MB/s Buffer-Const,s!=d,xor=1: 0.377139 s 2715.179 MB/s Buffer-Const,s==d,xor=0: 0.263448 s 3886.919 MB/s Buffer-Const,s==d,xor=1: 0.271300 s 3774.415 MB/s +67108864 16 16 SPLIT 16 4 SSE,STDMAP - Seed: 1347474096 Buffer-Const,s!=d,xor=0: 0.422952 s 2421.079 MB/s Buffer-Const,s!=d,xor=1: 0.373653 s 2740.513 MB/s Buffer-Const,s==d,xor=0: 0.257768 s 3972.566 MB/s Buffer-Const,s==d,xor=1: 0.273398 s 3745.460 MB/s +134217728 8 16 SPLIT 16 4 SSE,STDMAP - Seed: 1347474103 Buffer-Const,s!=d,xor=0: 0.461641 s 2218.174 MB/s Buffer-Const,s!=d,xor=1: 0.383588 s 2669.532 MB/s Buffer-Const,s==d,xor=0: 0.269459 s 3800.207 MB/s Buffer-Const,s==d,xor=1: 0.270140 s 3790.625 MB/s +268435456 4 16 SPLIT 16 4 SSE,STDMAP - Seed: 1347474110 Buffer-Const,s!=d,xor=0: 0.530862 s 1928.940 MB/s Buffer-Const,s!=d,xor=1: 0.373995 s 2738.004 MB/s Buffer-Const,s==d,xor=0: 0.258169 s 3966.391 MB/s Buffer-Const,s==d,xor=1: 0.277354 s 3692.036 MB/s +536870912 2 16 SPLIT 16 4 SSE,STDMAP - Seed: 1347474117 Buffer-Const,s!=d,xor=0: 0.695551 s 1472.214 MB/s Buffer-Const,s!=d,xor=1: 0.374834 s 2731.876 MB/s Buffer-Const,s==d,xor=0: 0.259944 s 3939.311 MB/s Buffer-Const,s==d,xor=1: 0.278206 s 3680.724 MB/s +1073741824 1 16 SPLIT 16 4 SSE,STDMAP - Seed: 1347474124 Buffer-Const,s!=d,xor=0: 1.020324 s 1003.603 MB/s Buffer-Const,s!=d,xor=1: 0.379288 s 2699.796 MB/s Buffer-Const,s==d,xor=0: 0.257958 s 3969.639 MB/s Buffer-Const,s==d,xor=1: 0.269507 s 3799.531 MB/s +1024 1048576 16 SPLIT 16 4 SSE,ALTMAP - Seed: 1347474132 Buffer-Const,s!=d,xor=0: 0.374943 s 2731.083 MB/s Buffer-Const,s!=d,xor=1: 0.381695 s 2682.773 MB/s Buffer-Const,s==d,xor=0: 0.360327 s 2841.861 MB/s Buffer-Const,s==d,xor=1: 0.388995 s 2632.423 MB/s +2048 524288 16 SPLIT 16 4 SSE,ALTMAP - Seed: 1347474139 Buffer-Const,s!=d,xor=0: 0.257053 s 3983.608 MB/s Buffer-Const,s!=d,xor=1: 0.274414 s 3731.587 MB/s Buffer-Const,s==d,xor=0: 0.250040 s 4095.352 MB/s Buffer-Const,s==d,xor=1: 0.271855 s 3766.712 MB/s +4096 262144 16 SPLIT 16 4 SSE,ALTMAP - Seed: 1347474146 Buffer-Const,s!=d,xor=0: 0.204864 s 4998.432 MB/s Buffer-Const,s!=d,xor=1: 0.218739 s 4681.388 MB/s Buffer-Const,s==d,xor=0: 0.198597 s 5156.172 MB/s Buffer-Const,s==d,xor=1: 0.220060 s 4653.274 MB/s +8192 131072 16 SPLIT 16 4 SSE,ALTMAP - Seed: 1347474152 Buffer-Const,s!=d,xor=0: 0.170398 s 6009.469 MB/s Buffer-Const,s!=d,xor=1: 0.189123 s 5414.468 MB/s Buffer-Const,s==d,xor=0: 0.170900 s 5991.821 MB/s Buffer-Const,s==d,xor=1: 0.185934 s 5507.322 MB/s +16384 65536 16 SPLIT 16 4 SSE,ALTMAP - Seed: 1347474158 Buffer-Const,s!=d,xor=0: 0.158379 s 6465.520 MB/s Buffer-Const,s!=d,xor=1: 0.173688 s 5895.623 MB/s Buffer-Const,s==d,xor=0: 0.153663 s 6663.916 MB/s Buffer-Const,s==d,xor=1: 0.169384 s 6045.427 MB/s +32768 32768 16 SPLIT 16 4 SSE,ALTMAP - Seed: 1347474164 Buffer-Const,s!=d,xor=0: 0.151898 s 6741.381 MB/s Buffer-Const,s!=d,xor=1: 0.179828 s 5694.332 MB/s Buffer-Const,s==d,xor=0: 0.152833 s 6700.114 MB/s Buffer-Const,s==d,xor=1: 0.170787 s 5995.761 MB/s +65536 16384 16 SPLIT 16 4 SSE,ALTMAP - Seed: 1347474170 Buffer-Const,s!=d,xor=0: 0.149569 s 6846.337 MB/s Buffer-Const,s!=d,xor=1: 0.170012 s 6023.104 MB/s Buffer-Const,s==d,xor=0: 0.147070 s 6962.685 MB/s Buffer-Const,s==d,xor=1: 0.164932 s 6208.628 MB/s +131072 8192 16 SPLIT 16 4 SSE,ALTMAP - Seed: 1347474176 Buffer-Const,s!=d,xor=0: 0.147436 s 6945.391 MB/s Buffer-Const,s!=d,xor=1: 0.177689 s 5762.875 MB/s Buffer-Const,s==d,xor=0: 0.141681 s 7227.494 MB/s Buffer-Const,s==d,xor=1: 0.159149 s 6434.234 MB/s +262144 4096 16 SPLIT 16 4 SSE,ALTMAP - Seed: 1347474181 Buffer-Const,s!=d,xor=0: 0.176476 s 5802.489 MB/s Buffer-Const,s!=d,xor=1: 0.190235 s 5382.819 MB/s Buffer-Const,s==d,xor=0: 0.145066 s 7058.832 MB/s Buffer-Const,s==d,xor=1: 0.161759 s 6330.400 MB/s +524288 2048 16 SPLIT 16 4 SSE,ALTMAP - Seed: 1347474187 Buffer-Const,s!=d,xor=0: 0.170859 s 5993.259 MB/s Buffer-Const,s!=d,xor=1: 0.184301 s 5556.124 MB/s Buffer-Const,s==d,xor=0: 0.146564 s 6986.709 MB/s Buffer-Const,s==d,xor=1: 0.163810 s 6251.162 MB/s +1048576 1024 16 SPLIT 16 4 SSE,ALTMAP - Seed: 1347474193 Buffer-Const,s!=d,xor=0: 0.173408 s 5905.164 MB/s Buffer-Const,s!=d,xor=1: 0.186853 s 5480.246 MB/s Buffer-Const,s==d,xor=0: 0.145153 s 7054.646 MB/s Buffer-Const,s==d,xor=1: 0.161184 s 6352.994 MB/s +2097152 512 16 SPLIT 16 4 SSE,ALTMAP - Seed: 1347474199 Buffer-Const,s!=d,xor=0: 0.253428 s 4040.592 MB/s Buffer-Const,s!=d,xor=1: 0.282650 s 3622.849 MB/s Buffer-Const,s==d,xor=0: 0.151634 s 6753.114 MB/s Buffer-Const,s==d,xor=1: 0.162269 s 6310.523 MB/s +4194304 256 16 SPLIT 16 4 SSE,ALTMAP - Seed: 1347474205 Buffer-Const,s!=d,xor=0: 0.392085 s 2611.678 MB/s Buffer-Const,s!=d,xor=1: 0.390785 s 2620.365 MB/s Buffer-Const,s==d,xor=0: 0.225171 s 4547.663 MB/s Buffer-Const,s==d,xor=1: 0.229246 s 4466.819 MB/s +8388608 128 16 SPLIT 16 4 SSE,ALTMAP - Seed: 1347474212 Buffer-Const,s!=d,xor=0: 0.385162 s 2658.624 MB/s Buffer-Const,s!=d,xor=1: 0.384540 s 2662.920 MB/s Buffer-Const,s==d,xor=0: 0.236824 s 4323.885 MB/s Buffer-Const,s==d,xor=1: 0.242623 s 4220.542 MB/s +16777216 64 16 SPLIT 16 4 SSE,ALTMAP - Seed: 1347474218 Buffer-Const,s!=d,xor=0: 0.390857 s 2619.887 MB/s Buffer-Const,s!=d,xor=1: 0.377723 s 2710.983 MB/s Buffer-Const,s==d,xor=0: 0.235221 s 4353.346 MB/s Buffer-Const,s==d,xor=1: 0.240751 s 4253.365 MB/s +33554432 32 16 SPLIT 16 4 SSE,ALTMAP - Seed: 1347474225 Buffer-Const,s!=d,xor=0: 0.401487 s 2550.516 MB/s Buffer-Const,s!=d,xor=1: 0.375969 s 2723.626 MB/s Buffer-Const,s==d,xor=0: 0.235533 s 4347.591 MB/s Buffer-Const,s==d,xor=1: 0.238075 s 4301.161 MB/s +67108864 16 16 SPLIT 16 4 SSE,ALTMAP - Seed: 1347474231 Buffer-Const,s!=d,xor=0: 0.412280 s 2483.751 MB/s Buffer-Const,s!=d,xor=1: 0.371134 s 2759.109 MB/s Buffer-Const,s==d,xor=0: 0.233195 s 4391.178 MB/s Buffer-Const,s==d,xor=1: 0.236408 s 4331.504 MB/s +134217728 8 16 SPLIT 16 4 SSE,ALTMAP - Seed: 1347474238 Buffer-Const,s!=d,xor=0: 0.448526 s 2283.034 MB/s Buffer-Const,s!=d,xor=1: 0.370282 s 2765.460 MB/s Buffer-Const,s==d,xor=0: 0.233473 s 4385.940 MB/s Buffer-Const,s==d,xor=1: 0.240682 s 4254.570 MB/s +268435456 4 16 SPLIT 16 4 SSE,ALTMAP - Seed: 1347474245 Buffer-Const,s!=d,xor=0: 0.524038 s 1954.058 MB/s Buffer-Const,s!=d,xor=1: 0.375533 s 2726.792 MB/s Buffer-Const,s==d,xor=0: 0.242939 s 4215.050 MB/s Buffer-Const,s==d,xor=1: 0.244587 s 4186.650 MB/s +536870912 2 16 SPLIT 16 4 SSE,ALTMAP - Seed: 1347474251 Buffer-Const,s!=d,xor=0: 0.669426 s 1529.669 MB/s Buffer-Const,s!=d,xor=1: 0.376380 s 2720.655 MB/s Buffer-Const,s==d,xor=0: 0.238366 s 4295.912 MB/s Buffer-Const,s==d,xor=1: 0.246535 s 4153.567 MB/s +1073741824 1 16 SPLIT 16 4 SSE,ALTMAP - Seed: 1347474259 Buffer-Const,s!=d,xor=0: 0.980024 s 1044.872 MB/s Buffer-Const,s!=d,xor=1: 0.373039 s 2745.021 MB/s Buffer-Const,s==d,xor=0: 0.257666 s 3974.139 MB/s Buffer-Const,s==d,xor=1: 0.236541 s 4329.059 MB/s +1024 1048576 32 SPLIT 8 8 - - Seed: 1347474267 Buffer-Const,s!=d,xor=0: 4.078636 s 251.064 MB/s Buffer-Const,s!=d,xor=1: 4.030305 s 254.075 MB/s Buffer-Const,s==d,xor=0: 3.999870 s 256.008 MB/s Buffer-Const,s==d,xor=1: 4.004111 s 255.737 MB/s +2048 524288 32 SPLIT 8 8 - - Seed: 1347474288 Buffer-Const,s!=d,xor=0: 3.907939 s 262.031 MB/s Buffer-Const,s!=d,xor=1: 3.925473 s 260.860 MB/s Buffer-Const,s==d,xor=0: 3.923752 s 260.975 MB/s Buffer-Const,s==d,xor=1: 3.880185 s 263.905 MB/s +4096 262144 32 SPLIT 8 8 - - Seed: 1347474309 Buffer-Const,s!=d,xor=0: 3.841739 s 266.546 MB/s Buffer-Const,s!=d,xor=1: 3.807040 s 268.975 MB/s Buffer-Const,s==d,xor=0: 3.757250 s 272.540 MB/s Buffer-Const,s==d,xor=1: 3.823483 s 267.819 MB/s +8192 131072 32 SPLIT 8 8 - - Seed: 1347474330 Buffer-Const,s!=d,xor=0: 3.847896 s 266.119 MB/s Buffer-Const,s!=d,xor=1: 3.804132 s 269.181 MB/s Buffer-Const,s==d,xor=0: 3.718211 s 275.401 MB/s Buffer-Const,s==d,xor=1: 3.918893 s 261.298 MB/s +16384 65536 32 SPLIT 8 8 - - Seed: 1347474351 Buffer-Const,s!=d,xor=0: 3.864461 s 264.979 MB/s Buffer-Const,s!=d,xor=1: 3.730061 s 274.526 MB/s Buffer-Const,s==d,xor=0: 3.765701 s 271.928 MB/s Buffer-Const,s==d,xor=1: 3.654276 s 280.220 MB/s +32768 32768 32 SPLIT 8 8 - - Seed: 1347474371 Buffer-Const,s!=d,xor=0: 3.685288 s 277.862 MB/s Buffer-Const,s!=d,xor=1: 3.737605 s 273.972 MB/s Buffer-Const,s==d,xor=0: 3.696803 s 276.996 MB/s Buffer-Const,s==d,xor=1: 3.687180 s 277.719 MB/s +65536 16384 32 SPLIT 8 8 - - Seed: 1347474391 Buffer-Const,s!=d,xor=0: 3.750255 s 273.048 MB/s Buffer-Const,s!=d,xor=1: 3.722842 s 275.059 MB/s Buffer-Const,s==d,xor=0: 3.639989 s 281.320 MB/s Buffer-Const,s==d,xor=1: 3.657695 s 279.958 MB/s +131072 8192 32 SPLIT 8 8 - - Seed: 1347474411 Buffer-Const,s!=d,xor=0: 3.727132 s 274.742 MB/s Buffer-Const,s!=d,xor=1: 3.719290 s 275.321 MB/s Buffer-Const,s==d,xor=0: 3.658669 s 279.883 MB/s Buffer-Const,s==d,xor=1: 3.639225 s 281.379 MB/s +262144 4096 32 SPLIT 8 8 - - Seed: 1347474431 Buffer-Const,s!=d,xor=0: 3.699968 s 276.759 MB/s Buffer-Const,s!=d,xor=1: 3.708224 s 276.143 MB/s Buffer-Const,s==d,xor=0: 3.636058 s 281.624 MB/s Buffer-Const,s==d,xor=1: 3.663831 s 279.489 MB/s +524288 2048 32 SPLIT 8 8 - - Seed: 1347474451 Buffer-Const,s!=d,xor=0: 3.715168 s 275.627 MB/s Buffer-Const,s!=d,xor=1: 3.716467 s 275.531 MB/s Buffer-Const,s==d,xor=0: 3.642050 s 281.160 MB/s Buffer-Const,s==d,xor=1: 3.650928 s 280.477 MB/s +1048576 1024 32 SPLIT 8 8 - - Seed: 1347474471 Buffer-Const,s!=d,xor=0: 3.748222 s 273.196 MB/s Buffer-Const,s!=d,xor=1: 3.707973 s 276.162 MB/s Buffer-Const,s==d,xor=0: 3.633509 s 281.821 MB/s Buffer-Const,s==d,xor=1: 3.606194 s 283.956 MB/s +2097152 512 32 SPLIT 8 8 - - Seed: 1347474491 Buffer-Const,s!=d,xor=0: 3.772647 s 271.427 MB/s Buffer-Const,s!=d,xor=1: 3.751378 s 272.966 MB/s Buffer-Const,s==d,xor=0: 3.627191 s 282.312 MB/s Buffer-Const,s==d,xor=1: 3.683802 s 277.974 MB/s +4194304 256 32 SPLIT 8 8 - - Seed: 1347474511 Buffer-Const,s!=d,xor=0: 3.710802 s 275.951 MB/s Buffer-Const,s!=d,xor=1: 3.754683 s 272.726 MB/s Buffer-Const,s==d,xor=0: 3.725703 s 274.847 MB/s Buffer-Const,s==d,xor=1: 3.741130 s 273.714 MB/s +8388608 128 32 SPLIT 8 8 - - Seed: 1347474531 Buffer-Const,s!=d,xor=0: 3.725367 s 274.872 MB/s Buffer-Const,s!=d,xor=1: 3.786313 s 270.448 MB/s Buffer-Const,s==d,xor=0: 3.661536 s 279.664 MB/s Buffer-Const,s==d,xor=1: 3.663013 s 279.551 MB/s +16777216 64 32 SPLIT 8 8 - - Seed: 1347474551 Buffer-Const,s!=d,xor=0: 3.733772 s 274.254 MB/s Buffer-Const,s!=d,xor=1: 3.834374 s 267.058 MB/s Buffer-Const,s==d,xor=0: 3.682088 s 278.103 MB/s Buffer-Const,s==d,xor=1: 3.664883 s 279.409 MB/s +33554432 32 32 SPLIT 8 8 - - Seed: 1347474572 Buffer-Const,s!=d,xor=0: 3.788104 s 270.320 MB/s Buffer-Const,s!=d,xor=1: 3.787767 s 270.344 MB/s Buffer-Const,s==d,xor=0: 3.767594 s 271.792 MB/s Buffer-Const,s==d,xor=1: 3.785537 s 270.503 MB/s +67108864 16 32 SPLIT 8 8 - - Seed: 1347474592 Buffer-Const,s!=d,xor=0: 3.902282 s 262.411 MB/s Buffer-Const,s!=d,xor=1: 3.781487 s 270.793 MB/s Buffer-Const,s==d,xor=0: 3.793007 s 269.970 MB/s Buffer-Const,s==d,xor=1: 3.694762 s 277.149 MB/s +134217728 8 32 SPLIT 8 8 - - Seed: 1347474613 Buffer-Const,s!=d,xor=0: 3.803999 s 269.190 MB/s Buffer-Const,s!=d,xor=1: 4.004633 s 255.704 MB/s Buffer-Const,s==d,xor=0: 3.789479 s 270.222 MB/s Buffer-Const,s==d,xor=1: 3.750047 s 273.063 MB/s +268435456 4 32 SPLIT 8 8 - - Seed: 1347474633 Buffer-Const,s!=d,xor=0: 3.924148 s 260.948 MB/s Buffer-Const,s!=d,xor=1: 3.670553 s 278.977 MB/s Buffer-Const,s==d,xor=0: 3.616194 s 283.171 MB/s Buffer-Const,s==d,xor=1: 3.472242 s 294.910 MB/s +536870912 2 32 SPLIT 8 8 - - Seed: 1347474654 Buffer-Const,s!=d,xor=0: 3.879471 s 263.954 MB/s Buffer-Const,s!=d,xor=1: 3.399143 s 301.252 MB/s Buffer-Const,s==d,xor=0: 3.431460 s 298.415 MB/s Buffer-Const,s==d,xor=1: 3.755244 s 272.685 MB/s +1073741824 1 32 SPLIT 8 8 - - Seed: 1347474674 Buffer-Const,s!=d,xor=0: 4.345123 s 235.667 MB/s Buffer-Const,s!=d,xor=1: 3.869906 s 264.606 MB/s Buffer-Const,s==d,xor=0: 3.230677 s 316.961 MB/s Buffer-Const,s==d,xor=1: 3.296187 s 310.662 MB/s +1024 1048576 32 SPLIT 32 4 NOSSE - Seed: 1347474695 Buffer-Const,s!=d,xor=0: 3.426757 s 298.825 MB/s Buffer-Const,s!=d,xor=1: 3.431886 s 298.378 MB/s Buffer-Const,s==d,xor=0: 3.434533 s 298.148 MB/s Buffer-Const,s==d,xor=1: 3.427667 s 298.745 MB/s +2048 524288 32 SPLIT 32 4 NOSSE - Seed: 1347474714 Buffer-Const,s!=d,xor=0: 3.157299 s 324.328 MB/s Buffer-Const,s!=d,xor=1: 3.174731 s 322.547 MB/s Buffer-Const,s==d,xor=0: 3.147733 s 325.313 MB/s Buffer-Const,s==d,xor=1: 3.124081 s 327.776 MB/s +4096 262144 32 SPLIT 32 4 NOSSE - Seed: 1347474732 Buffer-Const,s!=d,xor=0: 2.994873 s 341.918 MB/s Buffer-Const,s!=d,xor=1: 2.995221 s 341.878 MB/s Buffer-Const,s==d,xor=0: 2.977546 s 343.907 MB/s Buffer-Const,s==d,xor=1: 3.072379 s 333.292 MB/s +8192 131072 32 SPLIT 32 4 NOSSE - Seed: 1347474749 Buffer-Const,s!=d,xor=0: 2.942536 s 347.999 MB/s Buffer-Const,s!=d,xor=1: 3.084363 s 331.997 MB/s Buffer-Const,s==d,xor=0: 3.405338 s 300.704 MB/s Buffer-Const,s==d,xor=1: 2.927504 s 349.786 MB/s +16384 65536 32 SPLIT 32 4 NOSSE - Seed: 1347474767 Buffer-Const,s!=d,xor=0: 2.898155 s 353.328 MB/s Buffer-Const,s!=d,xor=1: 2.916746 s 351.076 MB/s Buffer-Const,s==d,xor=0: 2.890006 s 354.325 MB/s Buffer-Const,s==d,xor=1: 2.884264 s 355.030 MB/s +32768 32768 32 SPLIT 32 4 NOSSE - Seed: 1347474784 Buffer-Const,s!=d,xor=0: 2.931310 s 349.332 MB/s Buffer-Const,s!=d,xor=1: 3.416450 s 299.726 MB/s Buffer-Const,s==d,xor=0: 2.887018 s 354.691 MB/s Buffer-Const,s==d,xor=1: 2.879376 s 355.633 MB/s +65536 16384 32 SPLIT 32 4 NOSSE - Seed: 1347474802 Buffer-Const,s!=d,xor=0: 2.836922 s 360.955 MB/s Buffer-Const,s!=d,xor=1: 2.840663 s 360.479 MB/s Buffer-Const,s==d,xor=0: 2.844574 s 359.984 MB/s Buffer-Const,s==d,xor=1: 2.861463 s 357.859 MB/s +131072 8192 32 SPLIT 32 4 NOSSE - Seed: 1347474818 Buffer-Const,s!=d,xor=0: 2.847367 s 359.630 MB/s Buffer-Const,s!=d,xor=1: 2.891688 s 354.118 MB/s Buffer-Const,s==d,xor=0: 2.893903 s 353.847 MB/s Buffer-Const,s==d,xor=1: 2.853056 s 358.913 MB/s +262144 4096 32 SPLIT 32 4 NOSSE - Seed: 1347474835 Buffer-Const,s!=d,xor=0: 2.845535 s 359.862 MB/s Buffer-Const,s!=d,xor=1: 2.883235 s 355.157 MB/s Buffer-Const,s==d,xor=0: 2.825692 s 362.389 MB/s Buffer-Const,s==d,xor=1: 2.824296 s 362.568 MB/s +524288 2048 32 SPLIT 32 4 NOSSE - Seed: 1347474852 Buffer-Const,s!=d,xor=0: 2.830584 s 361.763 MB/s Buffer-Const,s!=d,xor=1: 2.842232 s 360.280 MB/s Buffer-Const,s==d,xor=0: 2.855167 s 358.648 MB/s Buffer-Const,s==d,xor=1: 2.844668 s 359.972 MB/s +1048576 1024 32 SPLIT 32 4 NOSSE - Seed: 1347474868 Buffer-Const,s!=d,xor=0: 2.866585 s 357.220 MB/s Buffer-Const,s!=d,xor=1: 2.903829 s 352.638 MB/s Buffer-Const,s==d,xor=0: 2.842242 s 360.279 MB/s Buffer-Const,s==d,xor=1: 2.842065 s 360.301 MB/s +2097152 512 32 SPLIT 32 4 NOSSE - Seed: 1347474885 Buffer-Const,s!=d,xor=0: 2.881829 s 355.330 MB/s Buffer-Const,s!=d,xor=1: 2.869698 s 356.832 MB/s Buffer-Const,s==d,xor=0: 2.844980 s 359.932 MB/s Buffer-Const,s==d,xor=1: 2.879891 s 355.569 MB/s +4194304 256 32 SPLIT 32 4 NOSSE - Seed: 1347474902 Buffer-Const,s!=d,xor=0: 2.904719 s 352.530 MB/s Buffer-Const,s!=d,xor=1: 2.957000 s 346.297 MB/s Buffer-Const,s==d,xor=0: 2.897870 s 353.363 MB/s Buffer-Const,s==d,xor=1: 2.860694 s 357.955 MB/s +8388608 128 32 SPLIT 32 4 NOSSE - Seed: 1347474919 Buffer-Const,s!=d,xor=0: 2.891168 s 354.182 MB/s Buffer-Const,s!=d,xor=1: 2.912499 s 351.588 MB/s Buffer-Const,s==d,xor=0: 2.905373 s 352.450 MB/s Buffer-Const,s==d,xor=1: 2.877376 s 355.880 MB/s +16777216 64 32 SPLIT 32 4 NOSSE - Seed: 1347474936 Buffer-Const,s!=d,xor=0: 2.874882 s 356.188 MB/s Buffer-Const,s!=d,xor=1: 2.891762 s 354.109 MB/s Buffer-Const,s==d,xor=0: 2.882928 s 355.194 MB/s Buffer-Const,s==d,xor=1: 2.899087 s 353.215 MB/s +33554432 32 32 SPLIT 32 4 NOSSE - Seed: 1347474952 Buffer-Const,s!=d,xor=0: 2.927485 s 349.788 MB/s Buffer-Const,s!=d,xor=1: 2.908132 s 352.116 MB/s Buffer-Const,s==d,xor=0: 2.885413 s 354.889 MB/s Buffer-Const,s==d,xor=1: 2.878262 s 355.770 MB/s +67108864 16 32 SPLIT 32 4 NOSSE - Seed: 1347474969 Buffer-Const,s!=d,xor=0: 2.936867 s 348.671 MB/s Buffer-Const,s!=d,xor=1: 2.918213 s 350.900 MB/s Buffer-Const,s==d,xor=0: 2.860039 s 358.037 MB/s Buffer-Const,s==d,xor=1: 2.914023 s 351.404 MB/s +134217728 8 32 SPLIT 32 4 NOSSE - Seed: 1347474986 Buffer-Const,s!=d,xor=0: 3.004242 s 340.851 MB/s Buffer-Const,s!=d,xor=1: 2.907473 s 352.196 MB/s Buffer-Const,s==d,xor=0: 2.870576 s 356.723 MB/s Buffer-Const,s==d,xor=1: 2.869254 s 356.887 MB/s +268435456 4 32 SPLIT 32 4 NOSSE - Seed: 1347475003 Buffer-Const,s!=d,xor=0: 3.086275 s 331.792 MB/s Buffer-Const,s!=d,xor=1: 2.917016 s 351.044 MB/s Buffer-Const,s==d,xor=0: 2.872529 s 356.480 MB/s Buffer-Const,s==d,xor=1: 2.876983 s 355.928 MB/s +536870912 2 32 SPLIT 32 4 NOSSE - Seed: 1347475021 Buffer-Const,s!=d,xor=0: 3.246244 s 315.441 MB/s Buffer-Const,s!=d,xor=1: 2.880565 s 355.486 MB/s Buffer-Const,s==d,xor=0: 2.875096 s 356.162 MB/s Buffer-Const,s==d,xor=1: 2.880929 s 355.441 MB/s +1073741824 1 32 SPLIT 32 4 NOSSE - Seed: 1347475038 Buffer-Const,s!=d,xor=0: 3.564328 s 287.291 MB/s Buffer-Const,s!=d,xor=1: 2.865621 s 357.340 MB/s Buffer-Const,s==d,xor=0: 2.851534 s 359.105 MB/s Buffer-Const,s==d,xor=1: 2.894613 s 353.761 MB/s +1024 1048576 32 SPLIT 32 4 SSE,STDMAP - Seed: 1347475056 Buffer-Const,s!=d,xor=0: 1.386263 s 738.677 MB/s Buffer-Const,s!=d,xor=1: 1.388797 s 737.329 MB/s Buffer-Const,s==d,xor=0: 1.389924 s 736.731 MB/s Buffer-Const,s==d,xor=1: 1.397714 s 732.625 MB/s +2048 524288 32 SPLIT 32 4 SSE,STDMAP - Seed: 1347475068 Buffer-Const,s!=d,xor=0: 0.861808 s 1188.200 MB/s Buffer-Const,s!=d,xor=1: 0.862678 s 1187.002 MB/s Buffer-Const,s==d,xor=0: 0.858774 s 1192.397 MB/s Buffer-Const,s==d,xor=1: 0.877707 s 1166.676 MB/s +4096 262144 32 SPLIT 32 4 SSE,STDMAP - Seed: 1347475076 Buffer-Const,s!=d,xor=0: 0.599774 s 1707.309 MB/s Buffer-Const,s!=d,xor=1: 0.605536 s 1691.064 MB/s Buffer-Const,s==d,xor=0: 0.594488 s 1722.490 MB/s Buffer-Const,s==d,xor=1: 0.598080 s 1712.145 MB/s +8192 131072 32 SPLIT 32 4 SSE,STDMAP - Seed: 1347475084 Buffer-Const,s!=d,xor=0: 0.463454 s 2209.496 MB/s Buffer-Const,s!=d,xor=1: 0.476521 s 2148.911 MB/s Buffer-Const,s==d,xor=0: 0.463254 s 2210.451 MB/s Buffer-Const,s==d,xor=1: 0.475028 s 2155.663 MB/s +16384 65536 32 SPLIT 32 4 SSE,STDMAP - Seed: 1347475091 Buffer-Const,s!=d,xor=0: 0.403863 s 2535.512 MB/s Buffer-Const,s!=d,xor=1: 0.406392 s 2519.733 MB/s Buffer-Const,s==d,xor=0: 0.398511 s 2569.567 MB/s Buffer-Const,s==d,xor=1: 0.403220 s 2539.558 MB/s +32768 32768 32 SPLIT 32 4 SSE,STDMAP - Seed: 1347475098 Buffer-Const,s!=d,xor=0: 0.366185 s 2796.399 MB/s Buffer-Const,s!=d,xor=1: 0.371948 s 2753.077 MB/s Buffer-Const,s==d,xor=0: 0.365726 s 2799.912 MB/s Buffer-Const,s==d,xor=1: 0.373114 s 2744.470 MB/s +65536 16384 32 SPLIT 32 4 SSE,STDMAP - Seed: 1347475105 Buffer-Const,s!=d,xor=0: 0.355393 s 2881.320 MB/s Buffer-Const,s!=d,xor=1: 0.361260 s 2834.520 MB/s Buffer-Const,s==d,xor=0: 0.352397 s 2905.810 MB/s Buffer-Const,s==d,xor=1: 0.359221 s 2850.615 MB/s +131072 8192 32 SPLIT 32 4 SSE,STDMAP - Seed: 1347475111 Buffer-Const,s!=d,xor=0: 0.345755 s 2961.634 MB/s Buffer-Const,s!=d,xor=1: 0.349291 s 2931.656 MB/s Buffer-Const,s==d,xor=0: 0.349212 s 2932.317 MB/s Buffer-Const,s==d,xor=1: 0.346459 s 2955.613 MB/s +262144 4096 32 SPLIT 32 4 SSE,STDMAP - Seed: 1347475118 Buffer-Const,s!=d,xor=0: 0.339918 s 3012.490 MB/s Buffer-Const,s!=d,xor=1: 0.348600 s 2937.467 MB/s Buffer-Const,s==d,xor=0: 0.337834 s 3031.078 MB/s Buffer-Const,s==d,xor=1: 0.345001 s 2968.103 MB/s +524288 2048 32 SPLIT 32 4 SSE,STDMAP - Seed: 1347475125 Buffer-Const,s!=d,xor=0: 0.341756 s 2996.291 MB/s Buffer-Const,s!=d,xor=1: 0.350183 s 2924.183 MB/s Buffer-Const,s==d,xor=0: 0.339499 s 3016.213 MB/s Buffer-Const,s==d,xor=1: 0.344557 s 2971.934 MB/s +1048576 1024 32 SPLIT 32 4 SSE,STDMAP - Seed: 1347475131 Buffer-Const,s!=d,xor=0: 0.341502 s 2998.519 MB/s Buffer-Const,s!=d,xor=1: 0.346550 s 2954.845 MB/s Buffer-Const,s==d,xor=0: 0.335763 s 3049.766 MB/s Buffer-Const,s==d,xor=1: 0.341348 s 2999.872 MB/s +2097152 512 32 SPLIT 32 4 SSE,STDMAP - Seed: 1347475138 Buffer-Const,s!=d,xor=0: 0.359277 s 2850.168 MB/s Buffer-Const,s!=d,xor=1: 0.400653 s 2555.825 MB/s Buffer-Const,s==d,xor=0: 0.341248 s 3000.750 MB/s Buffer-Const,s==d,xor=1: 0.342671 s 2988.290 MB/s +4194304 256 32 SPLIT 32 4 SSE,STDMAP - Seed: 1347475145 Buffer-Const,s!=d,xor=0: 0.417889 s 2450.411 MB/s Buffer-Const,s!=d,xor=1: 0.473958 s 2160.527 MB/s Buffer-Const,s==d,xor=0: 0.376402 s 2720.496 MB/s Buffer-Const,s==d,xor=1: 0.382395 s 2677.859 MB/s +8388608 128 32 SPLIT 32 4 SSE,STDMAP - Seed: 1347475152 Buffer-Const,s!=d,xor=0: 0.438709 s 2334.120 MB/s Buffer-Const,s!=d,xor=1: 0.458300 s 2234.343 MB/s Buffer-Const,s==d,xor=0: 0.372593 s 2748.306 MB/s Buffer-Const,s==d,xor=1: 0.377737 s 2710.881 MB/s +16777216 64 32 SPLIT 32 4 SSE,STDMAP - Seed: 1347475158 Buffer-Const,s!=d,xor=0: 0.430602 s 2378.066 MB/s Buffer-Const,s!=d,xor=1: 0.457316 s 2239.150 MB/s Buffer-Const,s==d,xor=0: 0.380552 s 2690.828 MB/s Buffer-Const,s==d,xor=1: 0.377189 s 2714.818 MB/s +33554432 32 32 SPLIT 32 4 SSE,STDMAP - Seed: 1347475165 Buffer-Const,s!=d,xor=0: 0.435465 s 2351.507 MB/s Buffer-Const,s!=d,xor=1: 0.463290 s 2210.280 MB/s Buffer-Const,s==d,xor=0: 0.377565 s 2712.118 MB/s Buffer-Const,s==d,xor=1: 0.379561 s 2697.854 MB/s +67108864 16 32 SPLIT 32 4 SSE,STDMAP - Seed: 1347475172 Buffer-Const,s!=d,xor=0: 0.469558 s 2180.775 MB/s Buffer-Const,s!=d,xor=1: 0.454985 s 2250.625 MB/s Buffer-Const,s==d,xor=0: 0.375220 s 2729.069 MB/s Buffer-Const,s==d,xor=1: 0.378372 s 2706.333 MB/s +134217728 8 32 SPLIT 32 4 SSE,STDMAP - Seed: 1347475179 Buffer-Const,s!=d,xor=0: 0.500927 s 2044.211 MB/s Buffer-Const,s!=d,xor=1: 0.461080 s 2220.875 MB/s Buffer-Const,s==d,xor=0: 0.378093 s 2708.328 MB/s Buffer-Const,s==d,xor=1: 0.380782 s 2689.205 MB/s +268435456 4 32 SPLIT 32 4 SSE,STDMAP - Seed: 1347475187 Buffer-Const,s!=d,xor=0: 0.583161 s 1755.947 MB/s Buffer-Const,s!=d,xor=1: 0.454891 s 2251.089 MB/s Buffer-Const,s==d,xor=0: 0.370498 s 2763.848 MB/s Buffer-Const,s==d,xor=1: 0.380160 s 2693.602 MB/s +536870912 2 32 SPLIT 32 4 SSE,STDMAP - Seed: 1347475194 Buffer-Const,s!=d,xor=0: 0.773630 s 1323.630 MB/s Buffer-Const,s!=d,xor=1: 0.449662 s 2277.266 MB/s Buffer-Const,s==d,xor=0: 0.378359 s 2706.425 MB/s Buffer-Const,s==d,xor=1: 0.381460 s 2684.423 MB/s +1073741824 1 32 SPLIT 32 4 SSE,STDMAP - Seed: 1347475202 Buffer-Const,s!=d,xor=0: 1.140212 s 898.079 MB/s Buffer-Const,s!=d,xor=1: 0.448195 s 2284.720 MB/s Buffer-Const,s==d,xor=0: 0.371347 s 2757.529 MB/s Buffer-Const,s==d,xor=1: 0.383728 s 2668.557 MB/s +1024 1048576 32 SPLIT 32 4 SSE,ALTMAP - Seed: 1347475210 Buffer-Const,s!=d,xor=0: 1.306085 s 784.023 MB/s Buffer-Const,s!=d,xor=1: 1.316872 s 777.600 MB/s Buffer-Const,s==d,xor=0: 1.312451 s 780.220 MB/s Buffer-Const,s==d,xor=1: 1.336282 s 766.305 MB/s +2048 524288 32 SPLIT 32 4 SSE,ALTMAP - Seed: 1347475221 Buffer-Const,s!=d,xor=0: 0.780763 s 1311.537 MB/s Buffer-Const,s!=d,xor=1: 0.788499 s 1298.670 MB/s Buffer-Const,s==d,xor=0: 0.774973 s 1321.336 MB/s Buffer-Const,s==d,xor=1: 0.787734 s 1299.931 MB/s +4096 262144 32 SPLIT 32 4 SSE,ALTMAP - Seed: 1347475229 Buffer-Const,s!=d,xor=0: 0.509444 s 2010.036 MB/s Buffer-Const,s!=d,xor=1: 0.528554 s 1937.360 MB/s Buffer-Const,s==d,xor=0: 0.515298 s 1987.198 MB/s Buffer-Const,s==d,xor=1: 0.533344 s 1919.963 MB/s +8192 131072 32 SPLIT 32 4 SSE,ALTMAP - Seed: 1347475237 Buffer-Const,s!=d,xor=0: 0.385064 s 2659.301 MB/s Buffer-Const,s!=d,xor=1: 0.389860 s 2626.586 MB/s Buffer-Const,s==d,xor=0: 0.377777 s 2710.597 MB/s Buffer-Const,s==d,xor=1: 0.389788 s 2627.068 MB/s +16384 65536 32 SPLIT 32 4 SSE,ALTMAP - Seed: 1347475244 Buffer-Const,s!=d,xor=0: 0.316446 s 3235.938 MB/s Buffer-Const,s!=d,xor=1: 0.327997 s 3121.980 MB/s Buffer-Const,s==d,xor=0: 0.313605 s 3265.256 MB/s Buffer-Const,s==d,xor=1: 0.323668 s 3163.736 MB/s +32768 32768 32 SPLIT 32 4 SSE,ALTMAP - Seed: 1347475250 Buffer-Const,s!=d,xor=0: 0.280216 s 3654.318 MB/s Buffer-Const,s!=d,xor=1: 0.293557 s 3488.244 MB/s Buffer-Const,s==d,xor=0: 0.278453 s 3677.463 MB/s Buffer-Const,s==d,xor=1: 0.296944 s 3448.460 MB/s +65536 16384 32 SPLIT 32 4 SSE,ALTMAP - Seed: 1347475256 Buffer-Const,s!=d,xor=0: 0.302436 s 3385.842 MB/s Buffer-Const,s!=d,xor=1: 0.277890 s 3684.909 MB/s Buffer-Const,s==d,xor=0: 0.262908 s 3894.892 MB/s Buffer-Const,s==d,xor=1: 0.272852 s 3752.951 MB/s +131072 8192 32 SPLIT 32 4 SSE,ALTMAP - Seed: 1347475263 Buffer-Const,s!=d,xor=0: 0.253372 s 4041.493 MB/s Buffer-Const,s!=d,xor=1: 0.265148 s 3861.999 MB/s Buffer-Const,s==d,xor=0: 0.253380 s 4041.364 MB/s Buffer-Const,s==d,xor=1: 0.264949 s 3864.897 MB/s +262144 4096 32 SPLIT 32 4 SSE,ALTMAP - Seed: 1347475269 Buffer-Const,s!=d,xor=0: 0.252645 s 4053.125 MB/s Buffer-Const,s!=d,xor=1: 0.265852 s 3851.771 MB/s Buffer-Const,s==d,xor=0: 0.252493 s 4055.552 MB/s Buffer-Const,s==d,xor=1: 0.261079 s 3922.183 MB/s +524288 2048 32 SPLIT 32 4 SSE,ALTMAP - Seed: 1347475276 Buffer-Const,s!=d,xor=0: 0.250466 s 4088.377 MB/s Buffer-Const,s!=d,xor=1: 0.262400 s 3902.444 MB/s Buffer-Const,s==d,xor=0: 0.250604 s 4086.133 MB/s Buffer-Const,s==d,xor=1: 0.266080 s 3848.461 MB/s +1048576 1024 32 SPLIT 32 4 SSE,ALTMAP - Seed: 1347475282 Buffer-Const,s!=d,xor=0: 0.261790 s 3911.528 MB/s Buffer-Const,s!=d,xor=1: 0.270695 s 3782.856 MB/s Buffer-Const,s==d,xor=0: 0.251318 s 4074.523 MB/s Buffer-Const,s==d,xor=1: 0.262138 s 3906.337 MB/s +2097152 512 32 SPLIT 32 4 SSE,ALTMAP - Seed: 1347475288 Buffer-Const,s!=d,xor=0: 0.307526 s 3329.799 MB/s Buffer-Const,s!=d,xor=1: 0.335713 s 3050.225 MB/s Buffer-Const,s==d,xor=0: 0.251394 s 4073.291 MB/s Buffer-Const,s==d,xor=1: 0.261434 s 3916.854 MB/s +4194304 256 32 SPLIT 32 4 SSE,ALTMAP - Seed: 1347475295 Buffer-Const,s!=d,xor=0: 0.392351 s 2609.907 MB/s Buffer-Const,s!=d,xor=1: 0.407979 s 2509.932 MB/s Buffer-Const,s==d,xor=0: 0.298268 s 3433.150 MB/s Buffer-Const,s==d,xor=1: 0.307510 s 3329.977 MB/s +8388608 128 32 SPLIT 32 4 SSE,ALTMAP - Seed: 1347475301 Buffer-Const,s!=d,xor=0: 0.382194 s 2679.271 MB/s Buffer-Const,s!=d,xor=1: 0.401781 s 2548.653 MB/s Buffer-Const,s==d,xor=0: 0.299551 s 3418.447 MB/s Buffer-Const,s==d,xor=1: 0.310157 s 3301.553 MB/s +16777216 64 32 SPLIT 32 4 SSE,ALTMAP - Seed: 1347475308 Buffer-Const,s!=d,xor=0: 0.380721 s 2689.631 MB/s Buffer-Const,s!=d,xor=1: 0.407525 s 2512.729 MB/s Buffer-Const,s==d,xor=0: 0.312348 s 3278.393 MB/s Buffer-Const,s==d,xor=1: 0.315135 s 3249.401 MB/s +33554432 32 32 SPLIT 32 4 SSE,ALTMAP - Seed: 1347475315 Buffer-Const,s!=d,xor=0: 0.417304 s 2453.845 MB/s Buffer-Const,s!=d,xor=1: 0.399624 s 2562.406 MB/s Buffer-Const,s==d,xor=0: 0.301665 s 3394.496 MB/s Buffer-Const,s==d,xor=1: 0.308625 s 3317.940 MB/s +67108864 16 32 SPLIT 32 4 SSE,ALTMAP - Seed: 1347475322 Buffer-Const,s!=d,xor=0: 0.409972 s 2497.732 MB/s Buffer-Const,s!=d,xor=1: 0.390287 s 2623.712 MB/s Buffer-Const,s==d,xor=0: 0.300863 s 3403.539 MB/s Buffer-Const,s==d,xor=1: 0.310340 s 3299.610 MB/s +134217728 8 32 SPLIT 32 4 SSE,ALTMAP - Seed: 1347475328 Buffer-Const,s!=d,xor=0: 0.462659 s 2213.291 MB/s Buffer-Const,s!=d,xor=1: 0.394245 s 2597.369 MB/s Buffer-Const,s==d,xor=0: 0.304025 s 3368.140 MB/s Buffer-Const,s==d,xor=1: 0.305459 s 3352.329 MB/s +268435456 4 32 SPLIT 32 4 SSE,ALTMAP - Seed: 1347475335 Buffer-Const,s!=d,xor=0: 0.535408 s 1912.560 MB/s Buffer-Const,s!=d,xor=1: 0.389819 s 2626.859 MB/s Buffer-Const,s==d,xor=0: 0.300778 s 3404.505 MB/s Buffer-Const,s==d,xor=1: 0.309189 s 3311.892 MB/s +536870912 2 32 SPLIT 32 4 SSE,ALTMAP - Seed: 1347475342 Buffer-Const,s!=d,xor=0: 0.697741 s 1467.594 MB/s Buffer-Const,s!=d,xor=1: 0.391113 s 2618.169 MB/s Buffer-Const,s==d,xor=0: 0.297088 s 3446.786 MB/s Buffer-Const,s==d,xor=1: 0.312841 s 3273.229 MB/s +1073741824 1 32 SPLIT 32 4 SSE,ALTMAP - Seed: 1347475350 Buffer-Const,s!=d,xor=0: 1.069489 s 957.467 MB/s Buffer-Const,s!=d,xor=1: 0.396330 s 2583.705 MB/s Buffer-Const,s==d,xor=0: 0.315887 s 3241.666 MB/s Buffer-Const,s==d,xor=1: 0.307323 s 3331.999 MB/s diff --git a/tmp2.sh b/tmp2.sh new file mode 100644 index 0000000..d98248f --- /dev/null +++ b/tmp2.sh @@ -0,0 +1,13 @@ +if [ $# -lt 4 ]; then + echo 'usage: sh tmp-test.sh w gf_specs (e.g. LOG - -)' >&2 + exit 1 +fi + +w=$1 +shift +i=1024 +while [ $i -le 1073741824 ]; do + iter=`echo $i | awk '{ print (1073741824/$1)*10 }'` + echo $i $iter $w $* `gf_time $w R -1 $i $iter $*` + i=`echo $i | awk '{ print $1*2 }'` +done