2018-11-12 15:25:55 +03:00
|
|
|
//+build !noasm !appengine !gccgo
|
2017-08-26 12:47:42 +03:00
|
|
|
|
|
|
|
// Copyright 2015, Klaus Post, see LICENSE for details.
|
|
|
|
// Copyright 2017, Minio, Inc.
|
|
|
|
|
|
|
|
// Use github.com/minio/asm2plan9s on this file to assemble ARM instructions to
|
|
|
|
// the opcodes of their Plan9 equivalents
|
|
|
|
|
2020-05-13 11:24:22 +03:00
|
|
|
// func galMulNEON(low, high, in, out []byte)
|
2017-08-26 12:47:42 +03:00
|
|
|
TEXT ·galMulNEON(SB), 7, $0
|
2020-05-13 11:24:22 +03:00
|
|
|
MOVD in_base+48(FP), R1
|
|
|
|
MOVD in_len+56(FP), R2 // length of message
|
|
|
|
MOVD out_base+72(FP), R5
|
2017-08-26 12:47:42 +03:00
|
|
|
SUBS $32, R2
|
|
|
|
BMI complete
|
|
|
|
|
2020-05-13 11:24:22 +03:00
|
|
|
MOVD low+0(FP), R10 // R10: &low
|
|
|
|
MOVD high+24(FP), R11 // R11: &high
|
|
|
|
WORD $0x4c407146 // ld1 {v6.16b}, [x10]
|
|
|
|
WORD $0x4c407167 // ld1 {v7.16b}, [x11]
|
2017-08-26 12:47:42 +03:00
|
|
|
|
2020-05-13 11:24:22 +03:00
|
|
|
MOVD $0x0f, R3
|
|
|
|
WORD $0x4e010c68 // dup v8.16b, w3
|
2017-08-26 12:47:42 +03:00
|
|
|
|
|
|
|
loop:
|
|
|
|
// Main loop
|
2020-05-13 11:24:22 +03:00
|
|
|
WORD $0x4cdfa020 // ld1 {v0.16b-v1.16b}, [x1], #32
|
2017-08-26 12:47:42 +03:00
|
|
|
|
2020-05-13 11:24:22 +03:00
|
|
|
// Get low input and high input
|
|
|
|
WORD $0x6f0c040a // ushr v10.16b, v0.16b, #4
|
|
|
|
WORD $0x6f0c042b // ushr v11.16b, v1.16b, #4
|
|
|
|
WORD $0x4e281c00 // and v0.16b, v0.16b, v8.16b
|
|
|
|
WORD $0x4e281c21 // and v1.16b, v1.16b, v8.16b
|
2017-08-26 12:47:42 +03:00
|
|
|
|
2020-05-13 11:24:22 +03:00
|
|
|
// Mul low part and mul high part
|
|
|
|
WORD $0x4e0000c4 // tbl v4.16b, {v6.16b}, v0.16b
|
|
|
|
WORD $0x4e0a00e5 // tbl v5.16b, {v7.16b}, v10.16b
|
|
|
|
WORD $0x4e0100ce // tbl v14.16b, {v6.16b}, v1.16b
|
|
|
|
WORD $0x4e0b00ef // tbl v15.16b, {v7.16b}, v11.16b
|
2017-08-26 12:47:42 +03:00
|
|
|
|
2020-05-13 11:24:22 +03:00
|
|
|
// Combine results
|
|
|
|
WORD $0x6e251c84 // eor v4.16b, v4.16b, v5.16b
|
|
|
|
WORD $0x6e2f1dc5 // eor v5.16b, v14.16b, v15.16b
|
2017-08-26 12:47:42 +03:00
|
|
|
|
|
|
|
// Store result
|
2020-05-13 11:24:22 +03:00
|
|
|
WORD $0x4c9faca4 // st1 {v4.2d-v5.2d}, [x5], #32
|
2017-08-26 12:47:42 +03:00
|
|
|
|
|
|
|
SUBS $32, R2
|
|
|
|
BPL loop
|
|
|
|
|
|
|
|
complete:
|
|
|
|
RET
|
|
|
|
|
2020-05-13 11:24:22 +03:00
|
|
|
|
|
|
|
// func galMulXorNEON(low, high, in, out []byte)
|
2017-08-26 12:47:42 +03:00
|
|
|
TEXT ·galMulXorNEON(SB), 7, $0
|
2020-05-13 11:24:22 +03:00
|
|
|
MOVD in_base+48(FP), R1
|
|
|
|
MOVD in_len+56(FP), R2 // length of message
|
|
|
|
MOVD out_base+72(FP), R5
|
2017-08-26 12:47:42 +03:00
|
|
|
SUBS $32, R2
|
|
|
|
BMI completeXor
|
|
|
|
|
2020-05-13 11:24:22 +03:00
|
|
|
MOVD low+0(FP), R10 // R10: &low
|
|
|
|
MOVD high+24(FP), R11 // R11: &high
|
|
|
|
WORD $0x4c407146 // ld1 {v6.16b}, [x10]
|
|
|
|
WORD $0x4c407167 // ld1 {v7.16b}, [x11]
|
2017-08-26 12:47:42 +03:00
|
|
|
|
2020-05-13 11:24:22 +03:00
|
|
|
MOVD $0x0f, R3
|
|
|
|
WORD $0x4e010c68 // dup v8.16b, w3
|
2017-08-26 12:47:42 +03:00
|
|
|
|
|
|
|
loopXor:
|
|
|
|
// Main loop
|
2020-05-13 11:24:22 +03:00
|
|
|
WORD $0x4cdfa020 // ld1 {v0.16b-v1.16b}, [x1], #32
|
|
|
|
WORD $0x4c40a0b4 // ld1 {v20.16b-v21.16b}, [x5]
|
|
|
|
|
|
|
|
// Get low input and high input
|
|
|
|
WORD $0x6f0c040a // ushr v10.16b, v0.16b, #4
|
|
|
|
WORD $0x6f0c042b // ushr v11.16b, v1.16b, #4
|
|
|
|
WORD $0x4e281c00 // and v0.16b, v0.16b, v8.16b
|
|
|
|
WORD $0x4e281c21 // and v1.16b, v1.16b, v8.16b
|
|
|
|
|
|
|
|
// Mul low part and mul high part
|
|
|
|
WORD $0x4e0000c4 // tbl v4.16b, {v6.16b}, v0.16b
|
|
|
|
WORD $0x4e0a00e5 // tbl v5.16b, {v7.16b}, v10.16b
|
|
|
|
WORD $0x4e0100ce // tbl v14.16b, {v6.16b}, v1.16b
|
|
|
|
WORD $0x4e0b00ef // tbl v15.16b, {v7.16b}, v11.16b
|
|
|
|
|
|
|
|
// Combine results
|
|
|
|
WORD $0x6e251c84 // eor v4.16b, v4.16b, v5.16b
|
|
|
|
WORD $0x6e2f1dc5 // eor v5.16b, v14.16b, v15.16b
|
|
|
|
WORD $0x6e341c84 // eor v4.16b, v4.16b, v20.16b
|
|
|
|
WORD $0x6e351ca5 // eor v5.16b, v5.16b, v21.16b
|
|
|
|
|
|
|
|
// Store result
|
|
|
|
WORD $0x4c9faca4 // st1 {v4.2d-v5.2d}, [x5], #32
|
|
|
|
|
|
|
|
SUBS $32, R2
|
|
|
|
BPL loopXor
|
2017-08-26 12:47:42 +03:00
|
|
|
|
2020-05-13 11:24:22 +03:00
|
|
|
completeXor:
|
|
|
|
RET
|
2017-08-26 12:47:42 +03:00
|
|
|
|
2020-05-13 11:24:22 +03:00
|
|
|
// func galXorNEON(in, out []byte)
|
|
|
|
TEXT ·galXorNEON(SB), 7, $0
|
|
|
|
MOVD in_base+0(FP), R1
|
|
|
|
MOVD in_len+8(FP), R2 // length of message
|
|
|
|
MOVD out_base+24(FP), R5
|
|
|
|
SUBS $32, R2
|
|
|
|
BMI completeXor
|
2017-08-26 12:47:42 +03:00
|
|
|
|
2020-05-13 11:24:22 +03:00
|
|
|
loopXor:
|
|
|
|
// Main loop
|
|
|
|
WORD $0x4cdfa020 // ld1 {v0.16b-v1.16b}, [x1], #32
|
|
|
|
WORD $0x4c40a0b4 // ld1 {v20.16b-v21.16b}, [x5]
|
2017-08-26 12:47:42 +03:00
|
|
|
|
2020-05-13 11:24:22 +03:00
|
|
|
WORD $0x6e341c04 // eor v4.16b, v0.16b, v20.16b
|
|
|
|
WORD $0x6e351c25 // eor v5.16b, v1.16b, v21.16b
|
2017-08-26 12:47:42 +03:00
|
|
|
|
2020-05-13 11:24:22 +03:00
|
|
|
// Store result
|
|
|
|
WORD $0x4c9faca4 // st1 {v4.2d-v5.2d}, [x5], #32
|
2017-08-26 12:47:42 +03:00
|
|
|
|
|
|
|
SUBS $32, R2
|
|
|
|
BPL loopXor
|
|
|
|
|
|
|
|
completeXor:
|
|
|
|
RET
|