1 // Copyright 2018 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build ppc64 || ppc64le
6 // +build ppc64 ppc64le
7
8 #include "textflag.h"
9
10 // func xorBytesVSX(dst, a, b *byte, n int)
11 TEXT ·xorBytesVSX(SB), NOSPLIT, $0
12 MOVD dst+0(FP), R3 // R3 = dst
13 MOVD a+8(FP), R4 // R4 = a
14 MOVD b+16(FP), R5 // R5 = b
15 MOVD n+24(FP), R6 // R6 = n
16
17 CMPU R6, $32, CR7 // Check if n ≥ 32 bytes
18 MOVD R0, R8 // R8 = index
19 CMPU R6, $8, CR6 // Check if 8 ≤ n < 32 bytes
20 BLT CR6, small // Smaller than 8
21 BLT CR7, xor16 // Case for 16 ≤ n < 32 bytes
22
23 // Case for n ≥ 32 bytes
24 preloop32:
25 SRD $5, R6, R7 // Setup loop counter
26 MOVD R7, CTR
27 MOVD $16, R10
28 ANDCC $31, R6, R9 // Check for tailing bytes for later
29 loop32:
30 LXVD2X (R4)(R8), VS32 // VS32 = a[i,...,i+15]
31 LXVD2X (R4)(R10), VS34
32 LXVD2X (R5)(R8), VS33 // VS33 = b[i,...,i+15]
33 LXVD2X (R5)(R10), VS35
34 XXLXOR VS32, VS33, VS32 // VS34 = a[] ^ b[]
35 XXLXOR VS34, VS35, VS34
36 STXVD2X VS32, (R3)(R8) // Store to dst
37 STXVD2X VS34, (R3)(R10)
38 ADD $32, R8 // Update index
39 ADD $32, R10
40 BC 16, 0, loop32 // bdnz loop16
41
42 BEQ CR0, done
43
44 MOVD R9, R6
45 CMP R6, $8
46 BLT small
47 xor16:
48 CMP R6, $16
49 BLT xor8
50 LXVD2X (R4)(R8), VS32
51 LXVD2X (R5)(R8), VS33
52 XXLXOR VS32, VS33, VS32
53 STXVD2X VS32, (R3)(R8)
54 ADD $16, R8
55 ADD $-16, R6
56 CMP R6, $8
57 BLT small
58 xor8:
59 // Case for 8 ≤ n < 16 bytes
60 MOVD (R4)(R8), R14 // R14 = a[i,...,i+7]
61 MOVD (R5)(R8), R15 // R15 = b[i,...,i+7]
62 XOR R14, R15, R16 // R16 = a[] ^ b[]
63 SUB $8, R6 // n = n - 8
64 MOVD R16, (R3)(R8) // Store to dst
65 ADD $8, R8
66
67 // Check if we're finished
68 CMP R6, R0
69 BGT small
70 RET
71
72 // Case for n < 8 bytes and tailing bytes from the
73 // previous cases.
74 small:
75 CMP R6, R0
76 BEQ done
77 MOVD R6, CTR // Setup loop counter
78
79 loop:
80 MOVBZ (R4)(R8), R14 // R14 = a[i]
81 MOVBZ (R5)(R8), R15 // R15 = b[i]
82 XOR R14, R15, R16 // R16 = a[i] ^ b[i]
83 MOVB R16, (R3)(R8) // Store to dst
84 ADD $1, R8
85 BC 16, 0, loop // bdnz loop
86
87 done:
88 RET
89
View as plain text