Text file
src/runtime/memclr_ppc64x.s
Documentation: runtime
1 // Copyright 2014 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build ppc64 || ppc64le
6 // +build ppc64 ppc64le
7
8 #include "textflag.h"
9
10 // See memclrNoHeapPointers Go doc for important implementation constraints.
11
12 // func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr)
13 TEXT runtime·memclrNoHeapPointers(SB), NOSPLIT|NOFRAME, $0-16
14 MOVD ptr+0(FP), R3
15 MOVD n+8(FP), R4
16
17 // Determine if there are doublewords to clear
18 check:
19 ANDCC $7, R4, R5 // R5: leftover bytes to clear
20 SRD $3, R4, R6 // R6: double words to clear
21 CMP R6, $0, CR1 // CR1[EQ] set if no double words
22
23 BC 12, 6, nozerolarge // only single bytes
24 CMP R4, $512
25 BLT under512 // special case for < 512
26 ANDCC $127, R3, R8 // check for 128 alignment of address
27 BEQ zero512setup
28
29 ANDCC $7, R3, R15
30 BEQ zero512xsetup // at least 8 byte aligned
31
32 // zero bytes up to 8 byte alignment
33
34 ANDCC $1, R3, R15 // check for byte alignment
35 BEQ byte2
36 MOVB R0, 0(R3) // zero 1 byte
37 ADD $1, R3 // bump ptr by 1
38 ADD $-1, R4
39
40 byte2:
41 ANDCC $2, R3, R15 // check for 2 byte alignment
42 BEQ byte4
43 MOVH R0, 0(R3) // zero 2 bytes
44 ADD $2, R3 // bump ptr by 2
45 ADD $-2, R4
46
47 byte4:
48 ANDCC $4, R3, R15 // check for 4 byte alignment
49 BEQ zero512xsetup
50 MOVW R0, 0(R3) // zero 4 bytes
51 ADD $4, R3 // bump ptr by 4
52 ADD $-4, R4
53 BR zero512xsetup // ptr should now be 8 byte aligned
54
55 under512:
56 MOVD R6, CTR // R6 = number of double words
57 SRDCC $2, R6, R7 // 32 byte chunks?
58 BNE zero32setup
59
60 // Clear double words
61
62 zero8:
63 MOVD R0, 0(R3) // double word
64 ADD $8, R3
65 ADD $-8, R4
66 BC 16, 0, zero8 // dec ctr, br zero8 if ctr not 0
67 BR nozerolarge // handle leftovers
68
69 // Prepare to clear 32 bytes at a time.
70
71 zero32setup:
72 DCBTST (R3) // prepare data cache
73 XXLXOR VS32, VS32, VS32 // clear VS32 (V0)
74 MOVD R7, CTR // number of 32 byte chunks
75 MOVD $16, R8
76
77 zero32:
78 STXVD2X VS32, (R3+R0) // store 16 bytes
79 STXVD2X VS32, (R3+R8)
80 ADD $32, R3
81 ADD $-32, R4
82 BC 16, 0, zero32 // dec ctr, br zero32 if ctr not 0
83 RLDCLCC $61, R4, $3, R6 // remaining doublewords
84 BEQ nozerolarge
85 MOVD R6, CTR // set up the CTR for doublewords
86 BR zero8
87
88 nozerolarge:
89 ANDCC $7, R4, R5 // any remaining bytes
90 BC 4, 1, LR // ble lr
91
92 zerotail:
93 MOVD R5, CTR // set up to clear tail bytes
94
95 zerotailloop:
96 MOVB R0, 0(R3) // clear single bytes
97 ADD $1, R3
98 BC 16, 0, zerotailloop // dec ctr, br zerotailloop if ctr not 0
99 RET
100
101 zero512xsetup: // 512 chunk with extra needed
102 ANDCC $8, R3, R11 // 8 byte alignment?
103 BEQ zero512setup16
104 MOVD R0, 0(R3) // clear 8 bytes
105 ADD $8, R3 // update ptr to next 8
106 ADD $-8, R4 // dec count by 8
107
108 zero512setup16:
109 ANDCC $127, R3, R14 // < 128 byte alignment
110 BEQ zero512setup // handle 128 byte alignment
111 MOVD $128, R15
112 SUB R14, R15, R14 // find increment to 128 alignment
113 SRD $4, R14, R15 // number of 16 byte chunks
114
115 zero512presetup:
116 MOVD R15, CTR // loop counter of 16 bytes
117 XXLXOR VS32, VS32, VS32 // clear VS32 (V0)
118
119 zero512preloop: // clear up to 128 alignment
120 STXVD2X VS32, (R3+R0) // clear 16 bytes
121 ADD $16, R3 // update ptr
122 ADD $-16, R4 // dec count
123 BC 16, 0, zero512preloop
124
125 zero512setup: // setup for dcbz loop
126 CMP R4, $512 // check if at least 512
127 BLT remain
128 SRD $9, R4, R8 // loop count for 512 chunks
129 MOVD R8, CTR // set up counter
130 MOVD $128, R9 // index regs for 128 bytes
131 MOVD $256, R10
132 MOVD $384, R11
133
134 zero512:
135 DCBZ (R3+R0) // clear first chunk
136 DCBZ (R3+R9) // clear second chunk
137 DCBZ (R3+R10) // clear third chunk
138 DCBZ (R3+R11) // clear fourth chunk
139 ADD $512, R3
140 ADD $-512, R4
141 BC 16, 0, zero512
142
143 remain:
144 CMP R4, $128 // check if 128 byte chunks left
145 BLT smaller
146 DCBZ (R3+R0) // clear 128
147 ADD $128, R3
148 ADD $-128, R4
149 BR remain
150
151 smaller:
152 ANDCC $127, R4, R7 // find leftovers
153 BEQ done
154 CMP R7, $64 // more than 64, do 32 at a time
155 BLT zero8setup // less than 64, do 8 at a time
156 SRD $5, R7, R7 // set up counter for 32
157 BR zero32setup
158
159 zero8setup:
160 SRDCC $3, R7, R7 // less than 8 bytes
161 BEQ nozerolarge
162 MOVD R7, CTR
163 BR zero8
164
165 done:
166 RET
167
View as plain text