1 // Copyright 2018 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "go_asm.h"
6 #include "textflag.h"
7
8 // memequal(a, b unsafe.Pointer, size uintptr) bool
9 TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT,$0-25
10 #ifdef GOEXPERIMENT_regabiargs
11 // AX = a (want in SI)
12 // BX = b (want in DI)
13 // CX = size (want in BX)
14 CMPQ AX, BX
15 JNE neq
16 MOVQ $1, AX // return 1
17 RET
18 neq:
19 MOVQ AX, SI
20 MOVQ BX, DI
21 MOVQ CX, BX
22 JMP memeqbody<>(SB)
23 #else
24 MOVQ a+0(FP), SI
25 MOVQ b+8(FP), DI
26 CMPQ SI, DI
27 JEQ eq
28 MOVQ size+16(FP), BX
29 LEAQ ret+24(FP), AX
30 JMP memeqbody<>(SB)
31 eq:
32 MOVB $1, ret+24(FP)
33 RET
34 #endif
35
36 // memequal_varlen(a, b unsafe.Pointer) bool
37 TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17
38 #ifdef GOEXPERIMENT_regabiargs
39 // AX = a (want in SI)
40 // BX = b (want in DI)
41 // 8(DX) = size (want in BX)
42 CMPQ AX, BX
43 JNE neq
44 MOVQ $1, AX // return 1
45 RET
46 neq:
47 MOVQ AX, SI
48 MOVQ BX, DI
49 MOVQ 8(DX), BX // compiler stores size at offset 8 in the closure
50 JMP memeqbody<>(SB)
51 #else
52 MOVQ a+0(FP), SI
53 MOVQ b+8(FP), DI
54 CMPQ SI, DI
55 JEQ eq
56 MOVQ 8(DX), BX // compiler stores size at offset 8 in the closure
57 LEAQ ret+16(FP), AX
58 JMP memeqbody<>(SB)
59 eq:
60 MOVB $1, ret+16(FP)
61 RET
62 #endif
63
64 // Input:
65 // a in SI
66 // b in DI
67 // count in BX
68 #ifndef GOEXPERIMENT_regabiargs
69 // address of result byte in AX
70 #else
71 // Output:
72 // result in AX
73 #endif
74 TEXT memeqbody<>(SB),NOSPLIT,$0-0
75 CMPQ BX, $8
76 JB small
77 CMPQ BX, $64
78 JB bigloop
79 CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
80 JE hugeloop_avx2
81
82 // 64 bytes at a time using xmm registers
83 hugeloop:
84 CMPQ BX, $64
85 JB bigloop
86 MOVOU (SI), X0
87 MOVOU (DI), X1
88 MOVOU 16(SI), X2
89 MOVOU 16(DI), X3
90 MOVOU 32(SI), X4
91 MOVOU 32(DI), X5
92 MOVOU 48(SI), X6
93 MOVOU 48(DI), X7
94 PCMPEQB X1, X0
95 PCMPEQB X3, X2
96 PCMPEQB X5, X4
97 PCMPEQB X7, X6
98 PAND X2, X0
99 PAND X6, X4
100 PAND X4, X0
101 PMOVMSKB X0, DX
102 ADDQ $64, SI
103 ADDQ $64, DI
104 SUBQ $64, BX
105 CMPL DX, $0xffff
106 JEQ hugeloop
107 #ifdef GOEXPERIMENT_regabiargs
108 XORQ AX, AX // return 0
109 #else
110 MOVB $0, (AX)
111 #endif
112 RET
113
114 // 64 bytes at a time using ymm registers
115 hugeloop_avx2:
116 CMPQ BX, $64
117 JB bigloop_avx2
118 VMOVDQU (SI), Y0
119 VMOVDQU (DI), Y1
120 VMOVDQU 32(SI), Y2
121 VMOVDQU 32(DI), Y3
122 VPCMPEQB Y1, Y0, Y4
123 VPCMPEQB Y2, Y3, Y5
124 VPAND Y4, Y5, Y6
125 VPMOVMSKB Y6, DX
126 ADDQ $64, SI
127 ADDQ $64, DI
128 SUBQ $64, BX
129 CMPL DX, $0xffffffff
130 JEQ hugeloop_avx2
131 VZEROUPPER
132 #ifdef GOEXPERIMENT_regabiargs
133 XORQ AX, AX // return 0
134 #else
135 MOVB $0, (AX)
136 #endif
137 RET
138
139 bigloop_avx2:
140 VZEROUPPER
141
142 // 8 bytes at a time using 64-bit register
143 bigloop:
144 CMPQ BX, $8
145 JBE leftover
146 MOVQ (SI), CX
147 MOVQ (DI), DX
148 ADDQ $8, SI
149 ADDQ $8, DI
150 SUBQ $8, BX
151 CMPQ CX, DX
152 JEQ bigloop
153 #ifdef GOEXPERIMENT_regabiargs
154 XORQ AX, AX // return 0
155 #else
156 MOVB $0, (AX)
157 #endif
158 RET
159
160 // remaining 0-8 bytes
161 leftover:
162 MOVQ -8(SI)(BX*1), CX
163 MOVQ -8(DI)(BX*1), DX
164 CMPQ CX, DX
165 #ifdef GOEXPERIMENT_regabiargs
166 SETEQ AX
167 #else
168 SETEQ (AX)
169 #endif
170 RET
171
172 small:
173 CMPQ BX, $0
174 JEQ equal
175
176 LEAQ 0(BX*8), CX
177 NEGQ CX
178
179 CMPB SI, $0xf8
180 JA si_high
181
182 // load at SI won't cross a page boundary.
183 MOVQ (SI), SI
184 JMP si_finish
185 si_high:
186 // address ends in 11111xxx. Load up to bytes we want, move to correct position.
187 MOVQ -8(SI)(BX*1), SI
188 SHRQ CX, SI
189 si_finish:
190
191 // same for DI.
192 CMPB DI, $0xf8
193 JA di_high
194 MOVQ (DI), DI
195 JMP di_finish
196 di_high:
197 MOVQ -8(DI)(BX*1), DI
198 SHRQ CX, DI
199 di_finish:
200
201 SUBQ SI, DI
202 SHLQ CX, DI
203 equal:
204 #ifdef GOEXPERIMENT_regabiargs
205 SETEQ AX
206 #else
207 SETEQ (AX)
208 #endif
209 RET
210
211
View as plain text