Black Lives Matter. Support the Equal Justice Initiative.

Text file src/internal/bytealg/equal_amd64.s

Documentation: internal/bytealg

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "go_asm.h"
     6  #include "textflag.h"
     7  
     8  // memequal(a, b unsafe.Pointer, size uintptr) bool
     9  TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT,$0-25
    10  #ifdef GOEXPERIMENT_regabiargs
    11  	// AX = a    (want in SI)
    12  	// BX = b    (want in DI)
    13  	// CX = size (want in BX)
    14  	CMPQ	AX, BX
    15  	JNE	neq
    16  	MOVQ	$1, AX	// return 1
    17  	RET
    18  neq:
    19  	MOVQ	AX, SI
    20  	MOVQ	BX, DI
    21  	MOVQ	CX, BX
    22  	JMP	memeqbody<>(SB)
    23  #else
    24  	MOVQ	a+0(FP), SI
    25  	MOVQ	b+8(FP), DI
    26  	CMPQ	SI, DI
    27  	JEQ	eq
    28  	MOVQ	size+16(FP), BX
    29  	LEAQ	ret+24(FP), AX
    30  	JMP	memeqbody<>(SB)
    31  eq:
    32  	MOVB	$1, ret+24(FP)
    33  	RET
    34  #endif
    35  
    36  // memequal_varlen(a, b unsafe.Pointer) bool
    37  TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17
    38  #ifdef GOEXPERIMENT_regabiargs
    39  	// AX = a       (want in SI)
    40  	// BX = b       (want in DI)
    41  	// 8(DX) = size (want in BX)
    42  	CMPQ	AX, BX
    43  	JNE	neq
    44  	MOVQ	$1, AX	// return 1
    45  	RET
    46  neq:
    47  	MOVQ	AX, SI
    48  	MOVQ	BX, DI
    49  	MOVQ	8(DX), BX    // compiler stores size at offset 8 in the closure
    50  	JMP	memeqbody<>(SB)
    51  #else
    52  	MOVQ	a+0(FP), SI
    53  	MOVQ	b+8(FP), DI
    54  	CMPQ	SI, DI
    55  	JEQ	eq
    56  	MOVQ	8(DX), BX    // compiler stores size at offset 8 in the closure
    57  	LEAQ	ret+16(FP), AX
    58  	JMP	memeqbody<>(SB)
    59  eq:
    60  	MOVB	$1, ret+16(FP)
    61  	RET
    62  #endif
    63  
    64  // Input:
    65  //   a in SI
    66  //   b in DI
    67  //   count in BX
    68  #ifndef GOEXPERIMENT_regabiargs
    69  //   address of result byte in AX
    70  #else
    71  // Output:
    72  //   result in AX
    73  #endif
    74  TEXT memeqbody<>(SB),NOSPLIT,$0-0
    75  	CMPQ	BX, $8
    76  	JB	small
    77  	CMPQ	BX, $64
    78  	JB	bigloop
    79  	CMPB	internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
    80  	JE	hugeloop_avx2
    81  
    82  	// 64 bytes at a time using xmm registers
    83  hugeloop:
    84  	CMPQ	BX, $64
    85  	JB	bigloop
    86  	MOVOU	(SI), X0
    87  	MOVOU	(DI), X1
    88  	MOVOU	16(SI), X2
    89  	MOVOU	16(DI), X3
    90  	MOVOU	32(SI), X4
    91  	MOVOU	32(DI), X5
    92  	MOVOU	48(SI), X6
    93  	MOVOU	48(DI), X7
    94  	PCMPEQB	X1, X0
    95  	PCMPEQB	X3, X2
    96  	PCMPEQB	X5, X4
    97  	PCMPEQB	X7, X6
    98  	PAND	X2, X0
    99  	PAND	X6, X4
   100  	PAND	X4, X0
   101  	PMOVMSKB X0, DX
   102  	ADDQ	$64, SI
   103  	ADDQ	$64, DI
   104  	SUBQ	$64, BX
   105  	CMPL	DX, $0xffff
   106  	JEQ	hugeloop
   107  #ifdef GOEXPERIMENT_regabiargs
   108  	XORQ	AX, AX	// return 0
   109  #else
   110  	MOVB	$0, (AX)
   111  #endif
   112  	RET
   113  
   114  	// 64 bytes at a time using ymm registers
   115  hugeloop_avx2:
   116  	CMPQ	BX, $64
   117  	JB	bigloop_avx2
   118  	VMOVDQU	(SI), Y0
   119  	VMOVDQU	(DI), Y1
   120  	VMOVDQU	32(SI), Y2
   121  	VMOVDQU	32(DI), Y3
   122  	VPCMPEQB	Y1, Y0, Y4
   123  	VPCMPEQB	Y2, Y3, Y5
   124  	VPAND	Y4, Y5, Y6
   125  	VPMOVMSKB Y6, DX
   126  	ADDQ	$64, SI
   127  	ADDQ	$64, DI
   128  	SUBQ	$64, BX
   129  	CMPL	DX, $0xffffffff
   130  	JEQ	hugeloop_avx2
   131  	VZEROUPPER
   132  #ifdef GOEXPERIMENT_regabiargs
   133  	XORQ	AX, AX	// return 0
   134  #else
   135  	MOVB	$0, (AX)
   136  #endif
   137  	RET
   138  
   139  bigloop_avx2:
   140  	VZEROUPPER
   141  
   142  	// 8 bytes at a time using 64-bit register
   143  bigloop:
   144  	CMPQ	BX, $8
   145  	JBE	leftover
   146  	MOVQ	(SI), CX
   147  	MOVQ	(DI), DX
   148  	ADDQ	$8, SI
   149  	ADDQ	$8, DI
   150  	SUBQ	$8, BX
   151  	CMPQ	CX, DX
   152  	JEQ	bigloop
   153  #ifdef GOEXPERIMENT_regabiargs
   154  	XORQ	AX, AX	// return 0
   155  #else
   156  	MOVB	$0, (AX)
   157  #endif
   158  	RET
   159  
   160  	// remaining 0-8 bytes
   161  leftover:
   162  	MOVQ	-8(SI)(BX*1), CX
   163  	MOVQ	-8(DI)(BX*1), DX
   164  	CMPQ	CX, DX
   165  #ifdef GOEXPERIMENT_regabiargs
   166  	SETEQ	AX
   167  #else
   168  	SETEQ	(AX)
   169  #endif
   170  	RET
   171  
   172  small:
   173  	CMPQ	BX, $0
   174  	JEQ	equal
   175  
   176  	LEAQ	0(BX*8), CX
   177  	NEGQ	CX
   178  
   179  	CMPB	SI, $0xf8
   180  	JA	si_high
   181  
   182  	// load at SI won't cross a page boundary.
   183  	MOVQ	(SI), SI
   184  	JMP	si_finish
   185  si_high:
   186  	// address ends in 11111xxx. Load up to bytes we want, move to correct position.
   187  	MOVQ	-8(SI)(BX*1), SI
   188  	SHRQ	CX, SI
   189  si_finish:
   190  
   191  	// same for DI.
   192  	CMPB	DI, $0xf8
   193  	JA	di_high
   194  	MOVQ	(DI), DI
   195  	JMP	di_finish
   196  di_high:
   197  	MOVQ	-8(DI)(BX*1), DI
   198  	SHRQ	CX, DI
   199  di_finish:
   200  
   201  	SUBQ	SI, DI
   202  	SHLQ	CX, DI
   203  equal:
   204  #ifdef GOEXPERIMENT_regabiargs
   205  	SETEQ	AX
   206  #else
   207  	SETEQ	(AX)
   208  #endif
   209  	RET
   210  
   211  

View as plain text