// Copyright 2018 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. #include "go_asm.h" #include "textflag.h" // memequal(a, b unsafe.Pointer, size uintptr) bool TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25 MOVD size+16(FP), R1 // short path to handle 0-byte case CBZ R1, equal MOVD a+0(FP), R0 MOVD b+8(FP), R2 MOVD $ret+24(FP), R8 B memeqbody<>(SB) equal: MOVD $1, R0 MOVB R0, ret+24(FP) RET // memequal_varlen(a, b unsafe.Pointer) bool TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17 MOVD a+0(FP), R3 MOVD b+8(FP), R4 CMP R3, R4 BEQ eq MOVD 8(R26), R5 // compiler stores size at offset 8 in the closure CBZ R5, eq MOVD R3, 8(RSP) MOVD R4, 16(RSP) MOVD R5, 24(RSP) BL runtime·memequal(SB) MOVBU 32(RSP), R3 MOVB R3, ret+16(FP) RET eq: MOVD $1, R3 MOVB R3, ret+16(FP) RET // input: // R0: pointer a // R1: data len // R2: pointer b // R8: address to put result TEXT memeqbody<>(SB),NOSPLIT,$0 CMP $1, R1 // handle 1-byte special case for better performance BEQ one CMP $16, R1 // handle specially if length < 16 BLO tail BIC $0x3f, R1, R3 CBZ R3, chunk16 // work with 64-byte chunks ADD R3, R0, R6 // end of chunks chunk64_loop: VLD1.P (R0), [V0.D2, V1.D2, V2.D2, V3.D2] VLD1.P (R2), [V4.D2, V5.D2, V6.D2, V7.D2] VCMEQ V0.D2, V4.D2, V8.D2 VCMEQ V1.D2, V5.D2, V9.D2 VCMEQ V2.D2, V6.D2, V10.D2 VCMEQ V3.D2, V7.D2, V11.D2 VAND V8.B16, V9.B16, V8.B16 VAND V8.B16, V10.B16, V8.B16 VAND V8.B16, V11.B16, V8.B16 CMP R0, R6 VMOV V8.D[0], R4 VMOV V8.D[1], R5 CBZ R4, not_equal CBZ R5, not_equal BNE chunk64_loop AND $0x3f, R1, R1 CBZ R1, equal chunk16: // work with 16-byte chunks BIC $0xf, R1, R3 CBZ R3, tail ADD R3, R0, R6 // end of chunks chunk16_loop: LDP.P 16(R0), (R4, R5) LDP.P 16(R2), (R7, R9) EOR R4, R7 CBNZ R7, not_equal EOR R5, R9 CBNZ R9, not_equal CMP R0, R6 BNE chunk16_loop AND $0xf, R1, R1 CBZ R1, equal tail: // special compare of tail with length < 16 TBZ $3, R1, lt_8 MOVD (R0), R4 MOVD (R2), R5 EOR R4, R5 CBNZ R5, not_equal SUB $8, R1, R6 // offset of the last 8 bytes MOVD (R0)(R6), R4 MOVD (R2)(R6), R5 EOR R4, R5 CBNZ R5, not_equal B equal lt_8: TBZ $2, R1, lt_4 MOVWU (R0), R4 MOVWU (R2), R5 EOR R4, R5 CBNZ R5, not_equal SUB $4, R1, R6 // offset of the last 4 bytes MOVWU (R0)(R6), R4 MOVWU (R2)(R6), R5 EOR R4, R5 CBNZ R5, not_equal B equal lt_4: TBZ $1, R1, lt_2 MOVHU.P 2(R0), R4 MOVHU.P 2(R2), R5 CMP R4, R5 BNE not_equal lt_2: TBZ $0, R1, equal one: MOVBU (R0), R4 MOVBU (R2), R5 CMP R4, R5 BNE not_equal equal: MOVD $1, R0 MOVB R0, (R8) RET not_equal: MOVB ZR, (R8) RET