Black Lives Matter. Support the Equal Justice Initiative.

Text file src/internal/bytealg/compare_ppc64x.s

Documentation: internal/bytealg

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     5  //go:build ppc64 || ppc64le
     6  // +build ppc64 ppc64le
     8  #include "go_asm.h"
     9  #include "textflag.h"
    11  TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56
    12  	MOVD	a_base+0(FP), R5
    13  	MOVD	b_base+24(FP), R6
    14  	MOVD	a_len+8(FP), R3
    15  	CMP	R5,R6,CR7
    16  	MOVD	b_len+32(FP), R4
    17  	MOVD	$ret+48(FP), R7
    18  	CMP	R3,R4,CR6
    19  	BEQ	CR7,equal
    21  #ifdef	GOARCH_ppc64le
    22  	BR	cmpbodyLE<>(SB)
    23  #else
    24  	BR      cmpbodyBE<>(SB)
    25  #endif
    27  equal:
    28  	BEQ	CR6,done
    29  	MOVD	$1, R8
    30  	BGT	CR6,greater
    31  	NEG	R8
    33  greater:
    34  	MOVD	R8, (R7)
    35  	RET
    37  done:
    38  	MOVD	$0, (R7)
    39  	RET
    41  TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
    42  	MOVD	a_base+0(FP), R5
    43  	MOVD	b_base+16(FP), R6
    44  	MOVD	a_len+8(FP), R3
    45  	CMP	R5,R6,CR7
    46  	MOVD	b_len+24(FP), R4
    47  	MOVD	$ret+32(FP), R7
    48  	CMP	R3,R4,CR6
    49  	BEQ	CR7,equal
    51  #ifdef	GOARCH_ppc64le
    52  	BR	cmpbodyLE<>(SB)
    53  #else
    54  	BR      cmpbodyBE<>(SB)
    55  #endif
    57  equal:
    58  	BEQ	CR6,done
    59  	MOVD	$1, R8
    60  	BGT	CR6,greater
    61  	NEG	R8
    63  greater:
    64  	MOVD	R8, (R7)
    65  	RET
    67  done:
    68  	MOVD	$0, (R7)
    69  	RET
    71  // Do an efficient memcmp for ppc64le
    72  // R3 = a len
    73  // R4 = b len
    74  // R5 = a addr
    75  // R6 = b addr
    76  // R7 = addr of return value
    77  TEXT cmpbodyLE<>(SB),NOSPLIT|NOFRAME,$0-0
    78  	MOVD	R3,R8		// set up length
    79  	CMP	R3,R4,CR2	// unequal?
    80  	BC	12,8,setuplen	// BLT CR2
    81  	MOVD	R4,R8		// use R4 for comparison len
    82  setuplen:
    83  	MOVD	R8,CTR		// set up loop counter
    84  	CMP	R8,$8		// only optimize >=8
    85  	BLT	simplecheck
    86  	DCBT	(R5)		// cache hint
    87  	DCBT	(R6)
    88  	CMP	R8,$32		// optimize >= 32
    89  	MOVD	R8,R9
    90  	BLT	setup8a		// 8 byte moves only
    91  setup32a:
    92  	SRADCC	$5,R8,R9	// number of 32 byte chunks
    93  	MOVD	R9,CTR
    95          // Special processing for 32 bytes or longer.
    96          // Loading this way is faster and correct as long as the
    97  	// doublewords being compared are equal. Once they
    98  	// are found unequal, reload them in proper byte order
    99  	// to determine greater or less than.
   100  loop32a:
   101  	MOVD	0(R5),R9	// doublewords to compare
   102  	MOVD	0(R6),R10	// get 4 doublewords
   103  	MOVD	8(R5),R14
   104  	MOVD	8(R6),R15
   105  	CMPU	R9,R10		// bytes equal?
   106  	MOVD	$0,R16		// set up for cmpne
   107  	BNE	cmpne		// further compare for LT or GT
   108  	MOVD	16(R5),R9	// get next pair of doublewords
   109  	MOVD	16(R6),R10
   110  	CMPU	R14,R15		// bytes match?
   111  	MOVD	$8,R16		// set up for cmpne
   112  	BNE	cmpne		// further compare for LT or GT
   113  	MOVD	24(R5),R14	// get next pair of doublewords
   114  	MOVD    24(R6),R15
   115  	CMPU	R9,R10		// bytes match?
   116  	MOVD	$16,R16		// set up for cmpne
   117  	BNE	cmpne		// further compare for LT or GT
   118  	MOVD	$-8,R16		// for cmpne, R5,R6 already inc by 32
   119  	ADD	$32,R5		// bump up to next 32
   120  	ADD	$32,R6
   121  	CMPU    R14,R15		// bytes match?
   122  	BC	8,2,loop32a	// br ctr and cr
   123  	BNE	cmpne
   124  	ANDCC	$24,R8,R9	// Any 8 byte chunks?
   125  	BEQ	leftover	// and result is 0
   126  setup8a:
   127  	SRADCC	$3,R9,R9	// get the 8 byte count
   128  	BEQ	leftover	// shifted value is 0
   129  	MOVD	R9,CTR		// loop count for doublewords
   130  loop8:
   131  	MOVDBR	(R5+R0),R9	// doublewords to compare
   132  	MOVDBR	(R6+R0),R10	// LE compare order
   133  	ADD	$8,R5
   134  	ADD	$8,R6
   135  	CMPU	R9,R10		// match?
   136  	BC	8,2,loop8	// bt ctr <> 0 && cr
   137  	BGT	greater
   138  	BLT	less
   139  leftover:
   140  	ANDCC	$7,R8,R9	// check for leftover bytes
   141  	MOVD	R9,CTR		// save the ctr
   142  	BNE	simple		// leftover bytes
   143  	BC	12,10,equal	// test CR2 for length comparison
   144  	BC	12,8,less
   145  	BR	greater
   146  simplecheck:
   147  	CMP	R8,$0		// remaining compare length 0
   148  	BNE	simple		// do simple compare
   149  	BC	12,10,equal	// test CR2 for length comparison
   150  	BC	12,8,less	// 1st len < 2nd len, result less
   151  	BR	greater		// 1st len > 2nd len must be greater
   152  simple:
   153  	MOVBZ	0(R5), R9	// get byte from 1st operand
   154  	ADD	$1,R5
   155  	MOVBZ	0(R6), R10	// get byte from 2nd operand
   156  	ADD	$1,R6
   157  	CMPU	R9, R10
   158  	BC	8,2,simple	// bc ctr <> 0 && cr
   159  	BGT	greater		// 1st > 2nd
   160  	BLT	less		// 1st < 2nd
   161  	BC	12,10,equal	// test CR2 for length comparison
   162  	BC	12,9,greater	// 2nd len > 1st len
   163  	BR	less		// must be less
   164  cmpne:				// only here is not equal
   165  	MOVDBR	(R5+R16),R8	// reload in reverse order
   166  	MOVDBR	(R6+R16),R9
   167  	CMPU	R8,R9		// compare correct endianness
   168  	BGT	greater		// here only if NE
   169  less:
   170  	MOVD	$-1,R3
   171  	MOVD	R3,(R7)		// return value if A < B
   172  	RET
   173  equal:
   174  	MOVD	$0,(R7)		// return value if A == B
   175  	RET
   176  greater:
   177  	MOVD	$1,R3
   178  	MOVD	R3,(R7)		// return value if A > B
   179  	RET
   181  // Do an efficient memcmp for ppc64 (BE)
   182  // R3 = a len
   183  // R4 = b len
   184  // R5 = a addr
   185  // R6 = b addr
   186  // R7 = addr of return value
   187  TEXT cmpbodyBE<>(SB),NOSPLIT|NOFRAME,$0-0
   188  	MOVD	R3,R8		// set up length
   189  	CMP	R3,R4,CR2	// unequal?
   190  	BC	12,8,setuplen	// BLT CR2
   191  	MOVD	R4,R8		// use R4 for comparison len
   192  setuplen:
   193  	MOVD	R8,CTR		// set up loop counter
   194  	CMP	R8,$8		// only optimize >=8
   195  	BLT	simplecheck
   196  	DCBT	(R5)		// cache hint
   197  	DCBT	(R6)
   198  	CMP	R8,$32		// optimize >= 32
   199  	MOVD	R8,R9
   200  	BLT	setup8a		// 8 byte moves only
   202  setup32a:
   203  	SRADCC	$5,R8,R9	// number of 32 byte chunks
   204  	MOVD	R9,CTR
   205  loop32a:
   206  	MOVD	0(R5),R9	// doublewords to compare
   207  	MOVD	0(R6),R10	// get 4 doublewords
   208  	MOVD	8(R5),R14
   209  	MOVD	8(R6),R15
   210  	CMPU	R9,R10		// bytes equal?
   211  	BLT	less		// found to be less
   212  	BGT	greater		// found to be greater
   213  	MOVD	16(R5),R9	// get next pair of doublewords
   214  	MOVD	16(R6),R10
   215  	CMPU	R14,R15		// bytes match?
   216  	BLT	less		// found less
   217  	BGT	greater		// found greater
   218  	MOVD	24(R5),R14	// get next pair of doublewords
   219  	MOVD	24(R6),R15
   220  	CMPU	R9,R10		// bytes match?
   221  	BLT	less		// found to be less
   222  	BGT	greater		// found to be greater
   223  	ADD	$32,R5		// bump up to next 32
   224  	ADD	$32,R6
   225  	CMPU	R14,R15		// bytes match?
   226  	BC	8,2,loop32a	// br ctr and cr
   227  	BLT	less		// with BE, byte ordering is
   228  	BGT	greater		// good for compare
   229  	ANDCC	$24,R8,R9	// Any 8 byte chunks?
   230  	BEQ	leftover	// and result is 0
   231  setup8a:
   232  	SRADCC	$3,R9,R9	// get the 8 byte count
   233  	BEQ	leftover	// shifted value is 0
   234  	MOVD	R9,CTR		// loop count for doublewords
   235  loop8:
   236  	MOVD	(R5),R9
   237  	MOVD	(R6),R10
   238  	ADD	$8,R5
   239  	ADD	$8,R6
   240  	CMPU	R9,R10		// match?
   241  	BC	8,2,loop8	// bt ctr <> 0 && cr
   242  	BGT	greater
   243  	BLT	less
   244  leftover:
   245  	ANDCC	$7,R8,R9	// check for leftover bytes
   246  	MOVD	R9,CTR		// save the ctr
   247  	BNE	simple		// leftover bytes
   248  	BC	12,10,equal	// test CR2 for length comparison
   249  	BC	12,8,less
   250  	BR	greater
   251  simplecheck:
   252  	CMP	R8,$0		// remaining compare length 0
   253  	BNE	simple		// do simple compare
   254  	BC	12,10,equal	// test CR2 for length comparison
   255  	BC 	12,8,less	// 1st len < 2nd len, result less
   256  	BR	greater		// same len, must be equal
   257  simple:
   258  	MOVBZ	0(R5),R9	// get byte from 1st operand
   259  	ADD	$1,R5
   260  	MOVBZ	0(R6),R10	// get byte from 2nd operand
   261  	ADD	$1,R6
   262  	CMPU	R9,R10
   263  	BC	8,2,simple	// bc ctr <> 0 && cr
   264  	BGT	greater		// 1st > 2nd
   265  	BLT	less		// 1st < 2nd
   266  	BC	12,10,equal	// test CR2 for length comparison
   267  	BC	12,9,greater	// 2nd len > 1st len
   268  less:
   269  	MOVD	$-1,R3
   270  	MOVD    R3,(R7)		// return value if A < B
   271  	RET
   272  equal:
   273  	MOVD    $0,(R7)		// return value if A == B
   274  	RET
   275  greater:
   276  	MOVD	$1,R3
   277  	MOVD	R3,(R7)		// return value if A > B
   278  	RET

View as plain text