Black Lives Matter. Support the Equal Justice Initiative.

Source file src/internal/bytealg/bytealg.go

Documentation: internal/bytealg

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package bytealg
     6  
     7  import (
     8  	"internal/cpu"
     9  	"unsafe"
    10  )
    11  
    12  // Offsets into internal/cpu records for use in assembly.
    13  const (
    14  	offsetX86HasSSE2   = unsafe.Offsetof(cpu.X86.HasSSE2)
    15  	offsetX86HasSSE42  = unsafe.Offsetof(cpu.X86.HasSSE42)
    16  	offsetX86HasAVX2   = unsafe.Offsetof(cpu.X86.HasAVX2)
    17  	offsetX86HasPOPCNT = unsafe.Offsetof(cpu.X86.HasPOPCNT)
    18  
    19  	offsetS390xHasVX = unsafe.Offsetof(cpu.S390X.HasVX)
    20  
    21  	offsetPPC64HasPOWER9 = unsafe.Offsetof(cpu.PPC64.IsPOWER9)
    22  )
    23  
    24  // MaxLen is the maximum length of the string to be searched for (argument b) in Index.
    25  // If MaxLen is not 0, make sure MaxLen >= 4.
    26  var MaxLen int
    27  
    28  // FIXME: the logic of HashStrBytes, HashStrRevBytes, IndexRabinKarpBytes and HashStr, HashStrRev,
    29  // IndexRabinKarp are exactly the same, except that the types are different. Can we eliminate
    30  // three of them without causing allocation?
    31  
    32  // PrimeRK is the prime base used in Rabin-Karp algorithm.
    33  const PrimeRK = 16777619
    34  
    35  // HashStrBytes returns the hash and the appropriate multiplicative
    36  // factor for use in Rabin-Karp algorithm.
    37  func HashStrBytes(sep []byte) (uint32, uint32) {
    38  	hash := uint32(0)
    39  	for i := 0; i < len(sep); i++ {
    40  		hash = hash*PrimeRK + uint32(sep[i])
    41  	}
    42  	var pow, sq uint32 = 1, PrimeRK
    43  	for i := len(sep); i > 0; i >>= 1 {
    44  		if i&1 != 0 {
    45  			pow *= sq
    46  		}
    47  		sq *= sq
    48  	}
    49  	return hash, pow
    50  }
    51  
    52  // HashStr returns the hash and the appropriate multiplicative
    53  // factor for use in Rabin-Karp algorithm.
    54  func HashStr(sep string) (uint32, uint32) {
    55  	hash := uint32(0)
    56  	for i := 0; i < len(sep); i++ {
    57  		hash = hash*PrimeRK + uint32(sep[i])
    58  	}
    59  	var pow, sq uint32 = 1, PrimeRK
    60  	for i := len(sep); i > 0; i >>= 1 {
    61  		if i&1 != 0 {
    62  			pow *= sq
    63  		}
    64  		sq *= sq
    65  	}
    66  	return hash, pow
    67  }
    68  
    69  // HashStrRevBytes returns the hash of the reverse of sep and the
    70  // appropriate multiplicative factor for use in Rabin-Karp algorithm.
    71  func HashStrRevBytes(sep []byte) (uint32, uint32) {
    72  	hash := uint32(0)
    73  	for i := len(sep) - 1; i >= 0; i-- {
    74  		hash = hash*PrimeRK + uint32(sep[i])
    75  	}
    76  	var pow, sq uint32 = 1, PrimeRK
    77  	for i := len(sep); i > 0; i >>= 1 {
    78  		if i&1 != 0 {
    79  			pow *= sq
    80  		}
    81  		sq *= sq
    82  	}
    83  	return hash, pow
    84  }
    85  
    86  // HashStrRev returns the hash of the reverse of sep and the
    87  // appropriate multiplicative factor for use in Rabin-Karp algorithm.
    88  func HashStrRev(sep string) (uint32, uint32) {
    89  	hash := uint32(0)
    90  	for i := len(sep) - 1; i >= 0; i-- {
    91  		hash = hash*PrimeRK + uint32(sep[i])
    92  	}
    93  	var pow, sq uint32 = 1, PrimeRK
    94  	for i := len(sep); i > 0; i >>= 1 {
    95  		if i&1 != 0 {
    96  			pow *= sq
    97  		}
    98  		sq *= sq
    99  	}
   100  	return hash, pow
   101  }
   102  
   103  // IndexRabinKarpBytes uses the Rabin-Karp search algorithm to return the index of the
   104  // first occurrence of substr in s, or -1 if not present.
   105  func IndexRabinKarpBytes(s, sep []byte) int {
   106  	// Rabin-Karp search
   107  	hashsep, pow := HashStrBytes(sep)
   108  	n := len(sep)
   109  	var h uint32
   110  	for i := 0; i < n; i++ {
   111  		h = h*PrimeRK + uint32(s[i])
   112  	}
   113  	if h == hashsep && Equal(s[:n], sep) {
   114  		return 0
   115  	}
   116  	for i := n; i < len(s); {
   117  		h *= PrimeRK
   118  		h += uint32(s[i])
   119  		h -= pow * uint32(s[i-n])
   120  		i++
   121  		if h == hashsep && Equal(s[i-n:i], sep) {
   122  			return i - n
   123  		}
   124  	}
   125  	return -1
   126  }
   127  
   128  // IndexRabinKarp uses the Rabin-Karp search algorithm to return the index of the
   129  // first occurrence of substr in s, or -1 if not present.
   130  func IndexRabinKarp(s, substr string) int {
   131  	// Rabin-Karp search
   132  	hashss, pow := HashStr(substr)
   133  	n := len(substr)
   134  	var h uint32
   135  	for i := 0; i < n; i++ {
   136  		h = h*PrimeRK + uint32(s[i])
   137  	}
   138  	if h == hashss && s[:n] == substr {
   139  		return 0
   140  	}
   141  	for i := n; i < len(s); {
   142  		h *= PrimeRK
   143  		h += uint32(s[i])
   144  		h -= pow * uint32(s[i-n])
   145  		i++
   146  		if h == hashss && s[i-n:i] == substr {
   147  			return i - n
   148  		}
   149  	}
   150  	return -1
   151  }
   152  

View as plain text