Black Lives Matter. Support the Equal Justice Initiative.

Source file src/regexp/find_test.go

Documentation: regexp

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package regexp
     6  
     7  import (
     8  	"fmt"
     9  	"strings"
    10  	"testing"
    11  )
    12  
    13  // For each pattern/text pair, what is the expected output of each function?
    14  // We can derive the textual results from the indexed results, the non-submatch
    15  // results from the submatched results, the single results from the 'all' results,
    16  // and the byte results from the string results. Therefore the table includes
    17  // only the FindAllStringSubmatchIndex result.
    18  type FindTest struct {
    19  	pat     string
    20  	text    string
    21  	matches [][]int
    22  }
    23  
    24  func (t FindTest) String() string {
    25  	return fmt.Sprintf("pat: %#q text: %#q", t.pat, t.text)
    26  }
    27  
    28  var findTests = []FindTest{
    29  	{``, ``, build(1, 0, 0)},
    30  	{`^abcdefg`, "abcdefg", build(1, 0, 7)},
    31  	{`a+`, "baaab", build(1, 1, 4)},
    32  	{"abcd..", "abcdef", build(1, 0, 6)},
    33  	{`a`, "a", build(1, 0, 1)},
    34  	{`x`, "y", nil},
    35  	{`b`, "abc", build(1, 1, 2)},
    36  	{`.`, "a", build(1, 0, 1)},
    37  	{`.*`, "abcdef", build(1, 0, 6)},
    38  	{`^`, "abcde", build(1, 0, 0)},
    39  	{`$`, "abcde", build(1, 5, 5)},
    40  	{`^abcd$`, "abcd", build(1, 0, 4)},
    41  	{`^bcd'`, "abcdef", nil},
    42  	{`^abcd$`, "abcde", nil},
    43  	{`a+`, "baaab", build(1, 1, 4)},
    44  	{`a*`, "baaab", build(3, 0, 0, 1, 4, 5, 5)},
    45  	{`[a-z]+`, "abcd", build(1, 0, 4)},
    46  	{`[^a-z]+`, "ab1234cd", build(1, 2, 6)},
    47  	{`[a\-\]z]+`, "az]-bcz", build(2, 0, 4, 6, 7)},
    48  	{`[^\n]+`, "abcd\n", build(1, 0, 4)},
    49  	{`[日本語]+`, "日本語日本語", build(1, 0, 18)},
    50  	{`日本語+`, "日本語", build(1, 0, 9)},
    51  	{`日本語+`, "日本語語語語", build(1, 0, 18)},
    52  	{`()`, "", build(1, 0, 0, 0, 0)},
    53  	{`(a)`, "a", build(1, 0, 1, 0, 1)},
    54  	{`(.)(.)`, "日a", build(1, 0, 4, 0, 3, 3, 4)},
    55  	{`(.*)`, "", build(1, 0, 0, 0, 0)},
    56  	{`(.*)`, "abcd", build(1, 0, 4, 0, 4)},
    57  	{`(..)(..)`, "abcd", build(1, 0, 4, 0, 2, 2, 4)},
    58  	{`(([^xyz]*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 3, 4)},
    59  	{`((a|b|c)*(d))`, "abcd", build(1, 0, 4, 0, 4, 2, 3, 3, 4)},
    60  	{`(((a|b|c)*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 2, 3, 3, 4)},
    61  	{`\a\f\n\r\t\v`, "\a\f\n\r\t\v", build(1, 0, 6)},
    62  	{`[\a\f\n\r\t\v]+`, "\a\f\n\r\t\v", build(1, 0, 6)},
    63  
    64  	{`a*(|(b))c*`, "aacc", build(1, 0, 4, 2, 2, -1, -1)},
    65  	{`(.*).*`, "ab", build(1, 0, 2, 0, 2)},
    66  	{`[.]`, ".", build(1, 0, 1)},
    67  	{`/$`, "/abc/", build(1, 4, 5)},
    68  	{`/$`, "/abc", nil},
    69  
    70  	// multiple matches
    71  	{`.`, "abc", build(3, 0, 1, 1, 2, 2, 3)},
    72  	{`(.)`, "abc", build(3, 0, 1, 0, 1, 1, 2, 1, 2, 2, 3, 2, 3)},
    73  	{`.(.)`, "abcd", build(2, 0, 2, 1, 2, 2, 4, 3, 4)},
    74  	{`ab*`, "abbaab", build(3, 0, 3, 3, 4, 4, 6)},
    75  	{`a(b*)`, "abbaab", build(3, 0, 3, 1, 3, 3, 4, 4, 4, 4, 6, 5, 6)},
    76  
    77  	// fixed bugs
    78  	{`ab$`, "cab", build(1, 1, 3)},
    79  	{`axxb$`, "axxcb", nil},
    80  	{`data`, "daXY data", build(1, 5, 9)},
    81  	{`da(.)a$`, "daXY data", build(1, 5, 9, 7, 8)},
    82  	{`zx+`, "zzx", build(1, 1, 3)},
    83  	{`ab$`, "abcab", build(1, 3, 5)},
    84  	{`(aa)*$`, "a", build(1, 1, 1, -1, -1)},
    85  	{`(?:.|(?:.a))`, "", nil},
    86  	{`(?:A(?:A|a))`, "Aa", build(1, 0, 2)},
    87  	{`(?:A|(?:A|a))`, "a", build(1, 0, 1)},
    88  	{`(a){0}`, "", build(1, 0, 0, -1, -1)},
    89  	{`(?-s)(?:(?:^).)`, "\n", nil},
    90  	{`(?s)(?:(?:^).)`, "\n", build(1, 0, 1)},
    91  	{`(?:(?:^).)`, "\n", nil},
    92  	{`\b`, "x", build(2, 0, 0, 1, 1)},
    93  	{`\b`, "xx", build(2, 0, 0, 2, 2)},
    94  	{`\b`, "x y", build(4, 0, 0, 1, 1, 2, 2, 3, 3)},
    95  	{`\b`, "xx yy", build(4, 0, 0, 2, 2, 3, 3, 5, 5)},
    96  	{`\B`, "x", nil},
    97  	{`\B`, "xx", build(1, 1, 1)},
    98  	{`\B`, "x y", nil},
    99  	{`\B`, "xx yy", build(2, 1, 1, 4, 4)},
   100  	{`(|a)*`, "aa", build(3, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2)},
   101  
   102  	// RE2 tests
   103  	{`[^\S\s]`, "abcd", nil},
   104  	{`[^\S[:space:]]`, "abcd", nil},
   105  	{`[^\D\d]`, "abcd", nil},
   106  	{`[^\D[:digit:]]`, "abcd", nil},
   107  	{`(?i)\W`, "x", nil},
   108  	{`(?i)\W`, "k", nil},
   109  	{`(?i)\W`, "s", nil},
   110  
   111  	// can backslash-escape any punctuation
   112  	{`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`,
   113  		`!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)},
   114  	{`[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~]+`,
   115  		`!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)},
   116  	{"\\`", "`", build(1, 0, 1)},
   117  	{"[\\`]+", "`", build(1, 0, 1)},
   118  
   119  	// long set of matches (longer than startSize)
   120  	{
   121  		".",
   122  		"qwertyuiopasdfghjklzxcvbnm1234567890",
   123  		build(36, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10,
   124  			10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20,
   125  			20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30,
   126  			30, 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36),
   127  	},
   128  }
   129  
   130  // build is a helper to construct a [][]int by extracting n sequences from x.
   131  // This represents n matches with len(x)/n submatches each.
   132  func build(n int, x ...int) [][]int {
   133  	ret := make([][]int, n)
   134  	runLength := len(x) / n
   135  	j := 0
   136  	for i := range ret {
   137  		ret[i] = make([]int, runLength)
   138  		copy(ret[i], x[j:])
   139  		j += runLength
   140  		if j > len(x) {
   141  			panic("invalid build entry")
   142  		}
   143  	}
   144  	return ret
   145  }
   146  
   147  // First the simple cases.
   148  
   149  func TestFind(t *testing.T) {
   150  	for _, test := range findTests {
   151  		re := MustCompile(test.pat)
   152  		if re.String() != test.pat {
   153  			t.Errorf("String() = `%s`; should be `%s`", re.String(), test.pat)
   154  		}
   155  		result := re.Find([]byte(test.text))
   156  		switch {
   157  		case len(test.matches) == 0 && len(result) == 0:
   158  			// ok
   159  		case test.matches == nil && result != nil:
   160  			t.Errorf("expected no match; got one: %s", test)
   161  		case test.matches != nil && result == nil:
   162  			t.Errorf("expected match; got none: %s", test)
   163  		case test.matches != nil && result != nil:
   164  			expect := test.text[test.matches[0][0]:test.matches[0][1]]
   165  			if len(result) != cap(result) {
   166  				t.Errorf("expected capacity %d got %d: %s", len(result), cap(result), test)
   167  			}
   168  			if expect != string(result) {
   169  				t.Errorf("expected %q got %q: %s", expect, result, test)
   170  			}
   171  		}
   172  	}
   173  }
   174  
   175  func TestFindString(t *testing.T) {
   176  	for _, test := range findTests {
   177  		result := MustCompile(test.pat).FindString(test.text)
   178  		switch {
   179  		case len(test.matches) == 0 && len(result) == 0:
   180  			// ok
   181  		case test.matches == nil && result != "":
   182  			t.Errorf("expected no match; got one: %s", test)
   183  		case test.matches != nil && result == "":
   184  			// Tricky because an empty result has two meanings: no match or empty match.
   185  			if test.matches[0][0] != test.matches[0][1] {
   186  				t.Errorf("expected match; got none: %s", test)
   187  			}
   188  		case test.matches != nil && result != "":
   189  			expect := test.text[test.matches[0][0]:test.matches[0][1]]
   190  			if expect != result {
   191  				t.Errorf("expected %q got %q: %s", expect, result, test)
   192  			}
   193  		}
   194  	}
   195  }
   196  
   197  func testFindIndex(test *FindTest, result []int, t *testing.T) {
   198  	switch {
   199  	case len(test.matches) == 0 && len(result) == 0:
   200  		// ok
   201  	case test.matches == nil && result != nil:
   202  		t.Errorf("expected no match; got one: %s", test)
   203  	case test.matches != nil && result == nil:
   204  		t.Errorf("expected match; got none: %s", test)
   205  	case test.matches != nil && result != nil:
   206  		expect := test.matches[0]
   207  		if expect[0] != result[0] || expect[1] != result[1] {
   208  			t.Errorf("expected %v got %v: %s", expect, result, test)
   209  		}
   210  	}
   211  }
   212  
   213  func TestFindIndex(t *testing.T) {
   214  	for _, test := range findTests {
   215  		testFindIndex(&test, MustCompile(test.pat).FindIndex([]byte(test.text)), t)
   216  	}
   217  }
   218  
   219  func TestFindStringIndex(t *testing.T) {
   220  	for _, test := range findTests {
   221  		testFindIndex(&test, MustCompile(test.pat).FindStringIndex(test.text), t)
   222  	}
   223  }
   224  
   225  func TestFindReaderIndex(t *testing.T) {
   226  	for _, test := range findTests {
   227  		testFindIndex(&test, MustCompile(test.pat).FindReaderIndex(strings.NewReader(test.text)), t)
   228  	}
   229  }
   230  
   231  // Now come the simple All cases.
   232  
   233  func TestFindAll(t *testing.T) {
   234  	for _, test := range findTests {
   235  		result := MustCompile(test.pat).FindAll([]byte(test.text), -1)
   236  		switch {
   237  		case test.matches == nil && result == nil:
   238  			// ok
   239  		case test.matches == nil && result != nil:
   240  			t.Errorf("expected no match; got one: %s", test)
   241  		case test.matches != nil && result == nil:
   242  			t.Fatalf("expected match; got none: %s", test)
   243  		case test.matches != nil && result != nil:
   244  			if len(test.matches) != len(result) {
   245  				t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   246  				continue
   247  			}
   248  			for k, e := range test.matches {
   249  				got := result[k]
   250  				if len(got) != cap(got) {
   251  					t.Errorf("match %d: expected capacity %d got %d: %s", k, len(got), cap(got), test)
   252  				}
   253  				expect := test.text[e[0]:e[1]]
   254  				if expect != string(got) {
   255  					t.Errorf("match %d: expected %q got %q: %s", k, expect, got, test)
   256  				}
   257  			}
   258  		}
   259  	}
   260  }
   261  
   262  func TestFindAllString(t *testing.T) {
   263  	for _, test := range findTests {
   264  		result := MustCompile(test.pat).FindAllString(test.text, -1)
   265  		switch {
   266  		case test.matches == nil && result == nil:
   267  			// ok
   268  		case test.matches == nil && result != nil:
   269  			t.Errorf("expected no match; got one: %s", test)
   270  		case test.matches != nil && result == nil:
   271  			t.Errorf("expected match; got none: %s", test)
   272  		case test.matches != nil && result != nil:
   273  			if len(test.matches) != len(result) {
   274  				t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   275  				continue
   276  			}
   277  			for k, e := range test.matches {
   278  				expect := test.text[e[0]:e[1]]
   279  				if expect != result[k] {
   280  					t.Errorf("expected %q got %q: %s", expect, result, test)
   281  				}
   282  			}
   283  		}
   284  	}
   285  }
   286  
   287  func testFindAllIndex(test *FindTest, result [][]int, t *testing.T) {
   288  	switch {
   289  	case test.matches == nil && result == nil:
   290  		// ok
   291  	case test.matches == nil && result != nil:
   292  		t.Errorf("expected no match; got one: %s", test)
   293  	case test.matches != nil && result == nil:
   294  		t.Errorf("expected match; got none: %s", test)
   295  	case test.matches != nil && result != nil:
   296  		if len(test.matches) != len(result) {
   297  			t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   298  			return
   299  		}
   300  		for k, e := range test.matches {
   301  			if e[0] != result[k][0] || e[1] != result[k][1] {
   302  				t.Errorf("match %d: expected %v got %v: %s", k, e, result[k], test)
   303  			}
   304  		}
   305  	}
   306  }
   307  
   308  func TestFindAllIndex(t *testing.T) {
   309  	for _, test := range findTests {
   310  		testFindAllIndex(&test, MustCompile(test.pat).FindAllIndex([]byte(test.text), -1), t)
   311  	}
   312  }
   313  
   314  func TestFindAllStringIndex(t *testing.T) {
   315  	for _, test := range findTests {
   316  		testFindAllIndex(&test, MustCompile(test.pat).FindAllStringIndex(test.text, -1), t)
   317  	}
   318  }
   319  
   320  // Now come the Submatch cases.
   321  
   322  func testSubmatchBytes(test *FindTest, n int, submatches []int, result [][]byte, t *testing.T) {
   323  	if len(submatches) != len(result)*2 {
   324  		t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test)
   325  		return
   326  	}
   327  	for k := 0; k < len(submatches); k += 2 {
   328  		if submatches[k] == -1 {
   329  			if result[k/2] != nil {
   330  				t.Errorf("match %d: expected nil got %q: %s", n, result, test)
   331  			}
   332  			continue
   333  		}
   334  		got := result[k/2]
   335  		if len(got) != cap(got) {
   336  			t.Errorf("match %d: expected capacity %d got %d: %s", n, len(got), cap(got), test)
   337  			return
   338  		}
   339  		expect := test.text[submatches[k]:submatches[k+1]]
   340  		if expect != string(got) {
   341  			t.Errorf("match %d: expected %q got %q: %s", n, expect, got, test)
   342  			return
   343  		}
   344  	}
   345  }
   346  
   347  func TestFindSubmatch(t *testing.T) {
   348  	for _, test := range findTests {
   349  		result := MustCompile(test.pat).FindSubmatch([]byte(test.text))
   350  		switch {
   351  		case test.matches == nil && result == nil:
   352  			// ok
   353  		case test.matches == nil && result != nil:
   354  			t.Errorf("expected no match; got one: %s", test)
   355  		case test.matches != nil && result == nil:
   356  			t.Errorf("expected match; got none: %s", test)
   357  		case test.matches != nil && result != nil:
   358  			testSubmatchBytes(&test, 0, test.matches[0], result, t)
   359  		}
   360  	}
   361  }
   362  
   363  func testSubmatchString(test *FindTest, n int, submatches []int, result []string, t *testing.T) {
   364  	if len(submatches) != len(result)*2 {
   365  		t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test)
   366  		return
   367  	}
   368  	for k := 0; k < len(submatches); k += 2 {
   369  		if submatches[k] == -1 {
   370  			if result[k/2] != "" {
   371  				t.Errorf("match %d: expected nil got %q: %s", n, result, test)
   372  			}
   373  			continue
   374  		}
   375  		expect := test.text[submatches[k]:submatches[k+1]]
   376  		if expect != result[k/2] {
   377  			t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test)
   378  			return
   379  		}
   380  	}
   381  }
   382  
   383  func TestFindStringSubmatch(t *testing.T) {
   384  	for _, test := range findTests {
   385  		result := MustCompile(test.pat).FindStringSubmatch(test.text)
   386  		switch {
   387  		case test.matches == nil && result == nil:
   388  			// ok
   389  		case test.matches == nil && result != nil:
   390  			t.Errorf("expected no match; got one: %s", test)
   391  		case test.matches != nil && result == nil:
   392  			t.Errorf("expected match; got none: %s", test)
   393  		case test.matches != nil && result != nil:
   394  			testSubmatchString(&test, 0, test.matches[0], result, t)
   395  		}
   396  	}
   397  }
   398  
   399  func testSubmatchIndices(test *FindTest, n int, expect, result []int, t *testing.T) {
   400  	if len(expect) != len(result) {
   401  		t.Errorf("match %d: expected %d matches; got %d: %s", n, len(expect)/2, len(result)/2, test)
   402  		return
   403  	}
   404  	for k, e := range expect {
   405  		if e != result[k] {
   406  			t.Errorf("match %d: submatch error: expected %v got %v: %s", n, expect, result, test)
   407  		}
   408  	}
   409  }
   410  
   411  func testFindSubmatchIndex(test *FindTest, result []int, t *testing.T) {
   412  	switch {
   413  	case test.matches == nil && result == nil:
   414  		// ok
   415  	case test.matches == nil && result != nil:
   416  		t.Errorf("expected no match; got one: %s", test)
   417  	case test.matches != nil && result == nil:
   418  		t.Errorf("expected match; got none: %s", test)
   419  	case test.matches != nil && result != nil:
   420  		testSubmatchIndices(test, 0, test.matches[0], result, t)
   421  	}
   422  }
   423  
   424  func TestFindSubmatchIndex(t *testing.T) {
   425  	for _, test := range findTests {
   426  		testFindSubmatchIndex(&test, MustCompile(test.pat).FindSubmatchIndex([]byte(test.text)), t)
   427  	}
   428  }
   429  
   430  func TestFindStringSubmatchIndex(t *testing.T) {
   431  	for _, test := range findTests {
   432  		testFindSubmatchIndex(&test, MustCompile(test.pat).FindStringSubmatchIndex(test.text), t)
   433  	}
   434  }
   435  
   436  func TestFindReaderSubmatchIndex(t *testing.T) {
   437  	for _, test := range findTests {
   438  		testFindSubmatchIndex(&test, MustCompile(test.pat).FindReaderSubmatchIndex(strings.NewReader(test.text)), t)
   439  	}
   440  }
   441  
   442  // Now come the monster AllSubmatch cases.
   443  
   444  func TestFindAllSubmatch(t *testing.T) {
   445  	for _, test := range findTests {
   446  		result := MustCompile(test.pat).FindAllSubmatch([]byte(test.text), -1)
   447  		switch {
   448  		case test.matches == nil && result == nil:
   449  			// ok
   450  		case test.matches == nil && result != nil:
   451  			t.Errorf("expected no match; got one: %s", test)
   452  		case test.matches != nil && result == nil:
   453  			t.Errorf("expected match; got none: %s", test)
   454  		case len(test.matches) != len(result):
   455  			t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   456  		case test.matches != nil && result != nil:
   457  			for k, match := range test.matches {
   458  				testSubmatchBytes(&test, k, match, result[k], t)
   459  			}
   460  		}
   461  	}
   462  }
   463  
   464  func TestFindAllStringSubmatch(t *testing.T) {
   465  	for _, test := range findTests {
   466  		result := MustCompile(test.pat).FindAllStringSubmatch(test.text, -1)
   467  		switch {
   468  		case test.matches == nil && result == nil:
   469  			// ok
   470  		case test.matches == nil && result != nil:
   471  			t.Errorf("expected no match; got one: %s", test)
   472  		case test.matches != nil && result == nil:
   473  			t.Errorf("expected match; got none: %s", test)
   474  		case len(test.matches) != len(result):
   475  			t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   476  		case test.matches != nil && result != nil:
   477  			for k, match := range test.matches {
   478  				testSubmatchString(&test, k, match, result[k], t)
   479  			}
   480  		}
   481  	}
   482  }
   483  
   484  func testFindAllSubmatchIndex(test *FindTest, result [][]int, t *testing.T) {
   485  	switch {
   486  	case test.matches == nil && result == nil:
   487  		// ok
   488  	case test.matches == nil && result != nil:
   489  		t.Errorf("expected no match; got one: %s", test)
   490  	case test.matches != nil && result == nil:
   491  		t.Errorf("expected match; got none: %s", test)
   492  	case len(test.matches) != len(result):
   493  		t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   494  	case test.matches != nil && result != nil:
   495  		for k, match := range test.matches {
   496  			testSubmatchIndices(test, k, match, result[k], t)
   497  		}
   498  	}
   499  }
   500  
   501  func TestFindAllSubmatchIndex(t *testing.T) {
   502  	for _, test := range findTests {
   503  		testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllSubmatchIndex([]byte(test.text), -1), t)
   504  	}
   505  }
   506  
   507  func TestFindAllStringSubmatchIndex(t *testing.T) {
   508  	for _, test := range findTests {
   509  		testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllStringSubmatchIndex(test.text, -1), t)
   510  	}
   511  }
   512  

View as plain text