1
2
3
4
5
6
7
8 package elliptic
9
10 import (
11 "crypto/subtle"
12 "internal/cpu"
13 "math/big"
14 "unsafe"
15 )
16
17 const (
18 offsetS390xHasVX = unsafe.Offsetof(cpu.S390X.HasVX)
19 offsetS390xHasVE1 = unsafe.Offsetof(cpu.S390X.HasVXE)
20 )
21
22 type p256CurveFast struct {
23 *CurveParams
24 }
25
26 type p256Point struct {
27 x [32]byte
28 y [32]byte
29 z [32]byte
30 }
31
32 var (
33 p256 Curve
34 p256PreFast *[37][64]p256Point
35 )
36
37
38 func p256MulInternalTrampolineSetup()
39
40
41 func p256SqrInternalTrampolineSetup()
42
43
44 func p256MulInternalVX()
45
46
47 func p256MulInternalVMSL()
48
49
50 func p256SqrInternalVX()
51
52
53 func p256SqrInternalVMSL()
54
55 func initP256Arch() {
56 if cpu.S390X.HasVX {
57 p256 = p256CurveFast{p256Params}
58 initTable()
59 return
60 }
61
62
63 p256 = p256Curve{p256Params}
64 return
65 }
66
67 func (curve p256CurveFast) Params() *CurveParams {
68 return curve.CurveParams
69 }
70
71
72
73
74
75 func p256SqrAsm(res, in1 []byte)
76
77
78 func p256MulAsm(res, in1, in2 []byte)
79
80
81 func p256Sqr(res, in []byte) {
82 p256SqrAsm(res, in)
83 }
84
85
86
87
88 func p256FromMont(res, in []byte)
89
90
91
92
93 func p256NegCond(val *p256Point, cond int)
94
95
96
97
98 func p256MovCond(res, a, b *p256Point, cond int)
99
100
101
102
103 func p256Select(point *p256Point, table []p256Point, idx int)
104
105
106 func p256SelectBase(point *p256Point, table []p256Point, idx int)
107
108
109
110
111 func p256OrdMul(res, in1, in2 []byte)
112
113
114 func p256OrdSqr(res, in []byte, n int) {
115 copy(res, in)
116 for i := 0; i < n; i += 1 {
117 p256OrdMul(res, res, res)
118 }
119 }
120
121
122
123
124
125
126
127 func p256PointAddAffineAsm(P3, P1, P2 *p256Point, sign, sel, zero int)
128
129
130
131
132 func p256PointAddAsm(P3, P1, P2 *p256Point) int
133
134
135 func p256PointDoubleAsm(P3, P1 *p256Point)
136
137 func (curve p256CurveFast) Inverse(k *big.Int) *big.Int {
138 if k.Cmp(p256Params.N) >= 0 {
139
140 reducedK := new(big.Int).Mod(k, p256Params.N)
141 k = reducedK
142 }
143
144
145
146 var table [15][32]byte
147
148 x := fromBig(k)
149
150
151
152
153
154
155 RR := []byte{0x66, 0xe1, 0x2d, 0x94, 0xf3, 0xd9, 0x56, 0x20, 0x28, 0x45, 0xb2, 0x39, 0x2b, 0x6b, 0xec, 0x59,
156 0x46, 0x99, 0x79, 0x9c, 0x49, 0xbd, 0x6f, 0xa6, 0x83, 0x24, 0x4c, 0x95, 0xbe, 0x79, 0xee, 0xa2}
157
158 p256OrdMul(table[0][:], x, RR)
159
160
161
162 for i := 2; i < 16; i += 2 {
163 p256OrdSqr(table[i-1][:], table[(i/2)-1][:], 1)
164 p256OrdMul(table[i][:], table[i-1][:], table[0][:])
165 }
166
167 copy(x, table[14][:])
168
169 p256OrdSqr(x[0:32], x[0:32], 4)
170 p256OrdMul(x[0:32], x[0:32], table[14][:])
171 t := make([]byte, 32)
172 copy(t, x)
173
174 p256OrdSqr(x, x, 8)
175 p256OrdMul(x, x, t)
176 copy(t, x)
177
178 p256OrdSqr(x, x, 16)
179 p256OrdMul(x, x, t)
180 copy(t, x)
181
182 p256OrdSqr(x, x, 64)
183 p256OrdMul(x, x, t)
184 p256OrdSqr(x, x, 32)
185 p256OrdMul(x, x, t)
186
187
188 expLo := [32]byte{0xb, 0xc, 0xe, 0x6, 0xf, 0xa, 0xa, 0xd, 0xa, 0x7, 0x1, 0x7, 0x9, 0xe, 0x8, 0x4,
189 0xf, 0x3, 0xb, 0x9, 0xc, 0xa, 0xc, 0x2, 0xf, 0xc, 0x6, 0x3, 0x2, 0x5, 0x4, 0xf}
190 for i := 0; i < 32; i++ {
191 p256OrdSqr(x, x, 4)
192 p256OrdMul(x, x, table[expLo[i]-1][:])
193 }
194
195
196
197 one := []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
198 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}
199 p256OrdMul(x, x, one)
200
201 return new(big.Int).SetBytes(x)
202 }
203
204
205 func fromBig(big *big.Int) []byte {
206
207 res := big.Bytes()
208 if 32 == len(res) {
209 return res
210 }
211 t := make([]byte, 32)
212 offset := 32 - len(res)
213 for i := len(res) - 1; i >= 0; i-- {
214 t[i+offset] = res[i]
215 }
216 return t
217 }
218
219
220
221 func p256GetMultiplier(in []byte) []byte {
222 n := new(big.Int).SetBytes(in)
223
224 if n.Cmp(p256Params.N) >= 0 {
225 n.Mod(n, p256Params.N)
226 }
227 return fromBig(n)
228 }
229
230
231
232
233 var rr = []byte{0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe,
234 0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03}
235
236
237 var one = []byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
238 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}
239
240 func maybeReduceModP(in *big.Int) *big.Int {
241 if in.Cmp(p256Params.P) < 0 {
242 return in
243 }
244 return new(big.Int).Mod(in, p256Params.P)
245 }
246
247 func (curve p256CurveFast) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) {
248 var r1, r2 p256Point
249 scalarReduced := p256GetMultiplier(baseScalar)
250 r1IsInfinity := scalarIsZero(scalarReduced)
251 r1.p256BaseMult(scalarReduced)
252
253 copy(r2.x[:], fromBig(maybeReduceModP(bigX)))
254 copy(r2.y[:], fromBig(maybeReduceModP(bigY)))
255 copy(r2.z[:], one)
256 p256MulAsm(r2.x[:], r2.x[:], rr[:])
257 p256MulAsm(r2.y[:], r2.y[:], rr[:])
258
259 scalarReduced = p256GetMultiplier(scalar)
260 r2IsInfinity := scalarIsZero(scalarReduced)
261 r2.p256ScalarMult(p256GetMultiplier(scalar))
262
263 var sum, double p256Point
264 pointsEqual := p256PointAddAsm(&sum, &r1, &r2)
265 p256PointDoubleAsm(&double, &r1)
266 p256MovCond(&sum, &double, &sum, pointsEqual)
267 p256MovCond(&sum, &r1, &sum, r2IsInfinity)
268 p256MovCond(&sum, &r2, &sum, r1IsInfinity)
269 return sum.p256PointToAffine()
270 }
271
272 func (curve p256CurveFast) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
273 var r p256Point
274 r.p256BaseMult(p256GetMultiplier(scalar))
275 return r.p256PointToAffine()
276 }
277
278 func (curve p256CurveFast) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
279 var r p256Point
280 copy(r.x[:], fromBig(maybeReduceModP(bigX)))
281 copy(r.y[:], fromBig(maybeReduceModP(bigY)))
282 copy(r.z[:], one)
283 p256MulAsm(r.x[:], r.x[:], rr[:])
284 p256MulAsm(r.y[:], r.y[:], rr[:])
285 r.p256ScalarMult(p256GetMultiplier(scalar))
286 return r.p256PointToAffine()
287 }
288
289
290
291 func scalarIsZero(scalar []byte) int {
292 b := byte(0)
293 for _, s := range scalar {
294 b |= s
295 }
296 return subtle.ConstantTimeByteEq(b, 0)
297 }
298
299 func (p *p256Point) p256PointToAffine() (x, y *big.Int) {
300 zInv := make([]byte, 32)
301 zInvSq := make([]byte, 32)
302
303 p256Inverse(zInv, p.z[:])
304 p256Sqr(zInvSq, zInv)
305 p256MulAsm(zInv, zInv, zInvSq)
306
307 p256MulAsm(zInvSq, p.x[:], zInvSq)
308 p256MulAsm(zInv, p.y[:], zInv)
309
310 p256FromMont(zInvSq, zInvSq)
311 p256FromMont(zInv, zInv)
312
313 return new(big.Int).SetBytes(zInvSq), new(big.Int).SetBytes(zInv)
314 }
315
316
317 func p256Inverse(out, in []byte) {
318 var stack [6 * 32]byte
319 p2 := stack[32*0 : 32*0+32]
320 p4 := stack[32*1 : 32*1+32]
321 p8 := stack[32*2 : 32*2+32]
322 p16 := stack[32*3 : 32*3+32]
323 p32 := stack[32*4 : 32*4+32]
324
325 p256Sqr(out, in)
326 p256MulAsm(p2, out, in)
327
328 p256Sqr(out, p2)
329 p256Sqr(out, out)
330 p256MulAsm(p4, out, p2)
331
332 p256Sqr(out, p4)
333 p256Sqr(out, out)
334 p256Sqr(out, out)
335 p256Sqr(out, out)
336 p256MulAsm(p8, out, p4)
337
338 p256Sqr(out, p8)
339
340 for i := 0; i < 7; i++ {
341 p256Sqr(out, out)
342 }
343 p256MulAsm(p16, out, p8)
344
345 p256Sqr(out, p16)
346 for i := 0; i < 15; i++ {
347 p256Sqr(out, out)
348 }
349 p256MulAsm(p32, out, p16)
350
351 p256Sqr(out, p32)
352
353 for i := 0; i < 31; i++ {
354 p256Sqr(out, out)
355 }
356 p256MulAsm(out, out, in)
357
358 for i := 0; i < 32*4; i++ {
359 p256Sqr(out, out)
360 }
361 p256MulAsm(out, out, p32)
362
363 for i := 0; i < 32; i++ {
364 p256Sqr(out, out)
365 }
366 p256MulAsm(out, out, p32)
367
368 for i := 0; i < 16; i++ {
369 p256Sqr(out, out)
370 }
371 p256MulAsm(out, out, p16)
372
373 for i := 0; i < 8; i++ {
374 p256Sqr(out, out)
375 }
376 p256MulAsm(out, out, p8)
377
378 p256Sqr(out, out)
379 p256Sqr(out, out)
380 p256Sqr(out, out)
381 p256Sqr(out, out)
382 p256MulAsm(out, out, p4)
383
384 p256Sqr(out, out)
385 p256Sqr(out, out)
386 p256MulAsm(out, out, p2)
387
388 p256Sqr(out, out)
389 p256Sqr(out, out)
390 p256MulAsm(out, out, in)
391 }
392
393 func boothW5(in uint) (int, int) {
394 var s uint = ^((in >> 5) - 1)
395 var d uint = (1 << 6) - in - 1
396 d = (d & s) | (in & (^s))
397 d = (d >> 1) + (d & 1)
398 return int(d), int(s & 1)
399 }
400
401 func boothW7(in uint) (int, int) {
402 var s uint = ^((in >> 7) - 1)
403 var d uint = (1 << 8) - in - 1
404 d = (d & s) | (in & (^s))
405 d = (d >> 1) + (d & 1)
406 return int(d), int(s & 1)
407 }
408
409 func initTable() {
410 p256PreFast = new([37][64]p256Point)
411 basePoint := p256Point{
412 x: [32]byte{0x18, 0x90, 0x5f, 0x76, 0xa5, 0x37, 0x55, 0xc6, 0x79, 0xfb, 0x73, 0x2b, 0x77, 0x62, 0x25, 0x10,
413 0x75, 0xba, 0x95, 0xfc, 0x5f, 0xed, 0xb6, 0x01, 0x79, 0xe7, 0x30, 0xd4, 0x18, 0xa9, 0x14, 0x3c},
414 y: [32]byte{0x85, 0x71, 0xff, 0x18, 0x25, 0x88, 0x5d, 0x85, 0xd2, 0xe8, 0x86, 0x88, 0xdd, 0x21, 0xf3, 0x25,
415 0x8b, 0x4a, 0xb8, 0xe4, 0xba, 0x19, 0xe4, 0x5c, 0xdd, 0xf2, 0x53, 0x57, 0xce, 0x95, 0x56, 0x0a},
416 z: [32]byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
417 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
418 }
419
420 t1 := new(p256Point)
421 t2 := new(p256Point)
422 *t2 = basePoint
423
424 zInv := make([]byte, 32)
425 zInvSq := make([]byte, 32)
426 for j := 0; j < 64; j++ {
427 *t1 = *t2
428 for i := 0; i < 37; i++ {
429
430 if i != 0 {
431 for k := 0; k < 7; k++ {
432 p256PointDoubleAsm(t1, t1)
433 }
434 }
435
436
437 p256Inverse(zInv, t1.z[:])
438 p256Sqr(zInvSq, zInv)
439 p256MulAsm(zInv, zInv, zInvSq)
440
441 p256MulAsm(t1.x[:], t1.x[:], zInvSq)
442 p256MulAsm(t1.y[:], t1.y[:], zInv)
443
444 copy(t1.z[:], basePoint.z[:])
445
446 copy(p256PreFast[i][j].x[:], t1.x[:])
447 copy(p256PreFast[i][j].y[:], t1.y[:])
448 }
449 if j == 0 {
450 p256PointDoubleAsm(t2, &basePoint)
451 } else {
452 p256PointAddAsm(t2, t2, &basePoint)
453 }
454 }
455 }
456
457 func (p *p256Point) p256BaseMult(scalar []byte) {
458 wvalue := (uint(scalar[31]) << 1) & 0xff
459 sel, sign := boothW7(uint(wvalue))
460 p256SelectBase(p, p256PreFast[0][:], sel)
461 p256NegCond(p, sign)
462
463 copy(p.z[:], one[:])
464 var t0 p256Point
465
466 copy(t0.z[:], one[:])
467
468 index := uint(6)
469 zero := sel
470
471 for i := 1; i < 37; i++ {
472 if index < 247 {
473 wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0xff
474 } else {
475 wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0xff
476 }
477 index += 7
478 sel, sign = boothW7(uint(wvalue))
479 p256SelectBase(&t0, p256PreFast[i][:], sel)
480 p256PointAddAffineAsm(p, p, &t0, sign, sel, zero)
481 zero |= sel
482 }
483 }
484
485 func (p *p256Point) p256ScalarMult(scalar []byte) {
486
487
488 var precomp [16]p256Point
489 var t0, t1, t2, t3 p256Point
490
491
492 *&precomp[0] = *p
493
494 p256PointDoubleAsm(&t0, p)
495 p256PointDoubleAsm(&t1, &t0)
496 p256PointDoubleAsm(&t2, &t1)
497 p256PointDoubleAsm(&t3, &t2)
498 *&precomp[1] = t0
499 *&precomp[3] = t1
500 *&precomp[7] = t2
501 *&precomp[15] = t3
502
503 p256PointAddAsm(&t0, &t0, p)
504 p256PointAddAsm(&t1, &t1, p)
505 p256PointAddAsm(&t2, &t2, p)
506 *&precomp[2] = t0
507 *&precomp[4] = t1
508 *&precomp[8] = t2
509
510 p256PointDoubleAsm(&t0, &t0)
511 p256PointDoubleAsm(&t1, &t1)
512 *&precomp[5] = t0
513 *&precomp[9] = t1
514
515 p256PointAddAsm(&t2, &t0, p)
516 p256PointAddAsm(&t1, &t1, p)
517 *&precomp[6] = t2
518 *&precomp[10] = t1
519
520 p256PointDoubleAsm(&t0, &t0)
521 p256PointDoubleAsm(&t2, &t2)
522 *&precomp[11] = t0
523 *&precomp[13] = t2
524
525 p256PointAddAsm(&t0, &t0, p)
526 p256PointAddAsm(&t2, &t2, p)
527 *&precomp[12] = t0
528 *&precomp[14] = t2
529
530
531 index := uint(254)
532 var sel, sign int
533
534 wvalue := (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f
535 sel, _ = boothW5(uint(wvalue))
536 p256Select(p, precomp[:], sel)
537 zero := sel
538
539 for index > 4 {
540 index -= 5
541 p256PointDoubleAsm(p, p)
542 p256PointDoubleAsm(p, p)
543 p256PointDoubleAsm(p, p)
544 p256PointDoubleAsm(p, p)
545 p256PointDoubleAsm(p, p)
546
547 if index < 247 {
548 wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0x3f
549 } else {
550 wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f
551 }
552
553 sel, sign = boothW5(uint(wvalue))
554
555 p256Select(&t0, precomp[:], sel)
556 p256NegCond(&t0, sign)
557 p256PointAddAsm(&t1, p, &t0)
558 p256MovCond(&t1, &t1, p, sel)
559 p256MovCond(p, &t1, &t0, zero)
560 zero |= sel
561 }
562
563 p256PointDoubleAsm(p, p)
564 p256PointDoubleAsm(p, p)
565 p256PointDoubleAsm(p, p)
566 p256PointDoubleAsm(p, p)
567 p256PointDoubleAsm(p, p)
568
569 wvalue = (uint(scalar[31]) << 1) & 0x3f
570 sel, sign = boothW5(uint(wvalue))
571
572 p256Select(&t0, precomp[:], sel)
573 p256NegCond(&t0, sign)
574 p256PointAddAsm(&t1, p, &t0)
575 p256MovCond(&t1, &t1, p, sel)
576 p256MovCond(p, &t1, &t0, zero)
577 }
578
View as plain text