1 | /*
|
---|
2 | * Copyright 2014-2022 The OpenSSL Project Authors. All Rights Reserved.
|
---|
3 | * Copyright (c) 2014, Intel Corporation. All Rights Reserved.
|
---|
4 | * Copyright (c) 2015, CloudFlare, Inc.
|
---|
5 | *
|
---|
6 | * Licensed under the Apache License 2.0 (the "License"). You may not use
|
---|
7 | * this file except in compliance with the License. You can obtain a copy
|
---|
8 | * in the file LICENSE in the source distribution or at
|
---|
9 | * https://www.openssl.org/source/license.html
|
---|
10 | *
|
---|
11 | * Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1, 3)
|
---|
12 | * (1) Intel Corporation, Israel Development Center, Haifa, Israel
|
---|
13 | * (2) University of Haifa, Israel
|
---|
14 | * (3) CloudFlare, Inc.
|
---|
15 | *
|
---|
16 | * Reference:
|
---|
17 | * S.Gueron and V.Krasnov, "Fast Prime Field Elliptic Curve Cryptography with
|
---|
18 | * 256 Bit Primes"
|
---|
19 | */
|
---|
20 |
|
---|
21 | /*
|
---|
22 | * ECDSA low level APIs are deprecated for public use, but still ok for
|
---|
23 | * internal use.
|
---|
24 | */
|
---|
25 | #include "internal/deprecated.h"
|
---|
26 |
|
---|
27 | #include <string.h>
|
---|
28 |
|
---|
29 | #include "internal/cryptlib.h"
|
---|
30 | #include "crypto/bn.h"
|
---|
31 | #include "ec_local.h"
|
---|
32 | #include "internal/refcount.h"
|
---|
33 |
|
---|
34 | #if BN_BITS2 != 64
|
---|
35 | # define TOBN(hi,lo) lo,hi
|
---|
36 | #else
|
---|
37 | # define TOBN(hi,lo) ((BN_ULONG)hi<<32|lo)
|
---|
38 | #endif
|
---|
39 |
|
---|
40 | #if defined(__GNUC__)
|
---|
41 | # define ALIGN32 __attribute((aligned(32)))
|
---|
42 | #elif defined(_MSC_VER)
|
---|
43 | # define ALIGN32 __declspec(align(32))
|
---|
44 | #else
|
---|
45 | # define ALIGN32
|
---|
46 | #endif
|
---|
47 |
|
---|
48 | #define ALIGNPTR(p,N) ((unsigned char *)p+N-(size_t)p%N)
|
---|
49 | #define P256_LIMBS (256/BN_BITS2)
|
---|
50 |
|
---|
51 | typedef unsigned short u16;
|
---|
52 |
|
---|
53 | typedef struct {
|
---|
54 | BN_ULONG X[P256_LIMBS];
|
---|
55 | BN_ULONG Y[P256_LIMBS];
|
---|
56 | BN_ULONG Z[P256_LIMBS];
|
---|
57 | } P256_POINT;
|
---|
58 |
|
---|
59 | typedef struct {
|
---|
60 | BN_ULONG X[P256_LIMBS];
|
---|
61 | BN_ULONG Y[P256_LIMBS];
|
---|
62 | } P256_POINT_AFFINE;
|
---|
63 |
|
---|
64 | typedef P256_POINT_AFFINE PRECOMP256_ROW[64];
|
---|
65 |
|
---|
66 | /* structure for precomputed multiples of the generator */
|
---|
67 | struct nistz256_pre_comp_st {
|
---|
68 | const EC_GROUP *group; /* Parent EC_GROUP object */
|
---|
69 | size_t w; /* Window size */
|
---|
70 | /*
|
---|
71 | * Constant time access to the X and Y coordinates of the pre-computed,
|
---|
72 | * generator multiplies, in the Montgomery domain. Pre-calculated
|
---|
73 | * multiplies are stored in affine form.
|
---|
74 | */
|
---|
75 | PRECOMP256_ROW *precomp;
|
---|
76 | void *precomp_storage;
|
---|
77 | CRYPTO_REF_COUNT references;
|
---|
78 | CRYPTO_RWLOCK *lock;
|
---|
79 | };
|
---|
80 |
|
---|
81 | /* Functions implemented in assembly */
|
---|
82 | /*
|
---|
83 | * Most of below mentioned functions *preserve* the property of inputs
|
---|
84 | * being fully reduced, i.e. being in [0, modulus) range. Simply put if
|
---|
85 | * inputs are fully reduced, then output is too. Note that reverse is
|
---|
86 | * not true, in sense that given partially reduced inputs output can be
|
---|
87 | * either, not unlikely reduced. And "most" in first sentence refers to
|
---|
88 | * the fact that given the calculations flow one can tolerate that
|
---|
89 | * addition, 1st function below, produces partially reduced result *if*
|
---|
90 | * multiplications by 2 and 3, which customarily use addition, fully
|
---|
91 | * reduce it. This effectively gives two options: a) addition produces
|
---|
92 | * fully reduced result [as long as inputs are, just like remaining
|
---|
93 | * functions]; b) addition is allowed to produce partially reduced
|
---|
94 | * result, but multiplications by 2 and 3 perform additional reduction
|
---|
95 | * step. Choice between the two can be platform-specific, but it was a)
|
---|
96 | * in all cases so far...
|
---|
97 | */
|
---|
98 | /* Modular add: res = a+b mod P */
|
---|
99 | void ecp_nistz256_add(BN_ULONG res[P256_LIMBS],
|
---|
100 | const BN_ULONG a[P256_LIMBS],
|
---|
101 | const BN_ULONG b[P256_LIMBS]);
|
---|
102 | /* Modular mul by 2: res = 2*a mod P */
|
---|
103 | void ecp_nistz256_mul_by_2(BN_ULONG res[P256_LIMBS],
|
---|
104 | const BN_ULONG a[P256_LIMBS]);
|
---|
105 | /* Modular mul by 3: res = 3*a mod P */
|
---|
106 | void ecp_nistz256_mul_by_3(BN_ULONG res[P256_LIMBS],
|
---|
107 | const BN_ULONG a[P256_LIMBS]);
|
---|
108 |
|
---|
109 | /* Modular div by 2: res = a/2 mod P */
|
---|
110 | void ecp_nistz256_div_by_2(BN_ULONG res[P256_LIMBS],
|
---|
111 | const BN_ULONG a[P256_LIMBS]);
|
---|
112 | /* Modular sub: res = a-b mod P */
|
---|
113 | void ecp_nistz256_sub(BN_ULONG res[P256_LIMBS],
|
---|
114 | const BN_ULONG a[P256_LIMBS],
|
---|
115 | const BN_ULONG b[P256_LIMBS]);
|
---|
116 | /* Modular neg: res = -a mod P */
|
---|
117 | void ecp_nistz256_neg(BN_ULONG res[P256_LIMBS], const BN_ULONG a[P256_LIMBS]);
|
---|
118 | /* Montgomery mul: res = a*b*2^-256 mod P */
|
---|
119 | void ecp_nistz256_mul_mont(BN_ULONG res[P256_LIMBS],
|
---|
120 | const BN_ULONG a[P256_LIMBS],
|
---|
121 | const BN_ULONG b[P256_LIMBS]);
|
---|
122 | /* Montgomery sqr: res = a*a*2^-256 mod P */
|
---|
123 | void ecp_nistz256_sqr_mont(BN_ULONG res[P256_LIMBS],
|
---|
124 | const BN_ULONG a[P256_LIMBS]);
|
---|
125 | /* Convert a number from Montgomery domain, by multiplying with 1 */
|
---|
126 | void ecp_nistz256_from_mont(BN_ULONG res[P256_LIMBS],
|
---|
127 | const BN_ULONG in[P256_LIMBS]);
|
---|
128 | /* Convert a number to Montgomery domain, by multiplying with 2^512 mod P*/
|
---|
129 | void ecp_nistz256_to_mont(BN_ULONG res[P256_LIMBS],
|
---|
130 | const BN_ULONG in[P256_LIMBS]);
|
---|
131 | /* Functions that perform constant time access to the precomputed tables */
|
---|
132 | void ecp_nistz256_scatter_w5(P256_POINT *val,
|
---|
133 | const P256_POINT *in_t, int idx);
|
---|
134 | void ecp_nistz256_gather_w5(P256_POINT *val,
|
---|
135 | const P256_POINT *in_t, int idx);
|
---|
136 | void ecp_nistz256_scatter_w7(P256_POINT_AFFINE *val,
|
---|
137 | const P256_POINT_AFFINE *in_t, int idx);
|
---|
138 | void ecp_nistz256_gather_w7(P256_POINT_AFFINE *val,
|
---|
139 | const P256_POINT_AFFINE *in_t, int idx);
|
---|
140 |
|
---|
141 | /* One converted into the Montgomery domain */
|
---|
142 | static const BN_ULONG ONE[P256_LIMBS] = {
|
---|
143 | TOBN(0x00000000, 0x00000001), TOBN(0xffffffff, 0x00000000),
|
---|
144 | TOBN(0xffffffff, 0xffffffff), TOBN(0x00000000, 0xfffffffe)
|
---|
145 | };
|
---|
146 |
|
---|
147 | static NISTZ256_PRE_COMP *ecp_nistz256_pre_comp_new(const EC_GROUP *group);
|
---|
148 |
|
---|
149 | /* Precomputed tables for the default generator */
|
---|
150 | extern const PRECOMP256_ROW ecp_nistz256_precomputed[37];
|
---|
151 |
|
---|
152 | /* Recode window to a signed digit, see ecp_nistputil.c for details */
|
---|
153 | static unsigned int _booth_recode_w5(unsigned int in)
|
---|
154 | {
|
---|
155 | unsigned int s, d;
|
---|
156 |
|
---|
157 | s = ~((in >> 5) - 1);
|
---|
158 | d = (1 << 6) - in - 1;
|
---|
159 | d = (d & s) | (in & ~s);
|
---|
160 | d = (d >> 1) + (d & 1);
|
---|
161 |
|
---|
162 | return (d << 1) + (s & 1);
|
---|
163 | }
|
---|
164 |
|
---|
165 | static unsigned int _booth_recode_w7(unsigned int in)
|
---|
166 | {
|
---|
167 | unsigned int s, d;
|
---|
168 |
|
---|
169 | s = ~((in >> 7) - 1);
|
---|
170 | d = (1 << 8) - in - 1;
|
---|
171 | d = (d & s) | (in & ~s);
|
---|
172 | d = (d >> 1) + (d & 1);
|
---|
173 |
|
---|
174 | return (d << 1) + (s & 1);
|
---|
175 | }
|
---|
176 |
|
---|
177 | static void copy_conditional(BN_ULONG dst[P256_LIMBS],
|
---|
178 | const BN_ULONG src[P256_LIMBS], BN_ULONG move)
|
---|
179 | {
|
---|
180 | BN_ULONG mask1 = 0-move;
|
---|
181 | BN_ULONG mask2 = ~mask1;
|
---|
182 |
|
---|
183 | dst[0] = (src[0] & mask1) ^ (dst[0] & mask2);
|
---|
184 | dst[1] = (src[1] & mask1) ^ (dst[1] & mask2);
|
---|
185 | dst[2] = (src[2] & mask1) ^ (dst[2] & mask2);
|
---|
186 | dst[3] = (src[3] & mask1) ^ (dst[3] & mask2);
|
---|
187 | if (P256_LIMBS == 8) {
|
---|
188 | dst[4] = (src[4] & mask1) ^ (dst[4] & mask2);
|
---|
189 | dst[5] = (src[5] & mask1) ^ (dst[5] & mask2);
|
---|
190 | dst[6] = (src[6] & mask1) ^ (dst[6] & mask2);
|
---|
191 | dst[7] = (src[7] & mask1) ^ (dst[7] & mask2);
|
---|
192 | }
|
---|
193 | }
|
---|
194 |
|
---|
195 | static BN_ULONG is_zero(BN_ULONG in)
|
---|
196 | {
|
---|
197 | in |= (0 - in);
|
---|
198 | in = ~in;
|
---|
199 | in >>= BN_BITS2 - 1;
|
---|
200 | return in;
|
---|
201 | }
|
---|
202 |
|
---|
203 | static BN_ULONG is_equal(const BN_ULONG a[P256_LIMBS],
|
---|
204 | const BN_ULONG b[P256_LIMBS])
|
---|
205 | {
|
---|
206 | BN_ULONG res;
|
---|
207 |
|
---|
208 | res = a[0] ^ b[0];
|
---|
209 | res |= a[1] ^ b[1];
|
---|
210 | res |= a[2] ^ b[2];
|
---|
211 | res |= a[3] ^ b[3];
|
---|
212 | if (P256_LIMBS == 8) {
|
---|
213 | res |= a[4] ^ b[4];
|
---|
214 | res |= a[5] ^ b[5];
|
---|
215 | res |= a[6] ^ b[6];
|
---|
216 | res |= a[7] ^ b[7];
|
---|
217 | }
|
---|
218 |
|
---|
219 | return is_zero(res);
|
---|
220 | }
|
---|
221 |
|
---|
222 | static BN_ULONG is_one(const BIGNUM *z)
|
---|
223 | {
|
---|
224 | BN_ULONG res = 0;
|
---|
225 | BN_ULONG *a = bn_get_words(z);
|
---|
226 |
|
---|
227 | if (bn_get_top(z) == (P256_LIMBS - P256_LIMBS / 8)) {
|
---|
228 | res = a[0] ^ ONE[0];
|
---|
229 | res |= a[1] ^ ONE[1];
|
---|
230 | res |= a[2] ^ ONE[2];
|
---|
231 | res |= a[3] ^ ONE[3];
|
---|
232 | if (P256_LIMBS == 8) {
|
---|
233 | res |= a[4] ^ ONE[4];
|
---|
234 | res |= a[5] ^ ONE[5];
|
---|
235 | res |= a[6] ^ ONE[6];
|
---|
236 | /*
|
---|
237 | * no check for a[7] (being zero) on 32-bit platforms,
|
---|
238 | * because value of "one" takes only 7 limbs.
|
---|
239 | */
|
---|
240 | }
|
---|
241 | res = is_zero(res);
|
---|
242 | }
|
---|
243 |
|
---|
244 | return res;
|
---|
245 | }
|
---|
246 |
|
---|
247 | /*
|
---|
248 | * For reference, this macro is used only when new ecp_nistz256 assembly
|
---|
249 | * module is being developed. For example, configure with
|
---|
250 | * -DECP_NISTZ256_REFERENCE_IMPLEMENTATION and implement only functions
|
---|
251 | * performing simplest arithmetic operations on 256-bit vectors. Then
|
---|
252 | * work on implementation of higher-level functions performing point
|
---|
253 | * operations. Then remove ECP_NISTZ256_REFERENCE_IMPLEMENTATION
|
---|
254 | * and never define it again. (The correct macro denoting presence of
|
---|
255 | * ecp_nistz256 module is ECP_NISTZ256_ASM.)
|
---|
256 | */
|
---|
257 | #ifndef ECP_NISTZ256_REFERENCE_IMPLEMENTATION
|
---|
258 | void ecp_nistz256_point_double(P256_POINT *r, const P256_POINT *a);
|
---|
259 | void ecp_nistz256_point_add(P256_POINT *r,
|
---|
260 | const P256_POINT *a, const P256_POINT *b);
|
---|
261 | void ecp_nistz256_point_add_affine(P256_POINT *r,
|
---|
262 | const P256_POINT *a,
|
---|
263 | const P256_POINT_AFFINE *b);
|
---|
264 | #else
|
---|
265 | /* Point double: r = 2*a */
|
---|
266 | static void ecp_nistz256_point_double(P256_POINT *r, const P256_POINT *a)
|
---|
267 | {
|
---|
268 | BN_ULONG S[P256_LIMBS];
|
---|
269 | BN_ULONG M[P256_LIMBS];
|
---|
270 | BN_ULONG Zsqr[P256_LIMBS];
|
---|
271 | BN_ULONG tmp0[P256_LIMBS];
|
---|
272 |
|
---|
273 | const BN_ULONG *in_x = a->X;
|
---|
274 | const BN_ULONG *in_y = a->Y;
|
---|
275 | const BN_ULONG *in_z = a->Z;
|
---|
276 |
|
---|
277 | BN_ULONG *res_x = r->X;
|
---|
278 | BN_ULONG *res_y = r->Y;
|
---|
279 | BN_ULONG *res_z = r->Z;
|
---|
280 |
|
---|
281 | ecp_nistz256_mul_by_2(S, in_y);
|
---|
282 |
|
---|
283 | ecp_nistz256_sqr_mont(Zsqr, in_z);
|
---|
284 |
|
---|
285 | ecp_nistz256_sqr_mont(S, S);
|
---|
286 |
|
---|
287 | ecp_nistz256_mul_mont(res_z, in_z, in_y);
|
---|
288 | ecp_nistz256_mul_by_2(res_z, res_z);
|
---|
289 |
|
---|
290 | ecp_nistz256_add(M, in_x, Zsqr);
|
---|
291 | ecp_nistz256_sub(Zsqr, in_x, Zsqr);
|
---|
292 |
|
---|
293 | ecp_nistz256_sqr_mont(res_y, S);
|
---|
294 | ecp_nistz256_div_by_2(res_y, res_y);
|
---|
295 |
|
---|
296 | ecp_nistz256_mul_mont(M, M, Zsqr);
|
---|
297 | ecp_nistz256_mul_by_3(M, M);
|
---|
298 |
|
---|
299 | ecp_nistz256_mul_mont(S, S, in_x);
|
---|
300 | ecp_nistz256_mul_by_2(tmp0, S);
|
---|
301 |
|
---|
302 | ecp_nistz256_sqr_mont(res_x, M);
|
---|
303 |
|
---|
304 | ecp_nistz256_sub(res_x, res_x, tmp0);
|
---|
305 | ecp_nistz256_sub(S, S, res_x);
|
---|
306 |
|
---|
307 | ecp_nistz256_mul_mont(S, S, M);
|
---|
308 | ecp_nistz256_sub(res_y, S, res_y);
|
---|
309 | }
|
---|
310 |
|
---|
311 | /* Point addition: r = a+b */
|
---|
312 | static void ecp_nistz256_point_add(P256_POINT *r,
|
---|
313 | const P256_POINT *a, const P256_POINT *b)
|
---|
314 | {
|
---|
315 | BN_ULONG U2[P256_LIMBS], S2[P256_LIMBS];
|
---|
316 | BN_ULONG U1[P256_LIMBS], S1[P256_LIMBS];
|
---|
317 | BN_ULONG Z1sqr[P256_LIMBS];
|
---|
318 | BN_ULONG Z2sqr[P256_LIMBS];
|
---|
319 | BN_ULONG H[P256_LIMBS], R[P256_LIMBS];
|
---|
320 | BN_ULONG Hsqr[P256_LIMBS];
|
---|
321 | BN_ULONG Rsqr[P256_LIMBS];
|
---|
322 | BN_ULONG Hcub[P256_LIMBS];
|
---|
323 |
|
---|
324 | BN_ULONG res_x[P256_LIMBS];
|
---|
325 | BN_ULONG res_y[P256_LIMBS];
|
---|
326 | BN_ULONG res_z[P256_LIMBS];
|
---|
327 |
|
---|
328 | BN_ULONG in1infty, in2infty;
|
---|
329 |
|
---|
330 | const BN_ULONG *in1_x = a->X;
|
---|
331 | const BN_ULONG *in1_y = a->Y;
|
---|
332 | const BN_ULONG *in1_z = a->Z;
|
---|
333 |
|
---|
334 | const BN_ULONG *in2_x = b->X;
|
---|
335 | const BN_ULONG *in2_y = b->Y;
|
---|
336 | const BN_ULONG *in2_z = b->Z;
|
---|
337 |
|
---|
338 | /*
|
---|
339 | * Infinity in encoded as (,,0)
|
---|
340 | */
|
---|
341 | in1infty = (in1_z[0] | in1_z[1] | in1_z[2] | in1_z[3]);
|
---|
342 | if (P256_LIMBS == 8)
|
---|
343 | in1infty |= (in1_z[4] | in1_z[5] | in1_z[6] | in1_z[7]);
|
---|
344 |
|
---|
345 | in2infty = (in2_z[0] | in2_z[1] | in2_z[2] | in2_z[3]);
|
---|
346 | if (P256_LIMBS == 8)
|
---|
347 | in2infty |= (in2_z[4] | in2_z[5] | in2_z[6] | in2_z[7]);
|
---|
348 |
|
---|
349 | in1infty = is_zero(in1infty);
|
---|
350 | in2infty = is_zero(in2infty);
|
---|
351 |
|
---|
352 | ecp_nistz256_sqr_mont(Z2sqr, in2_z); /* Z2^2 */
|
---|
353 | ecp_nistz256_sqr_mont(Z1sqr, in1_z); /* Z1^2 */
|
---|
354 |
|
---|
355 | ecp_nistz256_mul_mont(S1, Z2sqr, in2_z); /* S1 = Z2^3 */
|
---|
356 | ecp_nistz256_mul_mont(S2, Z1sqr, in1_z); /* S2 = Z1^3 */
|
---|
357 |
|
---|
358 | ecp_nistz256_mul_mont(S1, S1, in1_y); /* S1 = Y1*Z2^3 */
|
---|
359 | ecp_nistz256_mul_mont(S2, S2, in2_y); /* S2 = Y2*Z1^3 */
|
---|
360 | ecp_nistz256_sub(R, S2, S1); /* R = S2 - S1 */
|
---|
361 |
|
---|
362 | ecp_nistz256_mul_mont(U1, in1_x, Z2sqr); /* U1 = X1*Z2^2 */
|
---|
363 | ecp_nistz256_mul_mont(U2, in2_x, Z1sqr); /* U2 = X2*Z1^2 */
|
---|
364 | ecp_nistz256_sub(H, U2, U1); /* H = U2 - U1 */
|
---|
365 |
|
---|
366 | /*
|
---|
367 | * The formulae are incorrect if the points are equal so we check for
|
---|
368 | * this and do doubling if this happens.
|
---|
369 | *
|
---|
370 | * Points here are in Jacobian projective coordinates (Xi, Yi, Zi)
|
---|
371 | * that are bound to the affine coordinates (xi, yi) by the following
|
---|
372 | * equations:
|
---|
373 | * - xi = Xi / (Zi)^2
|
---|
374 | * - y1 = Yi / (Zi)^3
|
---|
375 | *
|
---|
376 | * For the sake of optimization, the algorithm operates over
|
---|
377 | * intermediate variables U1, U2 and S1, S2 that are derived from
|
---|
378 | * the projective coordinates:
|
---|
379 | * - U1 = X1 * (Z2)^2 ; U2 = X2 * (Z1)^2
|
---|
380 | * - S1 = Y1 * (Z2)^3 ; S2 = Y2 * (Z1)^3
|
---|
381 | *
|
---|
382 | * It is easy to prove that is_equal(U1, U2) implies that the affine
|
---|
383 | * x-coordinates are equal, or either point is at infinity.
|
---|
384 | * Likewise is_equal(S1, S2) implies that the affine y-coordinates are
|
---|
385 | * equal, or either point is at infinity.
|
---|
386 | *
|
---|
387 | * The special case of either point being the point at infinity (Z1 or Z2
|
---|
388 | * is zero), is handled separately later on in this function, so we avoid
|
---|
389 | * jumping to point_double here in those special cases.
|
---|
390 | *
|
---|
391 | * When both points are inverse of each other, we know that the affine
|
---|
392 | * x-coordinates are equal, and the y-coordinates have different sign.
|
---|
393 | * Therefore since U1 = U2, we know H = 0, and therefore Z3 = H*Z1*Z2
|
---|
394 | * will equal 0, thus the result is infinity, if we simply let this
|
---|
395 | * function continue normally.
|
---|
396 | *
|
---|
397 | * We use bitwise operations to avoid potential side-channels introduced by
|
---|
398 | * the short-circuiting behaviour of boolean operators.
|
---|
399 | */
|
---|
400 | if (is_equal(U1, U2) & ~in1infty & ~in2infty & is_equal(S1, S2)) {
|
---|
401 | /*
|
---|
402 | * This is obviously not constant-time but it should never happen during
|
---|
403 | * single point multiplication, so there is no timing leak for ECDH or
|
---|
404 | * ECDSA signing.
|
---|
405 | */
|
---|
406 | ecp_nistz256_point_double(r, a);
|
---|
407 | return;
|
---|
408 | }
|
---|
409 |
|
---|
410 | ecp_nistz256_sqr_mont(Rsqr, R); /* R^2 */
|
---|
411 | ecp_nistz256_mul_mont(res_z, H, in1_z); /* Z3 = H*Z1*Z2 */
|
---|
412 | ecp_nistz256_sqr_mont(Hsqr, H); /* H^2 */
|
---|
413 | ecp_nistz256_mul_mont(res_z, res_z, in2_z); /* Z3 = H*Z1*Z2 */
|
---|
414 | ecp_nistz256_mul_mont(Hcub, Hsqr, H); /* H^3 */
|
---|
415 |
|
---|
416 | ecp_nistz256_mul_mont(U2, U1, Hsqr); /* U1*H^2 */
|
---|
417 | ecp_nistz256_mul_by_2(Hsqr, U2); /* 2*U1*H^2 */
|
---|
418 |
|
---|
419 | ecp_nistz256_sub(res_x, Rsqr, Hsqr);
|
---|
420 | ecp_nistz256_sub(res_x, res_x, Hcub);
|
---|
421 |
|
---|
422 | ecp_nistz256_sub(res_y, U2, res_x);
|
---|
423 |
|
---|
424 | ecp_nistz256_mul_mont(S2, S1, Hcub);
|
---|
425 | ecp_nistz256_mul_mont(res_y, R, res_y);
|
---|
426 | ecp_nistz256_sub(res_y, res_y, S2);
|
---|
427 |
|
---|
428 | copy_conditional(res_x, in2_x, in1infty);
|
---|
429 | copy_conditional(res_y, in2_y, in1infty);
|
---|
430 | copy_conditional(res_z, in2_z, in1infty);
|
---|
431 |
|
---|
432 | copy_conditional(res_x, in1_x, in2infty);
|
---|
433 | copy_conditional(res_y, in1_y, in2infty);
|
---|
434 | copy_conditional(res_z, in1_z, in2infty);
|
---|
435 |
|
---|
436 | memcpy(r->X, res_x, sizeof(res_x));
|
---|
437 | memcpy(r->Y, res_y, sizeof(res_y));
|
---|
438 | memcpy(r->Z, res_z, sizeof(res_z));
|
---|
439 | }
|
---|
440 |
|
---|
441 | /* Point addition when b is known to be affine: r = a+b */
|
---|
442 | static void ecp_nistz256_point_add_affine(P256_POINT *r,
|
---|
443 | const P256_POINT *a,
|
---|
444 | const P256_POINT_AFFINE *b)
|
---|
445 | {
|
---|
446 | BN_ULONG U2[P256_LIMBS], S2[P256_LIMBS];
|
---|
447 | BN_ULONG Z1sqr[P256_LIMBS];
|
---|
448 | BN_ULONG H[P256_LIMBS], R[P256_LIMBS];
|
---|
449 | BN_ULONG Hsqr[P256_LIMBS];
|
---|
450 | BN_ULONG Rsqr[P256_LIMBS];
|
---|
451 | BN_ULONG Hcub[P256_LIMBS];
|
---|
452 |
|
---|
453 | BN_ULONG res_x[P256_LIMBS];
|
---|
454 | BN_ULONG res_y[P256_LIMBS];
|
---|
455 | BN_ULONG res_z[P256_LIMBS];
|
---|
456 |
|
---|
457 | BN_ULONG in1infty, in2infty;
|
---|
458 |
|
---|
459 | const BN_ULONG *in1_x = a->X;
|
---|
460 | const BN_ULONG *in1_y = a->Y;
|
---|
461 | const BN_ULONG *in1_z = a->Z;
|
---|
462 |
|
---|
463 | const BN_ULONG *in2_x = b->X;
|
---|
464 | const BN_ULONG *in2_y = b->Y;
|
---|
465 |
|
---|
466 | /*
|
---|
467 | * Infinity in encoded as (,,0)
|
---|
468 | */
|
---|
469 | in1infty = (in1_z[0] | in1_z[1] | in1_z[2] | in1_z[3]);
|
---|
470 | if (P256_LIMBS == 8)
|
---|
471 | in1infty |= (in1_z[4] | in1_z[5] | in1_z[6] | in1_z[7]);
|
---|
472 |
|
---|
473 | /*
|
---|
474 | * In affine representation we encode infinity as (0,0), which is
|
---|
475 | * not on the curve, so it is OK
|
---|
476 | */
|
---|
477 | in2infty = (in2_x[0] | in2_x[1] | in2_x[2] | in2_x[3] |
|
---|
478 | in2_y[0] | in2_y[1] | in2_y[2] | in2_y[3]);
|
---|
479 | if (P256_LIMBS == 8)
|
---|
480 | in2infty |= (in2_x[4] | in2_x[5] | in2_x[6] | in2_x[7] |
|
---|
481 | in2_y[4] | in2_y[5] | in2_y[6] | in2_y[7]);
|
---|
482 |
|
---|
483 | in1infty = is_zero(in1infty);
|
---|
484 | in2infty = is_zero(in2infty);
|
---|
485 |
|
---|
486 | ecp_nistz256_sqr_mont(Z1sqr, in1_z); /* Z1^2 */
|
---|
487 |
|
---|
488 | ecp_nistz256_mul_mont(U2, in2_x, Z1sqr); /* U2 = X2*Z1^2 */
|
---|
489 | ecp_nistz256_sub(H, U2, in1_x); /* H = U2 - U1 */
|
---|
490 |
|
---|
491 | ecp_nistz256_mul_mont(S2, Z1sqr, in1_z); /* S2 = Z1^3 */
|
---|
492 |
|
---|
493 | ecp_nistz256_mul_mont(res_z, H, in1_z); /* Z3 = H*Z1*Z2 */
|
---|
494 |
|
---|
495 | ecp_nistz256_mul_mont(S2, S2, in2_y); /* S2 = Y2*Z1^3 */
|
---|
496 | ecp_nistz256_sub(R, S2, in1_y); /* R = S2 - S1 */
|
---|
497 |
|
---|
498 | ecp_nistz256_sqr_mont(Hsqr, H); /* H^2 */
|
---|
499 | ecp_nistz256_sqr_mont(Rsqr, R); /* R^2 */
|
---|
500 | ecp_nistz256_mul_mont(Hcub, Hsqr, H); /* H^3 */
|
---|
501 |
|
---|
502 | ecp_nistz256_mul_mont(U2, in1_x, Hsqr); /* U1*H^2 */
|
---|
503 | ecp_nistz256_mul_by_2(Hsqr, U2); /* 2*U1*H^2 */
|
---|
504 |
|
---|
505 | ecp_nistz256_sub(res_x, Rsqr, Hsqr);
|
---|
506 | ecp_nistz256_sub(res_x, res_x, Hcub);
|
---|
507 | ecp_nistz256_sub(H, U2, res_x);
|
---|
508 |
|
---|
509 | ecp_nistz256_mul_mont(S2, in1_y, Hcub);
|
---|
510 | ecp_nistz256_mul_mont(H, H, R);
|
---|
511 | ecp_nistz256_sub(res_y, H, S2);
|
---|
512 |
|
---|
513 | copy_conditional(res_x, in2_x, in1infty);
|
---|
514 | copy_conditional(res_x, in1_x, in2infty);
|
---|
515 |
|
---|
516 | copy_conditional(res_y, in2_y, in1infty);
|
---|
517 | copy_conditional(res_y, in1_y, in2infty);
|
---|
518 |
|
---|
519 | copy_conditional(res_z, ONE, in1infty);
|
---|
520 | copy_conditional(res_z, in1_z, in2infty);
|
---|
521 |
|
---|
522 | memcpy(r->X, res_x, sizeof(res_x));
|
---|
523 | memcpy(r->Y, res_y, sizeof(res_y));
|
---|
524 | memcpy(r->Z, res_z, sizeof(res_z));
|
---|
525 | }
|
---|
526 | #endif
|
---|
527 |
|
---|
528 | /* r = in^-1 mod p */
|
---|
529 | static void ecp_nistz256_mod_inverse(BN_ULONG r[P256_LIMBS],
|
---|
530 | const BN_ULONG in[P256_LIMBS])
|
---|
531 | {
|
---|
532 | /*
|
---|
533 | * The poly is ffffffff 00000001 00000000 00000000 00000000 ffffffff
|
---|
534 | * ffffffff ffffffff We use FLT and used poly-2 as exponent
|
---|
535 | */
|
---|
536 | BN_ULONG p2[P256_LIMBS];
|
---|
537 | BN_ULONG p4[P256_LIMBS];
|
---|
538 | BN_ULONG p8[P256_LIMBS];
|
---|
539 | BN_ULONG p16[P256_LIMBS];
|
---|
540 | BN_ULONG p32[P256_LIMBS];
|
---|
541 | BN_ULONG res[P256_LIMBS];
|
---|
542 | int i;
|
---|
543 |
|
---|
544 | ecp_nistz256_sqr_mont(res, in);
|
---|
545 | ecp_nistz256_mul_mont(p2, res, in); /* 3*p */
|
---|
546 |
|
---|
547 | ecp_nistz256_sqr_mont(res, p2);
|
---|
548 | ecp_nistz256_sqr_mont(res, res);
|
---|
549 | ecp_nistz256_mul_mont(p4, res, p2); /* f*p */
|
---|
550 |
|
---|
551 | ecp_nistz256_sqr_mont(res, p4);
|
---|
552 | ecp_nistz256_sqr_mont(res, res);
|
---|
553 | ecp_nistz256_sqr_mont(res, res);
|
---|
554 | ecp_nistz256_sqr_mont(res, res);
|
---|
555 | ecp_nistz256_mul_mont(p8, res, p4); /* ff*p */
|
---|
556 |
|
---|
557 | ecp_nistz256_sqr_mont(res, p8);
|
---|
558 | for (i = 0; i < 7; i++)
|
---|
559 | ecp_nistz256_sqr_mont(res, res);
|
---|
560 | ecp_nistz256_mul_mont(p16, res, p8); /* ffff*p */
|
---|
561 |
|
---|
562 | ecp_nistz256_sqr_mont(res, p16);
|
---|
563 | for (i = 0; i < 15; i++)
|
---|
564 | ecp_nistz256_sqr_mont(res, res);
|
---|
565 | ecp_nistz256_mul_mont(p32, res, p16); /* ffffffff*p */
|
---|
566 |
|
---|
567 | ecp_nistz256_sqr_mont(res, p32);
|
---|
568 | for (i = 0; i < 31; i++)
|
---|
569 | ecp_nistz256_sqr_mont(res, res);
|
---|
570 | ecp_nistz256_mul_mont(res, res, in);
|
---|
571 |
|
---|
572 | for (i = 0; i < 32 * 4; i++)
|
---|
573 | ecp_nistz256_sqr_mont(res, res);
|
---|
574 | ecp_nistz256_mul_mont(res, res, p32);
|
---|
575 |
|
---|
576 | for (i = 0; i < 32; i++)
|
---|
577 | ecp_nistz256_sqr_mont(res, res);
|
---|
578 | ecp_nistz256_mul_mont(res, res, p32);
|
---|
579 |
|
---|
580 | for (i = 0; i < 16; i++)
|
---|
581 | ecp_nistz256_sqr_mont(res, res);
|
---|
582 | ecp_nistz256_mul_mont(res, res, p16);
|
---|
583 |
|
---|
584 | for (i = 0; i < 8; i++)
|
---|
585 | ecp_nistz256_sqr_mont(res, res);
|
---|
586 | ecp_nistz256_mul_mont(res, res, p8);
|
---|
587 |
|
---|
588 | ecp_nistz256_sqr_mont(res, res);
|
---|
589 | ecp_nistz256_sqr_mont(res, res);
|
---|
590 | ecp_nistz256_sqr_mont(res, res);
|
---|
591 | ecp_nistz256_sqr_mont(res, res);
|
---|
592 | ecp_nistz256_mul_mont(res, res, p4);
|
---|
593 |
|
---|
594 | ecp_nistz256_sqr_mont(res, res);
|
---|
595 | ecp_nistz256_sqr_mont(res, res);
|
---|
596 | ecp_nistz256_mul_mont(res, res, p2);
|
---|
597 |
|
---|
598 | ecp_nistz256_sqr_mont(res, res);
|
---|
599 | ecp_nistz256_sqr_mont(res, res);
|
---|
600 | ecp_nistz256_mul_mont(res, res, in);
|
---|
601 |
|
---|
602 | memcpy(r, res, sizeof(res));
|
---|
603 | }
|
---|
604 |
|
---|
605 | /*
|
---|
606 | * ecp_nistz256_bignum_to_field_elem copies the contents of |in| to |out| and
|
---|
607 | * returns one if it fits. Otherwise it returns zero.
|
---|
608 | */
|
---|
609 | __owur static int ecp_nistz256_bignum_to_field_elem(BN_ULONG out[P256_LIMBS],
|
---|
610 | const BIGNUM *in)
|
---|
611 | {
|
---|
612 | return bn_copy_words(out, in, P256_LIMBS);
|
---|
613 | }
|
---|
614 |
|
---|
615 | /* r = sum(scalar[i]*point[i]) */
|
---|
616 | __owur static int ecp_nistz256_windowed_mul(const EC_GROUP *group,
|
---|
617 | P256_POINT *r,
|
---|
618 | const BIGNUM **scalar,
|
---|
619 | const EC_POINT **point,
|
---|
620 | size_t num, BN_CTX *ctx)
|
---|
621 | {
|
---|
622 | size_t i;
|
---|
623 | int j, ret = 0;
|
---|
624 | unsigned int idx;
|
---|
625 | unsigned char (*p_str)[33] = NULL;
|
---|
626 | const unsigned int window_size = 5;
|
---|
627 | const unsigned int mask = (1 << (window_size + 1)) - 1;
|
---|
628 | unsigned int wvalue;
|
---|
629 | P256_POINT *temp; /* place for 5 temporary points */
|
---|
630 | const BIGNUM **scalars = NULL;
|
---|
631 | P256_POINT (*table)[16] = NULL;
|
---|
632 | void *table_storage = NULL;
|
---|
633 |
|
---|
634 | if ((num * 16 + 6) > OPENSSL_MALLOC_MAX_NELEMS(P256_POINT)
|
---|
635 | || (table_storage =
|
---|
636 | OPENSSL_malloc((num * 16 + 5) * sizeof(P256_POINT) + 64)) == NULL
|
---|
637 | || (p_str =
|
---|
638 | OPENSSL_malloc(num * 33 * sizeof(unsigned char))) == NULL
|
---|
639 | || (scalars = OPENSSL_malloc(num * sizeof(BIGNUM *))) == NULL) {
|
---|
640 | ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
|
---|
641 | goto err;
|
---|
642 | }
|
---|
643 |
|
---|
644 | table = (void *)ALIGNPTR(table_storage, 64);
|
---|
645 | temp = (P256_POINT *)(table + num);
|
---|
646 |
|
---|
647 | for (i = 0; i < num; i++) {
|
---|
648 | P256_POINT *row = table[i];
|
---|
649 |
|
---|
650 | /* This is an unusual input, we don't guarantee constant-timeness. */
|
---|
651 | if ((BN_num_bits(scalar[i]) > 256) || BN_is_negative(scalar[i])) {
|
---|
652 | BIGNUM *mod;
|
---|
653 |
|
---|
654 | if ((mod = BN_CTX_get(ctx)) == NULL)
|
---|
655 | goto err;
|
---|
656 | if (!BN_nnmod(mod, scalar[i], group->order, ctx)) {
|
---|
657 | ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
|
---|
658 | goto err;
|
---|
659 | }
|
---|
660 | scalars[i] = mod;
|
---|
661 | } else
|
---|
662 | scalars[i] = scalar[i];
|
---|
663 |
|
---|
664 | for (j = 0; j < bn_get_top(scalars[i]) * BN_BYTES; j += BN_BYTES) {
|
---|
665 | BN_ULONG d = bn_get_words(scalars[i])[j / BN_BYTES];
|
---|
666 |
|
---|
667 | p_str[i][j + 0] = (unsigned char)d;
|
---|
668 | p_str[i][j + 1] = (unsigned char)(d >> 8);
|
---|
669 | p_str[i][j + 2] = (unsigned char)(d >> 16);
|
---|
670 | p_str[i][j + 3] = (unsigned char)(d >>= 24);
|
---|
671 | if (BN_BYTES == 8) {
|
---|
672 | d >>= 8;
|
---|
673 | p_str[i][j + 4] = (unsigned char)d;
|
---|
674 | p_str[i][j + 5] = (unsigned char)(d >> 8);
|
---|
675 | p_str[i][j + 6] = (unsigned char)(d >> 16);
|
---|
676 | p_str[i][j + 7] = (unsigned char)(d >> 24);
|
---|
677 | }
|
---|
678 | }
|
---|
679 | for (; j < 33; j++)
|
---|
680 | p_str[i][j] = 0;
|
---|
681 |
|
---|
682 | if (!ecp_nistz256_bignum_to_field_elem(temp[0].X, point[i]->X)
|
---|
683 | || !ecp_nistz256_bignum_to_field_elem(temp[0].Y, point[i]->Y)
|
---|
684 | || !ecp_nistz256_bignum_to_field_elem(temp[0].Z, point[i]->Z)) {
|
---|
685 | ERR_raise(ERR_LIB_EC, EC_R_COORDINATES_OUT_OF_RANGE);
|
---|
686 | goto err;
|
---|
687 | }
|
---|
688 |
|
---|
689 | /*
|
---|
690 | * row[0] is implicitly (0,0,0) (the point at infinity), therefore it
|
---|
691 | * is not stored. All other values are actually stored with an offset
|
---|
692 | * of -1 in table.
|
---|
693 | */
|
---|
694 |
|
---|
695 | ecp_nistz256_scatter_w5 (row, &temp[0], 1);
|
---|
696 | ecp_nistz256_point_double(&temp[1], &temp[0]); /*1+1=2 */
|
---|
697 | ecp_nistz256_scatter_w5 (row, &temp[1], 2);
|
---|
698 | ecp_nistz256_point_add (&temp[2], &temp[1], &temp[0]); /*2+1=3 */
|
---|
699 | ecp_nistz256_scatter_w5 (row, &temp[2], 3);
|
---|
700 | ecp_nistz256_point_double(&temp[1], &temp[1]); /*2*2=4 */
|
---|
701 | ecp_nistz256_scatter_w5 (row, &temp[1], 4);
|
---|
702 | ecp_nistz256_point_double(&temp[2], &temp[2]); /*2*3=6 */
|
---|
703 | ecp_nistz256_scatter_w5 (row, &temp[2], 6);
|
---|
704 | ecp_nistz256_point_add (&temp[3], &temp[1], &temp[0]); /*4+1=5 */
|
---|
705 | ecp_nistz256_scatter_w5 (row, &temp[3], 5);
|
---|
706 | ecp_nistz256_point_add (&temp[4], &temp[2], &temp[0]); /*6+1=7 */
|
---|
707 | ecp_nistz256_scatter_w5 (row, &temp[4], 7);
|
---|
708 | ecp_nistz256_point_double(&temp[1], &temp[1]); /*2*4=8 */
|
---|
709 | ecp_nistz256_scatter_w5 (row, &temp[1], 8);
|
---|
710 | ecp_nistz256_point_double(&temp[2], &temp[2]); /*2*6=12 */
|
---|
711 | ecp_nistz256_scatter_w5 (row, &temp[2], 12);
|
---|
712 | ecp_nistz256_point_double(&temp[3], &temp[3]); /*2*5=10 */
|
---|
713 | ecp_nistz256_scatter_w5 (row, &temp[3], 10);
|
---|
714 | ecp_nistz256_point_double(&temp[4], &temp[4]); /*2*7=14 */
|
---|
715 | ecp_nistz256_scatter_w5 (row, &temp[4], 14);
|
---|
716 | ecp_nistz256_point_add (&temp[2], &temp[2], &temp[0]); /*12+1=13*/
|
---|
717 | ecp_nistz256_scatter_w5 (row, &temp[2], 13);
|
---|
718 | ecp_nistz256_point_add (&temp[3], &temp[3], &temp[0]); /*10+1=11*/
|
---|
719 | ecp_nistz256_scatter_w5 (row, &temp[3], 11);
|
---|
720 | ecp_nistz256_point_add (&temp[4], &temp[4], &temp[0]); /*14+1=15*/
|
---|
721 | ecp_nistz256_scatter_w5 (row, &temp[4], 15);
|
---|
722 | ecp_nistz256_point_add (&temp[2], &temp[1], &temp[0]); /*8+1=9 */
|
---|
723 | ecp_nistz256_scatter_w5 (row, &temp[2], 9);
|
---|
724 | ecp_nistz256_point_double(&temp[1], &temp[1]); /*2*8=16 */
|
---|
725 | ecp_nistz256_scatter_w5 (row, &temp[1], 16);
|
---|
726 | }
|
---|
727 |
|
---|
728 | idx = 255;
|
---|
729 |
|
---|
730 | wvalue = p_str[0][(idx - 1) / 8];
|
---|
731 | wvalue = (wvalue >> ((idx - 1) % 8)) & mask;
|
---|
732 |
|
---|
733 | /*
|
---|
734 | * We gather to temp[0], because we know it's position relative
|
---|
735 | * to table
|
---|
736 | */
|
---|
737 | ecp_nistz256_gather_w5(&temp[0], table[0], _booth_recode_w5(wvalue) >> 1);
|
---|
738 | memcpy(r, &temp[0], sizeof(temp[0]));
|
---|
739 |
|
---|
740 | while (idx >= 5) {
|
---|
741 | for (i = (idx == 255 ? 1 : 0); i < num; i++) {
|
---|
742 | unsigned int off = (idx - 1) / 8;
|
---|
743 |
|
---|
744 | wvalue = p_str[i][off] | p_str[i][off + 1] << 8;
|
---|
745 | wvalue = (wvalue >> ((idx - 1) % 8)) & mask;
|
---|
746 |
|
---|
747 | wvalue = _booth_recode_w5(wvalue);
|
---|
748 |
|
---|
749 | ecp_nistz256_gather_w5(&temp[0], table[i], wvalue >> 1);
|
---|
750 |
|
---|
751 | ecp_nistz256_neg(temp[1].Y, temp[0].Y);
|
---|
752 | copy_conditional(temp[0].Y, temp[1].Y, (wvalue & 1));
|
---|
753 |
|
---|
754 | ecp_nistz256_point_add(r, r, &temp[0]);
|
---|
755 | }
|
---|
756 |
|
---|
757 | idx -= window_size;
|
---|
758 |
|
---|
759 | ecp_nistz256_point_double(r, r);
|
---|
760 | ecp_nistz256_point_double(r, r);
|
---|
761 | ecp_nistz256_point_double(r, r);
|
---|
762 | ecp_nistz256_point_double(r, r);
|
---|
763 | ecp_nistz256_point_double(r, r);
|
---|
764 | }
|
---|
765 |
|
---|
766 | /* Final window */
|
---|
767 | for (i = 0; i < num; i++) {
|
---|
768 | wvalue = p_str[i][0];
|
---|
769 | wvalue = (wvalue << 1) & mask;
|
---|
770 |
|
---|
771 | wvalue = _booth_recode_w5(wvalue);
|
---|
772 |
|
---|
773 | ecp_nistz256_gather_w5(&temp[0], table[i], wvalue >> 1);
|
---|
774 |
|
---|
775 | ecp_nistz256_neg(temp[1].Y, temp[0].Y);
|
---|
776 | copy_conditional(temp[0].Y, temp[1].Y, wvalue & 1);
|
---|
777 |
|
---|
778 | ecp_nistz256_point_add(r, r, &temp[0]);
|
---|
779 | }
|
---|
780 |
|
---|
781 | ret = 1;
|
---|
782 | err:
|
---|
783 | OPENSSL_free(table_storage);
|
---|
784 | OPENSSL_free(p_str);
|
---|
785 | OPENSSL_free(scalars);
|
---|
786 | return ret;
|
---|
787 | }
|
---|
788 |
|
---|
789 | /* Coordinates of G, for which we have precomputed tables */
|
---|
790 | static const BN_ULONG def_xG[P256_LIMBS] = {
|
---|
791 | TOBN(0x79e730d4, 0x18a9143c), TOBN(0x75ba95fc, 0x5fedb601),
|
---|
792 | TOBN(0x79fb732b, 0x77622510), TOBN(0x18905f76, 0xa53755c6)
|
---|
793 | };
|
---|
794 |
|
---|
795 | static const BN_ULONG def_yG[P256_LIMBS] = {
|
---|
796 | TOBN(0xddf25357, 0xce95560a), TOBN(0x8b4ab8e4, 0xba19e45c),
|
---|
797 | TOBN(0xd2e88688, 0xdd21f325), TOBN(0x8571ff18, 0x25885d85)
|
---|
798 | };
|
---|
799 |
|
---|
800 | /*
|
---|
801 | * ecp_nistz256_is_affine_G returns one if |generator| is the standard, P-256
|
---|
802 | * generator.
|
---|
803 | */
|
---|
804 | static int ecp_nistz256_is_affine_G(const EC_POINT *generator)
|
---|
805 | {
|
---|
806 | return (bn_get_top(generator->X) == P256_LIMBS) &&
|
---|
807 | (bn_get_top(generator->Y) == P256_LIMBS) &&
|
---|
808 | is_equal(bn_get_words(generator->X), def_xG) &&
|
---|
809 | is_equal(bn_get_words(generator->Y), def_yG) &&
|
---|
810 | is_one(generator->Z);
|
---|
811 | }
|
---|
812 |
|
---|
813 | __owur static int ecp_nistz256_mult_precompute(EC_GROUP *group, BN_CTX *ctx)
|
---|
814 | {
|
---|
815 | /*
|
---|
816 | * We precompute a table for a Booth encoded exponent (wNAF) based
|
---|
817 | * computation. Each table holds 64 values for safe access, with an
|
---|
818 | * implicit value of infinity at index zero. We use window of size 7, and
|
---|
819 | * therefore require ceil(256/7) = 37 tables.
|
---|
820 | */
|
---|
821 | const BIGNUM *order;
|
---|
822 | EC_POINT *P = NULL, *T = NULL;
|
---|
823 | const EC_POINT *generator;
|
---|
824 | NISTZ256_PRE_COMP *pre_comp;
|
---|
825 | BN_CTX *new_ctx = NULL;
|
---|
826 | int i, j, k, ret = 0;
|
---|
827 | size_t w;
|
---|
828 |
|
---|
829 | PRECOMP256_ROW *preComputedTable = NULL;
|
---|
830 | unsigned char *precomp_storage = NULL;
|
---|
831 |
|
---|
832 | /* if there is an old NISTZ256_PRE_COMP object, throw it away */
|
---|
833 | EC_pre_comp_free(group);
|
---|
834 | generator = EC_GROUP_get0_generator(group);
|
---|
835 | if (generator == NULL) {
|
---|
836 | ERR_raise(ERR_LIB_EC, EC_R_UNDEFINED_GENERATOR);
|
---|
837 | return 0;
|
---|
838 | }
|
---|
839 |
|
---|
840 | if (ecp_nistz256_is_affine_G(generator)) {
|
---|
841 | /*
|
---|
842 | * No need to calculate tables for the standard generator because we
|
---|
843 | * have them statically.
|
---|
844 | */
|
---|
845 | return 1;
|
---|
846 | }
|
---|
847 |
|
---|
848 | if ((pre_comp = ecp_nistz256_pre_comp_new(group)) == NULL)
|
---|
849 | return 0;
|
---|
850 |
|
---|
851 | if (ctx == NULL) {
|
---|
852 | ctx = new_ctx = BN_CTX_new_ex(group->libctx);
|
---|
853 | if (ctx == NULL)
|
---|
854 | goto err;
|
---|
855 | }
|
---|
856 |
|
---|
857 | BN_CTX_start(ctx);
|
---|
858 |
|
---|
859 | order = EC_GROUP_get0_order(group);
|
---|
860 | if (order == NULL)
|
---|
861 | goto err;
|
---|
862 |
|
---|
863 | if (BN_is_zero(order)) {
|
---|
864 | ERR_raise(ERR_LIB_EC, EC_R_UNKNOWN_ORDER);
|
---|
865 | goto err;
|
---|
866 | }
|
---|
867 |
|
---|
868 | w = 7;
|
---|
869 |
|
---|
870 | if ((precomp_storage =
|
---|
871 | OPENSSL_malloc(37 * 64 * sizeof(P256_POINT_AFFINE) + 64)) == NULL) {
|
---|
872 | ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
|
---|
873 | goto err;
|
---|
874 | }
|
---|
875 |
|
---|
876 | preComputedTable = (void *)ALIGNPTR(precomp_storage, 64);
|
---|
877 |
|
---|
878 | P = EC_POINT_new(group);
|
---|
879 | T = EC_POINT_new(group);
|
---|
880 | if (P == NULL || T == NULL)
|
---|
881 | goto err;
|
---|
882 |
|
---|
883 | /*
|
---|
884 | * The zero entry is implicitly infinity, and we skip it, storing other
|
---|
885 | * values with -1 offset.
|
---|
886 | */
|
---|
887 | if (!EC_POINT_copy(T, generator))
|
---|
888 | goto err;
|
---|
889 |
|
---|
890 | for (k = 0; k < 64; k++) {
|
---|
891 | if (!EC_POINT_copy(P, T))
|
---|
892 | goto err;
|
---|
893 | for (j = 0; j < 37; j++) {
|
---|
894 | P256_POINT_AFFINE temp;
|
---|
895 | /*
|
---|
896 | * It would be faster to use EC_POINTs_make_affine and
|
---|
897 | * make multiple points affine at the same time.
|
---|
898 | */
|
---|
899 | if (group->meth->make_affine == NULL
|
---|
900 | || !group->meth->make_affine(group, P, ctx))
|
---|
901 | goto err;
|
---|
902 | if (!ecp_nistz256_bignum_to_field_elem(temp.X, P->X) ||
|
---|
903 | !ecp_nistz256_bignum_to_field_elem(temp.Y, P->Y)) {
|
---|
904 | ERR_raise(ERR_LIB_EC, EC_R_COORDINATES_OUT_OF_RANGE);
|
---|
905 | goto err;
|
---|
906 | }
|
---|
907 | ecp_nistz256_scatter_w7(preComputedTable[j], &temp, k);
|
---|
908 | for (i = 0; i < 7; i++) {
|
---|
909 | if (!EC_POINT_dbl(group, P, P, ctx))
|
---|
910 | goto err;
|
---|
911 | }
|
---|
912 | }
|
---|
913 | if (!EC_POINT_add(group, T, T, generator, ctx))
|
---|
914 | goto err;
|
---|
915 | }
|
---|
916 |
|
---|
917 | pre_comp->group = group;
|
---|
918 | pre_comp->w = w;
|
---|
919 | pre_comp->precomp = preComputedTable;
|
---|
920 | pre_comp->precomp_storage = precomp_storage;
|
---|
921 | precomp_storage = NULL;
|
---|
922 | SETPRECOMP(group, nistz256, pre_comp);
|
---|
923 | pre_comp = NULL;
|
---|
924 | ret = 1;
|
---|
925 |
|
---|
926 | err:
|
---|
927 | BN_CTX_end(ctx);
|
---|
928 | BN_CTX_free(new_ctx);
|
---|
929 |
|
---|
930 | EC_nistz256_pre_comp_free(pre_comp);
|
---|
931 | OPENSSL_free(precomp_storage);
|
---|
932 | EC_POINT_free(P);
|
---|
933 | EC_POINT_free(T);
|
---|
934 | return ret;
|
---|
935 | }
|
---|
936 |
|
---|
937 | __owur static int ecp_nistz256_set_from_affine(EC_POINT *out, const EC_GROUP *group,
|
---|
938 | const P256_POINT_AFFINE *in,
|
---|
939 | BN_CTX *ctx)
|
---|
940 | {
|
---|
941 | int ret = 0;
|
---|
942 |
|
---|
943 | if ((ret = bn_set_words(out->X, in->X, P256_LIMBS))
|
---|
944 | && (ret = bn_set_words(out->Y, in->Y, P256_LIMBS))
|
---|
945 | && (ret = bn_set_words(out->Z, ONE, P256_LIMBS)))
|
---|
946 | out->Z_is_one = 1;
|
---|
947 |
|
---|
948 | return ret;
|
---|
949 | }
|
---|
950 |
|
---|
951 | /* r = scalar*G + sum(scalars[i]*points[i]) */
|
---|
952 | __owur static int ecp_nistz256_points_mul(const EC_GROUP *group,
|
---|
953 | EC_POINT *r,
|
---|
954 | const BIGNUM *scalar,
|
---|
955 | size_t num,
|
---|
956 | const EC_POINT *points[],
|
---|
957 | const BIGNUM *scalars[], BN_CTX *ctx)
|
---|
958 | {
|
---|
959 | int i = 0, ret = 0, no_precomp_for_generator = 0, p_is_infinity = 0;
|
---|
960 | unsigned char p_str[33] = { 0 };
|
---|
961 | const PRECOMP256_ROW *preComputedTable = NULL;
|
---|
962 | const NISTZ256_PRE_COMP *pre_comp = NULL;
|
---|
963 | const EC_POINT *generator = NULL;
|
---|
964 | const BIGNUM **new_scalars = NULL;
|
---|
965 | const EC_POINT **new_points = NULL;
|
---|
966 | unsigned int idx = 0;
|
---|
967 | const unsigned int window_size = 7;
|
---|
968 | const unsigned int mask = (1 << (window_size + 1)) - 1;
|
---|
969 | unsigned int wvalue;
|
---|
970 | ALIGN32 union {
|
---|
971 | P256_POINT p;
|
---|
972 | P256_POINT_AFFINE a;
|
---|
973 | } t, p;
|
---|
974 | BIGNUM *tmp_scalar;
|
---|
975 |
|
---|
976 | if ((num + 1) == 0 || (num + 1) > OPENSSL_MALLOC_MAX_NELEMS(void *)) {
|
---|
977 | ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
|
---|
978 | return 0;
|
---|
979 | }
|
---|
980 |
|
---|
981 | memset(&p, 0, sizeof(p));
|
---|
982 | BN_CTX_start(ctx);
|
---|
983 |
|
---|
984 | if (scalar) {
|
---|
985 | generator = EC_GROUP_get0_generator(group);
|
---|
986 | if (generator == NULL) {
|
---|
987 | ERR_raise(ERR_LIB_EC, EC_R_UNDEFINED_GENERATOR);
|
---|
988 | goto err;
|
---|
989 | }
|
---|
990 |
|
---|
991 | /* look if we can use precomputed multiples of generator */
|
---|
992 | pre_comp = group->pre_comp.nistz256;
|
---|
993 |
|
---|
994 | if (pre_comp) {
|
---|
995 | /*
|
---|
996 | * If there is a precomputed table for the generator, check that
|
---|
997 | * it was generated with the same generator.
|
---|
998 | */
|
---|
999 | EC_POINT *pre_comp_generator = EC_POINT_new(group);
|
---|
1000 | if (pre_comp_generator == NULL)
|
---|
1001 | goto err;
|
---|
1002 |
|
---|
1003 | ecp_nistz256_gather_w7(&p.a, pre_comp->precomp[0], 1);
|
---|
1004 | if (!ecp_nistz256_set_from_affine(pre_comp_generator,
|
---|
1005 | group, &p.a, ctx)) {
|
---|
1006 | EC_POINT_free(pre_comp_generator);
|
---|
1007 | goto err;
|
---|
1008 | }
|
---|
1009 |
|
---|
1010 | if (0 == EC_POINT_cmp(group, generator, pre_comp_generator, ctx))
|
---|
1011 | preComputedTable = (const PRECOMP256_ROW *)pre_comp->precomp;
|
---|
1012 |
|
---|
1013 | EC_POINT_free(pre_comp_generator);
|
---|
1014 | }
|
---|
1015 |
|
---|
1016 | if (preComputedTable == NULL && ecp_nistz256_is_affine_G(generator)) {
|
---|
1017 | /*
|
---|
1018 | * If there is no precomputed data, but the generator is the
|
---|
1019 | * default, a hardcoded table of precomputed data is used. This
|
---|
1020 | * is because applications, such as Apache, do not use
|
---|
1021 | * EC_KEY_precompute_mult.
|
---|
1022 | */
|
---|
1023 | preComputedTable = ecp_nistz256_precomputed;
|
---|
1024 | }
|
---|
1025 |
|
---|
1026 | if (preComputedTable) {
|
---|
1027 | BN_ULONG infty;
|
---|
1028 |
|
---|
1029 | if ((BN_num_bits(scalar) > 256)
|
---|
1030 | || BN_is_negative(scalar)) {
|
---|
1031 | if ((tmp_scalar = BN_CTX_get(ctx)) == NULL)
|
---|
1032 | goto err;
|
---|
1033 |
|
---|
1034 | if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
|
---|
1035 | ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
|
---|
1036 | goto err;
|
---|
1037 | }
|
---|
1038 | scalar = tmp_scalar;
|
---|
1039 | }
|
---|
1040 |
|
---|
1041 | for (i = 0; i < bn_get_top(scalar) * BN_BYTES; i += BN_BYTES) {
|
---|
1042 | BN_ULONG d = bn_get_words(scalar)[i / BN_BYTES];
|
---|
1043 |
|
---|
1044 | p_str[i + 0] = (unsigned char)d;
|
---|
1045 | p_str[i + 1] = (unsigned char)(d >> 8);
|
---|
1046 | p_str[i + 2] = (unsigned char)(d >> 16);
|
---|
1047 | p_str[i + 3] = (unsigned char)(d >>= 24);
|
---|
1048 | if (BN_BYTES == 8) {
|
---|
1049 | d >>= 8;
|
---|
1050 | p_str[i + 4] = (unsigned char)d;
|
---|
1051 | p_str[i + 5] = (unsigned char)(d >> 8);
|
---|
1052 | p_str[i + 6] = (unsigned char)(d >> 16);
|
---|
1053 | p_str[i + 7] = (unsigned char)(d >> 24);
|
---|
1054 | }
|
---|
1055 | }
|
---|
1056 |
|
---|
1057 | for (; i < 33; i++)
|
---|
1058 | p_str[i] = 0;
|
---|
1059 |
|
---|
1060 | /* First window */
|
---|
1061 | wvalue = (p_str[0] << 1) & mask;
|
---|
1062 | idx += window_size;
|
---|
1063 |
|
---|
1064 | wvalue = _booth_recode_w7(wvalue);
|
---|
1065 |
|
---|
1066 | ecp_nistz256_gather_w7(&p.a, preComputedTable[0],
|
---|
1067 | wvalue >> 1);
|
---|
1068 |
|
---|
1069 | ecp_nistz256_neg(p.p.Z, p.p.Y);
|
---|
1070 | copy_conditional(p.p.Y, p.p.Z, wvalue & 1);
|
---|
1071 |
|
---|
1072 | /*
|
---|
1073 | * Since affine infinity is encoded as (0,0) and
|
---|
1074 | * Jacobian is (,,0), we need to harmonize them
|
---|
1075 | * by assigning "one" or zero to Z.
|
---|
1076 | */
|
---|
1077 | infty = (p.p.X[0] | p.p.X[1] | p.p.X[2] | p.p.X[3] |
|
---|
1078 | p.p.Y[0] | p.p.Y[1] | p.p.Y[2] | p.p.Y[3]);
|
---|
1079 | #if !defined(_MSC_VER) || P256_LIMBS != 4 /* vbox: VC++ 2010 complains about out of bounds accesses here in debug builds. */
|
---|
1080 | if (P256_LIMBS == 8)
|
---|
1081 | infty |= (p.p.X[4] | p.p.X[5] | p.p.X[6] | p.p.X[7] |
|
---|
1082 | p.p.Y[4] | p.p.Y[5] | p.p.Y[6] | p.p.Y[7]);
|
---|
1083 | #endif
|
---|
1084 |
|
---|
1085 | infty = 0 - is_zero(infty);
|
---|
1086 | infty = ~infty;
|
---|
1087 |
|
---|
1088 | p.p.Z[0] = ONE[0] & infty;
|
---|
1089 | p.p.Z[1] = ONE[1] & infty;
|
---|
1090 | p.p.Z[2] = ONE[2] & infty;
|
---|
1091 | p.p.Z[3] = ONE[3] & infty;
|
---|
1092 | #if !defined(_MSC_VER) || P256_LIMBS != 4 /* vbox: VC++ 2010 complains about out of bounds accesses here in debug builds. */
|
---|
1093 | if (P256_LIMBS == 8) {
|
---|
1094 | p.p.Z[4] = ONE[4] & infty;
|
---|
1095 | p.p.Z[5] = ONE[5] & infty;
|
---|
1096 | p.p.Z[6] = ONE[6] & infty;
|
---|
1097 | p.p.Z[7] = ONE[7] & infty;
|
---|
1098 | }
|
---|
1099 | #endif
|
---|
1100 |
|
---|
1101 | for (i = 1; i < 37; i++) {
|
---|
1102 | unsigned int off = (idx - 1) / 8;
|
---|
1103 | wvalue = p_str[off] | p_str[off + 1] << 8;
|
---|
1104 | wvalue = (wvalue >> ((idx - 1) % 8)) & mask;
|
---|
1105 | idx += window_size;
|
---|
1106 |
|
---|
1107 | wvalue = _booth_recode_w7(wvalue);
|
---|
1108 |
|
---|
1109 | ecp_nistz256_gather_w7(&t.a,
|
---|
1110 | preComputedTable[i], wvalue >> 1);
|
---|
1111 |
|
---|
1112 | ecp_nistz256_neg(t.p.Z, t.a.Y);
|
---|
1113 | copy_conditional(t.a.Y, t.p.Z, wvalue & 1);
|
---|
1114 |
|
---|
1115 | ecp_nistz256_point_add_affine(&p.p, &p.p, &t.a);
|
---|
1116 | }
|
---|
1117 | } else {
|
---|
1118 | p_is_infinity = 1;
|
---|
1119 | no_precomp_for_generator = 1;
|
---|
1120 | }
|
---|
1121 | } else
|
---|
1122 | p_is_infinity = 1;
|
---|
1123 |
|
---|
1124 | if (no_precomp_for_generator) {
|
---|
1125 | /*
|
---|
1126 | * Without a precomputed table for the generator, it has to be
|
---|
1127 | * handled like a normal point.
|
---|
1128 | */
|
---|
1129 | new_scalars = OPENSSL_malloc((num + 1) * sizeof(BIGNUM *));
|
---|
1130 | if (new_scalars == NULL) {
|
---|
1131 | ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
|
---|
1132 | goto err;
|
---|
1133 | }
|
---|
1134 |
|
---|
1135 | new_points = OPENSSL_malloc((num + 1) * sizeof(EC_POINT *));
|
---|
1136 | if (new_points == NULL) {
|
---|
1137 | ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
|
---|
1138 | goto err;
|
---|
1139 | }
|
---|
1140 |
|
---|
1141 | memcpy(new_scalars, scalars, num * sizeof(BIGNUM *));
|
---|
1142 | new_scalars[num] = scalar;
|
---|
1143 | memcpy(new_points, points, num * sizeof(EC_POINT *));
|
---|
1144 | new_points[num] = generator;
|
---|
1145 |
|
---|
1146 | scalars = new_scalars;
|
---|
1147 | points = new_points;
|
---|
1148 | num++;
|
---|
1149 | }
|
---|
1150 |
|
---|
1151 | if (num) {
|
---|
1152 | P256_POINT *out = &t.p;
|
---|
1153 | if (p_is_infinity)
|
---|
1154 | out = &p.p;
|
---|
1155 |
|
---|
1156 | if (!ecp_nistz256_windowed_mul(group, out, scalars, points, num, ctx))
|
---|
1157 | goto err;
|
---|
1158 |
|
---|
1159 | if (!p_is_infinity)
|
---|
1160 | ecp_nistz256_point_add(&p.p, &p.p, out);
|
---|
1161 | }
|
---|
1162 |
|
---|
1163 | /* Not constant-time, but we're only operating on the public output. */
|
---|
1164 | if (!bn_set_words(r->X, p.p.X, P256_LIMBS) ||
|
---|
1165 | !bn_set_words(r->Y, p.p.Y, P256_LIMBS) ||
|
---|
1166 | !bn_set_words(r->Z, p.p.Z, P256_LIMBS)) {
|
---|
1167 | goto err;
|
---|
1168 | }
|
---|
1169 | r->Z_is_one = is_one(r->Z) & 1;
|
---|
1170 |
|
---|
1171 | ret = 1;
|
---|
1172 |
|
---|
1173 | err:
|
---|
1174 | BN_CTX_end(ctx);
|
---|
1175 | OPENSSL_free(new_points);
|
---|
1176 | OPENSSL_free(new_scalars);
|
---|
1177 | return ret;
|
---|
1178 | }
|
---|
1179 |
|
---|
1180 | __owur static int ecp_nistz256_get_affine(const EC_GROUP *group,
|
---|
1181 | const EC_POINT *point,
|
---|
1182 | BIGNUM *x, BIGNUM *y, BN_CTX *ctx)
|
---|
1183 | {
|
---|
1184 | BN_ULONG z_inv2[P256_LIMBS];
|
---|
1185 | BN_ULONG z_inv3[P256_LIMBS];
|
---|
1186 | BN_ULONG x_aff[P256_LIMBS];
|
---|
1187 | BN_ULONG y_aff[P256_LIMBS];
|
---|
1188 | BN_ULONG point_x[P256_LIMBS], point_y[P256_LIMBS], point_z[P256_LIMBS];
|
---|
1189 | BN_ULONG x_ret[P256_LIMBS], y_ret[P256_LIMBS];
|
---|
1190 |
|
---|
1191 | if (EC_POINT_is_at_infinity(group, point)) {
|
---|
1192 | ERR_raise(ERR_LIB_EC, EC_R_POINT_AT_INFINITY);
|
---|
1193 | return 0;
|
---|
1194 | }
|
---|
1195 |
|
---|
1196 | if (!ecp_nistz256_bignum_to_field_elem(point_x, point->X) ||
|
---|
1197 | !ecp_nistz256_bignum_to_field_elem(point_y, point->Y) ||
|
---|
1198 | !ecp_nistz256_bignum_to_field_elem(point_z, point->Z)) {
|
---|
1199 | ERR_raise(ERR_LIB_EC, EC_R_COORDINATES_OUT_OF_RANGE);
|
---|
1200 | return 0;
|
---|
1201 | }
|
---|
1202 |
|
---|
1203 | ecp_nistz256_mod_inverse(z_inv3, point_z);
|
---|
1204 | ecp_nistz256_sqr_mont(z_inv2, z_inv3);
|
---|
1205 | ecp_nistz256_mul_mont(x_aff, z_inv2, point_x);
|
---|
1206 |
|
---|
1207 | if (x != NULL) {
|
---|
1208 | ecp_nistz256_from_mont(x_ret, x_aff);
|
---|
1209 | if (!bn_set_words(x, x_ret, P256_LIMBS))
|
---|
1210 | return 0;
|
---|
1211 | }
|
---|
1212 |
|
---|
1213 | if (y != NULL) {
|
---|
1214 | ecp_nistz256_mul_mont(z_inv3, z_inv3, z_inv2);
|
---|
1215 | ecp_nistz256_mul_mont(y_aff, z_inv3, point_y);
|
---|
1216 | ecp_nistz256_from_mont(y_ret, y_aff);
|
---|
1217 | if (!bn_set_words(y, y_ret, P256_LIMBS))
|
---|
1218 | return 0;
|
---|
1219 | }
|
---|
1220 |
|
---|
1221 | return 1;
|
---|
1222 | }
|
---|
1223 |
|
---|
1224 | static NISTZ256_PRE_COMP *ecp_nistz256_pre_comp_new(const EC_GROUP *group)
|
---|
1225 | {
|
---|
1226 | NISTZ256_PRE_COMP *ret = NULL;
|
---|
1227 |
|
---|
1228 | if (!group)
|
---|
1229 | return NULL;
|
---|
1230 |
|
---|
1231 | ret = OPENSSL_zalloc(sizeof(*ret));
|
---|
1232 |
|
---|
1233 | if (ret == NULL) {
|
---|
1234 | ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
|
---|
1235 | return ret;
|
---|
1236 | }
|
---|
1237 |
|
---|
1238 | ret->group = group;
|
---|
1239 | ret->w = 6; /* default */
|
---|
1240 | ret->references = 1;
|
---|
1241 |
|
---|
1242 | ret->lock = CRYPTO_THREAD_lock_new();
|
---|
1243 | if (ret->lock == NULL) {
|
---|
1244 | ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE);
|
---|
1245 | OPENSSL_free(ret);
|
---|
1246 | return NULL;
|
---|
1247 | }
|
---|
1248 | return ret;
|
---|
1249 | }
|
---|
1250 |
|
---|
1251 | NISTZ256_PRE_COMP *EC_nistz256_pre_comp_dup(NISTZ256_PRE_COMP *p)
|
---|
1252 | {
|
---|
1253 | int i;
|
---|
1254 | if (p != NULL)
|
---|
1255 | CRYPTO_UP_REF(&p->references, &i, p->lock);
|
---|
1256 | return p;
|
---|
1257 | }
|
---|
1258 |
|
---|
1259 | void EC_nistz256_pre_comp_free(NISTZ256_PRE_COMP *pre)
|
---|
1260 | {
|
---|
1261 | int i;
|
---|
1262 |
|
---|
1263 | if (pre == NULL)
|
---|
1264 | return;
|
---|
1265 |
|
---|
1266 | CRYPTO_DOWN_REF(&pre->references, &i, pre->lock);
|
---|
1267 | REF_PRINT_COUNT("EC_nistz256", pre);
|
---|
1268 | if (i > 0)
|
---|
1269 | return;
|
---|
1270 | REF_ASSERT_ISNT(i < 0);
|
---|
1271 |
|
---|
1272 | OPENSSL_free(pre->precomp_storage);
|
---|
1273 | CRYPTO_THREAD_lock_free(pre->lock);
|
---|
1274 | OPENSSL_free(pre);
|
---|
1275 | }
|
---|
1276 |
|
---|
1277 |
|
---|
1278 | static int ecp_nistz256_window_have_precompute_mult(const EC_GROUP *group)
|
---|
1279 | {
|
---|
1280 | /* There is a hard-coded table for the default generator. */
|
---|
1281 | const EC_POINT *generator = EC_GROUP_get0_generator(group);
|
---|
1282 |
|
---|
1283 | if (generator != NULL && ecp_nistz256_is_affine_G(generator)) {
|
---|
1284 | /* There is a hard-coded table for the default generator. */
|
---|
1285 | return 1;
|
---|
1286 | }
|
---|
1287 |
|
---|
1288 | return HAVEPRECOMP(group, nistz256);
|
---|
1289 | }
|
---|
1290 |
|
---|
1291 | #if defined(__x86_64) || defined(__x86_64__) || \
|
---|
1292 | defined(_M_AMD64) || defined(_M_X64) || \
|
---|
1293 | defined(__powerpc64__) || defined(_ARCH_PP64) || \
|
---|
1294 | defined(__aarch64__)
|
---|
1295 | /*
|
---|
1296 | * Montgomery mul modulo Order(P): res = a*b*2^-256 mod Order(P)
|
---|
1297 | */
|
---|
1298 | void ecp_nistz256_ord_mul_mont(BN_ULONG res[P256_LIMBS],
|
---|
1299 | const BN_ULONG a[P256_LIMBS],
|
---|
1300 | const BN_ULONG b[P256_LIMBS]);
|
---|
1301 | void ecp_nistz256_ord_sqr_mont(BN_ULONG res[P256_LIMBS],
|
---|
1302 | const BN_ULONG a[P256_LIMBS],
|
---|
1303 | BN_ULONG rep);
|
---|
1304 |
|
---|
1305 | static int ecp_nistz256_inv_mod_ord(const EC_GROUP *group, BIGNUM *r,
|
---|
1306 | const BIGNUM *x, BN_CTX *ctx)
|
---|
1307 | {
|
---|
1308 | /* RR = 2^512 mod ord(p256) */
|
---|
1309 | static const BN_ULONG RR[P256_LIMBS] = {
|
---|
1310 | TOBN(0x83244c95,0xbe79eea2), TOBN(0x4699799c,0x49bd6fa6),
|
---|
1311 | TOBN(0x2845b239,0x2b6bec59), TOBN(0x66e12d94,0xf3d95620)
|
---|
1312 | };
|
---|
1313 | /* The constant 1 (unlike ONE that is one in Montgomery representation) */
|
---|
1314 | static const BN_ULONG one[P256_LIMBS] = {
|
---|
1315 | TOBN(0,1), TOBN(0,0), TOBN(0,0), TOBN(0,0)
|
---|
1316 | };
|
---|
1317 | /*
|
---|
1318 | * We don't use entry 0 in the table, so we omit it and address
|
---|
1319 | * with -1 offset.
|
---|
1320 | */
|
---|
1321 | BN_ULONG table[15][P256_LIMBS];
|
---|
1322 | BN_ULONG out[P256_LIMBS], t[P256_LIMBS];
|
---|
1323 | int i, ret = 0;
|
---|
1324 | enum {
|
---|
1325 | i_1 = 0, i_10, i_11, i_101, i_111, i_1010, i_1111,
|
---|
1326 | i_10101, i_101010, i_101111, i_x6, i_x8, i_x16, i_x32
|
---|
1327 | };
|
---|
1328 |
|
---|
1329 | /*
|
---|
1330 | * Catch allocation failure early.
|
---|
1331 | */
|
---|
1332 | if (bn_wexpand(r, P256_LIMBS) == NULL) {
|
---|
1333 | ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
|
---|
1334 | goto err;
|
---|
1335 | }
|
---|
1336 |
|
---|
1337 | if ((BN_num_bits(x) > 256) || BN_is_negative(x)) {
|
---|
1338 | BIGNUM *tmp;
|
---|
1339 |
|
---|
1340 | if ((tmp = BN_CTX_get(ctx)) == NULL
|
---|
1341 | || !BN_nnmod(tmp, x, group->order, ctx)) {
|
---|
1342 | ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
|
---|
1343 | goto err;
|
---|
1344 | }
|
---|
1345 | x = tmp;
|
---|
1346 | }
|
---|
1347 |
|
---|
1348 | if (!ecp_nistz256_bignum_to_field_elem(t, x)) {
|
---|
1349 | ERR_raise(ERR_LIB_EC, EC_R_COORDINATES_OUT_OF_RANGE);
|
---|
1350 | goto err;
|
---|
1351 | }
|
---|
1352 |
|
---|
1353 | ecp_nistz256_ord_mul_mont(table[0], t, RR);
|
---|
1354 | #if 0
|
---|
1355 | /*
|
---|
1356 | * Original sparse-then-fixed-window algorithm, retained for reference.
|
---|
1357 | */
|
---|
1358 | for (i = 2; i < 16; i += 2) {
|
---|
1359 | ecp_nistz256_ord_sqr_mont(table[i-1], table[i/2-1], 1);
|
---|
1360 | ecp_nistz256_ord_mul_mont(table[i], table[i-1], table[0]);
|
---|
1361 | }
|
---|
1362 |
|
---|
1363 | /*
|
---|
1364 | * The top 128bit of the exponent are highly redudndant, so we
|
---|
1365 | * perform an optimized flow
|
---|
1366 | */
|
---|
1367 | ecp_nistz256_ord_sqr_mont(t, table[15-1], 4); /* f0 */
|
---|
1368 | ecp_nistz256_ord_mul_mont(t, t, table[15-1]); /* ff */
|
---|
1369 |
|
---|
1370 | ecp_nistz256_ord_sqr_mont(out, t, 8); /* ff00 */
|
---|
1371 | ecp_nistz256_ord_mul_mont(out, out, t); /* ffff */
|
---|
1372 |
|
---|
1373 | ecp_nistz256_ord_sqr_mont(t, out, 16); /* ffff0000 */
|
---|
1374 | ecp_nistz256_ord_mul_mont(t, t, out); /* ffffffff */
|
---|
1375 |
|
---|
1376 | ecp_nistz256_ord_sqr_mont(out, t, 64); /* ffffffff0000000000000000 */
|
---|
1377 | ecp_nistz256_ord_mul_mont(out, out, t); /* ffffffff00000000ffffffff */
|
---|
1378 |
|
---|
1379 | ecp_nistz256_ord_sqr_mont(out, out, 32); /* ffffffff00000000ffffffff00000000 */
|
---|
1380 | ecp_nistz256_ord_mul_mont(out, out, t); /* ffffffff00000000ffffffffffffffff */
|
---|
1381 |
|
---|
1382 | /*
|
---|
1383 | * The bottom 128 bit of the exponent are processed with fixed 4-bit window
|
---|
1384 | */
|
---|
1385 | for(i = 0; i < 32; i++) {
|
---|
1386 | /* expLo - the low 128 bits of the exponent we use (ord(p256) - 2),
|
---|
1387 | * split into nibbles */
|
---|
1388 | static const unsigned char expLo[32] = {
|
---|
1389 | 0xb,0xc,0xe,0x6,0xf,0xa,0xa,0xd,0xa,0x7,0x1,0x7,0x9,0xe,0x8,0x4,
|
---|
1390 | 0xf,0x3,0xb,0x9,0xc,0xa,0xc,0x2,0xf,0xc,0x6,0x3,0x2,0x5,0x4,0xf
|
---|
1391 | };
|
---|
1392 |
|
---|
1393 | ecp_nistz256_ord_sqr_mont(out, out, 4);
|
---|
1394 | /* The exponent is public, no need in constant-time access */
|
---|
1395 | ecp_nistz256_ord_mul_mont(out, out, table[expLo[i]-1]);
|
---|
1396 | }
|
---|
1397 | #else
|
---|
1398 | /*
|
---|
1399 | * https://briansmith.org/ecc-inversion-addition-chains-01#p256_scalar_inversion
|
---|
1400 | *
|
---|
1401 | * Even though this code path spares 12 squarings, 4.5%, and 13
|
---|
1402 | * multiplications, 25%, on grand scale sign operation is not that
|
---|
1403 | * much faster, not more that 2%...
|
---|
1404 | */
|
---|
1405 |
|
---|
1406 | /* pre-calculate powers */
|
---|
1407 | ecp_nistz256_ord_sqr_mont(table[i_10], table[i_1], 1);
|
---|
1408 |
|
---|
1409 | ecp_nistz256_ord_mul_mont(table[i_11], table[i_1], table[i_10]);
|
---|
1410 |
|
---|
1411 | ecp_nistz256_ord_mul_mont(table[i_101], table[i_11], table[i_10]);
|
---|
1412 |
|
---|
1413 | ecp_nistz256_ord_mul_mont(table[i_111], table[i_101], table[i_10]);
|
---|
1414 |
|
---|
1415 | ecp_nistz256_ord_sqr_mont(table[i_1010], table[i_101], 1);
|
---|
1416 |
|
---|
1417 | ecp_nistz256_ord_mul_mont(table[i_1111], table[i_1010], table[i_101]);
|
---|
1418 |
|
---|
1419 | ecp_nistz256_ord_sqr_mont(table[i_10101], table[i_1010], 1);
|
---|
1420 | ecp_nistz256_ord_mul_mont(table[i_10101], table[i_10101], table[i_1]);
|
---|
1421 |
|
---|
1422 | ecp_nistz256_ord_sqr_mont(table[i_101010], table[i_10101], 1);
|
---|
1423 |
|
---|
1424 | ecp_nistz256_ord_mul_mont(table[i_101111], table[i_101010], table[i_101]);
|
---|
1425 |
|
---|
1426 | ecp_nistz256_ord_mul_mont(table[i_x6], table[i_101010], table[i_10101]);
|
---|
1427 |
|
---|
1428 | ecp_nistz256_ord_sqr_mont(table[i_x8], table[i_x6], 2);
|
---|
1429 | ecp_nistz256_ord_mul_mont(table[i_x8], table[i_x8], table[i_11]);
|
---|
1430 |
|
---|
1431 | ecp_nistz256_ord_sqr_mont(table[i_x16], table[i_x8], 8);
|
---|
1432 | ecp_nistz256_ord_mul_mont(table[i_x16], table[i_x16], table[i_x8]);
|
---|
1433 |
|
---|
1434 | ecp_nistz256_ord_sqr_mont(table[i_x32], table[i_x16], 16);
|
---|
1435 | ecp_nistz256_ord_mul_mont(table[i_x32], table[i_x32], table[i_x16]);
|
---|
1436 |
|
---|
1437 | /* calculations */
|
---|
1438 | ecp_nistz256_ord_sqr_mont(out, table[i_x32], 64);
|
---|
1439 | ecp_nistz256_ord_mul_mont(out, out, table[i_x32]);
|
---|
1440 |
|
---|
1441 | for (i = 0; i < 27; i++) {
|
---|
1442 | static const struct { unsigned char p, i; } chain[27] = {
|
---|
1443 | { 32, i_x32 }, { 6, i_101111 }, { 5, i_111 },
|
---|
1444 | { 4, i_11 }, { 5, i_1111 }, { 5, i_10101 },
|
---|
1445 | { 4, i_101 }, { 3, i_101 }, { 3, i_101 },
|
---|
1446 | { 5, i_111 }, { 9, i_101111 }, { 6, i_1111 },
|
---|
1447 | { 2, i_1 }, { 5, i_1 }, { 6, i_1111 },
|
---|
1448 | { 5, i_111 }, { 4, i_111 }, { 5, i_111 },
|
---|
1449 | { 5, i_101 }, { 3, i_11 }, { 10, i_101111 },
|
---|
1450 | { 2, i_11 }, { 5, i_11 }, { 5, i_11 },
|
---|
1451 | { 3, i_1 }, { 7, i_10101 }, { 6, i_1111 }
|
---|
1452 | };
|
---|
1453 |
|
---|
1454 | ecp_nistz256_ord_sqr_mont(out, out, chain[i].p);
|
---|
1455 | ecp_nistz256_ord_mul_mont(out, out, table[chain[i].i]);
|
---|
1456 | }
|
---|
1457 | #endif
|
---|
1458 | ecp_nistz256_ord_mul_mont(out, out, one);
|
---|
1459 |
|
---|
1460 | /*
|
---|
1461 | * Can't fail, but check return code to be consistent anyway.
|
---|
1462 | */
|
---|
1463 | if (!bn_set_words(r, out, P256_LIMBS))
|
---|
1464 | goto err;
|
---|
1465 |
|
---|
1466 | ret = 1;
|
---|
1467 | err:
|
---|
1468 | return ret;
|
---|
1469 | }
|
---|
1470 | #else
|
---|
1471 | # define ecp_nistz256_inv_mod_ord NULL
|
---|
1472 | #endif
|
---|
1473 |
|
---|
1474 | const EC_METHOD *EC_GFp_nistz256_method(void)
|
---|
1475 | {
|
---|
1476 | static const EC_METHOD ret = {
|
---|
1477 | EC_FLAGS_DEFAULT_OCT,
|
---|
1478 | NID_X9_62_prime_field,
|
---|
1479 | ossl_ec_GFp_mont_group_init,
|
---|
1480 | ossl_ec_GFp_mont_group_finish,
|
---|
1481 | ossl_ec_GFp_mont_group_clear_finish,
|
---|
1482 | ossl_ec_GFp_mont_group_copy,
|
---|
1483 | ossl_ec_GFp_mont_group_set_curve,
|
---|
1484 | ossl_ec_GFp_simple_group_get_curve,
|
---|
1485 | ossl_ec_GFp_simple_group_get_degree,
|
---|
1486 | ossl_ec_group_simple_order_bits,
|
---|
1487 | ossl_ec_GFp_simple_group_check_discriminant,
|
---|
1488 | ossl_ec_GFp_simple_point_init,
|
---|
1489 | ossl_ec_GFp_simple_point_finish,
|
---|
1490 | ossl_ec_GFp_simple_point_clear_finish,
|
---|
1491 | ossl_ec_GFp_simple_point_copy,
|
---|
1492 | ossl_ec_GFp_simple_point_set_to_infinity,
|
---|
1493 | ossl_ec_GFp_simple_point_set_affine_coordinates,
|
---|
1494 | ecp_nistz256_get_affine,
|
---|
1495 | 0, 0, 0,
|
---|
1496 | ossl_ec_GFp_simple_add,
|
---|
1497 | ossl_ec_GFp_simple_dbl,
|
---|
1498 | ossl_ec_GFp_simple_invert,
|
---|
1499 | ossl_ec_GFp_simple_is_at_infinity,
|
---|
1500 | ossl_ec_GFp_simple_is_on_curve,
|
---|
1501 | ossl_ec_GFp_simple_cmp,
|
---|
1502 | ossl_ec_GFp_simple_make_affine,
|
---|
1503 | ossl_ec_GFp_simple_points_make_affine,
|
---|
1504 | ecp_nistz256_points_mul, /* mul */
|
---|
1505 | ecp_nistz256_mult_precompute, /* precompute_mult */
|
---|
1506 | ecp_nistz256_window_have_precompute_mult, /* have_precompute_mult */
|
---|
1507 | ossl_ec_GFp_mont_field_mul,
|
---|
1508 | ossl_ec_GFp_mont_field_sqr,
|
---|
1509 | 0, /* field_div */
|
---|
1510 | ossl_ec_GFp_mont_field_inv,
|
---|
1511 | ossl_ec_GFp_mont_field_encode,
|
---|
1512 | ossl_ec_GFp_mont_field_decode,
|
---|
1513 | ossl_ec_GFp_mont_field_set_to_one,
|
---|
1514 | ossl_ec_key_simple_priv2oct,
|
---|
1515 | ossl_ec_key_simple_oct2priv,
|
---|
1516 | 0, /* set private */
|
---|
1517 | ossl_ec_key_simple_generate_key,
|
---|
1518 | ossl_ec_key_simple_check_key,
|
---|
1519 | ossl_ec_key_simple_generate_public_key,
|
---|
1520 | 0, /* keycopy */
|
---|
1521 | 0, /* keyfinish */
|
---|
1522 | ossl_ecdh_simple_compute_key,
|
---|
1523 | ossl_ecdsa_simple_sign_setup,
|
---|
1524 | ossl_ecdsa_simple_sign_sig,
|
---|
1525 | ossl_ecdsa_simple_verify_sig,
|
---|
1526 | ecp_nistz256_inv_mod_ord, /* can be #define-d NULL */
|
---|
1527 | 0, /* blind_coordinates */
|
---|
1528 | 0, /* ladder_pre */
|
---|
1529 | 0, /* ladder_step */
|
---|
1530 | 0 /* ladder_post */
|
---|
1531 | };
|
---|
1532 |
|
---|
1533 | return &ret;
|
---|
1534 | }
|
---|