Print this page
6799218 RSA using Solaris Kernel Crypto framework lagging behind OpenSSL
5016936 bignumimpl:big_mul: potential memory leak
6810280 panic from bignum module: vmem_xalloc(): size == 0
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/common/bignum/bignumimpl.c
+++ new/usr/src/common/bignum/bignumimpl.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
↓ open down ↓ |
11 lines elided |
↑ open up ↑ |
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 - * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
22 + * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 -#pragma ident "%Z%%M% %I% %E% SMI"
27 -
28 -#define big_div_pos_fast big_div_pos
29 -
30 -#include "bignum.h"
31 -
32 26 /*
33 27 * Configuration guide
34 28 * -------------------
35 29 *
36 30 * There are 4 preprocessor symbols used to configure the bignum
37 31 * implementation. This file contains no logic to configure based on
38 32 * processor; we leave that to the Makefiles to specify.
39 33 *
40 34 * USE_FLOATING_POINT
41 35 * Meaning: There is support for a fast floating-point implementation of
42 36 * Montgomery multiply.
↓ open down ↓ |
1 lines elided |
↑ open up ↑ |
43 37 *
44 38 * PSR_MUL
45 39 * Meaning: There are processor-specific versions of the low level
46 40 * functions to implement big_mul. Those functions are: big_mul_set_vec,
47 41 * big_mul_add_vec, big_mul_vec, and big_sqr_vec. PSR_MUL implies support
48 42 * for all 4 functions. You cannot pick and choose which subset of these
49 43 * functions to support; that would lead to a rat's nest of #ifdefs.
50 44 *
51 45 * HWCAP
52 46 * Meaning: Call multiply support functions through a function pointer.
53 - * On x86, there are multiple implementations for differnt hardware
47 + * On x86, there are multiple implementations for different hardware
54 48 * capabilities, such as MMX, SSE2, etc. Tests are made at run-time, when
55 49 * a function is first used. So, the support functions are called through
56 50 * a function pointer. There is no need for that on Sparc, because there
57 51 * is only one implementation; support functions are called directly.
58 52 * Later, if there were some new VIS instruction, or something, and a
59 53 * run-time test were needed, rather than variant kernel modules and
60 54 * libraries, then HWCAP would be defined for Sparc, as well.
61 55 *
62 56 * UMUL64
63 57 * Meaning: It is safe to use generic C code that assumes the existence
64 58 * of a 32 x 32 --> 64 bit unsigned multiply. If this is not defined,
65 59 * then the generic code for big_mul_add_vec() must necessarily be very slow,
66 60 * because it must fall back to using 16 x 16 --> 32 bit multiplication.
67 61 *
68 62 */
69 63
70 64
65 +#include <sys/types.h>
66 +#include "bignum.h"
67 +
71 68 #ifdef _KERNEL
72 69 #include <sys/ddi.h>
73 70 #include <sys/mdesc.h>
74 71 #include <sys/crypto/common.h>
75 72
76 -#include <sys/types.h>
77 73 #include <sys/kmem.h>
78 74 #include <sys/param.h>
79 75 #include <sys/sunddi.h>
80 76
77 +#else
78 +#include <stdlib.h>
79 +#include <stdio.h>
80 +#include <assert.h>
81 +#define ASSERT assert
82 +#endif /* _KERNEL */
83 +
84 +#ifdef _LP64 /* truncate 64-bit size_t to 32-bits */
85 +#define UI32(ui) ((uint32_t)ui)
86 +#else /* size_t already 32-bits */
87 +#define UI32(ui) (ui)
88 +#endif
89 +
90 +
91 +#ifdef _KERNEL
81 92 #define big_malloc(size) kmem_alloc(size, KM_NOSLEEP)
82 93 #define big_free(ptr, size) kmem_free(ptr, size)
83 94
84 95 void *
85 96 big_realloc(void *from, size_t oldsize, size_t newsize)
86 97 {
87 98 void *rv;
88 99
89 100 rv = kmem_alloc(newsize, KM_NOSLEEP);
90 101 if (rv != NULL)
91 102 bcopy(from, rv, oldsize);
92 103 kmem_free(from, oldsize);
93 104 return (rv);
94 105 }
95 106
96 107 #else /* _KERNEL */
97 108
98 -#include <stdlib.h>
99 -#include <stdio.h>
100 -#include <assert.h>
101 -#define ASSERT assert
102 -
103 109 #ifndef MALLOC_DEBUG
104 110
105 111 #define big_malloc(size) malloc(size)
106 112 #define big_free(ptr, size) free(ptr)
107 113
108 114 #else
109 115
110 116 void
111 117 big_free(void *ptr, size_t size)
112 118 {
113 119 printf("freed %d bytes at %p\n", size, ptr);
114 120 free(ptr);
115 121 }
116 122
117 123 void *
118 124 big_malloc(size_t size)
↓ open down ↓ |
6 lines elided |
↑ open up ↑ |
119 125 {
120 126 void *rv;
121 127 rv = malloc(size);
122 128 printf("malloced %d bytes, addr:%p\n", size, rv);
123 129 return (rv);
124 130 }
125 131 #endif /* MALLOC_DEBUG */
126 132
127 133 #define big_realloc(x, y, z) realloc((x), (z))
128 134
135 +
136 +/*
137 + * printbignum()
138 + * Print a BIGNUM type to stdout.
139 + */
129 140 void
130 141 printbignum(char *aname, BIGNUM *a)
131 142 {
132 143 int i;
133 144
134 145 (void) printf("\n%s\n%d\n", aname, a->sign*a->len);
135 146 for (i = a->len - 1; i >= 0; i--) {
136 147 #ifdef BIGNUM_CHUNK_32
137 148 (void) printf("%08x ", a->value[i]);
138 - if ((i % 8 == 0) && (i != 0)) {
149 + if (((i & (BITSINBYTE - 1)) == 0) && (i != 0)) {
139 150 (void) printf("\n");
140 151 }
141 152 #else
142 153 (void) printf("%08x %08x ", (uint32_t)((a->value[i]) >> 32),
143 154 (uint32_t)((a->value[i]) & 0xffffffff));
144 - if ((i % 4 == 0) && (i != 0)) {
155 + if (((i & 3) == 0) && (i != 0)) { /* end of this chunk */
145 156 (void) printf("\n");
146 157 }
147 158 #endif
148 159 }
149 160 (void) printf("\n");
150 161 }
151 162
152 163 #endif /* _KERNEL */
153 164
154 165
155 -/* size in BIG_CHUNK_SIZE-bit words */
166 +/*
167 + * big_init()
168 + * Initialize and allocate memory for a BIGNUM type.
169 + *
170 + * big_init(number, size) is equivalent to big_init1(number, size, NULL, 0)
171 + *
172 + * Note: call big_finish() to free memory allocated by big_init().
173 + *
174 + * Input:
175 + * number Uninitialized memory for BIGNUM
176 + * size Minimum size, in BIG_CHUNK_SIZE-bit words, required for BIGNUM
177 + *
178 + * Output:
179 + * number Initialized BIGNUM
180 + *
181 + * Return BIG_OK on success or BIG_NO_MEM for an allocation error.
182 + */
156 183 BIG_ERR_CODE
157 184 big_init(BIGNUM *number, int size)
158 185 {
159 - number->value = big_malloc(sizeof (BIG_CHUNK_TYPE) * size);
186 + number->value = big_malloc(BIGNUM_WORDSIZE * size);
160 187 if (number->value == NULL) {
161 188 return (BIG_NO_MEM);
162 189 }
163 190 number->size = size;
164 191 number->len = 0;
165 192 number->sign = 1;
166 193 number->malloced = 1;
167 194 return (BIG_OK);
168 195 }
169 196
170 -/* size in BIG_CHUNK_SIZE-bit words */
197 +
198 +/*
199 + * big_init1()
200 + * Initialize and, if needed, allocate memory for a BIGNUM type.
201 + * Use the buffer passed, buf, if any, instad of allocating memory
202 + * if it's at least "size" bytes.
203 + *
204 + * Note: call big_finish() to free memory allocated by big_init().
205 + *
206 + * Input:
207 + * number Uninitialized memory for BIGNUM
208 + * size Minimum size, in BIG_CHUNK_SIZE-bit words, required for BIGNUM
209 + * buf Buffer for storing a BIGNUM.
210 + * If NULL, big_init1() will allocate a buffer
211 + * bufsize Size, in BIG_CHUNK_SIZE_bit words, of buf
212 + *
213 + * Output:
214 + * number Initialized BIGNUM
215 + *
216 + * Return BIG_OK on success or BIG_NO_MEM for an allocation error.
217 + */
171 218 BIG_ERR_CODE
172 219 big_init1(BIGNUM *number, int size, BIG_CHUNK_TYPE *buf, int bufsize)
173 220 {
174 221 if ((buf == NULL) || (size > bufsize)) {
175 - number->value = big_malloc(sizeof (BIG_CHUNK_TYPE) * size);
222 + number->value = big_malloc(BIGNUM_WORDSIZE * size);
176 223 if (number->value == NULL) {
177 224 return (BIG_NO_MEM);
178 225 }
179 226 number->size = size;
180 227 number->malloced = 1;
181 228 } else {
182 229 number->value = buf;
183 230 number->size = bufsize;
184 231 number->malloced = 0;
185 232 }
186 - number->len = 0;
187 - number->sign = 1;
233 + number->len = 0;
234 + number->sign = 1;
188 235
189 236 return (BIG_OK);
190 237 }
191 238
239 +
240 +/*
241 + * big_finish()
242 + * Free memory, if any, allocated by big_init() or big_init1().
243 + */
192 244 void
193 245 big_finish(BIGNUM *number)
194 246 {
195 247 if (number->malloced == 1) {
196 - big_free(number->value,
197 - sizeof (BIG_CHUNK_TYPE) * number->size);
248 + big_free(number->value, BIGNUM_WORDSIZE * number->size);
198 249 number->malloced = 0;
199 250 }
200 251 }
201 252
202 253
203 254 /*
204 - * bn->size should be at least
205 - * (len + sizeof (BIG_CHUNK_TYPE) - 1) / sizeof (BIG_CHUNK_TYPE) bytes
255 + * bn->size should be at least
256 + * (len + BIGNUM_WORDSIZE - 1) / BIGNUM_WORDSIZE bytes
206 257 * converts from byte-big-endian format to bignum format (words in
207 258 * little endian order, but bytes within the words big endian)
208 259 */
209 260 void
210 261 bytestring2bignum(BIGNUM *bn, uchar_t *kn, size_t len)
211 262 {
212 - int i, j, offs;
263 + int i, j;
264 + uint32_t offs;
265 + const uint32_t slen = UI32(len);
213 266 BIG_CHUNK_TYPE word;
214 267 uchar_t *knwordp;
215 268
216 -#ifdef _LP64
217 - offs = (uint32_t)len % sizeof (BIG_CHUNK_TYPE);
218 - bn->len = (uint32_t)len / sizeof (BIG_CHUNK_TYPE);
269 + if (slen == 0) {
270 + bn->len = 1;
271 + bn->value[0] = 0;
272 + return;
273 + }
219 274
220 - for (i = 0; i < (uint32_t)len / sizeof (BIG_CHUNK_TYPE); i++) {
221 -#else /* !_LP64 */
222 - offs = len % sizeof (BIG_CHUNK_TYPE);
223 - bn->len = len / sizeof (BIG_CHUNK_TYPE);
224 - for (i = 0; i < len / sizeof (BIG_CHUNK_TYPE); i++) {
225 -#endif /* _LP64 */
226 - knwordp = &(kn[len - sizeof (BIG_CHUNK_TYPE) * (i + 1)]);
275 + offs = slen % BIGNUM_WORDSIZE;
276 + bn->len = slen / BIGNUM_WORDSIZE;
277 +
278 + for (i = 0; i < slen / BIGNUM_WORDSIZE; i++) {
279 + knwordp = &(kn[slen - BIGNUM_WORDSIZE * (i + 1)]);
227 280 word = knwordp[0];
228 - for (j = 1; j < sizeof (BIG_CHUNK_TYPE); j++) {
229 - word = (word << 8)+ knwordp[j];
281 + for (j = 1; j < BIGNUM_WORDSIZE; j++) {
282 + word = (word << BITSINBYTE) + knwordp[j];
230 283 }
231 284 bn->value[i] = word;
232 285 }
233 286 if (offs > 0) {
234 287 word = kn[0];
235 - for (i = 1; i < offs; i++) word = (word << 8) + kn[i];
288 + for (i = 1; i < offs; i++) word = (word << BITSINBYTE) + kn[i];
236 289 bn->value[bn->len++] = word;
237 290 }
238 - while ((bn->len > 1) && (bn->value[bn->len-1] == 0)) {
291 + while ((bn->len > 1) && (bn->value[bn->len - 1] == 0)) {
239 292 bn->len --;
240 293 }
241 294 }
242 295
296 +
243 297 /*
244 298 * copies the least significant len bytes if
245 - * len < bn->len * sizeof (BIG_CHUNK_TYPE)
299 + * len < bn->len * BIGNUM_WORDSIZE
246 300 * converts from bignum format to byte-big-endian format.
247 301 * bignum format is words of type BIG_CHUNK_TYPE in little endian order.
248 302 */
249 303 void
250 304 bignum2bytestring(uchar_t *kn, BIGNUM *bn, size_t len)
251 305 {
252 - int i, j, offs;
306 + int i, j;
307 + uint32_t offs;
308 + const uint32_t slen = UI32(len);
253 309 BIG_CHUNK_TYPE word;
254 310
255 - if (len < sizeof (BIG_CHUNK_TYPE) * bn->len) {
256 -#ifdef _LP64
257 - for (i = 0; i < (uint32_t)len / sizeof (BIG_CHUNK_TYPE); i++) {
258 -#else /* !_LP64 */
259 - for (i = 0; i < len / sizeof (BIG_CHUNK_TYPE); i++) {
260 -#endif /* _LP64 */
311 + if (len < BIGNUM_WORDSIZE * bn->len) {
312 + for (i = 0; i < slen / BIGNUM_WORDSIZE; i++) {
261 313 word = bn->value[i];
262 - for (j = 0; j < sizeof (BIG_CHUNK_TYPE); j++) {
263 - kn[len - sizeof (BIG_CHUNK_TYPE) * i - j - 1] =
314 + for (j = 0; j < BIGNUM_WORDSIZE; j++) {
315 + kn[slen - BIGNUM_WORDSIZE * i - j - 1] =
264 316 word & 0xff;
265 - word = word >> 8;
317 + word = word >> BITSINBYTE;
266 318 }
267 319 }
268 -#ifdef _LP64
269 - offs = (uint32_t)len % sizeof (BIG_CHUNK_TYPE);
270 -#else /* !_LP64 */
271 - offs = len % sizeof (BIG_CHUNK_TYPE);
272 -#endif /* _LP64 */
320 + offs = slen % BIGNUM_WORDSIZE;
273 321 if (offs > 0) {
274 - word = bn->value[len / sizeof (BIG_CHUNK_TYPE)];
275 -#ifdef _LP64
276 - for (i = (uint32_t)len % sizeof (BIG_CHUNK_TYPE);
277 - i > 0; i --) {
278 -#else /* !_LP64 */
279 - for (i = len % sizeof (BIG_CHUNK_TYPE);
280 - i > 0; i --) {
281 -#endif /* _LP64 */
322 + word = bn->value[slen / BIGNUM_WORDSIZE];
323 + for (i = slen % BIGNUM_WORDSIZE; i > 0; i --) {
282 324 kn[i - 1] = word & 0xff;
283 - word = word >> 8;
325 + word = word >> BITSINBYTE;
284 326 }
285 327 }
286 328 } else {
287 329 for (i = 0; i < bn->len; i++) {
288 330 word = bn->value[i];
289 - for (j = 0; j < sizeof (BIG_CHUNK_TYPE); j++) {
290 - kn[len - sizeof (BIG_CHUNK_TYPE) * i - j - 1] =
331 + for (j = 0; j < BIGNUM_WORDSIZE; j++) {
332 + kn[slen - BIGNUM_WORDSIZE * i - j - 1] =
291 333 word & 0xff;
292 - word = word >> 8;
334 + word = word >> BITSINBYTE;
293 335 }
294 336 }
295 -#ifdef _LP64
296 - for (i = 0;
297 - i < (uint32_t)len - sizeof (BIG_CHUNK_TYPE) * bn->len;
298 - i++) {
299 -#else /* !_LP64 */
300 - for (i = 0; i < len - sizeof (BIG_CHUNK_TYPE) * bn->len; i++) {
301 -#endif /* _LP64 */
337 + for (i = 0; i < slen - BIGNUM_WORDSIZE * bn->len; i++) {
302 338 kn[i] = 0;
303 339 }
304 340 }
305 341 }
306 342
307 343
308 344 int
309 345 big_bitlength(BIGNUM *a)
310 346 {
311 347 int l = 0, b = 0;
312 348 BIG_CHUNK_TYPE c;
313 349
314 350 l = a->len - 1;
315 351 while ((l > 0) && (a->value[l] == 0)) {
316 352 l--;
317 353 }
318 - b = sizeof (BIG_CHUNK_TYPE) * BITSINBYTE;
354 + b = BIG_CHUNK_SIZE;
319 355 c = a->value[l];
320 356 while ((b > 1) && ((c & BIG_CHUNK_HIGHBIT) == 0)) {
321 357 c = c << 1;
322 358 b--;
323 359 }
324 360
325 - return (l * sizeof (BIG_CHUNK_TYPE) * BITSINBYTE + b);
361 + return (l * BIG_CHUNK_SIZE + b);
326 362 }
327 363
328 364
329 365 BIG_ERR_CODE
330 366 big_copy(BIGNUM *dest, BIGNUM *src)
331 367 {
332 368 BIG_CHUNK_TYPE *newptr;
333 369 int i, len;
334 370
335 371 len = src->len;
336 372 while ((len > 1) && (src->value[len - 1] == 0)) {
337 373 len--;
338 374 }
339 375 src->len = len;
340 376 if (dest->size < len) {
341 377 if (dest->malloced == 1) {
342 378 newptr = (BIG_CHUNK_TYPE *)big_realloc(dest->value,
343 - sizeof (BIG_CHUNK_TYPE) * dest->size,
344 - sizeof (BIG_CHUNK_TYPE) * len);
379 + BIGNUM_WORDSIZE * dest->size,
380 + BIGNUM_WORDSIZE * len);
345 381 } else {
346 382 newptr = (BIG_CHUNK_TYPE *)
347 - big_malloc(sizeof (BIG_CHUNK_TYPE) * len);
383 + big_malloc(BIGNUM_WORDSIZE * len);
348 384 if (newptr != NULL) {
349 385 dest->malloced = 1;
350 386 }
351 387 }
352 388 if (newptr == NULL) {
353 389 return (BIG_NO_MEM);
354 390 }
355 391 dest->value = newptr;
356 392 dest->size = len;
357 393 }
358 394 dest->len = len;
359 395 dest->sign = src->sign;
360 396 for (i = 0; i < len; i++) {
361 397 dest->value[i] = src->value[i];
362 398 }
363 399
364 400 return (BIG_OK);
365 401 }
366 402
367 403
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
368 404 BIG_ERR_CODE
369 405 big_extend(BIGNUM *number, int size)
370 406 {
371 407 BIG_CHUNK_TYPE *newptr;
372 408 int i;
373 409
374 410 if (number->size >= size)
375 411 return (BIG_OK);
376 412 if (number->malloced) {
377 413 number->value = big_realloc(number->value,
378 - sizeof (BIG_CHUNK_TYPE) * number->size,
379 - sizeof (BIG_CHUNK_TYPE) * size);
414 + BIGNUM_WORDSIZE * number->size,
415 + BIGNUM_WORDSIZE * size);
380 416 } else {
381 - newptr = big_malloc(sizeof (BIG_CHUNK_TYPE) * size);
417 + newptr = big_malloc(BIGNUM_WORDSIZE * size);
382 418 if (newptr != NULL) {
383 419 for (i = 0; i < number->size; i++) {
384 420 newptr[i] = number->value[i];
385 421 }
386 422 }
387 423 number->value = newptr;
388 424 }
389 425
390 426 if (number->value == NULL) {
391 427 return (BIG_NO_MEM);
392 428 }
393 429
394 430 number->size = size;
395 431 number->malloced = 1;
396 432 return (BIG_OK);
397 433 }
398 434
399 435
400 436 /* returns 1 if n == 0 */
401 437 int
402 438 big_is_zero(BIGNUM *n)
403 439 {
404 440 int i, result;
405 441
406 442 result = 1;
407 443 for (i = 0; i < n->len; i++) {
408 444 if (n->value[i] != 0) {
409 445 result = 0;
410 446 }
411 447 }
412 448 return (result);
413 449 }
414 450
415 451
416 452 BIG_ERR_CODE
417 453 big_add_abs(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
418 454 {
419 455 int i, shorter, longer;
420 456 BIG_CHUNK_TYPE cy, ai;
421 457 BIG_CHUNK_TYPE *r, *a, *b, *c;
422 458 BIG_ERR_CODE err;
423 459 BIGNUM *longerarg;
424 460
425 461 if (aa->len > bb->len) {
426 462 shorter = bb->len;
427 463 longer = aa->len;
428 464 longerarg = aa;
429 465 } else {
430 466 shorter = aa->len;
431 467 longer = bb->len;
432 468 longerarg = bb;
433 469 }
434 470 if (result->size < longer + 1) {
435 471 err = big_extend(result, longer + 1);
436 472 if (err != BIG_OK) {
437 473 return (err);
438 474 }
439 475 }
440 476
441 477 r = result->value;
442 478 a = aa->value;
443 479 b = bb->value;
444 480 c = longerarg->value;
445 481 cy = 0;
446 482 for (i = 0; i < shorter; i++) {
447 483 ai = a[i];
448 484 r[i] = ai + b[i] + cy;
449 485 if (r[i] > ai) {
450 486 cy = 0;
451 487 } else if (r[i] < ai) {
452 488 cy = 1;
453 489 }
454 490 }
455 491 for (; i < longer; i++) {
456 492 ai = c[i];
457 493 r[i] = ai + cy;
458 494 if (r[i] >= ai) {
459 495 cy = 0;
460 496 }
461 497 }
462 498 if (cy == 1) {
463 499 r[i] = cy;
464 500 result->len = longer + 1;
465 501 } else {
466 502 result->len = longer;
467 503 }
468 504 result->sign = 1;
469 505 return (BIG_OK);
470 506 }
471 507
472 508
473 509 /* caller must make sure that result has at least len words allocated */
474 510 void
475 511 big_sub_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, BIG_CHUNK_TYPE *b, int len)
476 512 {
477 513 int i;
478 514 BIG_CHUNK_TYPE cy, ai;
479 515
480 516 cy = 1;
481 517 for (i = 0; i < len; i++) {
482 518 ai = a[i];
483 519 r[i] = ai + (~b[i]) + cy;
484 520 if (r[i] > ai) {
485 521 cy = 0;
486 522 } else if (r[i] < ai) {
487 523 cy = 1;
488 524 }
489 525 }
490 526 }
491 527
492 528
493 529 /* result=aa-bb it is assumed that aa>=bb */
494 530 BIG_ERR_CODE
495 531 big_sub_pos(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
496 532 {
497 533 int i, shorter;
498 534 BIG_CHUNK_TYPE cy = 1, ai;
499 535 BIG_CHUNK_TYPE *r, *a, *b;
500 536 BIG_ERR_CODE err = BIG_OK;
501 537
502 538 if (aa->len > bb->len) {
503 539 shorter = bb->len;
504 540 } else {
505 541 shorter = aa->len;
506 542 }
507 543 if (result->size < aa->len) {
508 544 err = big_extend(result, aa->len);
509 545 if (err != BIG_OK) {
510 546 return (err);
511 547 }
512 548 }
513 549
514 550 r = result->value;
515 551 a = aa->value;
516 552 b = bb->value;
517 553 result->len = aa->len;
518 554 cy = 1;
519 555 for (i = 0; i < shorter; i++) {
520 556 ai = a[i];
521 557 r[i] = ai + (~b[i]) + cy;
522 558 if (r[i] > ai) {
523 559 cy = 0;
524 560 } else if (r[i] < ai) {
525 561 cy = 1;
526 562 }
527 563 }
528 564 for (; i < aa->len; i++) {
529 565 ai = a[i];
530 566 r[i] = ai + (~0) + cy;
531 567 if (r[i] < ai) {
532 568 cy = 1;
533 569 }
534 570 }
535 571 result->sign = 1;
536 572
537 573 if (cy == 0) {
538 574 return (BIG_INVALID_ARGS);
539 575 } else {
540 576 return (BIG_OK);
541 577 }
542 578 }
543 579
544 580
545 581 /* returns -1 if |aa|<|bb|, 0 if |aa|==|bb| 1 if |aa|>|bb| */
546 582 int
547 583 big_cmp_abs(BIGNUM *aa, BIGNUM *bb)
548 584 {
549 585 int i;
550 586
551 587 if (aa->len > bb->len) {
552 588 for (i = aa->len - 1; i > bb->len - 1; i--) {
553 589 if (aa->value[i] > 0) {
↓ open down ↓ |
162 lines elided |
↑ open up ↑ |
554 590 return (1);
555 591 }
556 592 }
557 593 } else if (aa->len < bb->len) {
558 594 for (i = bb->len - 1; i > aa->len - 1; i--) {
559 595 if (bb->value[i] > 0) {
560 596 return (-1);
561 597 }
562 598 }
563 599 } else {
564 - i = aa->len-1;
600 + i = aa->len - 1;
565 601 }
566 602 for (; i >= 0; i--) {
567 603 if (aa->value[i] > bb->value[i]) {
568 604 return (1);
569 605 } else if (aa->value[i] < bb->value[i]) {
570 606 return (-1);
571 607 }
572 608 }
573 609
574 610 return (0);
575 611 }
576 612
577 613
578 614 BIG_ERR_CODE
579 615 big_sub(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
580 616 {
581 617 BIG_ERR_CODE err;
582 618
583 619 if ((bb->sign == -1) && (aa->sign == 1)) {
584 620 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) {
585 621 return (err);
586 622 }
587 623 result->sign = 1;
588 624 } else if ((aa->sign == -1) && (bb->sign == 1)) {
589 625 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) {
590 626 return (err);
591 627 }
592 628 result->sign = -1;
593 629 } else if ((aa->sign == 1) && (bb->sign == 1)) {
594 630 if (big_cmp_abs(aa, bb) >= 0) {
595 631 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) {
596 632 return (err);
597 633 }
598 634 result->sign = 1;
599 635 } else {
600 636 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) {
601 637 return (err);
602 638 }
603 639 result->sign = -1;
604 640 }
605 641 } else {
606 642 if (big_cmp_abs(aa, bb) >= 0) {
607 643 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) {
608 644 return (err);
609 645 }
610 646 result->sign = -1;
611 647 } else {
612 648 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) {
613 649 return (err);
614 650 }
615 651 result->sign = 1;
616 652 }
617 653 }
618 654 return (BIG_OK);
619 655 }
620 656
621 657
622 658 BIG_ERR_CODE
623 659 big_add(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
624 660 {
625 661 BIG_ERR_CODE err;
626 662
627 663 if ((bb->sign == -1) && (aa->sign == -1)) {
628 664 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) {
629 665 return (err);
630 666 }
631 667 result->sign = -1;
632 668 } else if ((aa->sign == 1) && (bb->sign == 1)) {
633 669 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) {
634 670 return (err);
635 671 }
636 672 result->sign = 1;
637 673 } else if ((aa->sign == 1) && (bb->sign == -1)) {
638 674 if (big_cmp_abs(aa, bb) >= 0) {
639 675 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) {
640 676 return (err);
641 677 }
642 678 result->sign = 1;
643 679 } else {
644 680 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) {
645 681 return (err);
646 682 }
647 683 result->sign = -1;
648 684 }
649 685 } else {
650 686 if (big_cmp_abs(aa, bb) >= 0) {
651 687 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) {
652 688 return (err);
653 689 }
654 690 result->sign = -1;
655 691 } else {
656 692 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) {
657 693 return (err);
658 694 }
659 695 result->sign = 1;
660 696 }
661 697 }
662 698 return (BIG_OK);
663 699 }
664 700
665 701
666 702 /* result = aa/2 */
667 703 BIG_ERR_CODE
668 704 big_half_pos(BIGNUM *result, BIGNUM *aa)
669 705 {
670 706 BIG_ERR_CODE err;
671 707 int i;
672 708 BIG_CHUNK_TYPE cy, cy1;
673 709 BIG_CHUNK_TYPE *a, *r;
674 710
675 711 if (result->size < aa->len) {
676 712 err = big_extend(result, aa->len);
677 713 if (err != BIG_OK) {
678 714 return (err);
679 715 }
680 716 }
681 717
682 718 result->len = aa->len;
683 719 a = aa->value;
684 720 r = result->value;
685 721 cy = 0;
686 722 for (i = aa->len - 1; i >= 0; i--) {
687 723 cy1 = a[i] << (BIG_CHUNK_SIZE - 1);
688 724 r[i] = (cy | (a[i] >> 1));
689 725 cy = cy1;
690 726 }
691 727 if (r[result->len - 1] == 0) {
692 728 result->len--;
693 729 }
694 730
695 731 return (BIG_OK);
696 732 }
697 733
698 734 /* result = aa*2 */
699 735 BIG_ERR_CODE
700 736 big_double(BIGNUM *result, BIGNUM *aa)
701 737 {
702 738 BIG_ERR_CODE err;
703 739 int i, rsize;
704 740 BIG_CHUNK_TYPE cy, cy1;
705 741 BIG_CHUNK_TYPE *a, *r;
706 742
707 743 if ((aa->len > 0) &&
708 744 ((aa->value[aa->len - 1] & BIG_CHUNK_HIGHBIT) != 0)) {
709 745 rsize = aa->len + 1;
710 746 } else {
711 747 rsize = aa->len;
712 748 }
713 749
714 750 if (result->size < rsize) {
715 751 err = big_extend(result, rsize);
716 752 if (err != BIG_OK)
717 753 return (err);
718 754 }
719 755
720 756 a = aa->value;
721 757 r = result->value;
722 758 if (rsize == aa->len + 1) {
723 759 r[rsize - 1] = 1;
724 760 }
725 761 cy = 0;
726 762 for (i = 0; i < aa->len; i++) {
727 763 cy1 = a[i] >> (BIG_CHUNK_SIZE - 1);
728 764 r[i] = (cy | (a[i] << 1));
729 765 cy = cy1;
730 766 }
731 767 result->len = rsize;
732 768 return (BIG_OK);
733 769 }
734 770
735 771
736 772 /*
737 773 * returns aa mod b, aa must be nonneg, b must be a max
738 774 * (BIG_CHUNK_SIZE / 2)-bit integer
739 775 */
740 776 static uint32_t
741 777 big_modhalf_pos(BIGNUM *aa, uint32_t b)
742 778 {
743 779 int i;
744 780 BIG_CHUNK_TYPE rem;
745 781
746 782 if (aa->len == 0) {
747 783 return (0);
748 784 }
749 785 rem = aa->value[aa->len - 1] % b;
750 786 for (i = aa->len - 2; i >= 0; i--) {
751 787 rem = ((rem << (BIG_CHUNK_SIZE / 2)) |
752 788 (aa->value[i] >> (BIG_CHUNK_SIZE / 2))) % b;
753 789 rem = ((rem << (BIG_CHUNK_SIZE / 2)) |
754 790 (aa->value[i] & BIG_CHUNK_LOWHALFBITS)) % b;
755 791 }
756 792
757 793 return ((uint32_t)rem);
758 794 }
759 795
760 796
761 797 /*
762 798 * result = aa - (2^BIG_CHUNK_SIZE)^lendiff * bb
763 799 * result->size should be at least aa->len at entry
764 800 * aa, bb, and result should be positive
765 801 */
766 802 void
767 803 big_sub_pos_high(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
768 804 {
769 805 int i, lendiff;
770 806 BIGNUM res1, aa1;
771 807
772 808 lendiff = aa->len - bb->len;
773 809 res1.size = result->size - lendiff;
774 810 res1.malloced = 0;
775 811 res1.value = result->value + lendiff;
776 812 aa1.size = aa->size - lendiff;
777 813 aa1.value = aa->value + lendiff;
778 814 aa1.len = bb->len;
779 815 aa1.sign = 1;
780 816 (void) big_sub_pos(&res1, &aa1, bb);
781 817 if (result->value != aa->value) {
782 818 for (i = 0; i < lendiff; i++) {
783 819 result->value[i] = aa->value[i];
784 820 }
785 821 }
786 822 result->len = aa->len;
787 823 }
↓ open down ↓ |
213 lines elided |
↑ open up ↑ |
788 824
789 825
790 826 /*
791 827 * returns 1, 0, or -1 depending on whether |aa| > , ==, or <
792 828 * (2^BIG_CHUNK_SIZE)^lendiff * |bb|
793 829 * aa->len should be >= bb->len
794 830 */
795 831 int
796 832 big_cmp_abs_high(BIGNUM *aa, BIGNUM *bb)
797 833 {
798 - int lendiff;
799 - BIGNUM aa1;
834 + int lendiff;
835 + BIGNUM aa1;
800 836
801 837 lendiff = aa->len - bb->len;
802 838 aa1.len = bb->len;
803 839 aa1.size = aa->size - lendiff;
804 840 aa1.malloced = 0;
805 841 aa1.value = aa->value + lendiff;
806 842 return (big_cmp_abs(&aa1, bb));
807 843 }
808 844
809 845
810 846 /*
811 847 * result = aa * b where b is a max. (BIG_CHUNK_SIZE / 2)-bit positive integer.
812 848 * result should have enough space allocated.
813 849 */
814 850 static void
815 851 big_mulhalf_low(BIGNUM *result, BIGNUM *aa, BIG_CHUNK_TYPE b)
816 852 {
817 853 int i;
818 854 BIG_CHUNK_TYPE t1, t2, ai, cy;
819 855 BIG_CHUNK_TYPE *a, *r;
820 856
821 857 a = aa->value;
822 858 r = result->value;
823 859 cy = 0;
824 860 for (i = 0; i < aa->len; i++) {
825 861 ai = a[i];
826 862 t1 = (ai & BIG_CHUNK_LOWHALFBITS) * b + cy;
827 863 t2 = (ai >> (BIG_CHUNK_SIZE / 2)) * b +
828 864 (t1 >> (BIG_CHUNK_SIZE / 2));
829 865 r[i] = (t1 & BIG_CHUNK_LOWHALFBITS) |
830 866 (t2 << (BIG_CHUNK_SIZE / 2));
831 867 cy = t2 >> (BIG_CHUNK_SIZE / 2);
832 868 }
833 869 r[i] = cy;
834 870 result->len = aa->len + 1;
835 871 result->sign = aa->sign;
836 872 }
837 873
838 874
839 875 /*
840 876 * result = aa * b * 2^(BIG_CHUNK_SIZE / 2) where b is a max.
841 877 * (BIG_CHUNK_SIZE / 2)-bit positive integer.
842 878 * result should have enough space allocated.
843 879 */
844 880 static void
845 881 big_mulhalf_high(BIGNUM *result, BIGNUM *aa, BIG_CHUNK_TYPE b)
846 882 {
847 883 int i;
848 884 BIG_CHUNK_TYPE t1, t2, ai, cy, ri;
849 885 BIG_CHUNK_TYPE *a, *r;
850 886
851 887 a = aa->value;
852 888 r = result->value;
853 889 cy = 0;
854 890 ri = 0;
855 891 for (i = 0; i < aa->len; i++) {
856 892 ai = a[i];
857 893 t1 = (ai & BIG_CHUNK_LOWHALFBITS) * b + cy;
858 894 t2 = (ai >> (BIG_CHUNK_SIZE / 2)) * b +
859 895 (t1 >> (BIG_CHUNK_SIZE / 2));
860 896 r[i] = (t1 << (BIG_CHUNK_SIZE / 2)) + ri;
861 897 ri = t2 & BIG_CHUNK_LOWHALFBITS;
862 898 cy = t2 >> (BIG_CHUNK_SIZE / 2);
863 899 }
864 900 r[i] = (cy << (BIG_CHUNK_SIZE / 2)) + ri;
865 901 result->len = aa->len + 1;
866 902 result->sign = aa->sign;
867 903 }
868 904
869 905
870 906 /* it is assumed that result->size is big enough */
871 907 void
872 908 big_shiftleft(BIGNUM *result, BIGNUM *aa, int offs)
873 909 {
874 910 int i;
875 911 BIG_CHUNK_TYPE cy, ai;
876 912
877 913 if (offs == 0) {
878 914 if (result != aa) {
879 915 (void) big_copy(result, aa);
880 916 }
881 917 return;
882 918 }
883 919 cy = 0;
884 920 for (i = 0; i < aa->len; i++) {
885 921 ai = aa->value[i];
886 922 result->value[i] = (ai << offs) | cy;
887 923 cy = ai >> (BIG_CHUNK_SIZE - offs);
888 924 }
889 925 if (cy != 0) {
890 926 result->len = aa->len + 1;
891 927 result->value[result->len - 1] = cy;
892 928 } else {
893 929 result->len = aa->len;
894 930 }
895 931 result->sign = aa->sign;
896 932 }
897 933
898 934
899 935 /* it is assumed that result->size is big enough */
900 936 void
901 937 big_shiftright(BIGNUM *result, BIGNUM *aa, int offs)
902 938 {
903 939 int i;
904 940 BIG_CHUNK_TYPE cy, ai;
↓ open down ↓ |
95 lines elided |
↑ open up ↑ |
905 941
906 942 if (offs == 0) {
907 943 if (result != aa) {
908 944 (void) big_copy(result, aa);
909 945 }
910 946 return;
911 947 }
912 948 cy = aa->value[0] >> offs;
913 949 for (i = 1; i < aa->len; i++) {
914 950 ai = aa->value[i];
915 - result->value[i-1] = (ai << (BIG_CHUNK_SIZE - offs)) | cy;
951 + result->value[i - 1] = (ai << (BIG_CHUNK_SIZE - offs)) | cy;
916 952 cy = ai >> offs;
917 953 }
918 954 result->len = aa->len;
919 955 result->value[result->len - 1] = cy;
920 956 result->sign = aa->sign;
921 957 }
922 958
923 959
924 960 /*
925 961 * result = aa/bb remainder = aa mod bb
926 962 * it is assumed that aa and bb are positive
927 963 */
928 964 BIG_ERR_CODE
929 -big_div_pos_fast(BIGNUM *result, BIGNUM *remainder, BIGNUM *aa, BIGNUM *bb)
965 +big_div_pos(BIGNUM *result, BIGNUM *remainder, BIGNUM *aa, BIGNUM *bb)
930 966 {
931 967 BIG_ERR_CODE err = BIG_OK;
932 968 int i, alen, blen, tlen, rlen, offs;
933 969 BIG_CHUNK_TYPE higha, highb, coeff;
934 970 BIG_CHUNK_TYPE *a, *b;
935 971 BIGNUM bbhigh, bblow, tresult, tmp1, tmp2;
936 972 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE];
937 973 BIG_CHUNK_TYPE tmp2value[BIGTMPSIZE];
938 974 BIG_CHUNK_TYPE tresultvalue[BIGTMPSIZE];
939 975 BIG_CHUNK_TYPE bblowvalue[BIGTMPSIZE];
940 976 BIG_CHUNK_TYPE bbhighvalue[BIGTMPSIZE];
941 977
942 978 a = aa->value;
943 979 b = bb->value;
944 980 alen = aa->len;
945 981 blen = bb->len;
946 982 while ((alen > 1) && (a[alen - 1] == 0)) {
947 983 alen = alen - 1;
948 984 }
949 985 aa->len = alen;
950 986 while ((blen > 1) && (b[blen - 1] == 0)) {
951 987 blen = blen - 1;
952 988 }
953 989 bb->len = blen;
954 990 if ((blen == 1) && (b[0] == 0)) {
955 991 return (BIG_DIV_BY_0);
956 992 }
957 993
958 994 if (big_cmp_abs(aa, bb) < 0) {
959 995 if ((remainder != NULL) &&
960 996 ((err = big_copy(remainder, aa)) != BIG_OK)) {
961 997 return (err);
962 998 }
963 999 if (result != NULL) {
964 1000 result->len = 1;
965 1001 result->sign = 1;
966 1002 result->value[0] = 0;
967 1003 }
968 1004 return (BIG_OK);
969 1005 }
970 1006
971 1007 if ((err = big_init1(&bblow, blen + 1,
972 1008 bblowvalue, arraysize(bblowvalue))) != BIG_OK)
973 1009 return (err);
974 1010
975 1011 if ((err = big_init1(&bbhigh, blen + 1,
976 1012 bbhighvalue, arraysize(bbhighvalue))) != BIG_OK)
977 1013 goto ret1;
978 1014
979 1015 if ((err = big_init1(&tmp1, alen + 2,
980 1016 tmp1value, arraysize(tmp1value))) != BIG_OK)
981 1017 goto ret2;
982 1018
983 1019 if ((err = big_init1(&tmp2, blen + 2,
984 1020 tmp2value, arraysize(tmp2value))) != BIG_OK)
985 1021 goto ret3;
986 1022
987 1023 if ((err = big_init1(&tresult, alen - blen + 2,
988 1024 tresultvalue, arraysize(tresultvalue))) != BIG_OK)
989 1025 goto ret4;
990 1026
991 1027 offs = 0;
992 1028 highb = b[blen - 1];
993 1029 if (highb >= (BIG_CHUNK_HALF_HIGHBIT << 1)) {
994 1030 highb = highb >> (BIG_CHUNK_SIZE / 2);
995 1031 offs = (BIG_CHUNK_SIZE / 2);
996 1032 }
997 1033 while ((highb & BIG_CHUNK_HALF_HIGHBIT) == 0) {
998 1034 highb = highb << 1;
999 1035 offs++;
1000 1036 }
1001 1037
1002 1038 big_shiftleft(&bblow, bb, offs);
1003 1039
1004 1040 if (offs <= (BIG_CHUNK_SIZE / 2 - 1)) {
1005 1041 big_shiftleft(&bbhigh, &bblow, BIG_CHUNK_SIZE / 2);
1006 1042 } else {
1007 1043 big_shiftright(&bbhigh, &bblow, BIG_CHUNK_SIZE / 2);
1008 1044 }
1009 1045 if (bbhigh.value[bbhigh.len - 1] == 0) {
1010 1046 bbhigh.len--;
1011 1047 } else {
1012 1048 bbhigh.value[bbhigh.len] = 0;
1013 1049 }
1014 1050
1015 1051 highb = bblow.value[bblow.len - 1];
1016 1052
1017 1053 big_shiftleft(&tmp1, aa, offs);
1018 1054 rlen = tmp1.len - bblow.len + 1;
1019 1055 tresult.len = rlen;
1020 1056
1021 1057 tmp1.len++;
1022 1058 tlen = tmp1.len;
1023 1059 tmp1.value[tmp1.len - 1] = 0;
1024 1060 for (i = 0; i < rlen; i++) {
1025 1061 higha = (tmp1.value[tlen - 1] << (BIG_CHUNK_SIZE / 2)) +
1026 1062 (tmp1.value[tlen - 2] >> (BIG_CHUNK_SIZE / 2));
1027 1063 coeff = higha / (highb + 1);
1028 1064 big_mulhalf_high(&tmp2, &bblow, coeff);
1029 1065 big_sub_pos_high(&tmp1, &tmp1, &tmp2);
1030 1066 bbhigh.len++;
1031 1067 while (tmp1.value[tlen - 1] > 0) {
1032 1068 big_sub_pos_high(&tmp1, &tmp1, &bbhigh);
1033 1069 coeff++;
1034 1070 }
1035 1071 bbhigh.len--;
1036 1072 tlen--;
1037 1073 tmp1.len--;
1038 1074 while (big_cmp_abs_high(&tmp1, &bbhigh) >= 0) {
1039 1075 big_sub_pos_high(&tmp1, &tmp1, &bbhigh);
1040 1076 coeff++;
1041 1077 }
1042 1078 tresult.value[rlen - i - 1] = coeff << (BIG_CHUNK_SIZE / 2);
1043 1079 higha = tmp1.value[tlen - 1];
1044 1080 coeff = higha / (highb + 1);
1045 1081 big_mulhalf_low(&tmp2, &bblow, coeff);
1046 1082 tmp2.len--;
1047 1083 big_sub_pos_high(&tmp1, &tmp1, &tmp2);
1048 1084 while (big_cmp_abs_high(&tmp1, &bblow) >= 0) {
1049 1085 big_sub_pos_high(&tmp1, &tmp1, &bblow);
1050 1086 coeff++;
1051 1087 }
1052 1088 tresult.value[rlen - i - 1] =
1053 1089 tresult.value[rlen - i - 1] + coeff;
1054 1090 }
1055 1091
1056 1092 big_shiftright(&tmp1, &tmp1, offs);
1057 1093
1058 1094 err = BIG_OK;
1059 1095
1060 1096 if ((remainder != NULL) &&
1061 1097 ((err = big_copy(remainder, &tmp1)) != BIG_OK))
1062 1098 goto ret;
1063 1099
1064 1100 if (result != NULL)
1065 1101 err = big_copy(result, &tresult);
1066 1102
1067 1103 ret:
1068 1104 big_finish(&tresult);
1069 1105 ret4:
↓ open down ↓ |
130 lines elided |
↑ open up ↑ |
1070 1106 big_finish(&tmp1);
1071 1107 ret3:
1072 1108 big_finish(&tmp2);
1073 1109 ret2:
1074 1110 big_finish(&bbhigh);
1075 1111 ret1:
1076 1112 big_finish(&bblow);
1077 1113 return (err);
1078 1114 }
1079 1115
1116 +
1080 1117 /*
1081 1118 * If there is no processor-specific integer implementation of
1082 1119 * the lower level multiply functions, then this code is provided
1083 1120 * for big_mul_set_vec(), big_mul_add_vec(), big_mul_vec() and
1084 1121 * big_sqr_vec().
1085 1122 *
1086 1123 * There are two generic implementations. One that assumes that
1087 1124 * there is hardware and C compiler support for a 32 x 32 --> 64
1088 1125 * bit unsigned multiply, but otherwise is not specific to any
1089 1126 * processor, platform, or ISA.
1090 1127 *
1091 1128 * The other makes very few assumptions about hardware capabilities.
1092 1129 * It does not even assume that there is any implementation of a
1093 1130 * 32 x 32 --> 64 bit multiply that is accessible to C code and
1094 1131 * appropriate to use. It falls constructs 32 x 32 --> 64 bit
1095 1132 * multiplies from 16 x 16 --> 32 bit multiplies.
1096 1133 *
1097 1134 */
1098 1135
↓ open down ↓ |
9 lines elided |
↑ open up ↑ |
1099 1136 #if !defined(PSR_MUL)
1100 1137
1101 1138 #ifdef UMUL64
1102 1139
1103 1140 #if (BIG_CHUNK_SIZE == 32)
1104 1141
1105 1142 #define UNROLL8
1106 1143
1107 1144 #define MUL_SET_VEC_ROUND_PREFETCH(R) \
1108 1145 p = pf * d; \
1109 - pf = (uint64_t)a[R+1]; \
1146 + pf = (uint64_t)a[R + 1]; \
1110 1147 t = p + cy; \
1111 1148 r[R] = (uint32_t)t; \
1112 1149 cy = t >> 32
1113 1150
1114 1151 #define MUL_SET_VEC_ROUND_NOPREFETCH(R) \
1115 1152 p = pf * d; \
1116 1153 t = p + cy; \
1117 1154 r[R] = (uint32_t)t; \
1118 1155 cy = t >> 32
1119 1156
1120 1157 #define MUL_ADD_VEC_ROUND_PREFETCH(R) \
1121 1158 t = (uint64_t)r[R]; \
1122 1159 p = pf * d; \
1123 - pf = (uint64_t)a[R+1]; \
1160 + pf = (uint64_t)a[R + 1]; \
1124 1161 t = p + t + cy; \
1125 1162 r[R] = (uint32_t)t; \
1126 1163 cy = t >> 32
1127 1164
1128 1165 #define MUL_ADD_VEC_ROUND_NOPREFETCH(R) \
1129 1166 t = (uint64_t)r[R]; \
1130 1167 p = pf * d; \
1131 1168 t = p + t + cy; \
1132 1169 r[R] = (uint32_t)t; \
1133 1170 cy = t >> 32
1134 1171
1135 1172 #ifdef UNROLL8
1136 1173
1137 1174 #define UNROLL 8
1138 1175
1139 1176 /*
1140 1177 * r = a * b
1141 1178 * where r and a are vectors; b is a single 32-bit digit
1142 1179 */
1143 1180
1144 1181 uint32_t
1145 1182 big_mul_set_vec(uint32_t *r, uint32_t *a, int len, uint32_t b)
1146 1183 {
1147 1184 uint64_t d, pf, p, t, cy;
1148 1185
1149 1186 if (len == 0)
1150 1187 return (0);
1151 1188 cy = 0;
1152 1189 d = (uint64_t)b;
1153 1190 pf = (uint64_t)a[0];
1154 1191 while (len > UNROLL) {
1155 1192 MUL_SET_VEC_ROUND_PREFETCH(0);
1156 1193 MUL_SET_VEC_ROUND_PREFETCH(1);
1157 1194 MUL_SET_VEC_ROUND_PREFETCH(2);
1158 1195 MUL_SET_VEC_ROUND_PREFETCH(3);
1159 1196 MUL_SET_VEC_ROUND_PREFETCH(4);
1160 1197 MUL_SET_VEC_ROUND_PREFETCH(5);
1161 1198 MUL_SET_VEC_ROUND_PREFETCH(6);
1162 1199 MUL_SET_VEC_ROUND_PREFETCH(7);
1163 1200 r += UNROLL;
1164 1201 a += UNROLL;
1165 1202 len -= UNROLL;
1166 1203 }
1167 1204 if (len == UNROLL) {
1168 1205 MUL_SET_VEC_ROUND_PREFETCH(0);
1169 1206 MUL_SET_VEC_ROUND_PREFETCH(1);
1170 1207 MUL_SET_VEC_ROUND_PREFETCH(2);
1171 1208 MUL_SET_VEC_ROUND_PREFETCH(3);
1172 1209 MUL_SET_VEC_ROUND_PREFETCH(4);
1173 1210 MUL_SET_VEC_ROUND_PREFETCH(5);
1174 1211 MUL_SET_VEC_ROUND_PREFETCH(6);
1175 1212 MUL_SET_VEC_ROUND_NOPREFETCH(7);
1176 1213 return ((uint32_t)cy);
1177 1214 }
1178 1215 while (len > 1) {
1179 1216 MUL_SET_VEC_ROUND_PREFETCH(0);
1180 1217 ++r;
1181 1218 ++a;
1182 1219 --len;
1183 1220 }
1184 1221 if (len > 0) {
1185 1222 MUL_SET_VEC_ROUND_NOPREFETCH(0);
1186 1223 }
1187 1224 return ((uint32_t)cy);
1188 1225 }
1189 1226
1190 1227 /*
1191 1228 * r += a * b
1192 1229 * where r and a are vectors; b is a single 32-bit digit
1193 1230 */
1194 1231
1195 1232 uint32_t
1196 1233 big_mul_add_vec(uint32_t *r, uint32_t *a, int len, uint32_t b)
1197 1234 {
1198 1235 uint64_t d, pf, p, t, cy;
1199 1236
1200 1237 if (len == 0)
1201 1238 return (0);
1202 1239 cy = 0;
1203 1240 d = (uint64_t)b;
1204 1241 pf = (uint64_t)a[0];
1205 1242 while (len > 8) {
1206 1243 MUL_ADD_VEC_ROUND_PREFETCH(0);
1207 1244 MUL_ADD_VEC_ROUND_PREFETCH(1);
1208 1245 MUL_ADD_VEC_ROUND_PREFETCH(2);
1209 1246 MUL_ADD_VEC_ROUND_PREFETCH(3);
1210 1247 MUL_ADD_VEC_ROUND_PREFETCH(4);
1211 1248 MUL_ADD_VEC_ROUND_PREFETCH(5);
1212 1249 MUL_ADD_VEC_ROUND_PREFETCH(6);
1213 1250 MUL_ADD_VEC_ROUND_PREFETCH(7);
1214 1251 r += 8;
1215 1252 a += 8;
1216 1253 len -= 8;
1217 1254 }
1218 1255 if (len == 8) {
1219 1256 MUL_ADD_VEC_ROUND_PREFETCH(0);
1220 1257 MUL_ADD_VEC_ROUND_PREFETCH(1);
1221 1258 MUL_ADD_VEC_ROUND_PREFETCH(2);
1222 1259 MUL_ADD_VEC_ROUND_PREFETCH(3);
1223 1260 MUL_ADD_VEC_ROUND_PREFETCH(4);
1224 1261 MUL_ADD_VEC_ROUND_PREFETCH(5);
1225 1262 MUL_ADD_VEC_ROUND_PREFETCH(6);
1226 1263 MUL_ADD_VEC_ROUND_NOPREFETCH(7);
1227 1264 return ((uint32_t)cy);
1228 1265 }
1229 1266 while (len > 1) {
1230 1267 MUL_ADD_VEC_ROUND_PREFETCH(0);
1231 1268 ++r;
1232 1269 ++a;
1233 1270 --len;
1234 1271 }
↓ open down ↓ |
101 lines elided |
↑ open up ↑ |
1235 1272 if (len > 0) {
1236 1273 MUL_ADD_VEC_ROUND_NOPREFETCH(0);
1237 1274 }
1238 1275 return ((uint32_t)cy);
1239 1276 }
1240 1277 #endif /* UNROLL8 */
1241 1278
1242 1279 void
1243 1280 big_sqr_vec(uint32_t *r, uint32_t *a, int len)
1244 1281 {
1245 - uint32_t *tr, *ta;
1246 - int tlen, row, col;
1247 - uint64_t p, s, t, t2, cy;
1248 - uint32_t d;
1282 + uint32_t *tr, *ta;
1283 + int tlen, row, col;
1284 + uint64_t p, s, t, t2, cy;
1285 + uint32_t d;
1249 1286
1250 1287 tr = r + 1;
1251 1288 ta = a;
1252 1289 tlen = len - 1;
1253 1290 tr[tlen] = big_mul_set_vec(tr, ta + 1, tlen, ta[0]);
1254 1291 while (--tlen > 0) {
1255 1292 tr += 2;
1256 1293 ++ta;
1257 1294 tr[tlen] = big_mul_add_vec(tr, ta + 1, tlen, ta[0]);
1258 1295 }
1259 1296 s = (uint64_t)a[0];
1260 1297 s = s * s;
1261 1298 r[0] = (uint32_t)s;
1262 1299 cy = s >> 32;
1263 1300 p = ((uint64_t)r[1] << 1) + cy;
1264 1301 r[1] = (uint32_t)p;
1265 1302 cy = p >> 32;
1266 1303 row = 1;
1267 1304 col = 2;
1268 1305 while (row < len) {
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
1269 1306 s = (uint64_t)a[row];
1270 1307 s = s * s;
1271 1308 p = (uint64_t)r[col] << 1;
1272 1309 t = p + s;
1273 1310 d = (uint32_t)t;
1274 1311 t2 = (uint64_t)d + cy;
1275 1312 r[col] = (uint32_t)t2;
1276 1313 cy = (t >> 32) + (t2 >> 32);
1277 1314 if (row == len - 1)
1278 1315 break;
1279 - p = ((uint64_t)r[col+1] << 1) + cy;
1280 - r[col+1] = (uint32_t)p;
1316 + p = ((uint64_t)r[col + 1] << 1) + cy;
1317 + r[col + 1] = (uint32_t)p;
1281 1318 cy = p >> 32;
1282 1319 ++row;
1283 1320 col += 2;
1284 1321 }
1285 - r[col+1] = (uint32_t)cy;
1322 + r[col + 1] = (uint32_t)cy;
1286 1323 }
1287 1324
1288 1325 #else /* BIG_CHUNK_SIZE == 64 */
1289 1326
1290 1327 /*
1291 1328 * r = r + a * digit, r and a are vectors of length len
1292 1329 * returns the carry digit
1293 1330 */
1294 1331 BIG_CHUNK_TYPE
1295 1332 big_mul_add_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len,
1296 1333 BIG_CHUNK_TYPE digit)
1297 1334 {
1298 1335 BIG_CHUNK_TYPE cy, cy1, retcy, dlow, dhigh;
1299 1336 int i;
1300 1337
1301 1338 cy1 = 0;
1302 1339 dlow = digit & BIG_CHUNK_LOWHALFBITS;
1303 1340 dhigh = digit >> (BIG_CHUNK_SIZE / 2);
1304 1341 for (i = 0; i < len; i++) {
1305 1342 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) +
1306 1343 dlow * (a[i] & BIG_CHUNK_LOWHALFBITS) +
1307 1344 (r[i] & BIG_CHUNK_LOWHALFBITS);
1308 1345 cy1 = (cy >> (BIG_CHUNK_SIZE / 2)) +
1309 1346 dlow * (a[i] >> (BIG_CHUNK_SIZE / 2)) +
1310 1347 (r[i] >> (BIG_CHUNK_SIZE / 2));
1311 1348 r[i] = (cy & BIG_CHUNK_LOWHALFBITS) |
1312 1349 (cy1 << (BIG_CHUNK_SIZE / 2));
1313 1350 }
1314 1351 retcy = cy1 >> (BIG_CHUNK_SIZE / 2);
1315 1352
1316 1353 cy1 = r[0] & BIG_CHUNK_LOWHALFBITS;
1317 1354 for (i = 0; i < len - 1; i++) {
1318 1355 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) +
1319 1356 dhigh * (a[i] & BIG_CHUNK_LOWHALFBITS) +
1320 1357 (r[i] >> (BIG_CHUNK_SIZE / 2));
1321 1358 r[i] = (cy1 & BIG_CHUNK_LOWHALFBITS) |
1322 1359 (cy << (BIG_CHUNK_SIZE / 2));
1323 1360 cy1 = (cy >> (BIG_CHUNK_SIZE / 2)) +
1324 1361 dhigh * (a[i] >> (BIG_CHUNK_SIZE / 2)) +
1325 1362 (r[i + 1] & BIG_CHUNK_LOWHALFBITS);
1326 1363 }
1327 1364 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) +
1328 1365 dhigh * (a[len - 1] & BIG_CHUNK_LOWHALFBITS) +
1329 1366 (r[len - 1] >> (BIG_CHUNK_SIZE / 2));
1330 1367 r[len - 1] = (cy1 & BIG_CHUNK_LOWHALFBITS) |
1331 1368 (cy << (BIG_CHUNK_SIZE / 2));
1332 1369 retcy = (cy >> (BIG_CHUNK_SIZE / 2)) +
1333 1370 dhigh * (a[len - 1] >> (BIG_CHUNK_SIZE / 2)) + retcy;
1334 1371
1335 1372 return (retcy);
1336 1373 }
1337 1374
1338 1375
1339 1376 /*
1340 1377 * r = a * digit, r and a are vectors of length len
1341 1378 * returns the carry digit
1342 1379 */
1343 1380 BIG_CHUNK_TYPE
1344 1381 big_mul_set_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len,
1345 1382 BIG_CHUNK_TYPE digit)
1346 1383 {
1347 1384 int i;
1348 1385
1349 1386 ASSERT(r != a);
1350 1387 for (i = 0; i < len; i++) {
1351 1388 r[i] = 0;
1352 1389 }
1353 1390 return (big_mul_add_vec(r, a, len, digit));
↓ open down ↓ |
58 lines elided |
↑ open up ↑ |
1354 1391 }
1355 1392
1356 1393 void
1357 1394 big_sqr_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len)
1358 1395 {
1359 1396 int i;
1360 1397
1361 1398 ASSERT(r != a);
1362 1399 r[len] = big_mul_set_vec(r, a, len, a[0]);
1363 1400 for (i = 1; i < len; ++i)
1364 - r[len + i] = big_mul_add_vec(r+i, a, len, a[i]);
1401 + r[len + i] = big_mul_add_vec(r + i, a, len, a[i]);
1365 1402 }
1366 1403
1367 1404 #endif /* BIG_CHUNK_SIZE == 32/64 */
1368 1405
1406 +
1369 1407 #else /* ! UMUL64 */
1370 1408
1371 1409 #if (BIG_CHUNK_SIZE != 32)
1372 1410 #error Don't use 64-bit chunks without defining UMUL64
1373 1411 #endif
1374 1412
1375 1413
1376 1414 /*
1377 1415 * r = r + a * digit, r and a are vectors of length len
1378 1416 * returns the carry digit
1379 1417 */
1380 1418 uint32_t
1381 1419 big_mul_add_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit)
1382 1420 {
1383 1421 uint32_t cy, cy1, retcy, dlow, dhigh;
1384 1422 int i;
1385 1423
1386 1424 cy1 = 0;
1387 1425 dlow = digit & 0xffff;
1388 1426 dhigh = digit >> 16;
1389 1427 for (i = 0; i < len; i++) {
1390 1428 cy = (cy1 >> 16) + dlow * (a[i] & 0xffff) + (r[i] & 0xffff);
1391 1429 cy1 = (cy >> 16) + dlow * (a[i]>>16) + (r[i] >> 16);
1392 1430 r[i] = (cy & 0xffff) | (cy1 << 16);
1393 1431 }
1394 1432 retcy = cy1 >> 16;
1395 1433
1396 1434 cy1 = r[0] & 0xffff;
1397 1435 for (i = 0; i < len - 1; i++) {
1398 1436 cy = (cy1 >> 16) + dhigh * (a[i] & 0xffff) + (r[i] >> 16);
1399 1437 r[i] = (cy1 & 0xffff) | (cy << 16);
1400 1438 cy1 = (cy >> 16) + dhigh * (a[i] >> 16) + (r[i + 1] & 0xffff);
1401 1439 }
1402 1440 cy = (cy1 >> 16) + dhigh * (a[len - 1] & 0xffff) + (r[len - 1] >> 16);
1403 1441 r[len - 1] = (cy1 & 0xffff) | (cy << 16);
1404 1442 retcy = (cy >> 16) + dhigh * (a[len - 1] >> 16) + retcy;
1405 1443
1406 1444 return (retcy);
1407 1445 }
1408 1446
1409 1447
1410 1448 /*
1411 1449 * r = a * digit, r and a are vectors of length len
1412 1450 * returns the carry digit
1413 1451 */
1414 1452 uint32_t
1415 1453 big_mul_set_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit)
1416 1454 {
1417 1455 int i;
1418 1456
1419 1457 ASSERT(r != a);
1420 1458 for (i = 0; i < len; i++) {
1421 1459 r[i] = 0;
1422 1460 }
1423 1461
1424 1462 return (big_mul_add_vec(r, a, len, digit));
↓ open down ↓ |
46 lines elided |
↑ open up ↑ |
1425 1463 }
1426 1464
1427 1465 void
1428 1466 big_sqr_vec(uint32_t *r, uint32_t *a, int len)
1429 1467 {
1430 1468 int i;
1431 1469
1432 1470 ASSERT(r != a);
1433 1471 r[len] = big_mul_set_vec(r, a, len, a[0]);
1434 1472 for (i = 1; i < len; ++i)
1435 - r[len + i] = big_mul_add_vec(r+i, a, len, a[i]);
1473 + r[len + i] = big_mul_add_vec(r + i, a, len, a[i]);
1436 1474 }
1437 1475
1438 1476 #endif /* UMUL64 */
1439 1477
1440 1478 void
1441 1479 big_mul_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int alen,
1442 1480 BIG_CHUNK_TYPE *b, int blen)
1443 1481 {
1444 1482 int i;
1445 1483
1446 1484 r[alen] = big_mul_set_vec(r, a, alen, b[0]);
1447 1485 for (i = 1; i < blen; ++i)
1448 - r[alen + i] = big_mul_add_vec(r+i, a, alen, b[i]);
1486 + r[alen + i] = big_mul_add_vec(r + i, a, alen, b[i]);
1449 1487 }
1450 1488
1451 1489
1452 1490 #endif /* ! PSR_MUL */
1453 1491
1454 1492
1455 1493 /*
1456 1494 * result = aa * bb result->value should be big enough to hold the result
1457 1495 *
1458 1496 * Implementation: Standard grammar school algorithm
1459 1497 *
1460 1498 */
1461 1499 BIG_ERR_CODE
1462 1500 big_mul(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
1463 1501 {
1464 1502 BIGNUM tmp1;
1465 1503 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE];
1466 1504 BIG_CHUNK_TYPE *r, *t, *a, *b;
1467 1505 BIG_ERR_CODE err;
1468 1506 int i, alen, blen, rsize, sign, diff;
1469 1507
1470 1508 if (aa == bb) {
1471 1509 diff = 0;
1472 1510 } else {
1473 1511 diff = big_cmp_abs(aa, bb);
1474 1512 if (diff < 0) {
1475 1513 BIGNUM *tt;
1476 1514 tt = aa;
1477 1515 aa = bb;
1478 1516 bb = tt;
1479 1517 }
1480 1518 }
1481 1519 a = aa->value;
1482 1520 b = bb->value;
1483 1521 alen = aa->len;
1484 1522 blen = bb->len;
↓ open down ↓ |
26 lines elided |
↑ open up ↑ |
1485 1523 while ((alen > 1) && (a[alen - 1] == 0)) {
1486 1524 alen--;
1487 1525 }
1488 1526 aa->len = alen;
1489 1527 while ((blen > 1) && (b[blen - 1] == 0)) {
1490 1528 blen--;
1491 1529 }
1492 1530 bb->len = blen;
1493 1531
1494 1532 rsize = alen + blen;
1533 + ASSERT(rsize > 0);
1495 1534 if (result->size < rsize) {
1496 1535 err = big_extend(result, rsize);
1497 1536 if (err != BIG_OK) {
1498 1537 return (err);
1499 1538 }
1500 1539 /* aa or bb might be an alias to result */
1501 1540 a = aa->value;
1502 1541 b = bb->value;
1503 1542 }
1504 1543 r = result->value;
1505 1544
1506 1545 if (((alen == 1) && (a[0] == 0)) || ((blen == 1) && (b[0] == 0))) {
1507 1546 result->len = 1;
1508 1547 result->sign = 1;
1509 1548 r[0] = 0;
1510 1549 return (BIG_OK);
1511 1550 }
1512 1551 sign = aa->sign * bb->sign;
1513 1552 if ((alen == 1) && (a[0] == 1)) {
1514 1553 for (i = 0; i < blen; i++) {
1515 1554 r[i] = b[i];
1516 1555 }
1517 1556 result->len = blen;
1518 1557 result->sign = sign;
1519 1558 return (BIG_OK);
1520 1559 }
1521 1560 if ((blen == 1) && (b[0] == 1)) {
1522 1561 for (i = 0; i < alen; i++) {
1523 1562 r[i] = a[i];
1524 1563 }
1525 1564 result->len = alen;
1526 1565 result->sign = sign;
1527 1566 return (BIG_OK);
1528 1567 }
1529 1568
1530 1569 if ((err = big_init1(&tmp1, rsize,
1531 1570 tmp1value, arraysize(tmp1value))) != BIG_OK) {
1532 1571 return (err);
1533 1572 }
1534 1573 (void) big_copy(&tmp1, aa);
1535 1574 t = tmp1.value;
1536 1575
1537 1576 for (i = 0; i < rsize; i++) {
1538 1577 t[i] = 0;
1539 1578 }
1540 1579
1541 1580 if (diff == 0 && alen > 2) {
↓ open down ↓ |
37 lines elided |
↑ open up ↑ |
1542 1581 BIG_SQR_VEC(t, a, alen);
1543 1582 } else if (blen > 0) {
1544 1583 BIG_MUL_VEC(t, a, alen, b, blen);
1545 1584 }
1546 1585
1547 1586 if (t[rsize - 1] == 0) {
1548 1587 tmp1.len = rsize - 1;
1549 1588 } else {
1550 1589 tmp1.len = rsize;
1551 1590 }
1552 - if ((err = big_copy(result, &tmp1)) != BIG_OK) {
1553 - return (err);
1554 - }
1591 +
1592 + err = big_copy(result, &tmp1);
1593 +
1555 1594 result->sign = sign;
1556 1595
1557 1596 big_finish(&tmp1);
1558 1597
1559 - return (BIG_OK);
1598 + return (err);
1560 1599 }
1561 1600
1562 1601
1563 1602 /*
1564 1603 * caller must ensure that a < n, b < n and ret->size >= 2 * n->len + 1
1565 1604 * and that ret is not n
1566 1605 */
1567 1606 BIG_ERR_CODE
1568 1607 big_mont_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, BIGNUM *n, BIG_CHUNK_TYPE n0)
1569 1608 {
1570 1609 int i, j, nlen, needsubtract;
1571 1610 BIG_CHUNK_TYPE *nn, *rr;
1572 1611 BIG_CHUNK_TYPE digit, c;
1573 1612 BIG_ERR_CODE err;
1574 1613
1575 1614 nlen = n->len;
1576 1615 nn = n->value;
1577 1616
1578 1617 rr = ret->value;
1579 1618
1580 1619 if ((err = big_mul(ret, a, b)) != BIG_OK) {
1581 1620 return (err);
1582 1621 }
1583 1622
1584 1623 rr = ret->value;
1585 1624 for (i = ret->len; i < 2 * nlen + 1; i++) {
1586 1625 rr[i] = 0;
1587 1626 }
1588 1627 for (i = 0; i < nlen; i++) {
1589 1628 digit = rr[i];
1590 1629 digit = digit * n0;
1591 1630
1592 1631 c = BIG_MUL_ADD_VEC(rr + i, nn, nlen, digit);
1593 1632 j = i + nlen;
1594 1633 rr[j] += c;
1595 1634 while (rr[j] < c) {
1596 1635 rr[j + 1] += 1;
1597 1636 j++;
1598 1637 c = 1;
1599 1638 }
1600 1639 }
1601 1640
1602 1641 needsubtract = 0;
1603 1642 if ((rr[2 * nlen] != 0))
1604 1643 needsubtract = 1;
1605 1644 else {
1606 1645 for (i = 2 * nlen - 1; i >= nlen; i--) {
1607 1646 if (rr[i] > nn[i - nlen]) {
1608 1647 needsubtract = 1;
1609 1648 break;
1610 1649 } else if (rr[i] < nn[i - nlen]) {
1611 1650 break;
↓ open down ↓ |
42 lines elided |
↑ open up ↑ |
1612 1651 }
1613 1652 }
1614 1653 }
1615 1654 if (needsubtract)
1616 1655 big_sub_vec(rr, rr + nlen, nn, nlen);
1617 1656 else {
1618 1657 for (i = 0; i < nlen; i++) {
1619 1658 rr[i] = rr[i + nlen];
1620 1659 }
1621 1660 }
1622 - for (i = nlen - 1; (i >= 0) && (rr[i] == 0); i--)
1661 +
1662 + /* Remove leading zeros, but keep at least 1 digit: */
1663 + for (i = nlen - 1; (i > 0) && (rr[i] == 0); i--)
1623 1664 ;
1624 - ret->len = i+1;
1665 + ret->len = i + 1;
1625 1666
1626 1667 return (BIG_OK);
1627 1668 }
1628 1669
1629 1670
1630 1671 BIG_CHUNK_TYPE
1631 1672 big_n0(BIG_CHUNK_TYPE n)
1632 1673 {
1633 1674 int i;
1634 1675 BIG_CHUNK_TYPE result, tmp;
1635 1676
1636 1677 result = 0;
1637 1678 tmp = BIG_CHUNK_ALLBITS;
1638 1679 for (i = 0; i < BIG_CHUNK_SIZE; i++) {
1639 1680 if ((tmp & 1) == 1) {
1640 1681 result = (result >> 1) | BIG_CHUNK_HIGHBIT;
1641 1682 tmp = tmp - n;
1642 1683 } else {
1643 1684 result = (result >> 1);
1644 1685 }
1645 1686 tmp = tmp >> 1;
1646 1687 }
1647 1688
1648 1689 return (result);
1649 1690 }
1650 1691
1651 1692
1652 1693 int
1653 1694 big_numbits(BIGNUM *n)
1654 1695 {
1655 1696 int i, j;
1656 1697 BIG_CHUNK_TYPE t;
1657 1698
1658 1699 for (i = n->len - 1; i > 0; i--) {
1659 1700 if (n->value[i] != 0) {
1660 1701 break;
1661 1702 }
1662 1703 }
1663 1704 t = n->value[i];
1664 1705 for (j = BIG_CHUNK_SIZE; j > 0; j--) {
1665 1706 if ((t & BIG_CHUNK_HIGHBIT) == 0) {
1666 1707 t = t << 1;
1667 1708 } else {
1668 1709 return (BIG_CHUNK_SIZE * i + j);
1669 1710 }
1670 1711 }
1671 1712 return (0);
1672 1713 }
1673 1714
1674 1715
1675 1716 /* caller must make sure that a < n */
1676 1717 BIG_ERR_CODE
1677 1718 big_mont_rr(BIGNUM *result, BIGNUM *n)
1678 1719 {
1679 1720 BIGNUM rr;
1680 1721 BIG_CHUNK_TYPE rrvalue[BIGTMPSIZE];
1681 1722 int len, i;
1682 1723 BIG_ERR_CODE err;
1683 1724
1684 1725 rr.malloced = 0;
1685 1726 len = n->len;
1686 1727
1687 1728 if ((err = big_init1(&rr, 2 * len + 1,
1688 1729 rrvalue, arraysize(rrvalue))) != BIG_OK) {
1689 1730 return (err);
1690 1731 }
1691 1732
1692 1733 for (i = 0; i < 2 * len; i++) {
1693 1734 rr.value[i] = 0;
1694 1735 }
1695 1736 rr.value[2 * len] = 1;
1696 1737 rr.len = 2 * len + 1;
1697 1738 if ((err = big_div_pos(NULL, &rr, &rr, n)) != BIG_OK) {
1698 1739 goto ret;
1699 1740 }
1700 1741 err = big_copy(result, &rr);
1701 1742 ret:
1702 1743 big_finish(&rr);
1703 1744 return (err);
1704 1745 }
1705 1746
1706 1747
1707 1748 /* caller must make sure that a < n */
1708 1749 BIG_ERR_CODE
1709 1750 big_mont_conv(BIGNUM *result, BIGNUM *a, BIGNUM *n, BIG_CHUNK_TYPE n0,
1710 1751 BIGNUM *n_rr)
1711 1752 {
1712 1753 BIGNUM rr;
1713 1754 BIG_CHUNK_TYPE rrvalue[BIGTMPSIZE];
1714 1755 int len, i;
1715 1756 BIG_ERR_CODE err;
1716 1757
1717 1758 rr.malloced = 0;
1718 1759 len = n->len;
1719 1760
1720 1761 if ((err = big_init1(&rr, 2 * len + 1, rrvalue, arraysize(rrvalue)))
1721 1762 != BIG_OK) {
1722 1763 return (err);
1723 1764 }
1724 1765
1725 1766 if (n_rr == NULL) {
1726 1767 for (i = 0; i < 2 * len; i++) {
1727 1768 rr.value[i] = 0;
1728 1769 }
1729 1770 rr.value[2 * len] = 1;
1730 1771 rr.len = 2 * len + 1;
1731 1772 if ((err = big_div_pos(NULL, &rr, &rr, n)) != BIG_OK) {
1732 1773 goto ret;
1733 1774 }
1734 1775 n_rr = &rr;
1735 1776 }
1736 1777
1737 1778 if ((err = big_mont_mul(&rr, n_rr, a, n, n0)) != BIG_OK) {
1738 1779 goto ret;
1739 1780 }
1740 1781 err = big_copy(result, &rr);
1741 1782
1742 1783 ret:
1743 1784 big_finish(&rr);
1744 1785 return (err);
1745 1786 }
1746 1787
1747 1788
1748 1789 #ifdef USE_FLOATING_POINT
1749 1790 #define big_modexp_ncp_float big_modexp_ncp_sw
1750 1791 #else
1751 1792 #define big_modexp_ncp_int big_modexp_ncp_sw
1752 1793 #endif
1753 1794
1754 1795 #define MAX_EXP_BIT_GROUP_SIZE 6
1755 1796 #define APOWERS_MAX_SIZE (1 << (MAX_EXP_BIT_GROUP_SIZE - 1))
1756 1797
↓ open down ↓ |
122 lines elided |
↑ open up ↑ |
1757 1798 /* ARGSUSED */
1758 1799 static BIG_ERR_CODE
1759 1800 big_modexp_ncp_int(BIGNUM *result, BIGNUM *ma, BIGNUM *e, BIGNUM *n,
1760 1801 BIGNUM *tmp, BIG_CHUNK_TYPE n0)
1761 1802
1762 1803 {
1763 1804 BIGNUM apowers[APOWERS_MAX_SIZE];
1764 1805 BIGNUM tmp1;
1765 1806 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE];
1766 1807 int i, j, k, l, m, p;
1767 - int bit, bitind, bitcount, groupbits, apowerssize;
1768 - int nbits;
1808 + uint32_t bit, bitind, bitcount, groupbits, apowerssize;
1809 + uint32_t nbits;
1769 1810 BIG_ERR_CODE err;
1770 1811
1771 1812 nbits = big_numbits(e);
1772 1813 if (nbits < 50) {
1773 1814 groupbits = 1;
1774 1815 apowerssize = 1;
1775 1816 } else {
1776 1817 groupbits = MAX_EXP_BIT_GROUP_SIZE;
1777 1818 apowerssize = 1 << (groupbits - 1);
1778 1819 }
1779 1820
1780 1821
1781 1822 if ((err = big_init1(&tmp1, 2 * n->len + 1,
1782 1823 tmp1value, arraysize(tmp1value))) != BIG_OK) {
1783 1824 return (err);
1784 1825 }
1785 1826
1786 - /* set the malloced bit to help cleanup */
1827 + /* clear the malloced bit to help cleanup */
1787 1828 for (i = 0; i < apowerssize; i++) {
1788 1829 apowers[i].malloced = 0;
1789 1830 }
1831 +
1790 1832 for (i = 0; i < apowerssize; i++) {
1791 1833 if ((err = big_init1(&(apowers[i]), n->len, NULL, 0)) !=
1792 1834 BIG_OK) {
1793 1835 goto ret;
1794 1836 }
1795 1837 }
1796 1838
1797 1839 (void) big_copy(&(apowers[0]), ma);
1798 1840
1799 1841 if ((err = big_mont_mul(&tmp1, ma, ma, n, n0)) != BIG_OK) {
1800 1842 goto ret;
1801 1843 }
1802 1844 (void) big_copy(ma, &tmp1);
1803 1845
1804 1846 for (i = 1; i < apowerssize; i++) {
1805 1847 if ((err = big_mont_mul(&tmp1, ma,
1806 - &(apowers[i-1]), n, n0)) != BIG_OK) {
1848 + &(apowers[i - 1]), n, n0)) != BIG_OK) {
1807 1849 goto ret;
1808 1850 }
1809 1851 (void) big_copy(&apowers[i], &tmp1);
1810 1852 }
1811 1853
1812 1854 bitind = nbits % BIG_CHUNK_SIZE;
1813 1855 k = 0;
1814 1856 l = 0;
1815 1857 p = 0;
1816 1858 bitcount = 0;
1817 1859 for (i = nbits / BIG_CHUNK_SIZE; i >= 0; i--) {
1818 1860 for (j = bitind - 1; j >= 0; j--) {
1819 1861 bit = (e->value[i] >> j) & 1;
1820 1862 if ((bitcount == 0) && (bit == 0)) {
1821 1863 if ((err = big_mont_mul(tmp,
1822 1864 tmp, tmp, n, n0)) != BIG_OK) {
1823 1865 goto ret;
1824 1866 }
1825 1867 } else {
1826 1868 bitcount++;
1827 1869 p = p * 2 + bit;
1828 1870 if (bit == 1) {
1829 1871 k = k + l + 1;
1830 1872 l = 0;
1831 1873 } else {
1832 1874 l++;
1833 1875 }
1834 1876 if (bitcount == groupbits) {
1835 1877 for (m = 0; m < k; m++) {
1836 1878 if ((err = big_mont_mul(tmp,
1837 1879 tmp, tmp, n, n0)) !=
1838 1880 BIG_OK) {
1839 1881 goto ret;
1840 1882 }
1841 1883 }
1842 1884 if ((err = big_mont_mul(tmp, tmp,
1843 1885 &(apowers[p >> (l + 1)]),
1844 1886 n, n0)) != BIG_OK) {
1845 1887 goto ret;
1846 1888 }
1847 1889 for (m = 0; m < l; m++) {
1848 1890 if ((err = big_mont_mul(tmp,
1849 1891 tmp, tmp, n, n0)) !=
1850 1892 BIG_OK) {
1851 1893 goto ret;
1852 1894 }
1853 1895 }
1854 1896 k = 0;
1855 1897 l = 0;
1856 1898 p = 0;
1857 1899 bitcount = 0;
1858 1900 }
1859 1901 }
1860 1902 }
1861 1903 bitind = BIG_CHUNK_SIZE;
1862 1904 }
1863 1905
1864 1906 for (m = 0; m < k; m++) {
1865 1907 if ((err = big_mont_mul(tmp, tmp, tmp, n, n0)) != BIG_OK) {
1866 1908 goto ret;
1867 1909 }
1868 1910 }
1869 1911 if (p != 0) {
1870 1912 if ((err = big_mont_mul(tmp, tmp,
1871 1913 &(apowers[p >> (l + 1)]), n, n0)) != BIG_OK) {
1872 1914 goto ret;
1873 1915 }
1874 1916 }
1875 1917 for (m = 0; m < l; m++) {
1876 1918 if ((err = big_mont_mul(result, tmp, tmp, n, n0)) != BIG_OK) {
1877 1919 goto ret;
1878 1920 }
1879 1921 }
1880 1922
1881 1923 ret:
1882 1924 for (i = apowerssize - 1; i >= 0; i--) {
1883 1925 big_finish(&(apowers[i]));
1884 1926 }
1885 1927 big_finish(&tmp1);
1886 1928
1887 1929 return (err);
1888 1930 }
1889 1931
1890 1932
1891 1933 #ifdef USE_FLOATING_POINT
1892 1934
1893 1935 #ifdef _KERNEL
1894 1936
1895 1937 #include <sys/sysmacros.h>
1896 1938 #include <sys/regset.h>
1897 1939 #include <sys/fpu/fpusystm.h>
1898 1940
1899 1941 /* the alignment for block stores to save fp registers */
1900 1942 #define FPR_ALIGN (64)
1901 1943
1902 1944 extern void big_savefp(kfpu_t *);
1903 1945 extern void big_restorefp(kfpu_t *);
1904 1946
↓ open down ↓ |
88 lines elided |
↑ open up ↑ |
1905 1947 #endif /* _KERNEL */
1906 1948
1907 1949 /*
1908 1950 * This version makes use of floating point for performance
1909 1951 */
1910 1952 static BIG_ERR_CODE
1911 1953 big_modexp_ncp_float(BIGNUM *result, BIGNUM *ma, BIGNUM *e, BIGNUM *n,
1912 1954 BIGNUM *tmp, BIG_CHUNK_TYPE n0)
1913 1955 {
1914 1956
1915 - int i, j, k, l, m, p, bit, bitind, bitcount, nlen;
1957 + int i, j, k, l, m, p;
1958 + uint32_t bit, bitind, bitcount, nlen;
1916 1959 double dn0;
1917 1960 double *dn, *dt, *d16r, *d32r;
1918 1961 uint32_t *nint, *prod;
1919 1962 double *apowers[APOWERS_MAX_SIZE];
1920 - int nbits, groupbits, apowerssize;
1963 + uint32_t nbits, groupbits, apowerssize;
1921 1964 BIG_ERR_CODE err = BIG_OK;
1922 1965
1923 1966 #ifdef _KERNEL
1924 1967 uint8_t fpua[sizeof (kfpu_t) + FPR_ALIGN];
1925 1968 kfpu_t *fpu;
1926 1969
1927 1970 #ifdef DEBUG
1928 1971 if (!fpu_exists)
1929 1972 return (BIG_GENERAL_ERR);
1930 1973 #endif
1931 1974
1932 1975 fpu = (kfpu_t *)P2ROUNDUP((uintptr_t)fpua, FPR_ALIGN);
1933 1976 big_savefp(fpu);
1934 1977
1935 1978 #endif /* _KERNEL */
1936 1979
1937 1980 nbits = big_numbits(e);
1938 1981 if (nbits < 50) {
1939 1982 groupbits = 1;
1940 1983 apowerssize = 1;
1941 1984 } else {
1942 1985 groupbits = MAX_EXP_BIT_GROUP_SIZE;
1943 1986 apowerssize = 1 << (groupbits - 1);
1944 1987 }
1945 1988
1946 1989 nlen = (BIG_CHUNK_SIZE / 32) * n->len;
1947 1990 dn0 = (double)(n0 & 0xffff);
1948 1991
1949 1992 dn = dt = d16r = d32r = NULL;
1950 1993 nint = prod = NULL;
1951 1994 for (i = 0; i < apowerssize; i++) {
1952 1995 apowers[i] = NULL;
1953 1996 }
1954 1997
1955 1998 if ((dn = big_malloc(nlen * sizeof (double))) == NULL) {
1956 1999 err = BIG_NO_MEM;
1957 2000 goto ret;
1958 2001 }
1959 2002 if ((dt = big_malloc((4 * nlen + 2) * sizeof (double))) == NULL) {
1960 2003 err = BIG_NO_MEM;
1961 2004 goto ret;
1962 2005 }
1963 2006 if ((nint = big_malloc(nlen * sizeof (uint32_t))) == NULL) {
1964 2007 err = BIG_NO_MEM;
1965 2008 goto ret;
1966 2009 }
1967 2010 if ((prod = big_malloc((nlen + 1) * sizeof (uint32_t))) == NULL) {
1968 2011 err = BIG_NO_MEM;
1969 2012 goto ret;
1970 2013 }
1971 2014 if ((d16r = big_malloc((2 * nlen + 1) * sizeof (double))) == NULL) {
1972 2015 err = BIG_NO_MEM;
1973 2016 goto ret;
1974 2017 }
1975 2018 if ((d32r = big_malloc(nlen * sizeof (double))) == NULL) {
1976 2019 err = BIG_NO_MEM;
1977 2020 goto ret;
1978 2021 }
1979 2022 for (i = 0; i < apowerssize; i++) {
1980 2023 if ((apowers[i] = big_malloc((2 * nlen + 1) *
1981 2024 sizeof (double))) == NULL) {
1982 2025 err = BIG_NO_MEM;
1983 2026 goto ret;
1984 2027 }
1985 2028 }
1986 2029
1987 2030 #if (BIG_CHUNK_SIZE == 32)
1988 2031 for (i = 0; i < ma->len; i++) {
1989 2032 nint[i] = ma->value[i];
1990 2033 }
1991 2034 for (; i < nlen; i++) {
1992 2035 nint[i] = 0;
1993 2036 }
1994 2037 #else
1995 2038 for (i = 0; i < ma->len; i++) {
1996 2039 nint[2 * i] = (uint32_t)(ma->value[i] & 0xffffffffULL);
1997 2040 nint[2 * i + 1] = (uint32_t)(ma->value[i] >> 32);
1998 2041 }
1999 2042 for (i = ma->len * 2; i < nlen; i++) {
2000 2043 nint[i] = 0;
2001 2044 }
2002 2045 #endif
2003 2046 conv_i32_to_d32_and_d16(d32r, apowers[0], nint, nlen);
2004 2047
2005 2048 #if (BIG_CHUNK_SIZE == 32)
2006 2049 for (i = 0; i < n->len; i++) {
2007 2050 nint[i] = n->value[i];
2008 2051 }
2009 2052 for (; i < nlen; i++) {
2010 2053 nint[i] = 0;
2011 2054 }
2012 2055 #else
2013 2056 for (i = 0; i < n->len; i++) {
2014 2057 nint[2 * i] = (uint32_t)(n->value[i] & 0xffffffffULL);
2015 2058 nint[2 * i + 1] = (uint32_t)(n->value[i] >> 32);
2016 2059 }
2017 2060 for (i = n->len * 2; i < nlen; i++) {
2018 2061 nint[i] = 0;
2019 2062 }
2020 2063 #endif
2021 2064 conv_i32_to_d32(dn, nint, nlen);
2022 2065
2023 2066 mont_mulf_noconv(prod, d32r, apowers[0], dt, dn, nint, nlen, dn0);
2024 2067 conv_i32_to_d32(d32r, prod, nlen);
2025 2068 for (i = 1; i < apowerssize; i++) {
2026 2069 mont_mulf_noconv(prod, d32r, apowers[i - 1],
2027 2070 dt, dn, nint, nlen, dn0);
2028 2071 conv_i32_to_d16(apowers[i], prod, nlen);
2029 2072 }
2030 2073
2031 2074 #if (BIG_CHUNK_SIZE == 32)
2032 2075 for (i = 0; i < tmp->len; i++) {
2033 2076 prod[i] = tmp->value[i];
2034 2077 }
2035 2078 for (; i < nlen + 1; i++) {
2036 2079 prod[i] = 0;
2037 2080 }
2038 2081 #else
2039 2082 for (i = 0; i < tmp->len; i++) {
2040 2083 prod[2 * i] = (uint32_t)(tmp->value[i] & 0xffffffffULL);
2041 2084 prod[2 * i + 1] = (uint32_t)(tmp->value[i] >> 32);
2042 2085 }
2043 2086 for (i = tmp->len * 2; i < nlen + 1; i++) {
2044 2087 prod[i] = 0;
2045 2088 }
2046 2089 #endif
2047 2090
2048 2091 bitind = nbits % BIG_CHUNK_SIZE;
2049 2092 k = 0;
2050 2093 l = 0;
2051 2094 p = 0;
2052 2095 bitcount = 0;
2053 2096 for (i = nbits / BIG_CHUNK_SIZE; i >= 0; i--) {
2054 2097 for (j = bitind - 1; j >= 0; j--) {
2055 2098 bit = (e->value[i] >> j) & 1;
2056 2099 if ((bitcount == 0) && (bit == 0)) {
2057 2100 conv_i32_to_d32_and_d16(d32r, d16r,
2058 2101 prod, nlen);
2059 2102 mont_mulf_noconv(prod, d32r, d16r,
2060 2103 dt, dn, nint, nlen, dn0);
2061 2104 } else {
2062 2105 bitcount++;
2063 2106 p = p * 2 + bit;
2064 2107 if (bit == 1) {
2065 2108 k = k + l + 1;
2066 2109 l = 0;
2067 2110 } else {
2068 2111 l++;
2069 2112 }
↓ open down ↓ |
139 lines elided |
↑ open up ↑ |
2070 2113 if (bitcount == groupbits) {
2071 2114 for (m = 0; m < k; m++) {
2072 2115 conv_i32_to_d32_and_d16(d32r,
2073 2116 d16r, prod, nlen);
2074 2117 mont_mulf_noconv(prod, d32r,
2075 2118 d16r, dt, dn, nint,
2076 2119 nlen, dn0);
2077 2120 }
2078 2121 conv_i32_to_d32(d32r, prod, nlen);
2079 2122 mont_mulf_noconv(prod, d32r,
2080 - apowers[p >> (l+1)],
2123 + apowers[p >> (l + 1)],
2081 2124 dt, dn, nint, nlen, dn0);
2082 2125 for (m = 0; m < l; m++) {
2083 2126 conv_i32_to_d32_and_d16(d32r,
2084 2127 d16r, prod, nlen);
2085 2128 mont_mulf_noconv(prod, d32r,
2086 2129 d16r, dt, dn, nint,
2087 2130 nlen, dn0);
2088 2131 }
2089 2132 k = 0;
2090 2133 l = 0;
2091 2134 p = 0;
2092 2135 bitcount = 0;
2093 2136 }
2094 2137 }
2095 2138 }
2096 2139 bitind = BIG_CHUNK_SIZE;
2097 2140 }
2098 2141
2099 2142 for (m = 0; m < k; m++) {
2100 2143 conv_i32_to_d32_and_d16(d32r, d16r, prod, nlen);
2101 2144 mont_mulf_noconv(prod, d32r, d16r, dt, dn, nint, nlen, dn0);
2102 2145 }
2103 2146 if (p != 0) {
2104 2147 conv_i32_to_d32(d32r, prod, nlen);
2105 2148 mont_mulf_noconv(prod, d32r, apowers[p >> (l + 1)],
2106 2149 dt, dn, nint, nlen, dn0);
2107 2150 }
2108 2151 for (m = 0; m < l; m++) {
2109 2152 conv_i32_to_d32_and_d16(d32r, d16r, prod, nlen);
2110 2153 mont_mulf_noconv(prod, d32r, d16r, dt, dn, nint, nlen, dn0);
2111 2154 }
2112 2155
2113 2156 #if (BIG_CHUNK_SIZE == 32)
2114 2157 for (i = 0; i < nlen; i++) {
2115 2158 result->value[i] = prod[i];
2116 2159 }
2117 2160 for (i = nlen - 1; (i > 0) && (prod[i] == 0); i--)
2118 2161 ;
2119 2162 #else
2120 2163 for (i = 0; i < nlen / 2; i++) {
2121 2164 result->value[i] = (uint64_t)(prod[2 * i]) +
2122 2165 (((uint64_t)(prod[2 * i + 1])) << 32);
2123 2166 }
2124 2167 for (i = nlen / 2 - 1; (i > 0) && (result->value[i] == 0); i--)
2125 2168 ;
2126 2169 #endif
2127 2170 result->len = i + 1;
2128 2171
2129 2172 ret:
2130 2173 for (i = apowerssize - 1; i >= 0; i--) {
2131 2174 if (apowers[i] != NULL)
2132 2175 big_free(apowers[i], (2 * nlen + 1) * sizeof (double));
2133 2176 }
2134 2177 if (d32r != NULL) {
2135 2178 big_free(d32r, nlen * sizeof (double));
2136 2179 }
2137 2180 if (d16r != NULL) {
2138 2181 big_free(d16r, (2 * nlen + 1) * sizeof (double));
2139 2182 }
2140 2183 if (prod != NULL) {
2141 2184 big_free(prod, (nlen + 1) * sizeof (uint32_t));
2142 2185 }
2143 2186 if (nint != NULL) {
2144 2187 big_free(nint, nlen * sizeof (uint32_t));
2145 2188 }
2146 2189 if (dt != NULL) {
2147 2190 big_free(dt, (4 * nlen + 2) * sizeof (double));
2148 2191 }
2149 2192 if (dn != NULL) {
2150 2193 big_free(dn, nlen * sizeof (double));
2151 2194 }
2152 2195
2153 2196 #ifdef _KERNEL
2154 2197 big_restorefp(fpu);
2155 2198 #endif
2156 2199
2157 2200 return (err);
2158 2201 }
2159 2202
2160 2203 #endif /* USE_FLOATING_POINT */
2161 2204
2162 2205
2163 2206 BIG_ERR_CODE
2164 2207 big_modexp_ext(BIGNUM *result, BIGNUM *a, BIGNUM *e, BIGNUM *n, BIGNUM *n_rr,
2165 2208 big_modexp_ncp_info_t *info)
2166 2209 {
2167 2210 BIGNUM ma, tmp, rr;
2168 2211 BIG_CHUNK_TYPE mavalue[BIGTMPSIZE];
2169 2212 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE];
2170 2213 BIG_CHUNK_TYPE rrvalue[BIGTMPSIZE];
2171 2214 BIG_ERR_CODE err;
2172 2215 BIG_CHUNK_TYPE n0;
2173 2216
2174 2217 if ((err = big_init1(&ma, n->len, mavalue, arraysize(mavalue))) !=
2175 2218 BIG_OK) {
↓ open down ↓ |
85 lines elided |
↑ open up ↑ |
2176 2219 return (err);
2177 2220 }
2178 2221 ma.len = 1;
2179 2222 ma.value[0] = 0;
2180 2223
2181 2224 if ((err = big_init1(&tmp, 2 * n->len + 1,
2182 2225 tmpvalue, arraysize(tmpvalue))) != BIG_OK) {
2183 2226 goto ret1;
2184 2227 }
2185 2228
2186 - /* set the malloced bit to help cleanup */
2229 + /* clear the malloced bit to help cleanup */
2187 2230 rr.malloced = 0;
2231 +
2188 2232 if (n_rr == NULL) {
2189 2233 if ((err = big_init1(&rr, 2 * n->len + 1,
2190 2234 rrvalue, arraysize(rrvalue))) != BIG_OK) {
2191 2235 goto ret2;
2192 2236 }
2193 2237 if (big_mont_rr(&rr, n) != BIG_OK) {
2194 2238 goto ret;
2195 2239 }
2196 2240 n_rr = &rr;
2197 2241 }
2198 2242
2199 2243 n0 = big_n0(n->value[0]);
2200 2244
2201 2245 if (big_cmp_abs(a, n) > 0) {
2202 2246 if ((err = big_div_pos(NULL, &ma, a, n)) != BIG_OK) {
2203 2247 goto ret;
2204 2248 }
2205 2249 err = big_mont_conv(&ma, &ma, n, n0, n_rr);
2206 2250 } else {
2207 2251 err = big_mont_conv(&ma, a, n, n0, n_rr);
2208 2252 }
2209 2253 if (err != BIG_OK) {
2210 2254 goto ret;
2211 2255 }
2212 2256
2213 2257 tmp.len = 1;
2214 2258 tmp.value[0] = 1;
2215 2259 if ((err = big_mont_conv(&tmp, &tmp, n, n0, n_rr)) != BIG_OK) {
2216 2260 goto ret;
2217 2261 }
2218 2262
2219 2263 if ((info != NULL) && (info->func != NULL)) {
2220 2264 err = (*(info->func))(&tmp, &ma, e, n, &tmp, n0,
2221 2265 info->ncp, info->reqp);
2222 2266 } else {
2223 2267 err = big_modexp_ncp_sw(&tmp, &ma, e, n, &tmp, n0);
2224 2268 }
2225 2269 if (err != BIG_OK) {
2226 2270 goto ret;
2227 2271 }
2228 2272
2229 2273 ma.value[0] = 1;
2230 2274 ma.len = 1;
2231 2275 if ((err = big_mont_mul(&tmp, &tmp, &ma, n, n0)) != BIG_OK) {
2232 2276 goto ret;
2233 2277 }
2234 2278 err = big_copy(result, &tmp);
2235 2279
2236 2280 ret:
2237 2281 if (rr.malloced) {
2238 2282 big_finish(&rr);
2239 2283 }
2240 2284 ret2:
2241 2285 big_finish(&tmp);
2242 2286 ret1:
2243 2287 big_finish(&ma);
2244 2288
2245 2289 return (err);
2246 2290 }
2247 2291
2248 2292 BIG_ERR_CODE
2249 2293 big_modexp(BIGNUM *result, BIGNUM *a, BIGNUM *e, BIGNUM *n, BIGNUM *n_rr)
2250 2294 {
2251 2295 return (big_modexp_ext(result, a, e, n, n_rr, NULL));
2252 2296 }
2253 2297
2254 2298
2255 2299 BIG_ERR_CODE
2256 2300 big_modexp_crt_ext(BIGNUM *result, BIGNUM *a, BIGNUM *dmodpminus1,
2257 2301 BIGNUM *dmodqminus1, BIGNUM *p, BIGNUM *q, BIGNUM *pinvmodq,
2258 2302 BIGNUM *p_rr, BIGNUM *q_rr, big_modexp_ncp_info_t *info)
2259 2303 {
2260 2304 BIGNUM ap, aq, tmp;
2261 2305 int alen, biglen, sign;
2262 2306 BIG_ERR_CODE err;
2263 2307
2264 2308 if (p->len > q->len) {
2265 2309 biglen = p->len;
2266 2310 } else {
2267 2311 biglen = q->len;
2268 2312 }
2269 2313
2270 2314 if ((err = big_init1(&ap, p->len, NULL, 0)) != BIG_OK) {
2271 2315 return (err);
2272 2316 }
2273 2317 if ((err = big_init1(&aq, q->len, NULL, 0)) != BIG_OK) {
2274 2318 goto ret1;
2275 2319 }
2276 2320 if ((err = big_init1(&tmp, biglen + q->len + 1, NULL, 0)) != BIG_OK) {
2277 2321 goto ret2;
2278 2322 }
2279 2323
2280 2324 /*
2281 2325 * check whether a is too short - to avoid timing attacks
2282 2326 */
2283 2327 alen = a->len;
2284 2328 while ((alen > p->len) && (a->value[alen - 1] == 0)) {
2285 2329 alen--;
2286 2330 }
2287 2331 if (alen < p->len + q->len) {
2288 2332 /*
2289 2333 * a is too short, add p*q to it before
2290 2334 * taking it modulo p and q
2291 2335 * this will also affect timing, but this difference
2292 2336 * does not depend on p or q, only on a
2293 2337 * (in "normal" operation, this path will never be
2294 2338 * taken, so it is not a performance penalty
2295 2339 */
2296 2340 if ((err = big_mul(&tmp, p, q)) != BIG_OK) {
2297 2341 goto ret;
2298 2342 }
2299 2343 if ((err = big_add(&tmp, &tmp, a)) != BIG_OK) {
2300 2344 goto ret;
2301 2345 }
2302 2346 if ((err = big_div_pos(NULL, &ap, &tmp, p)) != BIG_OK) {
2303 2347 goto ret;
2304 2348 }
2305 2349 if ((err = big_div_pos(NULL, &aq, &tmp, q)) != BIG_OK) {
2306 2350 goto ret;
2307 2351 }
2308 2352 } else {
2309 2353 if ((err = big_div_pos(NULL, &ap, a, p)) != BIG_OK) {
2310 2354 goto ret;
2311 2355 }
2312 2356 if ((err = big_div_pos(NULL, &aq, a, q)) != BIG_OK) {
2313 2357 goto ret;
2314 2358 }
2315 2359 }
2316 2360
2317 2361 if ((err = big_modexp_ext(&ap, &ap, dmodpminus1, p, p_rr, info)) !=
2318 2362 BIG_OK) {
2319 2363 goto ret;
2320 2364 }
2321 2365 if ((err = big_modexp_ext(&aq, &aq, dmodqminus1, q, q_rr, info)) !=
2322 2366 BIG_OK) {
2323 2367 goto ret;
2324 2368 }
2325 2369 if ((err = big_sub(&tmp, &aq, &ap)) != BIG_OK) {
2326 2370 goto ret;
2327 2371 }
2328 2372 if ((err = big_mul(&tmp, &tmp, pinvmodq)) != BIG_OK) {
2329 2373 goto ret;
2330 2374 }
2331 2375 sign = tmp.sign;
2332 2376 tmp.sign = 1;
2333 2377 if ((err = big_div_pos(NULL, &aq, &tmp, q)) != BIG_OK) {
2334 2378 goto ret;
2335 2379 }
2336 2380 if ((sign == -1) && (!big_is_zero(&aq))) {
2337 2381 (void) big_sub_pos(&aq, q, &aq);
2338 2382 }
2339 2383 if ((err = big_mul(&tmp, &aq, p)) != BIG_OK) {
2340 2384 goto ret;
2341 2385 }
2342 2386 err = big_add_abs(result, &ap, &tmp);
2343 2387
2344 2388 ret:
2345 2389 big_finish(&tmp);
2346 2390 ret2:
2347 2391 big_finish(&aq);
2348 2392 ret1:
2349 2393 big_finish(&ap);
2350 2394
2351 2395 return (err);
2352 2396 }
2353 2397
2354 2398
2355 2399 BIG_ERR_CODE
2356 2400 big_modexp_crt(BIGNUM *result, BIGNUM *a, BIGNUM *dmodpminus1,
2357 2401 BIGNUM *dmodqminus1, BIGNUM *p, BIGNUM *q, BIGNUM *pinvmodq,
2358 2402 BIGNUM *p_rr, BIGNUM *q_rr)
2359 2403 {
2360 2404 return (big_modexp_crt_ext(result, a, dmodpminus1, dmodqminus1,
2361 2405 p, q, pinvmodq, p_rr, q_rr, NULL));
2362 2406 }
2363 2407
2364 2408
2365 2409 static BIG_CHUNK_TYPE onearr[1] = {(BIG_CHUNK_TYPE)1};
2366 2410 BIGNUM big_One = {1, 1, 1, 0, onearr};
2367 2411
2368 2412 static BIG_CHUNK_TYPE twoarr[1] = {(BIG_CHUNK_TYPE)2};
2369 2413 BIGNUM big_Two = {1, 1, 1, 0, twoarr};
2370 2414
2371 2415 static BIG_CHUNK_TYPE fourarr[1] = {(BIG_CHUNK_TYPE)4};
2372 2416 static BIGNUM big_Four = {1, 1, 1, 0, fourarr};
2373 2417
↓ open down ↓ |
176 lines elided |
↑ open up ↑ |
2374 2418
2375 2419 BIG_ERR_CODE
2376 2420 big_sqrt_pos(BIGNUM *result, BIGNUM *n)
2377 2421 {
2378 2422 BIGNUM *high, *low, *mid, *t;
2379 2423 BIGNUM t1, t2, t3, prod;
2380 2424 BIG_CHUNK_TYPE t1value[BIGTMPSIZE];
2381 2425 BIG_CHUNK_TYPE t2value[BIGTMPSIZE];
2382 2426 BIG_CHUNK_TYPE t3value[BIGTMPSIZE];
2383 2427 BIG_CHUNK_TYPE prodvalue[BIGTMPSIZE];
2384 - int i, nbits, diff, nrootbits, highbits;
2428 + int i, diff;
2429 + uint32_t nbits, nrootbits, highbits;
2385 2430 BIG_ERR_CODE err;
2386 2431
2387 2432 nbits = big_numbits(n);
2388 2433
2389 2434 if ((err = big_init1(&t1, n->len + 1,
2390 2435 t1value, arraysize(t1value))) != BIG_OK)
2391 2436 return (err);
2392 2437 if ((err = big_init1(&t2, n->len + 1,
2393 2438 t2value, arraysize(t2value))) != BIG_OK)
2394 2439 goto ret1;
2395 2440 if ((err = big_init1(&t3, n->len + 1,
2396 2441 t3value, arraysize(t3value))) != BIG_OK)
2397 2442 goto ret2;
2398 2443 if ((err = big_init1(&prod, n->len + 1,
2399 2444 prodvalue, arraysize(prodvalue))) != BIG_OK)
2400 2445 goto ret3;
2401 2446
2402 2447 nrootbits = (nbits + 1) / 2;
2403 2448 t1.len = t2.len = t3.len = (nrootbits - 1) / BIG_CHUNK_SIZE + 1;
2404 2449 for (i = 0; i < t1.len; i++) {
2405 2450 t1.value[i] = 0;
2406 2451 t2.value[i] = BIG_CHUNK_ALLBITS;
2407 2452 }
2408 2453 highbits = nrootbits - BIG_CHUNK_SIZE * (t1.len - 1);
2409 2454 if (highbits == BIG_CHUNK_SIZE) {
2410 2455 t1.value[t1.len - 1] = BIG_CHUNK_HIGHBIT;
2411 2456 t2.value[t2.len - 1] = BIG_CHUNK_ALLBITS;
2412 2457 } else {
2413 2458 t1.value[t1.len - 1] = (BIG_CHUNK_TYPE)1 << (highbits - 1);
2414 2459 t2.value[t2.len - 1] = 2 * t1.value[t1.len - 1] - 1;
2415 2460 }
2416 2461
2417 2462 high = &t2;
2418 2463 low = &t1;
2419 2464 mid = &t3;
2420 2465
2421 2466 if ((err = big_mul(&prod, high, high)) != BIG_OK) {
2422 2467 goto ret;
2423 2468 }
2424 2469 diff = big_cmp_abs(&prod, n);
2425 2470 if (diff <= 0) {
2426 2471 err = big_copy(result, high);
2427 2472 goto ret;
2428 2473 }
2429 2474
2430 2475 (void) big_sub_pos(mid, high, low);
2431 2476 while (big_cmp_abs(&big_One, mid) != 0) {
2432 2477 (void) big_add_abs(mid, high, low);
2433 2478 (void) big_half_pos(mid, mid);
2434 2479 if ((err = big_mul(&prod, mid, mid)) != BIG_OK)
2435 2480 goto ret;
2436 2481 diff = big_cmp_abs(&prod, n);
2437 2482 if (diff > 0) {
2438 2483 t = high;
2439 2484 high = mid;
2440 2485 mid = t;
2441 2486 } else if (diff < 0) {
2442 2487 t = low;
2443 2488 low = mid;
2444 2489 mid = t;
2445 2490 } else {
2446 2491 err = big_copy(result, low);
2447 2492 goto ret;
2448 2493 }
2449 2494 (void) big_sub_pos(mid, high, low);
2450 2495 }
2451 2496
2452 2497 err = big_copy(result, low);
2453 2498 ret:
2454 2499 if (prod.malloced) big_finish(&prod);
2455 2500 ret3:
2456 2501 if (t3.malloced) big_finish(&t3);
2457 2502 ret2:
2458 2503 if (t2.malloced) big_finish(&t2);
2459 2504 ret1:
2460 2505 if (t1.malloced) big_finish(&t1);
2461 2506
2462 2507 return (err);
2463 2508 }
2464 2509
2465 2510
2466 2511 BIG_ERR_CODE
2467 2512 big_Jacobi_pos(int *jac, BIGNUM *nn, BIGNUM *mm)
2468 2513 {
2469 2514 BIGNUM *t, *tmp2, *m, *n;
2470 2515 BIGNUM t1, t2, t3;
2471 2516 BIG_CHUNK_TYPE t1value[BIGTMPSIZE];
2472 2517 BIG_CHUNK_TYPE t2value[BIGTMPSIZE];
2473 2518 BIG_CHUNK_TYPE t3value[BIGTMPSIZE];
2474 2519 int len, err;
2475 2520
2476 2521 if (big_is_zero(nn) ||
2477 2522 (((nn->value[0] & 1) | (mm->value[0] & 1)) == 0)) {
2478 2523 *jac = 0;
2479 2524 return (BIG_OK);
2480 2525 }
2481 2526
2482 2527 if (nn->len > mm->len) {
2483 2528 len = nn->len;
2484 2529 } else {
2485 2530 len = mm->len;
2486 2531 }
2487 2532
2488 2533 if ((err = big_init1(&t1, len,
2489 2534 t1value, arraysize(t1value))) != BIG_OK) {
2490 2535 return (err);
2491 2536 }
2492 2537 if ((err = big_init1(&t2, len,
2493 2538 t2value, arraysize(t2value))) != BIG_OK) {
2494 2539 goto ret1;
2495 2540 }
2496 2541 if ((err = big_init1(&t3, len,
2497 2542 t3value, arraysize(t3value))) != BIG_OK) {
2498 2543 goto ret2;
2499 2544 }
2500 2545
2501 2546 n = &t1;
2502 2547 m = &t2;
2503 2548 tmp2 = &t3;
2504 2549
2505 2550 (void) big_copy(n, nn);
2506 2551 (void) big_copy(m, mm);
2507 2552
2508 2553 *jac = 1;
2509 2554 while (big_cmp_abs(&big_One, m) != 0) {
2510 2555 if (big_is_zero(n)) {
2511 2556 *jac = 0;
2512 2557 goto ret;
2513 2558 }
2514 2559 if ((m->value[0] & 1) == 0) {
2515 2560 if (((n->value[0] & 7) == 3) ||
2516 2561 ((n->value[0] & 7) == 5))
2517 2562 *jac = -*jac;
2518 2563 (void) big_half_pos(m, m);
2519 2564 } else if ((n->value[0] & 1) == 0) {
2520 2565 if (((m->value[0] & 7) == 3) ||
2521 2566 ((m->value[0] & 7) == 5))
2522 2567 *jac = -*jac;
2523 2568 (void) big_half_pos(n, n);
2524 2569 } else {
2525 2570 if (((m->value[0] & 3) == 3) &&
2526 2571 ((n->value[0] & 3) == 3)) {
2527 2572 *jac = -*jac;
2528 2573 }
2529 2574 if ((err = big_div_pos(NULL, tmp2, m, n)) != BIG_OK) {
2530 2575 goto ret;
2531 2576 }
2532 2577 t = tmp2;
2533 2578 tmp2 = m;
2534 2579 m = n;
2535 2580 n = t;
2536 2581 }
2537 2582 }
2538 2583 err = BIG_OK;
2539 2584
2540 2585 ret:
2541 2586 if (t3.malloced) big_finish(&t3);
2542 2587 ret2:
2543 2588 if (t2.malloced) big_finish(&t2);
↓ open down ↓ |
149 lines elided |
↑ open up ↑ |
2544 2589 ret1:
2545 2590 if (t1.malloced) big_finish(&t1);
2546 2591
2547 2592 return (err);
2548 2593 }
2549 2594
2550 2595
2551 2596 BIG_ERR_CODE
2552 2597 big_Lucas(BIGNUM *Lkminus1, BIGNUM *Lk, BIGNUM *p, BIGNUM *k, BIGNUM *n)
2553 2598 {
2554 - int m, w, i;
2599 + int i;
2600 + uint32_t m, w;
2555 2601 BIG_CHUNK_TYPE bit;
2556 2602 BIGNUM ki, tmp, tmp2;
2557 2603 BIG_CHUNK_TYPE kivalue[BIGTMPSIZE];
2558 2604 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE];
2559 2605 BIG_CHUNK_TYPE tmp2value[BIGTMPSIZE];
2560 2606 BIG_ERR_CODE err;
2561 2607
2562 2608 if (big_cmp_abs(k, &big_One) == 0) {
2563 2609 (void) big_copy(Lk, p);
2564 2610 (void) big_copy(Lkminus1, &big_Two);
2565 2611 return (BIG_OK);
2566 2612 }
2567 2613
2568 2614 if ((err = big_init1(&ki, k->len + 1,
2569 2615 kivalue, arraysize(kivalue))) != BIG_OK)
2570 2616 return (err);
2571 2617
2572 - if ((err = big_init1(&tmp, 2 * n->len +1,
2618 + if ((err = big_init1(&tmp, 2 * n->len + 1,
2573 2619 tmpvalue, arraysize(tmpvalue))) != BIG_OK)
2574 2620 goto ret1;
2575 2621
2576 2622 if ((err = big_init1(&tmp2, n->len,
2577 2623 tmp2value, arraysize(tmp2value))) != BIG_OK)
2578 2624 goto ret2;
2579 2625
2580 2626 m = big_numbits(k);
2581 2627 ki.len = (m - 1) / BIG_CHUNK_SIZE + 1;
2582 2628 w = (m - 1) / BIG_CHUNK_SIZE;
2583 2629 bit = (BIG_CHUNK_TYPE)1 << ((m - 1) % BIG_CHUNK_SIZE);
2584 2630 for (i = 0; i < ki.len; i++) {
2585 2631 ki.value[i] = 0;
2586 2632 }
2587 2633 ki.value[ki.len - 1] = bit;
2588 2634 if (big_cmp_abs(k, &ki) != 0) {
2589 2635 (void) big_double(&ki, &ki);
2590 2636 }
2591 2637 (void) big_sub_pos(&ki, &ki, k);
2592 2638
2593 2639 (void) big_copy(Lk, p);
2594 2640 (void) big_copy(Lkminus1, &big_Two);
2595 2641
2596 2642 for (i = 0; i < m; i++) {
2597 2643 if ((err = big_mul(&tmp, Lk, Lkminus1)) != BIG_OK) {
2598 2644 goto ret;
2599 2645 }
2600 2646 (void) big_add_abs(&tmp, &tmp, n);
2601 2647 (void) big_sub_pos(&tmp, &tmp, p);
2602 2648 if ((err = big_div_pos(NULL, &tmp2, &tmp, n)) != BIG_OK) {
2603 2649 goto ret;
2604 2650 }
2605 2651 if ((ki.value[w] & bit) != 0) {
2606 2652 if ((err = big_mul(&tmp, Lkminus1, Lkminus1)) !=
2607 2653 BIG_OK) {
2608 2654 goto ret;
2609 2655 }
2610 2656 (void) big_add_abs(&tmp, &tmp, n);
2611 2657 (void) big_sub_pos(&tmp, &tmp, &big_Two);
2612 2658 if ((err = big_div_pos(NULL, Lkminus1, &tmp, n)) !=
2613 2659 BIG_OK) {
2614 2660 goto ret;
2615 2661 }
2616 2662 (void) big_copy(Lk, &tmp2);
2617 2663 } else {
2618 2664 if ((err = big_mul(&tmp, Lk, Lk)) != BIG_OK) {
2619 2665 goto ret;
2620 2666 }
2621 2667 (void) big_add_abs(&tmp, &tmp, n);
2622 2668 (void) big_sub_pos(&tmp, &tmp, &big_Two);
2623 2669 if ((err = big_div_pos(NULL, Lk, &tmp, n)) != BIG_OK) {
2624 2670 goto ret;
2625 2671 }
2626 2672 (void) big_copy(Lkminus1, &tmp2);
2627 2673 }
2628 2674 bit = bit >> 1;
2629 2675 if (bit == 0) {
2630 2676 bit = BIG_CHUNK_HIGHBIT;
2631 2677 w--;
2632 2678 }
2633 2679 }
2634 2680
2635 2681 err = BIG_OK;
2636 2682
2637 2683 ret:
2638 2684 if (tmp2.malloced) big_finish(&tmp2);
2639 2685 ret2:
2640 2686 if (tmp.malloced) big_finish(&tmp);
2641 2687 ret1:
2642 2688 if (ki.malloced) big_finish(&ki);
2643 2689
2644 2690 return (err);
2645 2691 }
2646 2692
2647 2693
2648 2694 BIG_ERR_CODE
2649 2695 big_isprime_pos_ext(BIGNUM *n, big_modexp_ncp_info_t *info)
2650 2696 {
2651 2697 BIGNUM o, nminus1, tmp, Lkminus1, Lk;
2652 2698 BIG_CHUNK_TYPE ovalue[BIGTMPSIZE];
2653 2699 BIG_CHUNK_TYPE nminus1value[BIGTMPSIZE];
2654 2700 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE];
2655 2701 BIG_CHUNK_TYPE Lkminus1value[BIGTMPSIZE];
2656 2702 BIG_CHUNK_TYPE Lkvalue[BIGTMPSIZE];
2657 2703 BIG_ERR_CODE err;
2658 2704 int e, i, jac;
2659 2705
2660 2706 if (big_cmp_abs(n, &big_One) == 0) {
2661 2707 return (BIG_FALSE);
2662 2708 }
2663 2709 if (big_cmp_abs(n, &big_Two) == 0) {
2664 2710 return (BIG_TRUE);
2665 2711 }
2666 2712 if ((n->value[0] & 1) == 0) {
2667 2713 return (BIG_FALSE);
2668 2714 }
2669 2715
2670 2716 if ((err = big_init1(&o, n->len, ovalue, arraysize(ovalue))) !=
2671 2717 BIG_OK) {
2672 2718 return (err);
2673 2719 }
2674 2720
2675 2721 if ((err = big_init1(&nminus1, n->len,
2676 2722 nminus1value, arraysize(nminus1value))) != BIG_OK) {
2677 2723 goto ret1;
2678 2724 }
2679 2725
2680 2726 if ((err = big_init1(&tmp, 2 * n->len,
2681 2727 tmpvalue, arraysize(tmpvalue))) != BIG_OK) {
2682 2728 goto ret2;
2683 2729 }
2684 2730
↓ open down ↓ |
102 lines elided |
↑ open up ↑ |
2685 2731 if ((err = big_init1(&Lkminus1, n->len,
2686 2732 Lkminus1value, arraysize(Lkminus1value))) != BIG_OK) {
2687 2733 goto ret3;
2688 2734 }
2689 2735
2690 2736 if ((err = big_init1(&Lk, n->len,
2691 2737 Lkvalue, arraysize(Lkvalue))) != BIG_OK) {
2692 2738 goto ret4;
2693 2739 }
2694 2740
2695 - (void) big_sub_pos(&o, n, &big_One); /* cannot fail */
2741 + (void) big_sub_pos(&o, n, &big_One); /* cannot fail */
2696 2742 (void) big_copy(&nminus1, &o); /* cannot fail */
2697 2743 e = 0;
2698 2744 while ((o.value[0] & 1) == 0) {
2699 2745 e++;
2700 2746 (void) big_half_pos(&o, &o); /* cannot fail */
2701 2747 }
2702 2748 if ((err = big_modexp_ext(&tmp, &big_Two, &o, n, NULL, info)) !=
2703 2749 BIG_OK) {
2704 2750 goto ret;
2705 2751 }
2706 2752
2707 2753 i = 0;
2708 2754 while ((i < e) &&
2709 2755 (big_cmp_abs(&tmp, &big_One) != 0) &&
2710 2756 (big_cmp_abs(&tmp, &nminus1) != 0)) {
2711 2757 if ((err =
2712 2758 big_modexp_ext(&tmp, &tmp, &big_Two, n, NULL, info)) !=
2713 2759 BIG_OK)
2714 2760 goto ret;
2715 2761 i++;
2716 2762 }
2717 2763
2718 2764 if (!((big_cmp_abs(&tmp, &nminus1) == 0) ||
2719 2765 ((i == 0) && (big_cmp_abs(&tmp, &big_One) == 0)))) {
2720 2766 err = BIG_FALSE;
2721 2767 goto ret;
2722 2768 }
2723 2769
2724 2770 if ((err = big_sqrt_pos(&tmp, n)) != BIG_OK) {
2725 2771 goto ret;
2726 2772 }
2727 2773
2728 2774 if ((err = big_mul(&tmp, &tmp, &tmp)) != BIG_OK) {
2729 2775 goto ret;
2730 2776 }
2731 2777 if (big_cmp_abs(&tmp, n) == 0) {
2732 2778 err = BIG_FALSE;
2733 2779 goto ret;
2734 2780 }
2735 2781
2736 2782 (void) big_copy(&o, &big_Two);
2737 2783 do {
2738 2784 (void) big_add_abs(&o, &o, &big_One);
2739 2785 if ((err = big_mul(&tmp, &o, &o)) != BIG_OK) {
2740 2786 goto ret;
2741 2787 }
2742 2788 (void) big_sub_pos(&tmp, &tmp, &big_Four);
2743 2789 if ((err = big_Jacobi_pos(&jac, &tmp, n)) != BIG_OK) {
2744 2790 goto ret;
2745 2791 }
2746 2792 } while (jac != -1);
2747 2793
2748 2794 (void) big_add_abs(&tmp, n, &big_One);
2749 2795 if ((err = big_Lucas(&Lkminus1, &Lk, &o, &tmp, n)) != BIG_OK) {
2750 2796 goto ret;
2751 2797 }
2752 2798
2753 2799 if ((big_cmp_abs(&Lkminus1, &o) == 0) &&
2754 2800 (big_cmp_abs(&Lk, &big_Two) == 0)) {
2755 2801 err = BIG_TRUE;
2756 2802 } else {
2757 2803 err = BIG_FALSE;
2758 2804 }
2759 2805
2760 2806 ret:
2761 2807 if (Lk.malloced) big_finish(&Lk);
2762 2808 ret4:
2763 2809 if (Lkminus1.malloced) big_finish(&Lkminus1);
2764 2810 ret3:
2765 2811 if (tmp.malloced) big_finish(&tmp);
2766 2812 ret2:
2767 2813 if (nminus1.malloced) big_finish(&nminus1);
2768 2814 ret1:
2769 2815 if (o.malloced) big_finish(&o);
2770 2816
2771 2817 return (err);
2772 2818 }
2773 2819
↓ open down ↓ |
68 lines elided |
↑ open up ↑ |
2774 2820
2775 2821 BIG_ERR_CODE
2776 2822 big_isprime_pos(BIGNUM *n)
2777 2823 {
2778 2824 return (big_isprime_pos_ext(n, NULL));
2779 2825 }
2780 2826
2781 2827
2782 2828 #define SIEVESIZE 1000
2783 2829
2784 -uint32_t smallprimes[] =
2785 -{
2786 -3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47,
2787 -51, 53, 59, 61, 67, 71, 73, 79, 83, 89, 91, 97
2788 -};
2789 2830
2790 -
2791 2831 BIG_ERR_CODE
2792 2832 big_nextprime_pos_ext(BIGNUM *result, BIGNUM *n, big_modexp_ncp_info_t *info)
2793 2833 {
2834 + static const uint32_t smallprimes[] = {
2835 + 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47,
2836 + 51, 53, 59, 61, 67, 71, 73, 79, 83, 89, 91, 97 };
2794 2837 BIG_ERR_CODE err;
2795 2838 int sieve[SIEVESIZE];
2796 2839 int i;
2797 2840 uint32_t off, p;
2798 2841
2799 2842 if ((err = big_copy(result, n)) != BIG_OK) {
2800 2843 return (err);
2801 2844 }
2802 2845 result->value[0] |= 1;
2803 2846 /* CONSTCOND */
2804 2847 while (1) {
2805 2848 for (i = 0; i < SIEVESIZE; i++) sieve[i] = 0;
2806 2849 for (i = 0;
2807 2850 i < sizeof (smallprimes) / sizeof (smallprimes[0]); i++) {
2808 2851 p = smallprimes[i];
2809 2852 off = big_modhalf_pos(result, p);
2810 2853 off = p - off;
2811 2854 if ((off % 2) == 1) {
2812 2855 off = off + p;
2813 2856 }
2814 2857 off = off / 2;
2815 2858 while (off < SIEVESIZE) {
2816 2859 sieve[off] = 1;
2817 2860 off = off + p;
2818 2861 }
2819 2862 }
2820 2863
2821 2864 for (i = 0; i < SIEVESIZE; i++) {
2822 2865 if (sieve[i] == 0) {
2823 2866 err = big_isprime_pos_ext(result, info);
2824 2867 if (err != BIG_FALSE) {
2825 2868 if (err != BIG_TRUE) {
2826 2869 return (err);
2827 2870 } else {
2828 2871 goto out;
2829 2872 }
2830 2873 }
2831 2874
2832 2875 }
2833 2876 if ((err = big_add_abs(result, result, &big_Two)) !=
2834 2877 BIG_OK) {
2835 2878 return (err);
2836 2879 }
2837 2880 }
2838 2881 }
2839 2882
2840 2883 out:
2841 2884 return (BIG_OK);
2842 2885 }
2843 2886
2844 2887
2845 2888 BIG_ERR_CODE
2846 2889 big_nextprime_pos(BIGNUM *result, BIGNUM *n)
2847 2890 {
2848 2891 return (big_nextprime_pos_ext(result, n, NULL));
2849 2892 }
2850 2893
2851 2894
2852 2895 BIG_ERR_CODE
2853 2896 big_nextprime_pos_slow(BIGNUM *result, BIGNUM *n)
2854 2897 {
2855 2898 BIG_ERR_CODE err;
2856 2899
2857 2900
2858 2901 if ((err = big_copy(result, n)) != BIG_OK)
2859 2902 return (err);
2860 2903 result->value[0] |= 1;
2861 2904 while ((err = big_isprime_pos_ext(result, NULL)) != BIG_TRUE) {
2862 2905 if (err != BIG_FALSE)
2863 2906 return (err);
2864 2907 if ((err = big_add_abs(result, result, &big_Two)) != BIG_OK)
2865 2908 return (err);
2866 2909 }
2867 2910 return (BIG_OK);
2868 2911 }
2869 2912
2870 2913
2871 2914 /*
2872 2915 * given m and e, computes the rest in the equation
2873 2916 * gcd(m, e) = cm * m + ce * e
2874 2917 */
2875 2918 BIG_ERR_CODE
2876 2919 big_ext_gcd_pos(BIGNUM *gcd, BIGNUM *cm, BIGNUM *ce, BIGNUM *m, BIGNUM *e)
2877 2920 {
2878 2921 BIGNUM *xi, *ri, *riminus1, *riminus2, *t;
2879 2922 BIGNUM *vmi, *vei, *vmiminus1, *veiminus1;
2880 2923 BIGNUM t1, t2, t3, t4, t5, t6, t7, t8, tmp;
2881 2924 BIG_CHUNK_TYPE t1value[BIGTMPSIZE];
2882 2925 BIG_CHUNK_TYPE t2value[BIGTMPSIZE];
2883 2926 BIG_CHUNK_TYPE t3value[BIGTMPSIZE];
2884 2927 BIG_CHUNK_TYPE t4value[BIGTMPSIZE];
2885 2928 BIG_CHUNK_TYPE t5value[BIGTMPSIZE];
2886 2929 BIG_CHUNK_TYPE t6value[BIGTMPSIZE];
2887 2930 BIG_CHUNK_TYPE t7value[BIGTMPSIZE];
2888 2931 BIG_CHUNK_TYPE t8value[BIGTMPSIZE];
2889 2932 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE];
2890 2933 BIG_ERR_CODE err;
2891 2934 int len;
2892 2935
2893 2936 if (big_cmp_abs(m, e) >= 0) {
2894 2937 len = m->len;
2895 2938 } else {
2896 2939 len = e->len;
2897 2940 }
2898 2941
2899 2942 if ((err = big_init1(&t1, len,
2900 2943 t1value, arraysize(t1value))) != BIG_OK) {
2901 2944 return (err);
2902 2945 }
2903 2946 if ((err = big_init1(&t2, len,
2904 2947 t2value, arraysize(t2value))) != BIG_OK) {
2905 2948 goto ret1;
2906 2949 }
2907 2950 if ((err = big_init1(&t3, len,
2908 2951 t3value, arraysize(t3value))) != BIG_OK) {
2909 2952 goto ret2;
2910 2953 }
2911 2954 if ((err = big_init1(&t4, len,
2912 2955 t4value, arraysize(t3value))) != BIG_OK) {
2913 2956 goto ret3;
2914 2957 }
2915 2958 if ((err = big_init1(&t5, len,
2916 2959 t5value, arraysize(t5value))) != BIG_OK) {
2917 2960 goto ret4;
2918 2961 }
2919 2962 if ((err = big_init1(&t6, len,
2920 2963 t6value, arraysize(t6value))) != BIG_OK) {
2921 2964 goto ret5;
2922 2965 }
2923 2966 if ((err = big_init1(&t7, len,
2924 2967 t7value, arraysize(t7value))) != BIG_OK) {
2925 2968 goto ret6;
2926 2969 }
2927 2970 if ((err = big_init1(&t8, len,
2928 2971 t8value, arraysize(t8value))) != BIG_OK) {
2929 2972 goto ret7;
2930 2973 }
2931 2974
2932 2975 if ((err = big_init1(&tmp, 2 * len,
2933 2976 tmpvalue, arraysize(tmpvalue))) != BIG_OK) {
2934 2977 goto ret8;
2935 2978 }
2936 2979
2937 2980 ri = &t1;
2938 2981 ri->value[0] = 1;
2939 2982 ri->len = 1;
2940 2983 xi = &t2;
2941 2984 riminus1 = &t3;
2942 2985 riminus2 = &t4;
2943 2986 vmi = &t5;
2944 2987 vei = &t6;
2945 2988 vmiminus1 = &t7;
2946 2989 veiminus1 = &t8;
2947 2990
2948 2991 (void) big_copy(vmiminus1, &big_One);
2949 2992 (void) big_copy(vmi, &big_One);
2950 2993 (void) big_copy(veiminus1, &big_One);
2951 2994 (void) big_copy(xi, &big_One);
2952 2995 vei->len = 1;
2953 2996 vei->value[0] = 0;
2954 2997
2955 2998 (void) big_copy(riminus1, m);
2956 2999 (void) big_copy(ri, e);
2957 3000
2958 3001 while (!big_is_zero(ri)) {
2959 3002 t = riminus2;
2960 3003 riminus2 = riminus1;
2961 3004 riminus1 = ri;
2962 3005 ri = t;
2963 3006 if ((err = big_mul(&tmp, vmi, xi)) != BIG_OK) {
2964 3007 goto ret;
2965 3008 }
2966 3009 if ((err = big_sub(vmiminus1, vmiminus1, &tmp)) != BIG_OK) {
2967 3010 goto ret;
2968 3011 }
2969 3012 t = vmiminus1;
2970 3013 vmiminus1 = vmi;
2971 3014 vmi = t;
2972 3015 if ((err = big_mul(&tmp, vei, xi)) != BIG_OK) {
2973 3016 goto ret;
2974 3017 }
2975 3018 if ((err = big_sub(veiminus1, veiminus1, &tmp)) != BIG_OK) {
2976 3019 goto ret;
2977 3020 }
2978 3021 t = veiminus1;
2979 3022 veiminus1 = vei;
2980 3023 vei = t;
2981 3024 if ((err = big_div_pos(xi, ri, riminus2, riminus1)) !=
2982 3025 BIG_OK) {
2983 3026 goto ret;
2984 3027 }
2985 3028 }
2986 3029 if ((gcd != NULL) && ((err = big_copy(gcd, riminus1)) != BIG_OK)) {
2987 3030 goto ret;
2988 3031 }
2989 3032 if ((cm != NULL) && ((err = big_copy(cm, vmi)) != BIG_OK)) {
2990 3033 goto ret;
2991 3034 }
2992 3035 if (ce != NULL) {
2993 3036 err = big_copy(ce, vei);
2994 3037 }
2995 3038 ret:
2996 3039 if (tmp.malloced) big_finish(&tmp);
2997 3040 ret8:
2998 3041 if (t8.malloced) big_finish(&t8);
2999 3042 ret7:
3000 3043 if (t7.malloced) big_finish(&t7);
3001 3044 ret6:
3002 3045 if (t6.malloced) big_finish(&t6);
3003 3046 ret5:
3004 3047 if (t5.malloced) big_finish(&t5);
3005 3048 ret4:
3006 3049 if (t4.malloced) big_finish(&t4);
3007 3050 ret3:
3008 3051 if (t3.malloced) big_finish(&t3);
3009 3052 ret2:
3010 3053 if (t2.malloced) big_finish(&t2);
3011 3054 ret1:
3012 3055 if (t1.malloced) big_finish(&t1);
3013 3056
3014 3057 return (err);
3015 3058 }
↓ open down ↓ |
212 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX