1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Configuration guide
28 * -------------------
29 *
30 * There are 4 preprocessor symbols used to configure the bignum
31 * implementation. This file contains no logic to configure based on
32 * processor; we leave that to the Makefiles to specify.
33 *
34 * USE_FLOATING_POINT
35 * Meaning: There is support for a fast floating-point implementation of
36 * Montgomery multiply.
37 *
38 * PSR_MUL
39 * Meaning: There are processor-specific versions of the low level
40 * functions to implement big_mul. Those functions are: big_mul_set_vec,
41 * big_mul_add_vec, big_mul_vec, and big_sqr_vec. PSR_MUL implies support
42 * for all 4 functions. You cannot pick and choose which subset of these
43 * functions to support; that would lead to a rat's nest of #ifdefs.
44 *
45 * HWCAP
46 * Meaning: Call multiply support functions through a function pointer.
47 * On x86, there are multiple implementations for different hardware
48 * capabilities, such as MMX, SSE2, etc. Tests are made at run-time, when
49 * a function is first used. So, the support functions are called through
50 * a function pointer. There is no need for that on Sparc, because there
51 * is only one implementation; support functions are called directly.
52 * Later, if there were some new VIS instruction, or something, and a
53 * run-time test were needed, rather than variant kernel modules and
54 * libraries, then HWCAP would be defined for Sparc, as well.
55 *
56 * UMUL64
57 * Meaning: It is safe to use generic C code that assumes the existence
58 * of a 32 x 32 --> 64 bit unsigned multiply. If this is not defined,
59 * then the generic code for big_mul_add_vec() must necessarily be very slow,
60 * because it must fall back to using 16 x 16 --> 32 bit multiplication.
61 *
62 */
63
64
65 #include <sys/types.h>
66 #include "bignum.h"
67
68 #ifdef _KERNEL
69 #include <sys/ddi.h>
70 #include <sys/mdesc.h>
71 #include <sys/crypto/common.h>
72
73 #include <sys/kmem.h>
74 #include <sys/param.h>
75 #include <sys/sunddi.h>
76
77 #else
78 #include <stdlib.h>
79 #include <stdio.h>
80 #include <assert.h>
81 #define ASSERT assert
82 #endif /* _KERNEL */
83
84 #ifdef _LP64 /* truncate 64-bit size_t to 32-bits */
85 #define UI32(ui) ((uint32_t)ui)
86 #else /* size_t already 32-bits */
87 #define UI32(ui) (ui)
88 #endif
89
90
91 #ifdef _KERNEL
92 #define big_malloc(size) kmem_alloc(size, KM_NOSLEEP)
93 #define big_free(ptr, size) kmem_free(ptr, size)
94
95 void *
96 big_realloc(void *from, size_t oldsize, size_t newsize)
97 {
98 void *rv;
99
100 rv = kmem_alloc(newsize, KM_NOSLEEP);
101 if (rv != NULL)
102 bcopy(from, rv, oldsize);
103 kmem_free(from, oldsize);
104 return (rv);
105 }
106
107 #else /* _KERNEL */
108
109 #ifndef MALLOC_DEBUG
110
111 #define big_malloc(size) malloc(size)
112 #define big_free(ptr, size) free(ptr)
113
114 #else
115
116 void
117 big_free(void *ptr, size_t size)
118 {
119 printf("freed %d bytes at %p\n", size, ptr);
120 free(ptr);
121 }
122
123 void *
124 big_malloc(size_t size)
125 {
126 void *rv;
127 rv = malloc(size);
128 printf("malloced %d bytes, addr:%p\n", size, rv);
129 return (rv);
130 }
131 #endif /* MALLOC_DEBUG */
132
133 #define big_realloc(x, y, z) realloc((x), (z))
134
135
136 /*
137 * printbignum()
138 * Print a BIGNUM type to stdout.
139 */
140 void
141 printbignum(char *aname, BIGNUM *a)
142 {
143 int i;
144
145 (void) printf("\n%s\n%d\n", aname, a->sign*a->len);
146 for (i = a->len - 1; i >= 0; i--) {
147 #ifdef BIGNUM_CHUNK_32
148 (void) printf("%08x ", a->value[i]);
149 if (((i & (BITSINBYTE - 1)) == 0) && (i != 0)) {
150 (void) printf("\n");
151 }
152 #else
153 (void) printf("%08x %08x ", (uint32_t)((a->value[i]) >> 32),
154 (uint32_t)((a->value[i]) & 0xffffffff));
155 if (((i & 3) == 0) && (i != 0)) { /* end of this chunk */
156 (void) printf("\n");
157 }
158 #endif
159 }
160 (void) printf("\n");
161 }
162
163 #endif /* _KERNEL */
164
165
166 /*
167 * big_init()
168 * Initialize and allocate memory for a BIGNUM type.
169 *
170 * big_init(number, size) is equivalent to big_init1(number, size, NULL, 0)
171 *
172 * Note: call big_finish() to free memory allocated by big_init().
173 *
174 * Input:
175 * number Uninitialized memory for BIGNUM
176 * size Minimum size, in BIG_CHUNK_SIZE-bit words, required for BIGNUM
177 *
178 * Output:
179 * number Initialized BIGNUM
180 *
181 * Return BIG_OK on success or BIG_NO_MEM for an allocation error.
182 */
183 BIG_ERR_CODE
184 big_init(BIGNUM *number, int size)
185 {
186 number->value = big_malloc(BIGNUM_WORDSIZE * size);
187 if (number->value == NULL) {
188 return (BIG_NO_MEM);
189 }
190 number->size = size;
191 number->len = 0;
192 number->sign = 1;
193 number->malloced = 1;
194 return (BIG_OK);
195 }
196
197
198 /*
199 * big_init1()
200 * Initialize and, if needed, allocate memory for a BIGNUM type.
201 * Use the buffer passed, buf, if any, instad of allocating memory
202 * if it's at least "size" bytes.
203 *
204 * Note: call big_finish() to free memory allocated by big_init().
205 *
206 * Input:
207 * number Uninitialized memory for BIGNUM
208 * size Minimum size, in BIG_CHUNK_SIZE-bit words, required for BIGNUM
209 * buf Buffer for storing a BIGNUM.
210 * If NULL, big_init1() will allocate a buffer
211 * bufsize Size, in BIG_CHUNK_SIZE_bit words, of buf
212 *
213 * Output:
214 * number Initialized BIGNUM
215 *
216 * Return BIG_OK on success or BIG_NO_MEM for an allocation error.
217 */
218 BIG_ERR_CODE
219 big_init1(BIGNUM *number, int size, BIG_CHUNK_TYPE *buf, int bufsize)
220 {
221 if ((buf == NULL) || (size > bufsize)) {
222 number->value = big_malloc(BIGNUM_WORDSIZE * size);
223 if (number->value == NULL) {
224 return (BIG_NO_MEM);
225 }
226 number->size = size;
227 number->malloced = 1;
228 } else {
229 number->value = buf;
230 number->size = bufsize;
231 number->malloced = 0;
232 }
233 number->len = 0;
234 number->sign = 1;
235
236 return (BIG_OK);
237 }
238
239
240 /*
241 * big_finish()
242 * Free memory, if any, allocated by big_init() or big_init1().
243 */
244 void
245 big_finish(BIGNUM *number)
246 {
247 if (number->malloced == 1) {
248 big_free(number->value, BIGNUM_WORDSIZE * number->size);
249 number->malloced = 0;
250 }
251 }
252
253
254 /*
255 * bn->size should be at least
256 * (len + BIGNUM_WORDSIZE - 1) / BIGNUM_WORDSIZE bytes
257 * converts from byte-big-endian format to bignum format (words in
258 * little endian order, but bytes within the words big endian)
259 */
260 void
261 bytestring2bignum(BIGNUM *bn, uchar_t *kn, size_t len)
262 {
263 int i, j;
264 uint32_t offs;
265 const uint32_t slen = UI32(len);
266 BIG_CHUNK_TYPE word;
267 uchar_t *knwordp;
268
269 if (slen == 0) {
270 bn->len = 1;
271 bn->value[0] = 0;
272 return;
273 }
274
275 offs = slen % BIGNUM_WORDSIZE;
276 bn->len = slen / BIGNUM_WORDSIZE;
277
278 for (i = 0; i < slen / BIGNUM_WORDSIZE; i++) {
279 knwordp = &(kn[slen - BIGNUM_WORDSIZE * (i + 1)]);
280 word = knwordp[0];
281 for (j = 1; j < BIGNUM_WORDSIZE; j++) {
282 word = (word << BITSINBYTE) + knwordp[j];
283 }
284 bn->value[i] = word;
285 }
286 if (offs > 0) {
287 word = kn[0];
288 for (i = 1; i < offs; i++) word = (word << BITSINBYTE) + kn[i];
289 bn->value[bn->len++] = word;
290 }
291 while ((bn->len > 1) && (bn->value[bn->len - 1] == 0)) {
292 bn->len --;
293 }
294 }
295
296
297 /*
298 * copies the least significant len bytes if
299 * len < bn->len * BIGNUM_WORDSIZE
300 * converts from bignum format to byte-big-endian format.
301 * bignum format is words of type BIG_CHUNK_TYPE in little endian order.
302 */
303 void
304 bignum2bytestring(uchar_t *kn, BIGNUM *bn, size_t len)
305 {
306 int i, j;
307 uint32_t offs;
308 const uint32_t slen = UI32(len);
309 BIG_CHUNK_TYPE word;
310
311 if (len < BIGNUM_WORDSIZE * bn->len) {
312 for (i = 0; i < slen / BIGNUM_WORDSIZE; i++) {
313 word = bn->value[i];
314 for (j = 0; j < BIGNUM_WORDSIZE; j++) {
315 kn[slen - BIGNUM_WORDSIZE * i - j - 1] =
316 word & 0xff;
317 word = word >> BITSINBYTE;
318 }
319 }
320 offs = slen % BIGNUM_WORDSIZE;
321 if (offs > 0) {
322 word = bn->value[slen / BIGNUM_WORDSIZE];
323 for (i = slen % BIGNUM_WORDSIZE; i > 0; i --) {
324 kn[i - 1] = word & 0xff;
325 word = word >> BITSINBYTE;
326 }
327 }
328 } else {
329 for (i = 0; i < bn->len; i++) {
330 word = bn->value[i];
331 for (j = 0; j < BIGNUM_WORDSIZE; j++) {
332 kn[slen - BIGNUM_WORDSIZE * i - j - 1] =
333 word & 0xff;
334 word = word >> BITSINBYTE;
335 }
336 }
337 for (i = 0; i < slen - BIGNUM_WORDSIZE * bn->len; i++) {
338 kn[i] = 0;
339 }
340 }
341 }
342
343
344 int
345 big_bitlength(BIGNUM *a)
346 {
347 int l = 0, b = 0;
348 BIG_CHUNK_TYPE c;
349
350 l = a->len - 1;
351 while ((l > 0) && (a->value[l] == 0)) {
352 l--;
353 }
354 b = BIG_CHUNK_SIZE;
355 c = a->value[l];
356 while ((b > 1) && ((c & BIG_CHUNK_HIGHBIT) == 0)) {
357 c = c << 1;
358 b--;
359 }
360
361 return (l * BIG_CHUNK_SIZE + b);
362 }
363
364
365 BIG_ERR_CODE
366 big_copy(BIGNUM *dest, BIGNUM *src)
367 {
368 BIG_CHUNK_TYPE *newptr;
369 int i, len;
370
371 len = src->len;
372 while ((len > 1) && (src->value[len - 1] == 0)) {
373 len--;
374 }
375 src->len = len;
376 if (dest->size < len) {
377 if (dest->malloced == 1) {
378 newptr = (BIG_CHUNK_TYPE *)big_realloc(dest->value,
379 BIGNUM_WORDSIZE * dest->size,
380 BIGNUM_WORDSIZE * len);
381 } else {
382 newptr = (BIG_CHUNK_TYPE *)
383 big_malloc(BIGNUM_WORDSIZE * len);
384 if (newptr != NULL) {
385 dest->malloced = 1;
386 }
387 }
388 if (newptr == NULL) {
389 return (BIG_NO_MEM);
390 }
391 dest->value = newptr;
392 dest->size = len;
393 }
394 dest->len = len;
395 dest->sign = src->sign;
396 for (i = 0; i < len; i++) {
397 dest->value[i] = src->value[i];
398 }
399
400 return (BIG_OK);
401 }
402
403
404 BIG_ERR_CODE
405 big_extend(BIGNUM *number, int size)
406 {
407 BIG_CHUNK_TYPE *newptr;
408 int i;
409
410 if (number->size >= size)
411 return (BIG_OK);
412 if (number->malloced) {
413 number->value = big_realloc(number->value,
414 BIGNUM_WORDSIZE * number->size,
415 BIGNUM_WORDSIZE * size);
416 } else {
417 newptr = big_malloc(BIGNUM_WORDSIZE * size);
418 if (newptr != NULL) {
419 for (i = 0; i < number->size; i++) {
420 newptr[i] = number->value[i];
421 }
422 }
423 number->value = newptr;
424 }
425
426 if (number->value == NULL) {
427 return (BIG_NO_MEM);
428 }
429
430 number->size = size;
431 number->malloced = 1;
432 return (BIG_OK);
433 }
434
435
436 /* returns 1 if n == 0 */
437 int
438 big_is_zero(BIGNUM *n)
439 {
440 int i, result;
441
442 result = 1;
443 for (i = 0; i < n->len; i++) {
444 if (n->value[i] != 0) {
445 result = 0;
446 }
447 }
448 return (result);
449 }
450
451
452 BIG_ERR_CODE
453 big_add_abs(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
454 {
455 int i, shorter, longer;
456 BIG_CHUNK_TYPE cy, ai;
457 BIG_CHUNK_TYPE *r, *a, *b, *c;
458 BIG_ERR_CODE err;
459 BIGNUM *longerarg;
460
461 if (aa->len > bb->len) {
462 shorter = bb->len;
463 longer = aa->len;
464 longerarg = aa;
465 } else {
466 shorter = aa->len;
467 longer = bb->len;
468 longerarg = bb;
469 }
470 if (result->size < longer + 1) {
471 err = big_extend(result, longer + 1);
472 if (err != BIG_OK) {
473 return (err);
474 }
475 }
476
477 r = result->value;
478 a = aa->value;
479 b = bb->value;
480 c = longerarg->value;
481 cy = 0;
482 for (i = 0; i < shorter; i++) {
483 ai = a[i];
484 r[i] = ai + b[i] + cy;
485 if (r[i] > ai) {
486 cy = 0;
487 } else if (r[i] < ai) {
488 cy = 1;
489 }
490 }
491 for (; i < longer; i++) {
492 ai = c[i];
493 r[i] = ai + cy;
494 if (r[i] >= ai) {
495 cy = 0;
496 }
497 }
498 if (cy == 1) {
499 r[i] = cy;
500 result->len = longer + 1;
501 } else {
502 result->len = longer;
503 }
504 result->sign = 1;
505 return (BIG_OK);
506 }
507
508
509 /* caller must make sure that result has at least len words allocated */
510 void
511 big_sub_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, BIG_CHUNK_TYPE *b, int len)
512 {
513 int i;
514 BIG_CHUNK_TYPE cy, ai;
515
516 cy = 1;
517 for (i = 0; i < len; i++) {
518 ai = a[i];
519 r[i] = ai + (~b[i]) + cy;
520 if (r[i] > ai) {
521 cy = 0;
522 } else if (r[i] < ai) {
523 cy = 1;
524 }
525 }
526 }
527
528
529 /* result=aa-bb it is assumed that aa>=bb */
530 BIG_ERR_CODE
531 big_sub_pos(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
532 {
533 int i, shorter;
534 BIG_CHUNK_TYPE cy = 1, ai;
535 BIG_CHUNK_TYPE *r, *a, *b;
536 BIG_ERR_CODE err = BIG_OK;
537
538 if (aa->len > bb->len) {
539 shorter = bb->len;
540 } else {
541 shorter = aa->len;
542 }
543 if (result->size < aa->len) {
544 err = big_extend(result, aa->len);
545 if (err != BIG_OK) {
546 return (err);
547 }
548 }
549
550 r = result->value;
551 a = aa->value;
552 b = bb->value;
553 result->len = aa->len;
554 cy = 1;
555 for (i = 0; i < shorter; i++) {
556 ai = a[i];
557 r[i] = ai + (~b[i]) + cy;
558 if (r[i] > ai) {
559 cy = 0;
560 } else if (r[i] < ai) {
561 cy = 1;
562 }
563 }
564 for (; i < aa->len; i++) {
565 ai = a[i];
566 r[i] = ai + (~0) + cy;
567 if (r[i] < ai) {
568 cy = 1;
569 }
570 }
571 result->sign = 1;
572
573 if (cy == 0) {
574 return (BIG_INVALID_ARGS);
575 } else {
576 return (BIG_OK);
577 }
578 }
579
580
581 /* returns -1 if |aa|<|bb|, 0 if |aa|==|bb| 1 if |aa|>|bb| */
582 int
583 big_cmp_abs(BIGNUM *aa, BIGNUM *bb)
584 {
585 int i;
586
587 if (aa->len > bb->len) {
588 for (i = aa->len - 1; i > bb->len - 1; i--) {
589 if (aa->value[i] > 0) {
590 return (1);
591 }
592 }
593 } else if (aa->len < bb->len) {
594 for (i = bb->len - 1; i > aa->len - 1; i--) {
595 if (bb->value[i] > 0) {
596 return (-1);
597 }
598 }
599 } else {
600 i = aa->len - 1;
601 }
602 for (; i >= 0; i--) {
603 if (aa->value[i] > bb->value[i]) {
604 return (1);
605 } else if (aa->value[i] < bb->value[i]) {
606 return (-1);
607 }
608 }
609
610 return (0);
611 }
612
613
614 BIG_ERR_CODE
615 big_sub(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
616 {
617 BIG_ERR_CODE err;
618
619 if ((bb->sign == -1) && (aa->sign == 1)) {
620 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) {
621 return (err);
622 }
623 result->sign = 1;
624 } else if ((aa->sign == -1) && (bb->sign == 1)) {
625 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) {
626 return (err);
627 }
628 result->sign = -1;
629 } else if ((aa->sign == 1) && (bb->sign == 1)) {
630 if (big_cmp_abs(aa, bb) >= 0) {
631 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) {
632 return (err);
633 }
634 result->sign = 1;
635 } else {
636 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) {
637 return (err);
638 }
639 result->sign = -1;
640 }
641 } else {
642 if (big_cmp_abs(aa, bb) >= 0) {
643 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) {
644 return (err);
645 }
646 result->sign = -1;
647 } else {
648 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) {
649 return (err);
650 }
651 result->sign = 1;
652 }
653 }
654 return (BIG_OK);
655 }
656
657
658 BIG_ERR_CODE
659 big_add(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
660 {
661 BIG_ERR_CODE err;
662
663 if ((bb->sign == -1) && (aa->sign == -1)) {
664 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) {
665 return (err);
666 }
667 result->sign = -1;
668 } else if ((aa->sign == 1) && (bb->sign == 1)) {
669 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) {
670 return (err);
671 }
672 result->sign = 1;
673 } else if ((aa->sign == 1) && (bb->sign == -1)) {
674 if (big_cmp_abs(aa, bb) >= 0) {
675 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) {
676 return (err);
677 }
678 result->sign = 1;
679 } else {
680 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) {
681 return (err);
682 }
683 result->sign = -1;
684 }
685 } else {
686 if (big_cmp_abs(aa, bb) >= 0) {
687 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) {
688 return (err);
689 }
690 result->sign = -1;
691 } else {
692 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) {
693 return (err);
694 }
695 result->sign = 1;
696 }
697 }
698 return (BIG_OK);
699 }
700
701
702 /* result = aa/2 */
703 BIG_ERR_CODE
704 big_half_pos(BIGNUM *result, BIGNUM *aa)
705 {
706 BIG_ERR_CODE err;
707 int i;
708 BIG_CHUNK_TYPE cy, cy1;
709 BIG_CHUNK_TYPE *a, *r;
710
711 if (result->size < aa->len) {
712 err = big_extend(result, aa->len);
713 if (err != BIG_OK) {
714 return (err);
715 }
716 }
717
718 result->len = aa->len;
719 a = aa->value;
720 r = result->value;
721 cy = 0;
722 for (i = aa->len - 1; i >= 0; i--) {
723 cy1 = a[i] << (BIG_CHUNK_SIZE - 1);
724 r[i] = (cy | (a[i] >> 1));
725 cy = cy1;
726 }
727 if (r[result->len - 1] == 0) {
728 result->len--;
729 }
730
731 return (BIG_OK);
732 }
733
734 /* result = aa*2 */
735 BIG_ERR_CODE
736 big_double(BIGNUM *result, BIGNUM *aa)
737 {
738 BIG_ERR_CODE err;
739 int i, rsize;
740 BIG_CHUNK_TYPE cy, cy1;
741 BIG_CHUNK_TYPE *a, *r;
742
743 if ((aa->len > 0) &&
744 ((aa->value[aa->len - 1] & BIG_CHUNK_HIGHBIT) != 0)) {
745 rsize = aa->len + 1;
746 } else {
747 rsize = aa->len;
748 }
749
750 if (result->size < rsize) {
751 err = big_extend(result, rsize);
752 if (err != BIG_OK)
753 return (err);
754 }
755
756 a = aa->value;
757 r = result->value;
758 if (rsize == aa->len + 1) {
759 r[rsize - 1] = 1;
760 }
761 cy = 0;
762 for (i = 0; i < aa->len; i++) {
763 cy1 = a[i] >> (BIG_CHUNK_SIZE - 1);
764 r[i] = (cy | (a[i] << 1));
765 cy = cy1;
766 }
767 result->len = rsize;
768 return (BIG_OK);
769 }
770
771
772 /*
773 * returns aa mod b, aa must be nonneg, b must be a max
774 * (BIG_CHUNK_SIZE / 2)-bit integer
775 */
776 static uint32_t
777 big_modhalf_pos(BIGNUM *aa, uint32_t b)
778 {
779 int i;
780 BIG_CHUNK_TYPE rem;
781
782 if (aa->len == 0) {
783 return (0);
784 }
785 rem = aa->value[aa->len - 1] % b;
786 for (i = aa->len - 2; i >= 0; i--) {
787 rem = ((rem << (BIG_CHUNK_SIZE / 2)) |
788 (aa->value[i] >> (BIG_CHUNK_SIZE / 2))) % b;
789 rem = ((rem << (BIG_CHUNK_SIZE / 2)) |
790 (aa->value[i] & BIG_CHUNK_LOWHALFBITS)) % b;
791 }
792
793 return ((uint32_t)rem);
794 }
795
796
797 /*
798 * result = aa - (2^BIG_CHUNK_SIZE)^lendiff * bb
799 * result->size should be at least aa->len at entry
800 * aa, bb, and result should be positive
801 */
802 void
803 big_sub_pos_high(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
804 {
805 int i, lendiff;
806 BIGNUM res1, aa1;
807
808 lendiff = aa->len - bb->len;
809 res1.size = result->size - lendiff;
810 res1.malloced = 0;
811 res1.value = result->value + lendiff;
812 aa1.size = aa->size - lendiff;
813 aa1.value = aa->value + lendiff;
814 aa1.len = bb->len;
815 aa1.sign = 1;
816 (void) big_sub_pos(&res1, &aa1, bb);
817 if (result->value != aa->value) {
818 for (i = 0; i < lendiff; i++) {
819 result->value[i] = aa->value[i];
820 }
821 }
822 result->len = aa->len;
823 }
824
825
826 /*
827 * returns 1, 0, or -1 depending on whether |aa| > , ==, or <
828 * (2^BIG_CHUNK_SIZE)^lendiff * |bb|
829 * aa->len should be >= bb->len
830 */
831 int
832 big_cmp_abs_high(BIGNUM *aa, BIGNUM *bb)
833 {
834 int lendiff;
835 BIGNUM aa1;
836
837 lendiff = aa->len - bb->len;
838 aa1.len = bb->len;
839 aa1.size = aa->size - lendiff;
840 aa1.malloced = 0;
841 aa1.value = aa->value + lendiff;
842 return (big_cmp_abs(&aa1, bb));
843 }
844
845
846 /*
847 * result = aa * b where b is a max. (BIG_CHUNK_SIZE / 2)-bit positive integer.
848 * result should have enough space allocated.
849 */
850 static void
851 big_mulhalf_low(BIGNUM *result, BIGNUM *aa, BIG_CHUNK_TYPE b)
852 {
853 int i;
854 BIG_CHUNK_TYPE t1, t2, ai, cy;
855 BIG_CHUNK_TYPE *a, *r;
856
857 a = aa->value;
858 r = result->value;
859 cy = 0;
860 for (i = 0; i < aa->len; i++) {
861 ai = a[i];
862 t1 = (ai & BIG_CHUNK_LOWHALFBITS) * b + cy;
863 t2 = (ai >> (BIG_CHUNK_SIZE / 2)) * b +
864 (t1 >> (BIG_CHUNK_SIZE / 2));
865 r[i] = (t1 & BIG_CHUNK_LOWHALFBITS) |
866 (t2 << (BIG_CHUNK_SIZE / 2));
867 cy = t2 >> (BIG_CHUNK_SIZE / 2);
868 }
869 r[i] = cy;
870 result->len = aa->len + 1;
871 result->sign = aa->sign;
872 }
873
874
875 /*
876 * result = aa * b * 2^(BIG_CHUNK_SIZE / 2) where b is a max.
877 * (BIG_CHUNK_SIZE / 2)-bit positive integer.
878 * result should have enough space allocated.
879 */
880 static void
881 big_mulhalf_high(BIGNUM *result, BIGNUM *aa, BIG_CHUNK_TYPE b)
882 {
883 int i;
884 BIG_CHUNK_TYPE t1, t2, ai, cy, ri;
885 BIG_CHUNK_TYPE *a, *r;
886
887 a = aa->value;
888 r = result->value;
889 cy = 0;
890 ri = 0;
891 for (i = 0; i < aa->len; i++) {
892 ai = a[i];
893 t1 = (ai & BIG_CHUNK_LOWHALFBITS) * b + cy;
894 t2 = (ai >> (BIG_CHUNK_SIZE / 2)) * b +
895 (t1 >> (BIG_CHUNK_SIZE / 2));
896 r[i] = (t1 << (BIG_CHUNK_SIZE / 2)) + ri;
897 ri = t2 & BIG_CHUNK_LOWHALFBITS;
898 cy = t2 >> (BIG_CHUNK_SIZE / 2);
899 }
900 r[i] = (cy << (BIG_CHUNK_SIZE / 2)) + ri;
901 result->len = aa->len + 1;
902 result->sign = aa->sign;
903 }
904
905
906 /* it is assumed that result->size is big enough */
907 void
908 big_shiftleft(BIGNUM *result, BIGNUM *aa, int offs)
909 {
910 int i;
911 BIG_CHUNK_TYPE cy, ai;
912
913 if (offs == 0) {
914 if (result != aa) {
915 (void) big_copy(result, aa);
916 }
917 return;
918 }
919 cy = 0;
920 for (i = 0; i < aa->len; i++) {
921 ai = aa->value[i];
922 result->value[i] = (ai << offs) | cy;
923 cy = ai >> (BIG_CHUNK_SIZE - offs);
924 }
925 if (cy != 0) {
926 result->len = aa->len + 1;
927 result->value[result->len - 1] = cy;
928 } else {
929 result->len = aa->len;
930 }
931 result->sign = aa->sign;
932 }
933
934
935 /* it is assumed that result->size is big enough */
936 void
937 big_shiftright(BIGNUM *result, BIGNUM *aa, int offs)
938 {
939 int i;
940 BIG_CHUNK_TYPE cy, ai;
941
942 if (offs == 0) {
943 if (result != aa) {
944 (void) big_copy(result, aa);
945 }
946 return;
947 }
948 cy = aa->value[0] >> offs;
949 for (i = 1; i < aa->len; i++) {
950 ai = aa->value[i];
951 result->value[i - 1] = (ai << (BIG_CHUNK_SIZE - offs)) | cy;
952 cy = ai >> offs;
953 }
954 result->len = aa->len;
955 result->value[result->len - 1] = cy;
956 result->sign = aa->sign;
957 }
958
959
960 /*
961 * result = aa/bb remainder = aa mod bb
962 * it is assumed that aa and bb are positive
963 */
964 BIG_ERR_CODE
965 big_div_pos(BIGNUM *result, BIGNUM *remainder, BIGNUM *aa, BIGNUM *bb)
966 {
967 BIG_ERR_CODE err = BIG_OK;
968 int i, alen, blen, tlen, rlen, offs;
969 BIG_CHUNK_TYPE higha, highb, coeff;
970 BIG_CHUNK_TYPE *a, *b;
971 BIGNUM bbhigh, bblow, tresult, tmp1, tmp2;
972 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE];
973 BIG_CHUNK_TYPE tmp2value[BIGTMPSIZE];
974 BIG_CHUNK_TYPE tresultvalue[BIGTMPSIZE];
975 BIG_CHUNK_TYPE bblowvalue[BIGTMPSIZE];
976 BIG_CHUNK_TYPE bbhighvalue[BIGTMPSIZE];
977
978 a = aa->value;
979 b = bb->value;
980 alen = aa->len;
981 blen = bb->len;
982 while ((alen > 1) && (a[alen - 1] == 0)) {
983 alen = alen - 1;
984 }
985 aa->len = alen;
986 while ((blen > 1) && (b[blen - 1] == 0)) {
987 blen = blen - 1;
988 }
989 bb->len = blen;
990 if ((blen == 1) && (b[0] == 0)) {
991 return (BIG_DIV_BY_0);
992 }
993
994 if (big_cmp_abs(aa, bb) < 0) {
995 if ((remainder != NULL) &&
996 ((err = big_copy(remainder, aa)) != BIG_OK)) {
997 return (err);
998 }
999 if (result != NULL) {
1000 result->len = 1;
1001 result->sign = 1;
1002 result->value[0] = 0;
1003 }
1004 return (BIG_OK);
1005 }
1006
1007 if ((err = big_init1(&bblow, blen + 1,
1008 bblowvalue, arraysize(bblowvalue))) != BIG_OK)
1009 return (err);
1010
1011 if ((err = big_init1(&bbhigh, blen + 1,
1012 bbhighvalue, arraysize(bbhighvalue))) != BIG_OK)
1013 goto ret1;
1014
1015 if ((err = big_init1(&tmp1, alen + 2,
1016 tmp1value, arraysize(tmp1value))) != BIG_OK)
1017 goto ret2;
1018
1019 if ((err = big_init1(&tmp2, blen + 2,
1020 tmp2value, arraysize(tmp2value))) != BIG_OK)
1021 goto ret3;
1022
1023 if ((err = big_init1(&tresult, alen - blen + 2,
1024 tresultvalue, arraysize(tresultvalue))) != BIG_OK)
1025 goto ret4;
1026
1027 offs = 0;
1028 highb = b[blen - 1];
1029 if (highb >= (BIG_CHUNK_HALF_HIGHBIT << 1)) {
1030 highb = highb >> (BIG_CHUNK_SIZE / 2);
1031 offs = (BIG_CHUNK_SIZE / 2);
1032 }
1033 while ((highb & BIG_CHUNK_HALF_HIGHBIT) == 0) {
1034 highb = highb << 1;
1035 offs++;
1036 }
1037
1038 big_shiftleft(&bblow, bb, offs);
1039
1040 if (offs <= (BIG_CHUNK_SIZE / 2 - 1)) {
1041 big_shiftleft(&bbhigh, &bblow, BIG_CHUNK_SIZE / 2);
1042 } else {
1043 big_shiftright(&bbhigh, &bblow, BIG_CHUNK_SIZE / 2);
1044 }
1045 if (bbhigh.value[bbhigh.len - 1] == 0) {
1046 bbhigh.len--;
1047 } else {
1048 bbhigh.value[bbhigh.len] = 0;
1049 }
1050
1051 highb = bblow.value[bblow.len - 1];
1052
1053 big_shiftleft(&tmp1, aa, offs);
1054 rlen = tmp1.len - bblow.len + 1;
1055 tresult.len = rlen;
1056
1057 tmp1.len++;
1058 tlen = tmp1.len;
1059 tmp1.value[tmp1.len - 1] = 0;
1060 for (i = 0; i < rlen; i++) {
1061 higha = (tmp1.value[tlen - 1] << (BIG_CHUNK_SIZE / 2)) +
1062 (tmp1.value[tlen - 2] >> (BIG_CHUNK_SIZE / 2));
1063 coeff = higha / (highb + 1);
1064 big_mulhalf_high(&tmp2, &bblow, coeff);
1065 big_sub_pos_high(&tmp1, &tmp1, &tmp2);
1066 bbhigh.len++;
1067 while (tmp1.value[tlen - 1] > 0) {
1068 big_sub_pos_high(&tmp1, &tmp1, &bbhigh);
1069 coeff++;
1070 }
1071 bbhigh.len--;
1072 tlen--;
1073 tmp1.len--;
1074 while (big_cmp_abs_high(&tmp1, &bbhigh) >= 0) {
1075 big_sub_pos_high(&tmp1, &tmp1, &bbhigh);
1076 coeff++;
1077 }
1078 tresult.value[rlen - i - 1] = coeff << (BIG_CHUNK_SIZE / 2);
1079 higha = tmp1.value[tlen - 1];
1080 coeff = higha / (highb + 1);
1081 big_mulhalf_low(&tmp2, &bblow, coeff);
1082 tmp2.len--;
1083 big_sub_pos_high(&tmp1, &tmp1, &tmp2);
1084 while (big_cmp_abs_high(&tmp1, &bblow) >= 0) {
1085 big_sub_pos_high(&tmp1, &tmp1, &bblow);
1086 coeff++;
1087 }
1088 tresult.value[rlen - i - 1] =
1089 tresult.value[rlen - i - 1] + coeff;
1090 }
1091
1092 big_shiftright(&tmp1, &tmp1, offs);
1093
1094 err = BIG_OK;
1095
1096 if ((remainder != NULL) &&
1097 ((err = big_copy(remainder, &tmp1)) != BIG_OK))
1098 goto ret;
1099
1100 if (result != NULL)
1101 err = big_copy(result, &tresult);
1102
1103 ret:
1104 big_finish(&tresult);
1105 ret4:
1106 big_finish(&tmp1);
1107 ret3:
1108 big_finish(&tmp2);
1109 ret2:
1110 big_finish(&bbhigh);
1111 ret1:
1112 big_finish(&bblow);
1113 return (err);
1114 }
1115
1116
1117 /*
1118 * If there is no processor-specific integer implementation of
1119 * the lower level multiply functions, then this code is provided
1120 * for big_mul_set_vec(), big_mul_add_vec(), big_mul_vec() and
1121 * big_sqr_vec().
1122 *
1123 * There are two generic implementations. One that assumes that
1124 * there is hardware and C compiler support for a 32 x 32 --> 64
1125 * bit unsigned multiply, but otherwise is not specific to any
1126 * processor, platform, or ISA.
1127 *
1128 * The other makes very few assumptions about hardware capabilities.
1129 * It does not even assume that there is any implementation of a
1130 * 32 x 32 --> 64 bit multiply that is accessible to C code and
1131 * appropriate to use. It falls constructs 32 x 32 --> 64 bit
1132 * multiplies from 16 x 16 --> 32 bit multiplies.
1133 *
1134 */
1135
1136 #if !defined(PSR_MUL)
1137
1138 #ifdef UMUL64
1139
1140 #if (BIG_CHUNK_SIZE == 32)
1141
1142 #define UNROLL8
1143
1144 #define MUL_SET_VEC_ROUND_PREFETCH(R) \
1145 p = pf * d; \
1146 pf = (uint64_t)a[R + 1]; \
1147 t = p + cy; \
1148 r[R] = (uint32_t)t; \
1149 cy = t >> 32
1150
1151 #define MUL_SET_VEC_ROUND_NOPREFETCH(R) \
1152 p = pf * d; \
1153 t = p + cy; \
1154 r[R] = (uint32_t)t; \
1155 cy = t >> 32
1156
1157 #define MUL_ADD_VEC_ROUND_PREFETCH(R) \
1158 t = (uint64_t)r[R]; \
1159 p = pf * d; \
1160 pf = (uint64_t)a[R + 1]; \
1161 t = p + t + cy; \
1162 r[R] = (uint32_t)t; \
1163 cy = t >> 32
1164
1165 #define MUL_ADD_VEC_ROUND_NOPREFETCH(R) \
1166 t = (uint64_t)r[R]; \
1167 p = pf * d; \
1168 t = p + t + cy; \
1169 r[R] = (uint32_t)t; \
1170 cy = t >> 32
1171
1172 #ifdef UNROLL8
1173
1174 #define UNROLL 8
1175
1176 /*
1177 * r = a * b
1178 * where r and a are vectors; b is a single 32-bit digit
1179 */
1180
1181 uint32_t
1182 big_mul_set_vec(uint32_t *r, uint32_t *a, int len, uint32_t b)
1183 {
1184 uint64_t d, pf, p, t, cy;
1185
1186 if (len == 0)
1187 return (0);
1188 cy = 0;
1189 d = (uint64_t)b;
1190 pf = (uint64_t)a[0];
1191 while (len > UNROLL) {
1192 MUL_SET_VEC_ROUND_PREFETCH(0);
1193 MUL_SET_VEC_ROUND_PREFETCH(1);
1194 MUL_SET_VEC_ROUND_PREFETCH(2);
1195 MUL_SET_VEC_ROUND_PREFETCH(3);
1196 MUL_SET_VEC_ROUND_PREFETCH(4);
1197 MUL_SET_VEC_ROUND_PREFETCH(5);
1198 MUL_SET_VEC_ROUND_PREFETCH(6);
1199 MUL_SET_VEC_ROUND_PREFETCH(7);
1200 r += UNROLL;
1201 a += UNROLL;
1202 len -= UNROLL;
1203 }
1204 if (len == UNROLL) {
1205 MUL_SET_VEC_ROUND_PREFETCH(0);
1206 MUL_SET_VEC_ROUND_PREFETCH(1);
1207 MUL_SET_VEC_ROUND_PREFETCH(2);
1208 MUL_SET_VEC_ROUND_PREFETCH(3);
1209 MUL_SET_VEC_ROUND_PREFETCH(4);
1210 MUL_SET_VEC_ROUND_PREFETCH(5);
1211 MUL_SET_VEC_ROUND_PREFETCH(6);
1212 MUL_SET_VEC_ROUND_NOPREFETCH(7);
1213 return ((uint32_t)cy);
1214 }
1215 while (len > 1) {
1216 MUL_SET_VEC_ROUND_PREFETCH(0);
1217 ++r;
1218 ++a;
1219 --len;
1220 }
1221 if (len > 0) {
1222 MUL_SET_VEC_ROUND_NOPREFETCH(0);
1223 }
1224 return ((uint32_t)cy);
1225 }
1226
1227 /*
1228 * r += a * b
1229 * where r and a are vectors; b is a single 32-bit digit
1230 */
1231
1232 uint32_t
1233 big_mul_add_vec(uint32_t *r, uint32_t *a, int len, uint32_t b)
1234 {
1235 uint64_t d, pf, p, t, cy;
1236
1237 if (len == 0)
1238 return (0);
1239 cy = 0;
1240 d = (uint64_t)b;
1241 pf = (uint64_t)a[0];
1242 while (len > 8) {
1243 MUL_ADD_VEC_ROUND_PREFETCH(0);
1244 MUL_ADD_VEC_ROUND_PREFETCH(1);
1245 MUL_ADD_VEC_ROUND_PREFETCH(2);
1246 MUL_ADD_VEC_ROUND_PREFETCH(3);
1247 MUL_ADD_VEC_ROUND_PREFETCH(4);
1248 MUL_ADD_VEC_ROUND_PREFETCH(5);
1249 MUL_ADD_VEC_ROUND_PREFETCH(6);
1250 MUL_ADD_VEC_ROUND_PREFETCH(7);
1251 r += 8;
1252 a += 8;
1253 len -= 8;
1254 }
1255 if (len == 8) {
1256 MUL_ADD_VEC_ROUND_PREFETCH(0);
1257 MUL_ADD_VEC_ROUND_PREFETCH(1);
1258 MUL_ADD_VEC_ROUND_PREFETCH(2);
1259 MUL_ADD_VEC_ROUND_PREFETCH(3);
1260 MUL_ADD_VEC_ROUND_PREFETCH(4);
1261 MUL_ADD_VEC_ROUND_PREFETCH(5);
1262 MUL_ADD_VEC_ROUND_PREFETCH(6);
1263 MUL_ADD_VEC_ROUND_NOPREFETCH(7);
1264 return ((uint32_t)cy);
1265 }
1266 while (len > 1) {
1267 MUL_ADD_VEC_ROUND_PREFETCH(0);
1268 ++r;
1269 ++a;
1270 --len;
1271 }
1272 if (len > 0) {
1273 MUL_ADD_VEC_ROUND_NOPREFETCH(0);
1274 }
1275 return ((uint32_t)cy);
1276 }
1277 #endif /* UNROLL8 */
1278
1279 void
1280 big_sqr_vec(uint32_t *r, uint32_t *a, int len)
1281 {
1282 uint32_t *tr, *ta;
1283 int tlen, row, col;
1284 uint64_t p, s, t, t2, cy;
1285 uint32_t d;
1286
1287 tr = r + 1;
1288 ta = a;
1289 tlen = len - 1;
1290 tr[tlen] = big_mul_set_vec(tr, ta + 1, tlen, ta[0]);
1291 while (--tlen > 0) {
1292 tr += 2;
1293 ++ta;
1294 tr[tlen] = big_mul_add_vec(tr, ta + 1, tlen, ta[0]);
1295 }
1296 s = (uint64_t)a[0];
1297 s = s * s;
1298 r[0] = (uint32_t)s;
1299 cy = s >> 32;
1300 p = ((uint64_t)r[1] << 1) + cy;
1301 r[1] = (uint32_t)p;
1302 cy = p >> 32;
1303 row = 1;
1304 col = 2;
1305 while (row < len) {
1306 s = (uint64_t)a[row];
1307 s = s * s;
1308 p = (uint64_t)r[col] << 1;
1309 t = p + s;
1310 d = (uint32_t)t;
1311 t2 = (uint64_t)d + cy;
1312 r[col] = (uint32_t)t2;
1313 cy = (t >> 32) + (t2 >> 32);
1314 if (row == len - 1)
1315 break;
1316 p = ((uint64_t)r[col + 1] << 1) + cy;
1317 r[col + 1] = (uint32_t)p;
1318 cy = p >> 32;
1319 ++row;
1320 col += 2;
1321 }
1322 r[col + 1] = (uint32_t)cy;
1323 }
1324
1325 #else /* BIG_CHUNK_SIZE == 64 */
1326
1327 /*
1328 * r = r + a * digit, r and a are vectors of length len
1329 * returns the carry digit
1330 */
1331 BIG_CHUNK_TYPE
1332 big_mul_add_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len,
1333 BIG_CHUNK_TYPE digit)
1334 {
1335 BIG_CHUNK_TYPE cy, cy1, retcy, dlow, dhigh;
1336 int i;
1337
1338 cy1 = 0;
1339 dlow = digit & BIG_CHUNK_LOWHALFBITS;
1340 dhigh = digit >> (BIG_CHUNK_SIZE / 2);
1341 for (i = 0; i < len; i++) {
1342 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) +
1343 dlow * (a[i] & BIG_CHUNK_LOWHALFBITS) +
1344 (r[i] & BIG_CHUNK_LOWHALFBITS);
1345 cy1 = (cy >> (BIG_CHUNK_SIZE / 2)) +
1346 dlow * (a[i] >> (BIG_CHUNK_SIZE / 2)) +
1347 (r[i] >> (BIG_CHUNK_SIZE / 2));
1348 r[i] = (cy & BIG_CHUNK_LOWHALFBITS) |
1349 (cy1 << (BIG_CHUNK_SIZE / 2));
1350 }
1351 retcy = cy1 >> (BIG_CHUNK_SIZE / 2);
1352
1353 cy1 = r[0] & BIG_CHUNK_LOWHALFBITS;
1354 for (i = 0; i < len - 1; i++) {
1355 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) +
1356 dhigh * (a[i] & BIG_CHUNK_LOWHALFBITS) +
1357 (r[i] >> (BIG_CHUNK_SIZE / 2));
1358 r[i] = (cy1 & BIG_CHUNK_LOWHALFBITS) |
1359 (cy << (BIG_CHUNK_SIZE / 2));
1360 cy1 = (cy >> (BIG_CHUNK_SIZE / 2)) +
1361 dhigh * (a[i] >> (BIG_CHUNK_SIZE / 2)) +
1362 (r[i + 1] & BIG_CHUNK_LOWHALFBITS);
1363 }
1364 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) +
1365 dhigh * (a[len - 1] & BIG_CHUNK_LOWHALFBITS) +
1366 (r[len - 1] >> (BIG_CHUNK_SIZE / 2));
1367 r[len - 1] = (cy1 & BIG_CHUNK_LOWHALFBITS) |
1368 (cy << (BIG_CHUNK_SIZE / 2));
1369 retcy = (cy >> (BIG_CHUNK_SIZE / 2)) +
1370 dhigh * (a[len - 1] >> (BIG_CHUNK_SIZE / 2)) + retcy;
1371
1372 return (retcy);
1373 }
1374
1375
1376 /*
1377 * r = a * digit, r and a are vectors of length len
1378 * returns the carry digit
1379 */
1380 BIG_CHUNK_TYPE
1381 big_mul_set_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len,
1382 BIG_CHUNK_TYPE digit)
1383 {
1384 int i;
1385
1386 ASSERT(r != a);
1387 for (i = 0; i < len; i++) {
1388 r[i] = 0;
1389 }
1390 return (big_mul_add_vec(r, a, len, digit));
1391 }
1392
1393 void
1394 big_sqr_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len)
1395 {
1396 int i;
1397
1398 ASSERT(r != a);
1399 r[len] = big_mul_set_vec(r, a, len, a[0]);
1400 for (i = 1; i < len; ++i)
1401 r[len + i] = big_mul_add_vec(r + i, a, len, a[i]);
1402 }
1403
1404 #endif /* BIG_CHUNK_SIZE == 32/64 */
1405
1406
1407 #else /* ! UMUL64 */
1408
1409 #if (BIG_CHUNK_SIZE != 32)
1410 #error Don't use 64-bit chunks without defining UMUL64
1411 #endif
1412
1413
1414 /*
1415 * r = r + a * digit, r and a are vectors of length len
1416 * returns the carry digit
1417 */
1418 uint32_t
1419 big_mul_add_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit)
1420 {
1421 uint32_t cy, cy1, retcy, dlow, dhigh;
1422 int i;
1423
1424 cy1 = 0;
1425 dlow = digit & 0xffff;
1426 dhigh = digit >> 16;
1427 for (i = 0; i < len; i++) {
1428 cy = (cy1 >> 16) + dlow * (a[i] & 0xffff) + (r[i] & 0xffff);
1429 cy1 = (cy >> 16) + dlow * (a[i]>>16) + (r[i] >> 16);
1430 r[i] = (cy & 0xffff) | (cy1 << 16);
1431 }
1432 retcy = cy1 >> 16;
1433
1434 cy1 = r[0] & 0xffff;
1435 for (i = 0; i < len - 1; i++) {
1436 cy = (cy1 >> 16) + dhigh * (a[i] & 0xffff) + (r[i] >> 16);
1437 r[i] = (cy1 & 0xffff) | (cy << 16);
1438 cy1 = (cy >> 16) + dhigh * (a[i] >> 16) + (r[i + 1] & 0xffff);
1439 }
1440 cy = (cy1 >> 16) + dhigh * (a[len - 1] & 0xffff) + (r[len - 1] >> 16);
1441 r[len - 1] = (cy1 & 0xffff) | (cy << 16);
1442 retcy = (cy >> 16) + dhigh * (a[len - 1] >> 16) + retcy;
1443
1444 return (retcy);
1445 }
1446
1447
1448 /*
1449 * r = a * digit, r and a are vectors of length len
1450 * returns the carry digit
1451 */
1452 uint32_t
1453 big_mul_set_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit)
1454 {
1455 int i;
1456
1457 ASSERT(r != a);
1458 for (i = 0; i < len; i++) {
1459 r[i] = 0;
1460 }
1461
1462 return (big_mul_add_vec(r, a, len, digit));
1463 }
1464
1465 void
1466 big_sqr_vec(uint32_t *r, uint32_t *a, int len)
1467 {
1468 int i;
1469
1470 ASSERT(r != a);
1471 r[len] = big_mul_set_vec(r, a, len, a[0]);
1472 for (i = 1; i < len; ++i)
1473 r[len + i] = big_mul_add_vec(r + i, a, len, a[i]);
1474 }
1475
1476 #endif /* UMUL64 */
1477
1478 void
1479 big_mul_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int alen,
1480 BIG_CHUNK_TYPE *b, int blen)
1481 {
1482 int i;
1483
1484 r[alen] = big_mul_set_vec(r, a, alen, b[0]);
1485 for (i = 1; i < blen; ++i)
1486 r[alen + i] = big_mul_add_vec(r + i, a, alen, b[i]);
1487 }
1488
1489
1490 #endif /* ! PSR_MUL */
1491
1492
1493 /*
1494 * result = aa * bb result->value should be big enough to hold the result
1495 *
1496 * Implementation: Standard grammar school algorithm
1497 *
1498 */
1499 BIG_ERR_CODE
1500 big_mul(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
1501 {
1502 BIGNUM tmp1;
1503 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE];
1504 BIG_CHUNK_TYPE *r, *t, *a, *b;
1505 BIG_ERR_CODE err;
1506 int i, alen, blen, rsize, sign, diff;
1507
1508 if (aa == bb) {
1509 diff = 0;
1510 } else {
1511 diff = big_cmp_abs(aa, bb);
1512 if (diff < 0) {
1513 BIGNUM *tt;
1514 tt = aa;
1515 aa = bb;
1516 bb = tt;
1517 }
1518 }
1519 a = aa->value;
1520 b = bb->value;
1521 alen = aa->len;
1522 blen = bb->len;
1523 while ((alen > 1) && (a[alen - 1] == 0)) {
1524 alen--;
1525 }
1526 aa->len = alen;
1527 while ((blen > 1) && (b[blen - 1] == 0)) {
1528 blen--;
1529 }
1530 bb->len = blen;
1531
1532 rsize = alen + blen;
1533 ASSERT(rsize > 0);
1534 if (result->size < rsize) {
1535 err = big_extend(result, rsize);
1536 if (err != BIG_OK) {
1537 return (err);
1538 }
1539 /* aa or bb might be an alias to result */
1540 a = aa->value;
1541 b = bb->value;
1542 }
1543 r = result->value;
1544
1545 if (((alen == 1) && (a[0] == 0)) || ((blen == 1) && (b[0] == 0))) {
1546 result->len = 1;
1547 result->sign = 1;
1548 r[0] = 0;
1549 return (BIG_OK);
1550 }
1551 sign = aa->sign * bb->sign;
1552 if ((alen == 1) && (a[0] == 1)) {
1553 for (i = 0; i < blen; i++) {
1554 r[i] = b[i];
1555 }
1556 result->len = blen;
1557 result->sign = sign;
1558 return (BIG_OK);
1559 }
1560 if ((blen == 1) && (b[0] == 1)) {
1561 for (i = 0; i < alen; i++) {
1562 r[i] = a[i];
1563 }
1564 result->len = alen;
1565 result->sign = sign;
1566 return (BIG_OK);
1567 }
1568
1569 if ((err = big_init1(&tmp1, rsize,
1570 tmp1value, arraysize(tmp1value))) != BIG_OK) {
1571 return (err);
1572 }
1573 (void) big_copy(&tmp1, aa);
1574 t = tmp1.value;
1575
1576 for (i = 0; i < rsize; i++) {
1577 t[i] = 0;
1578 }
1579
1580 if (diff == 0 && alen > 2) {
1581 BIG_SQR_VEC(t, a, alen);
1582 } else if (blen > 0) {
1583 BIG_MUL_VEC(t, a, alen, b, blen);
1584 }
1585
1586 if (t[rsize - 1] == 0) {
1587 tmp1.len = rsize - 1;
1588 } else {
1589 tmp1.len = rsize;
1590 }
1591
1592 err = big_copy(result, &tmp1);
1593
1594 result->sign = sign;
1595
1596 big_finish(&tmp1);
1597
1598 return (err);
1599 }
1600
1601
1602 /*
1603 * caller must ensure that a < n, b < n and ret->size >= 2 * n->len + 1
1604 * and that ret is not n
1605 */
1606 BIG_ERR_CODE
1607 big_mont_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, BIGNUM *n, BIG_CHUNK_TYPE n0)
1608 {
1609 int i, j, nlen, needsubtract;
1610 BIG_CHUNK_TYPE *nn, *rr;
1611 BIG_CHUNK_TYPE digit, c;
1612 BIG_ERR_CODE err;
1613
1614 nlen = n->len;
1615 nn = n->value;
1616
1617 rr = ret->value;
1618
1619 if ((err = big_mul(ret, a, b)) != BIG_OK) {
1620 return (err);
1621 }
1622
1623 rr = ret->value;
1624 for (i = ret->len; i < 2 * nlen + 1; i++) {
1625 rr[i] = 0;
1626 }
1627 for (i = 0; i < nlen; i++) {
1628 digit = rr[i];
1629 digit = digit * n0;
1630
1631 c = BIG_MUL_ADD_VEC(rr + i, nn, nlen, digit);
1632 j = i + nlen;
1633 rr[j] += c;
1634 while (rr[j] < c) {
1635 rr[j + 1] += 1;
1636 j++;
1637 c = 1;
1638 }
1639 }
1640
1641 needsubtract = 0;
1642 if ((rr[2 * nlen] != 0))
1643 needsubtract = 1;
1644 else {
1645 for (i = 2 * nlen - 1; i >= nlen; i--) {
1646 if (rr[i] > nn[i - nlen]) {
1647 needsubtract = 1;
1648 break;
1649 } else if (rr[i] < nn[i - nlen]) {
1650 break;
1651 }
1652 }
1653 }
1654 if (needsubtract)
1655 big_sub_vec(rr, rr + nlen, nn, nlen);
1656 else {
1657 for (i = 0; i < nlen; i++) {
1658 rr[i] = rr[i + nlen];
1659 }
1660 }
1661
1662 /* Remove leading zeros, but keep at least 1 digit: */
1663 for (i = nlen - 1; (i > 0) && (rr[i] == 0); i--)
1664 ;
1665 ret->len = i + 1;
1666
1667 return (BIG_OK);
1668 }
1669
1670
1671 BIG_CHUNK_TYPE
1672 big_n0(BIG_CHUNK_TYPE n)
1673 {
1674 int i;
1675 BIG_CHUNK_TYPE result, tmp;
1676
1677 result = 0;
1678 tmp = BIG_CHUNK_ALLBITS;
1679 for (i = 0; i < BIG_CHUNK_SIZE; i++) {
1680 if ((tmp & 1) == 1) {
1681 result = (result >> 1) | BIG_CHUNK_HIGHBIT;
1682 tmp = tmp - n;
1683 } else {
1684 result = (result >> 1);
1685 }
1686 tmp = tmp >> 1;
1687 }
1688
1689 return (result);
1690 }
1691
1692
1693 int
1694 big_numbits(BIGNUM *n)
1695 {
1696 int i, j;
1697 BIG_CHUNK_TYPE t;
1698
1699 for (i = n->len - 1; i > 0; i--) {
1700 if (n->value[i] != 0) {
1701 break;
1702 }
1703 }
1704 t = n->value[i];
1705 for (j = BIG_CHUNK_SIZE; j > 0; j--) {
1706 if ((t & BIG_CHUNK_HIGHBIT) == 0) {
1707 t = t << 1;
1708 } else {
1709 return (BIG_CHUNK_SIZE * i + j);
1710 }
1711 }
1712 return (0);
1713 }
1714
1715
1716 /* caller must make sure that a < n */
1717 BIG_ERR_CODE
1718 big_mont_rr(BIGNUM *result, BIGNUM *n)
1719 {
1720 BIGNUM rr;
1721 BIG_CHUNK_TYPE rrvalue[BIGTMPSIZE];
1722 int len, i;
1723 BIG_ERR_CODE err;
1724
1725 rr.malloced = 0;
1726 len = n->len;
1727
1728 if ((err = big_init1(&rr, 2 * len + 1,
1729 rrvalue, arraysize(rrvalue))) != BIG_OK) {
1730 return (err);
1731 }
1732
1733 for (i = 0; i < 2 * len; i++) {
1734 rr.value[i] = 0;
1735 }
1736 rr.value[2 * len] = 1;
1737 rr.len = 2 * len + 1;
1738 if ((err = big_div_pos(NULL, &rr, &rr, n)) != BIG_OK) {
1739 goto ret;
1740 }
1741 err = big_copy(result, &rr);
1742 ret:
1743 big_finish(&rr);
1744 return (err);
1745 }
1746
1747
1748 /* caller must make sure that a < n */
1749 BIG_ERR_CODE
1750 big_mont_conv(BIGNUM *result, BIGNUM *a, BIGNUM *n, BIG_CHUNK_TYPE n0,
1751 BIGNUM *n_rr)
1752 {
1753 BIGNUM rr;
1754 BIG_CHUNK_TYPE rrvalue[BIGTMPSIZE];
1755 int len, i;
1756 BIG_ERR_CODE err;
1757
1758 rr.malloced = 0;
1759 len = n->len;
1760
1761 if ((err = big_init1(&rr, 2 * len + 1, rrvalue, arraysize(rrvalue)))
1762 != BIG_OK) {
1763 return (err);
1764 }
1765
1766 if (n_rr == NULL) {
1767 for (i = 0; i < 2 * len; i++) {
1768 rr.value[i] = 0;
1769 }
1770 rr.value[2 * len] = 1;
1771 rr.len = 2 * len + 1;
1772 if ((err = big_div_pos(NULL, &rr, &rr, n)) != BIG_OK) {
1773 goto ret;
1774 }
1775 n_rr = &rr;
1776 }
1777
1778 if ((err = big_mont_mul(&rr, n_rr, a, n, n0)) != BIG_OK) {
1779 goto ret;
1780 }
1781 err = big_copy(result, &rr);
1782
1783 ret:
1784 big_finish(&rr);
1785 return (err);
1786 }
1787
1788
1789 #ifdef USE_FLOATING_POINT
1790 #define big_modexp_ncp_float big_modexp_ncp_sw
1791 #else
1792 #define big_modexp_ncp_int big_modexp_ncp_sw
1793 #endif
1794
1795 #define MAX_EXP_BIT_GROUP_SIZE 6
1796 #define APOWERS_MAX_SIZE (1 << (MAX_EXP_BIT_GROUP_SIZE - 1))
1797
1798 /* ARGSUSED */
1799 static BIG_ERR_CODE
1800 big_modexp_ncp_int(BIGNUM *result, BIGNUM *ma, BIGNUM *e, BIGNUM *n,
1801 BIGNUM *tmp, BIG_CHUNK_TYPE n0)
1802
1803 {
1804 BIGNUM apowers[APOWERS_MAX_SIZE];
1805 BIGNUM tmp1;
1806 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE];
1807 int i, j, k, l, m, p;
1808 uint32_t bit, bitind, bitcount, groupbits, apowerssize;
1809 uint32_t nbits;
1810 BIG_ERR_CODE err;
1811
1812 nbits = big_numbits(e);
1813 if (nbits < 50) {
1814 groupbits = 1;
1815 apowerssize = 1;
1816 } else {
1817 groupbits = MAX_EXP_BIT_GROUP_SIZE;
1818 apowerssize = 1 << (groupbits - 1);
1819 }
1820
1821
1822 if ((err = big_init1(&tmp1, 2 * n->len + 1,
1823 tmp1value, arraysize(tmp1value))) != BIG_OK) {
1824 return (err);
1825 }
1826
1827 /* clear the malloced bit to help cleanup */
1828 for (i = 0; i < apowerssize; i++) {
1829 apowers[i].malloced = 0;
1830 }
1831
1832 for (i = 0; i < apowerssize; i++) {
1833 if ((err = big_init1(&(apowers[i]), n->len, NULL, 0)) !=
1834 BIG_OK) {
1835 goto ret;
1836 }
1837 }
1838
1839 (void) big_copy(&(apowers[0]), ma);
1840
1841 if ((err = big_mont_mul(&tmp1, ma, ma, n, n0)) != BIG_OK) {
1842 goto ret;
1843 }
1844 (void) big_copy(ma, &tmp1);
1845
1846 for (i = 1; i < apowerssize; i++) {
1847 if ((err = big_mont_mul(&tmp1, ma,
1848 &(apowers[i - 1]), n, n0)) != BIG_OK) {
1849 goto ret;
1850 }
1851 (void) big_copy(&apowers[i], &tmp1);
1852 }
1853
1854 bitind = nbits % BIG_CHUNK_SIZE;
1855 k = 0;
1856 l = 0;
1857 p = 0;
1858 bitcount = 0;
1859 for (i = nbits / BIG_CHUNK_SIZE; i >= 0; i--) {
1860 for (j = bitind - 1; j >= 0; j--) {
1861 bit = (e->value[i] >> j) & 1;
1862 if ((bitcount == 0) && (bit == 0)) {
1863 if ((err = big_mont_mul(tmp,
1864 tmp, tmp, n, n0)) != BIG_OK) {
1865 goto ret;
1866 }
1867 } else {
1868 bitcount++;
1869 p = p * 2 + bit;
1870 if (bit == 1) {
1871 k = k + l + 1;
1872 l = 0;
1873 } else {
1874 l++;
1875 }
1876 if (bitcount == groupbits) {
1877 for (m = 0; m < k; m++) {
1878 if ((err = big_mont_mul(tmp,
1879 tmp, tmp, n, n0)) !=
1880 BIG_OK) {
1881 goto ret;
1882 }
1883 }
1884 if ((err = big_mont_mul(tmp, tmp,
1885 &(apowers[p >> (l + 1)]),
1886 n, n0)) != BIG_OK) {
1887 goto ret;
1888 }
1889 for (m = 0; m < l; m++) {
1890 if ((err = big_mont_mul(tmp,
1891 tmp, tmp, n, n0)) !=
1892 BIG_OK) {
1893 goto ret;
1894 }
1895 }
1896 k = 0;
1897 l = 0;
1898 p = 0;
1899 bitcount = 0;
1900 }
1901 }
1902 }
1903 bitind = BIG_CHUNK_SIZE;
1904 }
1905
1906 for (m = 0; m < k; m++) {
1907 if ((err = big_mont_mul(tmp, tmp, tmp, n, n0)) != BIG_OK) {
1908 goto ret;
1909 }
1910 }
1911 if (p != 0) {
1912 if ((err = big_mont_mul(tmp, tmp,
1913 &(apowers[p >> (l + 1)]), n, n0)) != BIG_OK) {
1914 goto ret;
1915 }
1916 }
1917 for (m = 0; m < l; m++) {
1918 if ((err = big_mont_mul(result, tmp, tmp, n, n0)) != BIG_OK) {
1919 goto ret;
1920 }
1921 }
1922
1923 ret:
1924 for (i = apowerssize - 1; i >= 0; i--) {
1925 big_finish(&(apowers[i]));
1926 }
1927 big_finish(&tmp1);
1928
1929 return (err);
1930 }
1931
1932
1933 #ifdef USE_FLOATING_POINT
1934
1935 #ifdef _KERNEL
1936
1937 #include <sys/sysmacros.h>
1938 #include <sys/regset.h>
1939 #include <sys/fpu/fpusystm.h>
1940
1941 /* the alignment for block stores to save fp registers */
1942 #define FPR_ALIGN (64)
1943
1944 extern void big_savefp(kfpu_t *);
1945 extern void big_restorefp(kfpu_t *);
1946
1947 #endif /* _KERNEL */
1948
1949 /*
1950 * This version makes use of floating point for performance
1951 */
1952 static BIG_ERR_CODE
1953 big_modexp_ncp_float(BIGNUM *result, BIGNUM *ma, BIGNUM *e, BIGNUM *n,
1954 BIGNUM *tmp, BIG_CHUNK_TYPE n0)
1955 {
1956
1957 int i, j, k, l, m, p;
1958 uint32_t bit, bitind, bitcount, nlen;
1959 double dn0;
1960 double *dn, *dt, *d16r, *d32r;
1961 uint32_t *nint, *prod;
1962 double *apowers[APOWERS_MAX_SIZE];
1963 uint32_t nbits, groupbits, apowerssize;
1964 BIG_ERR_CODE err = BIG_OK;
1965
1966 #ifdef _KERNEL
1967 uint8_t fpua[sizeof (kfpu_t) + FPR_ALIGN];
1968 kfpu_t *fpu;
1969
1970 #ifdef DEBUG
1971 if (!fpu_exists)
1972 return (BIG_GENERAL_ERR);
1973 #endif
1974
1975 fpu = (kfpu_t *)P2ROUNDUP((uintptr_t)fpua, FPR_ALIGN);
1976 big_savefp(fpu);
1977
1978 #endif /* _KERNEL */
1979
1980 nbits = big_numbits(e);
1981 if (nbits < 50) {
1982 groupbits = 1;
1983 apowerssize = 1;
1984 } else {
1985 groupbits = MAX_EXP_BIT_GROUP_SIZE;
1986 apowerssize = 1 << (groupbits - 1);
1987 }
1988
1989 nlen = (BIG_CHUNK_SIZE / 32) * n->len;
1990 dn0 = (double)(n0 & 0xffff);
1991
1992 dn = dt = d16r = d32r = NULL;
1993 nint = prod = NULL;
1994 for (i = 0; i < apowerssize; i++) {
1995 apowers[i] = NULL;
1996 }
1997
1998 if ((dn = big_malloc(nlen * sizeof (double))) == NULL) {
1999 err = BIG_NO_MEM;
2000 goto ret;
2001 }
2002 if ((dt = big_malloc((4 * nlen + 2) * sizeof (double))) == NULL) {
2003 err = BIG_NO_MEM;
2004 goto ret;
2005 }
2006 if ((nint = big_malloc(nlen * sizeof (uint32_t))) == NULL) {
2007 err = BIG_NO_MEM;
2008 goto ret;
2009 }
2010 if ((prod = big_malloc((nlen + 1) * sizeof (uint32_t))) == NULL) {
2011 err = BIG_NO_MEM;
2012 goto ret;
2013 }
2014 if ((d16r = big_malloc((2 * nlen + 1) * sizeof (double))) == NULL) {
2015 err = BIG_NO_MEM;
2016 goto ret;
2017 }
2018 if ((d32r = big_malloc(nlen * sizeof (double))) == NULL) {
2019 err = BIG_NO_MEM;
2020 goto ret;
2021 }
2022 for (i = 0; i < apowerssize; i++) {
2023 if ((apowers[i] = big_malloc((2 * nlen + 1) *
2024 sizeof (double))) == NULL) {
2025 err = BIG_NO_MEM;
2026 goto ret;
2027 }
2028 }
2029
2030 #if (BIG_CHUNK_SIZE == 32)
2031 for (i = 0; i < ma->len; i++) {
2032 nint[i] = ma->value[i];
2033 }
2034 for (; i < nlen; i++) {
2035 nint[i] = 0;
2036 }
2037 #else
2038 for (i = 0; i < ma->len; i++) {
2039 nint[2 * i] = (uint32_t)(ma->value[i] & 0xffffffffULL);
2040 nint[2 * i + 1] = (uint32_t)(ma->value[i] >> 32);
2041 }
2042 for (i = ma->len * 2; i < nlen; i++) {
2043 nint[i] = 0;
2044 }
2045 #endif
2046 conv_i32_to_d32_and_d16(d32r, apowers[0], nint, nlen);
2047
2048 #if (BIG_CHUNK_SIZE == 32)
2049 for (i = 0; i < n->len; i++) {
2050 nint[i] = n->value[i];
2051 }
2052 for (; i < nlen; i++) {
2053 nint[i] = 0;
2054 }
2055 #else
2056 for (i = 0; i < n->len; i++) {
2057 nint[2 * i] = (uint32_t)(n->value[i] & 0xffffffffULL);
2058 nint[2 * i + 1] = (uint32_t)(n->value[i] >> 32);
2059 }
2060 for (i = n->len * 2; i < nlen; i++) {
2061 nint[i] = 0;
2062 }
2063 #endif
2064 conv_i32_to_d32(dn, nint, nlen);
2065
2066 mont_mulf_noconv(prod, d32r, apowers[0], dt, dn, nint, nlen, dn0);
2067 conv_i32_to_d32(d32r, prod, nlen);
2068 for (i = 1; i < apowerssize; i++) {
2069 mont_mulf_noconv(prod, d32r, apowers[i - 1],
2070 dt, dn, nint, nlen, dn0);
2071 conv_i32_to_d16(apowers[i], prod, nlen);
2072 }
2073
2074 #if (BIG_CHUNK_SIZE == 32)
2075 for (i = 0; i < tmp->len; i++) {
2076 prod[i] = tmp->value[i];
2077 }
2078 for (; i < nlen + 1; i++) {
2079 prod[i] = 0;
2080 }
2081 #else
2082 for (i = 0; i < tmp->len; i++) {
2083 prod[2 * i] = (uint32_t)(tmp->value[i] & 0xffffffffULL);
2084 prod[2 * i + 1] = (uint32_t)(tmp->value[i] >> 32);
2085 }
2086 for (i = tmp->len * 2; i < nlen + 1; i++) {
2087 prod[i] = 0;
2088 }
2089 #endif
2090
2091 bitind = nbits % BIG_CHUNK_SIZE;
2092 k = 0;
2093 l = 0;
2094 p = 0;
2095 bitcount = 0;
2096 for (i = nbits / BIG_CHUNK_SIZE; i >= 0; i--) {
2097 for (j = bitind - 1; j >= 0; j--) {
2098 bit = (e->value[i] >> j) & 1;
2099 if ((bitcount == 0) && (bit == 0)) {
2100 conv_i32_to_d32_and_d16(d32r, d16r,
2101 prod, nlen);
2102 mont_mulf_noconv(prod, d32r, d16r,
2103 dt, dn, nint, nlen, dn0);
2104 } else {
2105 bitcount++;
2106 p = p * 2 + bit;
2107 if (bit == 1) {
2108 k = k + l + 1;
2109 l = 0;
2110 } else {
2111 l++;
2112 }
2113 if (bitcount == groupbits) {
2114 for (m = 0; m < k; m++) {
2115 conv_i32_to_d32_and_d16(d32r,
2116 d16r, prod, nlen);
2117 mont_mulf_noconv(prod, d32r,
2118 d16r, dt, dn, nint,
2119 nlen, dn0);
2120 }
2121 conv_i32_to_d32(d32r, prod, nlen);
2122 mont_mulf_noconv(prod, d32r,
2123 apowers[p >> (l + 1)],
2124 dt, dn, nint, nlen, dn0);
2125 for (m = 0; m < l; m++) {
2126 conv_i32_to_d32_and_d16(d32r,
2127 d16r, prod, nlen);
2128 mont_mulf_noconv(prod, d32r,
2129 d16r, dt, dn, nint,
2130 nlen, dn0);
2131 }
2132 k = 0;
2133 l = 0;
2134 p = 0;
2135 bitcount = 0;
2136 }
2137 }
2138 }
2139 bitind = BIG_CHUNK_SIZE;
2140 }
2141
2142 for (m = 0; m < k; m++) {
2143 conv_i32_to_d32_and_d16(d32r, d16r, prod, nlen);
2144 mont_mulf_noconv(prod, d32r, d16r, dt, dn, nint, nlen, dn0);
2145 }
2146 if (p != 0) {
2147 conv_i32_to_d32(d32r, prod, nlen);
2148 mont_mulf_noconv(prod, d32r, apowers[p >> (l + 1)],
2149 dt, dn, nint, nlen, dn0);
2150 }
2151 for (m = 0; m < l; m++) {
2152 conv_i32_to_d32_and_d16(d32r, d16r, prod, nlen);
2153 mont_mulf_noconv(prod, d32r, d16r, dt, dn, nint, nlen, dn0);
2154 }
2155
2156 #if (BIG_CHUNK_SIZE == 32)
2157 for (i = 0; i < nlen; i++) {
2158 result->value[i] = prod[i];
2159 }
2160 for (i = nlen - 1; (i > 0) && (prod[i] == 0); i--)
2161 ;
2162 #else
2163 for (i = 0; i < nlen / 2; i++) {
2164 result->value[i] = (uint64_t)(prod[2 * i]) +
2165 (((uint64_t)(prod[2 * i + 1])) << 32);
2166 }
2167 for (i = nlen / 2 - 1; (i > 0) && (result->value[i] == 0); i--)
2168 ;
2169 #endif
2170 result->len = i + 1;
2171
2172 ret:
2173 for (i = apowerssize - 1; i >= 0; i--) {
2174 if (apowers[i] != NULL)
2175 big_free(apowers[i], (2 * nlen + 1) * sizeof (double));
2176 }
2177 if (d32r != NULL) {
2178 big_free(d32r, nlen * sizeof (double));
2179 }
2180 if (d16r != NULL) {
2181 big_free(d16r, (2 * nlen + 1) * sizeof (double));
2182 }
2183 if (prod != NULL) {
2184 big_free(prod, (nlen + 1) * sizeof (uint32_t));
2185 }
2186 if (nint != NULL) {
2187 big_free(nint, nlen * sizeof (uint32_t));
2188 }
2189 if (dt != NULL) {
2190 big_free(dt, (4 * nlen + 2) * sizeof (double));
2191 }
2192 if (dn != NULL) {
2193 big_free(dn, nlen * sizeof (double));
2194 }
2195
2196 #ifdef _KERNEL
2197 big_restorefp(fpu);
2198 #endif
2199
2200 return (err);
2201 }
2202
2203 #endif /* USE_FLOATING_POINT */
2204
2205
2206 BIG_ERR_CODE
2207 big_modexp_ext(BIGNUM *result, BIGNUM *a, BIGNUM *e, BIGNUM *n, BIGNUM *n_rr,
2208 big_modexp_ncp_info_t *info)
2209 {
2210 BIGNUM ma, tmp, rr;
2211 BIG_CHUNK_TYPE mavalue[BIGTMPSIZE];
2212 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE];
2213 BIG_CHUNK_TYPE rrvalue[BIGTMPSIZE];
2214 BIG_ERR_CODE err;
2215 BIG_CHUNK_TYPE n0;
2216
2217 if ((err = big_init1(&ma, n->len, mavalue, arraysize(mavalue))) !=
2218 BIG_OK) {
2219 return (err);
2220 }
2221 ma.len = 1;
2222 ma.value[0] = 0;
2223
2224 if ((err = big_init1(&tmp, 2 * n->len + 1,
2225 tmpvalue, arraysize(tmpvalue))) != BIG_OK) {
2226 goto ret1;
2227 }
2228
2229 /* clear the malloced bit to help cleanup */
2230 rr.malloced = 0;
2231
2232 if (n_rr == NULL) {
2233 if ((err = big_init1(&rr, 2 * n->len + 1,
2234 rrvalue, arraysize(rrvalue))) != BIG_OK) {
2235 goto ret2;
2236 }
2237 if (big_mont_rr(&rr, n) != BIG_OK) {
2238 goto ret;
2239 }
2240 n_rr = &rr;
2241 }
2242
2243 n0 = big_n0(n->value[0]);
2244
2245 if (big_cmp_abs(a, n) > 0) {
2246 if ((err = big_div_pos(NULL, &ma, a, n)) != BIG_OK) {
2247 goto ret;
2248 }
2249 err = big_mont_conv(&ma, &ma, n, n0, n_rr);
2250 } else {
2251 err = big_mont_conv(&ma, a, n, n0, n_rr);
2252 }
2253 if (err != BIG_OK) {
2254 goto ret;
2255 }
2256
2257 tmp.len = 1;
2258 tmp.value[0] = 1;
2259 if ((err = big_mont_conv(&tmp, &tmp, n, n0, n_rr)) != BIG_OK) {
2260 goto ret;
2261 }
2262
2263 if ((info != NULL) && (info->func != NULL)) {
2264 err = (*(info->func))(&tmp, &ma, e, n, &tmp, n0,
2265 info->ncp, info->reqp);
2266 } else {
2267 err = big_modexp_ncp_sw(&tmp, &ma, e, n, &tmp, n0);
2268 }
2269 if (err != BIG_OK) {
2270 goto ret;
2271 }
2272
2273 ma.value[0] = 1;
2274 ma.len = 1;
2275 if ((err = big_mont_mul(&tmp, &tmp, &ma, n, n0)) != BIG_OK) {
2276 goto ret;
2277 }
2278 err = big_copy(result, &tmp);
2279
2280 ret:
2281 if (rr.malloced) {
2282 big_finish(&rr);
2283 }
2284 ret2:
2285 big_finish(&tmp);
2286 ret1:
2287 big_finish(&ma);
2288
2289 return (err);
2290 }
2291
2292 BIG_ERR_CODE
2293 big_modexp(BIGNUM *result, BIGNUM *a, BIGNUM *e, BIGNUM *n, BIGNUM *n_rr)
2294 {
2295 return (big_modexp_ext(result, a, e, n, n_rr, NULL));
2296 }
2297
2298
2299 BIG_ERR_CODE
2300 big_modexp_crt_ext(BIGNUM *result, BIGNUM *a, BIGNUM *dmodpminus1,
2301 BIGNUM *dmodqminus1, BIGNUM *p, BIGNUM *q, BIGNUM *pinvmodq,
2302 BIGNUM *p_rr, BIGNUM *q_rr, big_modexp_ncp_info_t *info)
2303 {
2304 BIGNUM ap, aq, tmp;
2305 int alen, biglen, sign;
2306 BIG_ERR_CODE err;
2307
2308 if (p->len > q->len) {
2309 biglen = p->len;
2310 } else {
2311 biglen = q->len;
2312 }
2313
2314 if ((err = big_init1(&ap, p->len, NULL, 0)) != BIG_OK) {
2315 return (err);
2316 }
2317 if ((err = big_init1(&aq, q->len, NULL, 0)) != BIG_OK) {
2318 goto ret1;
2319 }
2320 if ((err = big_init1(&tmp, biglen + q->len + 1, NULL, 0)) != BIG_OK) {
2321 goto ret2;
2322 }
2323
2324 /*
2325 * check whether a is too short - to avoid timing attacks
2326 */
2327 alen = a->len;
2328 while ((alen > p->len) && (a->value[alen - 1] == 0)) {
2329 alen--;
2330 }
2331 if (alen < p->len + q->len) {
2332 /*
2333 * a is too short, add p*q to it before
2334 * taking it modulo p and q
2335 * this will also affect timing, but this difference
2336 * does not depend on p or q, only on a
2337 * (in "normal" operation, this path will never be
2338 * taken, so it is not a performance penalty
2339 */
2340 if ((err = big_mul(&tmp, p, q)) != BIG_OK) {
2341 goto ret;
2342 }
2343 if ((err = big_add(&tmp, &tmp, a)) != BIG_OK) {
2344 goto ret;
2345 }
2346 if ((err = big_div_pos(NULL, &ap, &tmp, p)) != BIG_OK) {
2347 goto ret;
2348 }
2349 if ((err = big_div_pos(NULL, &aq, &tmp, q)) != BIG_OK) {
2350 goto ret;
2351 }
2352 } else {
2353 if ((err = big_div_pos(NULL, &ap, a, p)) != BIG_OK) {
2354 goto ret;
2355 }
2356 if ((err = big_div_pos(NULL, &aq, a, q)) != BIG_OK) {
2357 goto ret;
2358 }
2359 }
2360
2361 if ((err = big_modexp_ext(&ap, &ap, dmodpminus1, p, p_rr, info)) !=
2362 BIG_OK) {
2363 goto ret;
2364 }
2365 if ((err = big_modexp_ext(&aq, &aq, dmodqminus1, q, q_rr, info)) !=
2366 BIG_OK) {
2367 goto ret;
2368 }
2369 if ((err = big_sub(&tmp, &aq, &ap)) != BIG_OK) {
2370 goto ret;
2371 }
2372 if ((err = big_mul(&tmp, &tmp, pinvmodq)) != BIG_OK) {
2373 goto ret;
2374 }
2375 sign = tmp.sign;
2376 tmp.sign = 1;
2377 if ((err = big_div_pos(NULL, &aq, &tmp, q)) != BIG_OK) {
2378 goto ret;
2379 }
2380 if ((sign == -1) && (!big_is_zero(&aq))) {
2381 (void) big_sub_pos(&aq, q, &aq);
2382 }
2383 if ((err = big_mul(&tmp, &aq, p)) != BIG_OK) {
2384 goto ret;
2385 }
2386 err = big_add_abs(result, &ap, &tmp);
2387
2388 ret:
2389 big_finish(&tmp);
2390 ret2:
2391 big_finish(&aq);
2392 ret1:
2393 big_finish(&ap);
2394
2395 return (err);
2396 }
2397
2398
2399 BIG_ERR_CODE
2400 big_modexp_crt(BIGNUM *result, BIGNUM *a, BIGNUM *dmodpminus1,
2401 BIGNUM *dmodqminus1, BIGNUM *p, BIGNUM *q, BIGNUM *pinvmodq,
2402 BIGNUM *p_rr, BIGNUM *q_rr)
2403 {
2404 return (big_modexp_crt_ext(result, a, dmodpminus1, dmodqminus1,
2405 p, q, pinvmodq, p_rr, q_rr, NULL));
2406 }
2407
2408
2409 static BIG_CHUNK_TYPE onearr[1] = {(BIG_CHUNK_TYPE)1};
2410 BIGNUM big_One = {1, 1, 1, 0, onearr};
2411
2412 static BIG_CHUNK_TYPE twoarr[1] = {(BIG_CHUNK_TYPE)2};
2413 BIGNUM big_Two = {1, 1, 1, 0, twoarr};
2414
2415 static BIG_CHUNK_TYPE fourarr[1] = {(BIG_CHUNK_TYPE)4};
2416 static BIGNUM big_Four = {1, 1, 1, 0, fourarr};
2417
2418
2419 BIG_ERR_CODE
2420 big_sqrt_pos(BIGNUM *result, BIGNUM *n)
2421 {
2422 BIGNUM *high, *low, *mid, *t;
2423 BIGNUM t1, t2, t3, prod;
2424 BIG_CHUNK_TYPE t1value[BIGTMPSIZE];
2425 BIG_CHUNK_TYPE t2value[BIGTMPSIZE];
2426 BIG_CHUNK_TYPE t3value[BIGTMPSIZE];
2427 BIG_CHUNK_TYPE prodvalue[BIGTMPSIZE];
2428 int i, diff;
2429 uint32_t nbits, nrootbits, highbits;
2430 BIG_ERR_CODE err;
2431
2432 nbits = big_numbits(n);
2433
2434 if ((err = big_init1(&t1, n->len + 1,
2435 t1value, arraysize(t1value))) != BIG_OK)
2436 return (err);
2437 if ((err = big_init1(&t2, n->len + 1,
2438 t2value, arraysize(t2value))) != BIG_OK)
2439 goto ret1;
2440 if ((err = big_init1(&t3, n->len + 1,
2441 t3value, arraysize(t3value))) != BIG_OK)
2442 goto ret2;
2443 if ((err = big_init1(&prod, n->len + 1,
2444 prodvalue, arraysize(prodvalue))) != BIG_OK)
2445 goto ret3;
2446
2447 nrootbits = (nbits + 1) / 2;
2448 t1.len = t2.len = t3.len = (nrootbits - 1) / BIG_CHUNK_SIZE + 1;
2449 for (i = 0; i < t1.len; i++) {
2450 t1.value[i] = 0;
2451 t2.value[i] = BIG_CHUNK_ALLBITS;
2452 }
2453 highbits = nrootbits - BIG_CHUNK_SIZE * (t1.len - 1);
2454 if (highbits == BIG_CHUNK_SIZE) {
2455 t1.value[t1.len - 1] = BIG_CHUNK_HIGHBIT;
2456 t2.value[t2.len - 1] = BIG_CHUNK_ALLBITS;
2457 } else {
2458 t1.value[t1.len - 1] = (BIG_CHUNK_TYPE)1 << (highbits - 1);
2459 t2.value[t2.len - 1] = 2 * t1.value[t1.len - 1] - 1;
2460 }
2461
2462 high = &t2;
2463 low = &t1;
2464 mid = &t3;
2465
2466 if ((err = big_mul(&prod, high, high)) != BIG_OK) {
2467 goto ret;
2468 }
2469 diff = big_cmp_abs(&prod, n);
2470 if (diff <= 0) {
2471 err = big_copy(result, high);
2472 goto ret;
2473 }
2474
2475 (void) big_sub_pos(mid, high, low);
2476 while (big_cmp_abs(&big_One, mid) != 0) {
2477 (void) big_add_abs(mid, high, low);
2478 (void) big_half_pos(mid, mid);
2479 if ((err = big_mul(&prod, mid, mid)) != BIG_OK)
2480 goto ret;
2481 diff = big_cmp_abs(&prod, n);
2482 if (diff > 0) {
2483 t = high;
2484 high = mid;
2485 mid = t;
2486 } else if (diff < 0) {
2487 t = low;
2488 low = mid;
2489 mid = t;
2490 } else {
2491 err = big_copy(result, low);
2492 goto ret;
2493 }
2494 (void) big_sub_pos(mid, high, low);
2495 }
2496
2497 err = big_copy(result, low);
2498 ret:
2499 if (prod.malloced) big_finish(&prod);
2500 ret3:
2501 if (t3.malloced) big_finish(&t3);
2502 ret2:
2503 if (t2.malloced) big_finish(&t2);
2504 ret1:
2505 if (t1.malloced) big_finish(&t1);
2506
2507 return (err);
2508 }
2509
2510
2511 BIG_ERR_CODE
2512 big_Jacobi_pos(int *jac, BIGNUM *nn, BIGNUM *mm)
2513 {
2514 BIGNUM *t, *tmp2, *m, *n;
2515 BIGNUM t1, t2, t3;
2516 BIG_CHUNK_TYPE t1value[BIGTMPSIZE];
2517 BIG_CHUNK_TYPE t2value[BIGTMPSIZE];
2518 BIG_CHUNK_TYPE t3value[BIGTMPSIZE];
2519 int len, err;
2520
2521 if (big_is_zero(nn) ||
2522 (((nn->value[0] & 1) | (mm->value[0] & 1)) == 0)) {
2523 *jac = 0;
2524 return (BIG_OK);
2525 }
2526
2527 if (nn->len > mm->len) {
2528 len = nn->len;
2529 } else {
2530 len = mm->len;
2531 }
2532
2533 if ((err = big_init1(&t1, len,
2534 t1value, arraysize(t1value))) != BIG_OK) {
2535 return (err);
2536 }
2537 if ((err = big_init1(&t2, len,
2538 t2value, arraysize(t2value))) != BIG_OK) {
2539 goto ret1;
2540 }
2541 if ((err = big_init1(&t3, len,
2542 t3value, arraysize(t3value))) != BIG_OK) {
2543 goto ret2;
2544 }
2545
2546 n = &t1;
2547 m = &t2;
2548 tmp2 = &t3;
2549
2550 (void) big_copy(n, nn);
2551 (void) big_copy(m, mm);
2552
2553 *jac = 1;
2554 while (big_cmp_abs(&big_One, m) != 0) {
2555 if (big_is_zero(n)) {
2556 *jac = 0;
2557 goto ret;
2558 }
2559 if ((m->value[0] & 1) == 0) {
2560 if (((n->value[0] & 7) == 3) ||
2561 ((n->value[0] & 7) == 5))
2562 *jac = -*jac;
2563 (void) big_half_pos(m, m);
2564 } else if ((n->value[0] & 1) == 0) {
2565 if (((m->value[0] & 7) == 3) ||
2566 ((m->value[0] & 7) == 5))
2567 *jac = -*jac;
2568 (void) big_half_pos(n, n);
2569 } else {
2570 if (((m->value[0] & 3) == 3) &&
2571 ((n->value[0] & 3) == 3)) {
2572 *jac = -*jac;
2573 }
2574 if ((err = big_div_pos(NULL, tmp2, m, n)) != BIG_OK) {
2575 goto ret;
2576 }
2577 t = tmp2;
2578 tmp2 = m;
2579 m = n;
2580 n = t;
2581 }
2582 }
2583 err = BIG_OK;
2584
2585 ret:
2586 if (t3.malloced) big_finish(&t3);
2587 ret2:
2588 if (t2.malloced) big_finish(&t2);
2589 ret1:
2590 if (t1.malloced) big_finish(&t1);
2591
2592 return (err);
2593 }
2594
2595
2596 BIG_ERR_CODE
2597 big_Lucas(BIGNUM *Lkminus1, BIGNUM *Lk, BIGNUM *p, BIGNUM *k, BIGNUM *n)
2598 {
2599 int i;
2600 uint32_t m, w;
2601 BIG_CHUNK_TYPE bit;
2602 BIGNUM ki, tmp, tmp2;
2603 BIG_CHUNK_TYPE kivalue[BIGTMPSIZE];
2604 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE];
2605 BIG_CHUNK_TYPE tmp2value[BIGTMPSIZE];
2606 BIG_ERR_CODE err;
2607
2608 if (big_cmp_abs(k, &big_One) == 0) {
2609 (void) big_copy(Lk, p);
2610 (void) big_copy(Lkminus1, &big_Two);
2611 return (BIG_OK);
2612 }
2613
2614 if ((err = big_init1(&ki, k->len + 1,
2615 kivalue, arraysize(kivalue))) != BIG_OK)
2616 return (err);
2617
2618 if ((err = big_init1(&tmp, 2 * n->len + 1,
2619 tmpvalue, arraysize(tmpvalue))) != BIG_OK)
2620 goto ret1;
2621
2622 if ((err = big_init1(&tmp2, n->len,
2623 tmp2value, arraysize(tmp2value))) != BIG_OK)
2624 goto ret2;
2625
2626 m = big_numbits(k);
2627 ki.len = (m - 1) / BIG_CHUNK_SIZE + 1;
2628 w = (m - 1) / BIG_CHUNK_SIZE;
2629 bit = (BIG_CHUNK_TYPE)1 << ((m - 1) % BIG_CHUNK_SIZE);
2630 for (i = 0; i < ki.len; i++) {
2631 ki.value[i] = 0;
2632 }
2633 ki.value[ki.len - 1] = bit;
2634 if (big_cmp_abs(k, &ki) != 0) {
2635 (void) big_double(&ki, &ki);
2636 }
2637 (void) big_sub_pos(&ki, &ki, k);
2638
2639 (void) big_copy(Lk, p);
2640 (void) big_copy(Lkminus1, &big_Two);
2641
2642 for (i = 0; i < m; i++) {
2643 if ((err = big_mul(&tmp, Lk, Lkminus1)) != BIG_OK) {
2644 goto ret;
2645 }
2646 (void) big_add_abs(&tmp, &tmp, n);
2647 (void) big_sub_pos(&tmp, &tmp, p);
2648 if ((err = big_div_pos(NULL, &tmp2, &tmp, n)) != BIG_OK) {
2649 goto ret;
2650 }
2651 if ((ki.value[w] & bit) != 0) {
2652 if ((err = big_mul(&tmp, Lkminus1, Lkminus1)) !=
2653 BIG_OK) {
2654 goto ret;
2655 }
2656 (void) big_add_abs(&tmp, &tmp, n);
2657 (void) big_sub_pos(&tmp, &tmp, &big_Two);
2658 if ((err = big_div_pos(NULL, Lkminus1, &tmp, n)) !=
2659 BIG_OK) {
2660 goto ret;
2661 }
2662 (void) big_copy(Lk, &tmp2);
2663 } else {
2664 if ((err = big_mul(&tmp, Lk, Lk)) != BIG_OK) {
2665 goto ret;
2666 }
2667 (void) big_add_abs(&tmp, &tmp, n);
2668 (void) big_sub_pos(&tmp, &tmp, &big_Two);
2669 if ((err = big_div_pos(NULL, Lk, &tmp, n)) != BIG_OK) {
2670 goto ret;
2671 }
2672 (void) big_copy(Lkminus1, &tmp2);
2673 }
2674 bit = bit >> 1;
2675 if (bit == 0) {
2676 bit = BIG_CHUNK_HIGHBIT;
2677 w--;
2678 }
2679 }
2680
2681 err = BIG_OK;
2682
2683 ret:
2684 if (tmp2.malloced) big_finish(&tmp2);
2685 ret2:
2686 if (tmp.malloced) big_finish(&tmp);
2687 ret1:
2688 if (ki.malloced) big_finish(&ki);
2689
2690 return (err);
2691 }
2692
2693
2694 BIG_ERR_CODE
2695 big_isprime_pos_ext(BIGNUM *n, big_modexp_ncp_info_t *info)
2696 {
2697 BIGNUM o, nminus1, tmp, Lkminus1, Lk;
2698 BIG_CHUNK_TYPE ovalue[BIGTMPSIZE];
2699 BIG_CHUNK_TYPE nminus1value[BIGTMPSIZE];
2700 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE];
2701 BIG_CHUNK_TYPE Lkminus1value[BIGTMPSIZE];
2702 BIG_CHUNK_TYPE Lkvalue[BIGTMPSIZE];
2703 BIG_ERR_CODE err;
2704 int e, i, jac;
2705
2706 if (big_cmp_abs(n, &big_One) == 0) {
2707 return (BIG_FALSE);
2708 }
2709 if (big_cmp_abs(n, &big_Two) == 0) {
2710 return (BIG_TRUE);
2711 }
2712 if ((n->value[0] & 1) == 0) {
2713 return (BIG_FALSE);
2714 }
2715
2716 if ((err = big_init1(&o, n->len, ovalue, arraysize(ovalue))) !=
2717 BIG_OK) {
2718 return (err);
2719 }
2720
2721 if ((err = big_init1(&nminus1, n->len,
2722 nminus1value, arraysize(nminus1value))) != BIG_OK) {
2723 goto ret1;
2724 }
2725
2726 if ((err = big_init1(&tmp, 2 * n->len,
2727 tmpvalue, arraysize(tmpvalue))) != BIG_OK) {
2728 goto ret2;
2729 }
2730
2731 if ((err = big_init1(&Lkminus1, n->len,
2732 Lkminus1value, arraysize(Lkminus1value))) != BIG_OK) {
2733 goto ret3;
2734 }
2735
2736 if ((err = big_init1(&Lk, n->len,
2737 Lkvalue, arraysize(Lkvalue))) != BIG_OK) {
2738 goto ret4;
2739 }
2740
2741 (void) big_sub_pos(&o, n, &big_One); /* cannot fail */
2742 (void) big_copy(&nminus1, &o); /* cannot fail */
2743 e = 0;
2744 while ((o.value[0] & 1) == 0) {
2745 e++;
2746 (void) big_half_pos(&o, &o); /* cannot fail */
2747 }
2748 if ((err = big_modexp_ext(&tmp, &big_Two, &o, n, NULL, info)) !=
2749 BIG_OK) {
2750 goto ret;
2751 }
2752
2753 i = 0;
2754 while ((i < e) &&
2755 (big_cmp_abs(&tmp, &big_One) != 0) &&
2756 (big_cmp_abs(&tmp, &nminus1) != 0)) {
2757 if ((err =
2758 big_modexp_ext(&tmp, &tmp, &big_Two, n, NULL, info)) !=
2759 BIG_OK)
2760 goto ret;
2761 i++;
2762 }
2763
2764 if (!((big_cmp_abs(&tmp, &nminus1) == 0) ||
2765 ((i == 0) && (big_cmp_abs(&tmp, &big_One) == 0)))) {
2766 err = BIG_FALSE;
2767 goto ret;
2768 }
2769
2770 if ((err = big_sqrt_pos(&tmp, n)) != BIG_OK) {
2771 goto ret;
2772 }
2773
2774 if ((err = big_mul(&tmp, &tmp, &tmp)) != BIG_OK) {
2775 goto ret;
2776 }
2777 if (big_cmp_abs(&tmp, n) == 0) {
2778 err = BIG_FALSE;
2779 goto ret;
2780 }
2781
2782 (void) big_copy(&o, &big_Two);
2783 do {
2784 (void) big_add_abs(&o, &o, &big_One);
2785 if ((err = big_mul(&tmp, &o, &o)) != BIG_OK) {
2786 goto ret;
2787 }
2788 (void) big_sub_pos(&tmp, &tmp, &big_Four);
2789 if ((err = big_Jacobi_pos(&jac, &tmp, n)) != BIG_OK) {
2790 goto ret;
2791 }
2792 } while (jac != -1);
2793
2794 (void) big_add_abs(&tmp, n, &big_One);
2795 if ((err = big_Lucas(&Lkminus1, &Lk, &o, &tmp, n)) != BIG_OK) {
2796 goto ret;
2797 }
2798
2799 if ((big_cmp_abs(&Lkminus1, &o) == 0) &&
2800 (big_cmp_abs(&Lk, &big_Two) == 0)) {
2801 err = BIG_TRUE;
2802 } else {
2803 err = BIG_FALSE;
2804 }
2805
2806 ret:
2807 if (Lk.malloced) big_finish(&Lk);
2808 ret4:
2809 if (Lkminus1.malloced) big_finish(&Lkminus1);
2810 ret3:
2811 if (tmp.malloced) big_finish(&tmp);
2812 ret2:
2813 if (nminus1.malloced) big_finish(&nminus1);
2814 ret1:
2815 if (o.malloced) big_finish(&o);
2816
2817 return (err);
2818 }
2819
2820
2821 BIG_ERR_CODE
2822 big_isprime_pos(BIGNUM *n)
2823 {
2824 return (big_isprime_pos_ext(n, NULL));
2825 }
2826
2827
2828 #define SIEVESIZE 1000
2829
2830
2831 BIG_ERR_CODE
2832 big_nextprime_pos_ext(BIGNUM *result, BIGNUM *n, big_modexp_ncp_info_t *info)
2833 {
2834 static const uint32_t smallprimes[] = {
2835 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47,
2836 51, 53, 59, 61, 67, 71, 73, 79, 83, 89, 91, 97 };
2837 BIG_ERR_CODE err;
2838 int sieve[SIEVESIZE];
2839 int i;
2840 uint32_t off, p;
2841
2842 if ((err = big_copy(result, n)) != BIG_OK) {
2843 return (err);
2844 }
2845 result->value[0] |= 1;
2846 /* CONSTCOND */
2847 while (1) {
2848 for (i = 0; i < SIEVESIZE; i++) sieve[i] = 0;
2849 for (i = 0;
2850 i < sizeof (smallprimes) / sizeof (smallprimes[0]); i++) {
2851 p = smallprimes[i];
2852 off = big_modhalf_pos(result, p);
2853 off = p - off;
2854 if ((off % 2) == 1) {
2855 off = off + p;
2856 }
2857 off = off / 2;
2858 while (off < SIEVESIZE) {
2859 sieve[off] = 1;
2860 off = off + p;
2861 }
2862 }
2863
2864 for (i = 0; i < SIEVESIZE; i++) {
2865 if (sieve[i] == 0) {
2866 err = big_isprime_pos_ext(result, info);
2867 if (err != BIG_FALSE) {
2868 if (err != BIG_TRUE) {
2869 return (err);
2870 } else {
2871 goto out;
2872 }
2873 }
2874
2875 }
2876 if ((err = big_add_abs(result, result, &big_Two)) !=
2877 BIG_OK) {
2878 return (err);
2879 }
2880 }
2881 }
2882
2883 out:
2884 return (BIG_OK);
2885 }
2886
2887
2888 BIG_ERR_CODE
2889 big_nextprime_pos(BIGNUM *result, BIGNUM *n)
2890 {
2891 return (big_nextprime_pos_ext(result, n, NULL));
2892 }
2893
2894
2895 BIG_ERR_CODE
2896 big_nextprime_pos_slow(BIGNUM *result, BIGNUM *n)
2897 {
2898 BIG_ERR_CODE err;
2899
2900
2901 if ((err = big_copy(result, n)) != BIG_OK)
2902 return (err);
2903 result->value[0] |= 1;
2904 while ((err = big_isprime_pos_ext(result, NULL)) != BIG_TRUE) {
2905 if (err != BIG_FALSE)
2906 return (err);
2907 if ((err = big_add_abs(result, result, &big_Two)) != BIG_OK)
2908 return (err);
2909 }
2910 return (BIG_OK);
2911 }
2912
2913
2914 /*
2915 * given m and e, computes the rest in the equation
2916 * gcd(m, e) = cm * m + ce * e
2917 */
2918 BIG_ERR_CODE
2919 big_ext_gcd_pos(BIGNUM *gcd, BIGNUM *cm, BIGNUM *ce, BIGNUM *m, BIGNUM *e)
2920 {
2921 BIGNUM *xi, *ri, *riminus1, *riminus2, *t;
2922 BIGNUM *vmi, *vei, *vmiminus1, *veiminus1;
2923 BIGNUM t1, t2, t3, t4, t5, t6, t7, t8, tmp;
2924 BIG_CHUNK_TYPE t1value[BIGTMPSIZE];
2925 BIG_CHUNK_TYPE t2value[BIGTMPSIZE];
2926 BIG_CHUNK_TYPE t3value[BIGTMPSIZE];
2927 BIG_CHUNK_TYPE t4value[BIGTMPSIZE];
2928 BIG_CHUNK_TYPE t5value[BIGTMPSIZE];
2929 BIG_CHUNK_TYPE t6value[BIGTMPSIZE];
2930 BIG_CHUNK_TYPE t7value[BIGTMPSIZE];
2931 BIG_CHUNK_TYPE t8value[BIGTMPSIZE];
2932 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE];
2933 BIG_ERR_CODE err;
2934 int len;
2935
2936 if (big_cmp_abs(m, e) >= 0) {
2937 len = m->len;
2938 } else {
2939 len = e->len;
2940 }
2941
2942 if ((err = big_init1(&t1, len,
2943 t1value, arraysize(t1value))) != BIG_OK) {
2944 return (err);
2945 }
2946 if ((err = big_init1(&t2, len,
2947 t2value, arraysize(t2value))) != BIG_OK) {
2948 goto ret1;
2949 }
2950 if ((err = big_init1(&t3, len,
2951 t3value, arraysize(t3value))) != BIG_OK) {
2952 goto ret2;
2953 }
2954 if ((err = big_init1(&t4, len,
2955 t4value, arraysize(t3value))) != BIG_OK) {
2956 goto ret3;
2957 }
2958 if ((err = big_init1(&t5, len,
2959 t5value, arraysize(t5value))) != BIG_OK) {
2960 goto ret4;
2961 }
2962 if ((err = big_init1(&t6, len,
2963 t6value, arraysize(t6value))) != BIG_OK) {
2964 goto ret5;
2965 }
2966 if ((err = big_init1(&t7, len,
2967 t7value, arraysize(t7value))) != BIG_OK) {
2968 goto ret6;
2969 }
2970 if ((err = big_init1(&t8, len,
2971 t8value, arraysize(t8value))) != BIG_OK) {
2972 goto ret7;
2973 }
2974
2975 if ((err = big_init1(&tmp, 2 * len,
2976 tmpvalue, arraysize(tmpvalue))) != BIG_OK) {
2977 goto ret8;
2978 }
2979
2980 ri = &t1;
2981 ri->value[0] = 1;
2982 ri->len = 1;
2983 xi = &t2;
2984 riminus1 = &t3;
2985 riminus2 = &t4;
2986 vmi = &t5;
2987 vei = &t6;
2988 vmiminus1 = &t7;
2989 veiminus1 = &t8;
2990
2991 (void) big_copy(vmiminus1, &big_One);
2992 (void) big_copy(vmi, &big_One);
2993 (void) big_copy(veiminus1, &big_One);
2994 (void) big_copy(xi, &big_One);
2995 vei->len = 1;
2996 vei->value[0] = 0;
2997
2998 (void) big_copy(riminus1, m);
2999 (void) big_copy(ri, e);
3000
3001 while (!big_is_zero(ri)) {
3002 t = riminus2;
3003 riminus2 = riminus1;
3004 riminus1 = ri;
3005 ri = t;
3006 if ((err = big_mul(&tmp, vmi, xi)) != BIG_OK) {
3007 goto ret;
3008 }
3009 if ((err = big_sub(vmiminus1, vmiminus1, &tmp)) != BIG_OK) {
3010 goto ret;
3011 }
3012 t = vmiminus1;
3013 vmiminus1 = vmi;
3014 vmi = t;
3015 if ((err = big_mul(&tmp, vei, xi)) != BIG_OK) {
3016 goto ret;
3017 }
3018 if ((err = big_sub(veiminus1, veiminus1, &tmp)) != BIG_OK) {
3019 goto ret;
3020 }
3021 t = veiminus1;
3022 veiminus1 = vei;
3023 vei = t;
3024 if ((err = big_div_pos(xi, ri, riminus2, riminus1)) !=
3025 BIG_OK) {
3026 goto ret;
3027 }
3028 }
3029 if ((gcd != NULL) && ((err = big_copy(gcd, riminus1)) != BIG_OK)) {
3030 goto ret;
3031 }
3032 if ((cm != NULL) && ((err = big_copy(cm, vmi)) != BIG_OK)) {
3033 goto ret;
3034 }
3035 if (ce != NULL) {
3036 err = big_copy(ce, vei);
3037 }
3038 ret:
3039 if (tmp.malloced) big_finish(&tmp);
3040 ret8:
3041 if (t8.malloced) big_finish(&t8);
3042 ret7:
3043 if (t7.malloced) big_finish(&t7);
3044 ret6:
3045 if (t6.malloced) big_finish(&t6);
3046 ret5:
3047 if (t5.malloced) big_finish(&t5);
3048 ret4:
3049 if (t4.malloced) big_finish(&t4);
3050 ret3:
3051 if (t3.malloced) big_finish(&t3);
3052 ret2:
3053 if (t2.malloced) big_finish(&t2);
3054 ret1:
3055 if (t1.malloced) big_finish(&t1);
3056
3057 return (err);
3058 }