1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #pragma ident "%Z%%M% %I% %E% SMI"
27
28 #define big_div_pos_fast big_div_pos
29
30 #include "bignum.h"
31
32 /*
33 * Configuration guide
34 * -------------------
35 *
36 * There are 4 preprocessor symbols used to configure the bignum
37 * implementation. This file contains no logic to configure based on
38 * processor; we leave that to the Makefiles to specify.
39 *
40 * USE_FLOATING_POINT
41 * Meaning: There is support for a fast floating-point implementation of
42 * Montgomery multiply.
43 *
44 * PSR_MUL
45 * Meaning: There are processor-specific versions of the low level
46 * functions to implement big_mul. Those functions are: big_mul_set_vec,
47 * big_mul_add_vec, big_mul_vec, and big_sqr_vec. PSR_MUL implies support
48 * for all 4 functions. You cannot pick and choose which subset of these
49 * functions to support; that would lead to a rat's nest of #ifdefs.
50 *
51 * HWCAP
52 * Meaning: Call multiply support functions through a function pointer.
53 * On x86, there are multiple implementations for differnt hardware
54 * capabilities, such as MMX, SSE2, etc. Tests are made at run-time, when
55 * a function is first used. So, the support functions are called through
56 * a function pointer. There is no need for that on Sparc, because there
57 * is only one implementation; support functions are called directly.
58 * Later, if there were some new VIS instruction, or something, and a
59 * run-time test were needed, rather than variant kernel modules and
60 * libraries, then HWCAP would be defined for Sparc, as well.
61 *
62 * UMUL64
63 * Meaning: It is safe to use generic C code that assumes the existence
64 * of a 32 x 32 --> 64 bit unsigned multiply. If this is not defined,
65 * then the generic code for big_mul_add_vec() must necessarily be very slow,
66 * because it must fall back to using 16 x 16 --> 32 bit multiplication.
67 *
68 */
69
70
71 #ifdef _KERNEL
72 #include <sys/ddi.h>
73 #include <sys/mdesc.h>
74 #include <sys/crypto/common.h>
75
76 #include <sys/types.h>
77 #include <sys/kmem.h>
78 #include <sys/param.h>
79 #include <sys/sunddi.h>
80
81 #define big_malloc(size) kmem_alloc(size, KM_NOSLEEP)
82 #define big_free(ptr, size) kmem_free(ptr, size)
83
84 void *
85 big_realloc(void *from, size_t oldsize, size_t newsize)
86 {
87 void *rv;
88
89 rv = kmem_alloc(newsize, KM_NOSLEEP);
90 if (rv != NULL)
91 bcopy(from, rv, oldsize);
92 kmem_free(from, oldsize);
93 return (rv);
94 }
95
96 #else /* _KERNEL */
97
98 #include <stdlib.h>
99 #include <stdio.h>
100 #include <assert.h>
101 #define ASSERT assert
102
103 #ifndef MALLOC_DEBUG
104
105 #define big_malloc(size) malloc(size)
106 #define big_free(ptr, size) free(ptr)
107
108 #else
109
110 void
111 big_free(void *ptr, size_t size)
112 {
113 printf("freed %d bytes at %p\n", size, ptr);
114 free(ptr);
115 }
116
117 void *
118 big_malloc(size_t size)
119 {
120 void *rv;
121 rv = malloc(size);
122 printf("malloced %d bytes, addr:%p\n", size, rv);
123 return (rv);
124 }
125 #endif /* MALLOC_DEBUG */
126
127 #define big_realloc(x, y, z) realloc((x), (z))
128
129 void
130 printbignum(char *aname, BIGNUM *a)
131 {
132 int i;
133
134 (void) printf("\n%s\n%d\n", aname, a->sign*a->len);
135 for (i = a->len - 1; i >= 0; i--) {
136 #ifdef BIGNUM_CHUNK_32
137 (void) printf("%08x ", a->value[i]);
138 if ((i % 8 == 0) && (i != 0)) {
139 (void) printf("\n");
140 }
141 #else
142 (void) printf("%08x %08x ", (uint32_t)((a->value[i]) >> 32),
143 (uint32_t)((a->value[i]) & 0xffffffff));
144 if ((i % 4 == 0) && (i != 0)) {
145 (void) printf("\n");
146 }
147 #endif
148 }
149 (void) printf("\n");
150 }
151
152 #endif /* _KERNEL */
153
154
155 /* size in BIG_CHUNK_SIZE-bit words */
156 BIG_ERR_CODE
157 big_init(BIGNUM *number, int size)
158 {
159 number->value = big_malloc(sizeof (BIG_CHUNK_TYPE) * size);
160 if (number->value == NULL) {
161 return (BIG_NO_MEM);
162 }
163 number->size = size;
164 number->len = 0;
165 number->sign = 1;
166 number->malloced = 1;
167 return (BIG_OK);
168 }
169
170 /* size in BIG_CHUNK_SIZE-bit words */
171 BIG_ERR_CODE
172 big_init1(BIGNUM *number, int size, BIG_CHUNK_TYPE *buf, int bufsize)
173 {
174 if ((buf == NULL) || (size > bufsize)) {
175 number->value = big_malloc(sizeof (BIG_CHUNK_TYPE) * size);
176 if (number->value == NULL) {
177 return (BIG_NO_MEM);
178 }
179 number->size = size;
180 number->malloced = 1;
181 } else {
182 number->value = buf;
183 number->size = bufsize;
184 number->malloced = 0;
185 }
186 number->len = 0;
187 number->sign = 1;
188
189 return (BIG_OK);
190 }
191
192 void
193 big_finish(BIGNUM *number)
194 {
195 if (number->malloced == 1) {
196 big_free(number->value,
197 sizeof (BIG_CHUNK_TYPE) * number->size);
198 number->malloced = 0;
199 }
200 }
201
202
203 /*
204 * bn->size should be at least
205 * (len + sizeof (BIG_CHUNK_TYPE) - 1) / sizeof (BIG_CHUNK_TYPE) bytes
206 * converts from byte-big-endian format to bignum format (words in
207 * little endian order, but bytes within the words big endian)
208 */
209 void
210 bytestring2bignum(BIGNUM *bn, uchar_t *kn, size_t len)
211 {
212 int i, j, offs;
213 BIG_CHUNK_TYPE word;
214 uchar_t *knwordp;
215
216 #ifdef _LP64
217 offs = (uint32_t)len % sizeof (BIG_CHUNK_TYPE);
218 bn->len = (uint32_t)len / sizeof (BIG_CHUNK_TYPE);
219
220 for (i = 0; i < (uint32_t)len / sizeof (BIG_CHUNK_TYPE); i++) {
221 #else /* !_LP64 */
222 offs = len % sizeof (BIG_CHUNK_TYPE);
223 bn->len = len / sizeof (BIG_CHUNK_TYPE);
224 for (i = 0; i < len / sizeof (BIG_CHUNK_TYPE); i++) {
225 #endif /* _LP64 */
226 knwordp = &(kn[len - sizeof (BIG_CHUNK_TYPE) * (i + 1)]);
227 word = knwordp[0];
228 for (j = 1; j < sizeof (BIG_CHUNK_TYPE); j++) {
229 word = (word << 8)+ knwordp[j];
230 }
231 bn->value[i] = word;
232 }
233 if (offs > 0) {
234 word = kn[0];
235 for (i = 1; i < offs; i++) word = (word << 8) + kn[i];
236 bn->value[bn->len++] = word;
237 }
238 while ((bn->len > 1) && (bn->value[bn->len-1] == 0)) {
239 bn->len --;
240 }
241 }
242
243 /*
244 * copies the least significant len bytes if
245 * len < bn->len * sizeof (BIG_CHUNK_TYPE)
246 * converts from bignum format to byte-big-endian format.
247 * bignum format is words of type BIG_CHUNK_TYPE in little endian order.
248 */
249 void
250 bignum2bytestring(uchar_t *kn, BIGNUM *bn, size_t len)
251 {
252 int i, j, offs;
253 BIG_CHUNK_TYPE word;
254
255 if (len < sizeof (BIG_CHUNK_TYPE) * bn->len) {
256 #ifdef _LP64
257 for (i = 0; i < (uint32_t)len / sizeof (BIG_CHUNK_TYPE); i++) {
258 #else /* !_LP64 */
259 for (i = 0; i < len / sizeof (BIG_CHUNK_TYPE); i++) {
260 #endif /* _LP64 */
261 word = bn->value[i];
262 for (j = 0; j < sizeof (BIG_CHUNK_TYPE); j++) {
263 kn[len - sizeof (BIG_CHUNK_TYPE) * i - j - 1] =
264 word & 0xff;
265 word = word >> 8;
266 }
267 }
268 #ifdef _LP64
269 offs = (uint32_t)len % sizeof (BIG_CHUNK_TYPE);
270 #else /* !_LP64 */
271 offs = len % sizeof (BIG_CHUNK_TYPE);
272 #endif /* _LP64 */
273 if (offs > 0) {
274 word = bn->value[len / sizeof (BIG_CHUNK_TYPE)];
275 #ifdef _LP64
276 for (i = (uint32_t)len % sizeof (BIG_CHUNK_TYPE);
277 i > 0; i --) {
278 #else /* !_LP64 */
279 for (i = len % sizeof (BIG_CHUNK_TYPE);
280 i > 0; i --) {
281 #endif /* _LP64 */
282 kn[i - 1] = word & 0xff;
283 word = word >> 8;
284 }
285 }
286 } else {
287 for (i = 0; i < bn->len; i++) {
288 word = bn->value[i];
289 for (j = 0; j < sizeof (BIG_CHUNK_TYPE); j++) {
290 kn[len - sizeof (BIG_CHUNK_TYPE) * i - j - 1] =
291 word & 0xff;
292 word = word >> 8;
293 }
294 }
295 #ifdef _LP64
296 for (i = 0;
297 i < (uint32_t)len - sizeof (BIG_CHUNK_TYPE) * bn->len;
298 i++) {
299 #else /* !_LP64 */
300 for (i = 0; i < len - sizeof (BIG_CHUNK_TYPE) * bn->len; i++) {
301 #endif /* _LP64 */
302 kn[i] = 0;
303 }
304 }
305 }
306
307
308 int
309 big_bitlength(BIGNUM *a)
310 {
311 int l = 0, b = 0;
312 BIG_CHUNK_TYPE c;
313
314 l = a->len - 1;
315 while ((l > 0) && (a->value[l] == 0)) {
316 l--;
317 }
318 b = sizeof (BIG_CHUNK_TYPE) * BITSINBYTE;
319 c = a->value[l];
320 while ((b > 1) && ((c & BIG_CHUNK_HIGHBIT) == 0)) {
321 c = c << 1;
322 b--;
323 }
324
325 return (l * sizeof (BIG_CHUNK_TYPE) * BITSINBYTE + b);
326 }
327
328
329 BIG_ERR_CODE
330 big_copy(BIGNUM *dest, BIGNUM *src)
331 {
332 BIG_CHUNK_TYPE *newptr;
333 int i, len;
334
335 len = src->len;
336 while ((len > 1) && (src->value[len - 1] == 0)) {
337 len--;
338 }
339 src->len = len;
340 if (dest->size < len) {
341 if (dest->malloced == 1) {
342 newptr = (BIG_CHUNK_TYPE *)big_realloc(dest->value,
343 sizeof (BIG_CHUNK_TYPE) * dest->size,
344 sizeof (BIG_CHUNK_TYPE) * len);
345 } else {
346 newptr = (BIG_CHUNK_TYPE *)
347 big_malloc(sizeof (BIG_CHUNK_TYPE) * len);
348 if (newptr != NULL) {
349 dest->malloced = 1;
350 }
351 }
352 if (newptr == NULL) {
353 return (BIG_NO_MEM);
354 }
355 dest->value = newptr;
356 dest->size = len;
357 }
358 dest->len = len;
359 dest->sign = src->sign;
360 for (i = 0; i < len; i++) {
361 dest->value[i] = src->value[i];
362 }
363
364 return (BIG_OK);
365 }
366
367
368 BIG_ERR_CODE
369 big_extend(BIGNUM *number, int size)
370 {
371 BIG_CHUNK_TYPE *newptr;
372 int i;
373
374 if (number->size >= size)
375 return (BIG_OK);
376 if (number->malloced) {
377 number->value = big_realloc(number->value,
378 sizeof (BIG_CHUNK_TYPE) * number->size,
379 sizeof (BIG_CHUNK_TYPE) * size);
380 } else {
381 newptr = big_malloc(sizeof (BIG_CHUNK_TYPE) * size);
382 if (newptr != NULL) {
383 for (i = 0; i < number->size; i++) {
384 newptr[i] = number->value[i];
385 }
386 }
387 number->value = newptr;
388 }
389
390 if (number->value == NULL) {
391 return (BIG_NO_MEM);
392 }
393
394 number->size = size;
395 number->malloced = 1;
396 return (BIG_OK);
397 }
398
399
400 /* returns 1 if n == 0 */
401 int
402 big_is_zero(BIGNUM *n)
403 {
404 int i, result;
405
406 result = 1;
407 for (i = 0; i < n->len; i++) {
408 if (n->value[i] != 0) {
409 result = 0;
410 }
411 }
412 return (result);
413 }
414
415
416 BIG_ERR_CODE
417 big_add_abs(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
418 {
419 int i, shorter, longer;
420 BIG_CHUNK_TYPE cy, ai;
421 BIG_CHUNK_TYPE *r, *a, *b, *c;
422 BIG_ERR_CODE err;
423 BIGNUM *longerarg;
424
425 if (aa->len > bb->len) {
426 shorter = bb->len;
427 longer = aa->len;
428 longerarg = aa;
429 } else {
430 shorter = aa->len;
431 longer = bb->len;
432 longerarg = bb;
433 }
434 if (result->size < longer + 1) {
435 err = big_extend(result, longer + 1);
436 if (err != BIG_OK) {
437 return (err);
438 }
439 }
440
441 r = result->value;
442 a = aa->value;
443 b = bb->value;
444 c = longerarg->value;
445 cy = 0;
446 for (i = 0; i < shorter; i++) {
447 ai = a[i];
448 r[i] = ai + b[i] + cy;
449 if (r[i] > ai) {
450 cy = 0;
451 } else if (r[i] < ai) {
452 cy = 1;
453 }
454 }
455 for (; i < longer; i++) {
456 ai = c[i];
457 r[i] = ai + cy;
458 if (r[i] >= ai) {
459 cy = 0;
460 }
461 }
462 if (cy == 1) {
463 r[i] = cy;
464 result->len = longer + 1;
465 } else {
466 result->len = longer;
467 }
468 result->sign = 1;
469 return (BIG_OK);
470 }
471
472
473 /* caller must make sure that result has at least len words allocated */
474 void
475 big_sub_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, BIG_CHUNK_TYPE *b, int len)
476 {
477 int i;
478 BIG_CHUNK_TYPE cy, ai;
479
480 cy = 1;
481 for (i = 0; i < len; i++) {
482 ai = a[i];
483 r[i] = ai + (~b[i]) + cy;
484 if (r[i] > ai) {
485 cy = 0;
486 } else if (r[i] < ai) {
487 cy = 1;
488 }
489 }
490 }
491
492
493 /* result=aa-bb it is assumed that aa>=bb */
494 BIG_ERR_CODE
495 big_sub_pos(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
496 {
497 int i, shorter;
498 BIG_CHUNK_TYPE cy = 1, ai;
499 BIG_CHUNK_TYPE *r, *a, *b;
500 BIG_ERR_CODE err = BIG_OK;
501
502 if (aa->len > bb->len) {
503 shorter = bb->len;
504 } else {
505 shorter = aa->len;
506 }
507 if (result->size < aa->len) {
508 err = big_extend(result, aa->len);
509 if (err != BIG_OK) {
510 return (err);
511 }
512 }
513
514 r = result->value;
515 a = aa->value;
516 b = bb->value;
517 result->len = aa->len;
518 cy = 1;
519 for (i = 0; i < shorter; i++) {
520 ai = a[i];
521 r[i] = ai + (~b[i]) + cy;
522 if (r[i] > ai) {
523 cy = 0;
524 } else if (r[i] < ai) {
525 cy = 1;
526 }
527 }
528 for (; i < aa->len; i++) {
529 ai = a[i];
530 r[i] = ai + (~0) + cy;
531 if (r[i] < ai) {
532 cy = 1;
533 }
534 }
535 result->sign = 1;
536
537 if (cy == 0) {
538 return (BIG_INVALID_ARGS);
539 } else {
540 return (BIG_OK);
541 }
542 }
543
544
545 /* returns -1 if |aa|<|bb|, 0 if |aa|==|bb| 1 if |aa|>|bb| */
546 int
547 big_cmp_abs(BIGNUM *aa, BIGNUM *bb)
548 {
549 int i;
550
551 if (aa->len > bb->len) {
552 for (i = aa->len - 1; i > bb->len - 1; i--) {
553 if (aa->value[i] > 0) {
554 return (1);
555 }
556 }
557 } else if (aa->len < bb->len) {
558 for (i = bb->len - 1; i > aa->len - 1; i--) {
559 if (bb->value[i] > 0) {
560 return (-1);
561 }
562 }
563 } else {
564 i = aa->len-1;
565 }
566 for (; i >= 0; i--) {
567 if (aa->value[i] > bb->value[i]) {
568 return (1);
569 } else if (aa->value[i] < bb->value[i]) {
570 return (-1);
571 }
572 }
573
574 return (0);
575 }
576
577
578 BIG_ERR_CODE
579 big_sub(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
580 {
581 BIG_ERR_CODE err;
582
583 if ((bb->sign == -1) && (aa->sign == 1)) {
584 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) {
585 return (err);
586 }
587 result->sign = 1;
588 } else if ((aa->sign == -1) && (bb->sign == 1)) {
589 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) {
590 return (err);
591 }
592 result->sign = -1;
593 } else if ((aa->sign == 1) && (bb->sign == 1)) {
594 if (big_cmp_abs(aa, bb) >= 0) {
595 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) {
596 return (err);
597 }
598 result->sign = 1;
599 } else {
600 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) {
601 return (err);
602 }
603 result->sign = -1;
604 }
605 } else {
606 if (big_cmp_abs(aa, bb) >= 0) {
607 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) {
608 return (err);
609 }
610 result->sign = -1;
611 } else {
612 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) {
613 return (err);
614 }
615 result->sign = 1;
616 }
617 }
618 return (BIG_OK);
619 }
620
621
622 BIG_ERR_CODE
623 big_add(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
624 {
625 BIG_ERR_CODE err;
626
627 if ((bb->sign == -1) && (aa->sign == -1)) {
628 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) {
629 return (err);
630 }
631 result->sign = -1;
632 } else if ((aa->sign == 1) && (bb->sign == 1)) {
633 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) {
634 return (err);
635 }
636 result->sign = 1;
637 } else if ((aa->sign == 1) && (bb->sign == -1)) {
638 if (big_cmp_abs(aa, bb) >= 0) {
639 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) {
640 return (err);
641 }
642 result->sign = 1;
643 } else {
644 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) {
645 return (err);
646 }
647 result->sign = -1;
648 }
649 } else {
650 if (big_cmp_abs(aa, bb) >= 0) {
651 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) {
652 return (err);
653 }
654 result->sign = -1;
655 } else {
656 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) {
657 return (err);
658 }
659 result->sign = 1;
660 }
661 }
662 return (BIG_OK);
663 }
664
665
666 /* result = aa/2 */
667 BIG_ERR_CODE
668 big_half_pos(BIGNUM *result, BIGNUM *aa)
669 {
670 BIG_ERR_CODE err;
671 int i;
672 BIG_CHUNK_TYPE cy, cy1;
673 BIG_CHUNK_TYPE *a, *r;
674
675 if (result->size < aa->len) {
676 err = big_extend(result, aa->len);
677 if (err != BIG_OK) {
678 return (err);
679 }
680 }
681
682 result->len = aa->len;
683 a = aa->value;
684 r = result->value;
685 cy = 0;
686 for (i = aa->len - 1; i >= 0; i--) {
687 cy1 = a[i] << (BIG_CHUNK_SIZE - 1);
688 r[i] = (cy | (a[i] >> 1));
689 cy = cy1;
690 }
691 if (r[result->len - 1] == 0) {
692 result->len--;
693 }
694
695 return (BIG_OK);
696 }
697
698 /* result = aa*2 */
699 BIG_ERR_CODE
700 big_double(BIGNUM *result, BIGNUM *aa)
701 {
702 BIG_ERR_CODE err;
703 int i, rsize;
704 BIG_CHUNK_TYPE cy, cy1;
705 BIG_CHUNK_TYPE *a, *r;
706
707 if ((aa->len > 0) &&
708 ((aa->value[aa->len - 1] & BIG_CHUNK_HIGHBIT) != 0)) {
709 rsize = aa->len + 1;
710 } else {
711 rsize = aa->len;
712 }
713
714 if (result->size < rsize) {
715 err = big_extend(result, rsize);
716 if (err != BIG_OK)
717 return (err);
718 }
719
720 a = aa->value;
721 r = result->value;
722 if (rsize == aa->len + 1) {
723 r[rsize - 1] = 1;
724 }
725 cy = 0;
726 for (i = 0; i < aa->len; i++) {
727 cy1 = a[i] >> (BIG_CHUNK_SIZE - 1);
728 r[i] = (cy | (a[i] << 1));
729 cy = cy1;
730 }
731 result->len = rsize;
732 return (BIG_OK);
733 }
734
735
736 /*
737 * returns aa mod b, aa must be nonneg, b must be a max
738 * (BIG_CHUNK_SIZE / 2)-bit integer
739 */
740 static uint32_t
741 big_modhalf_pos(BIGNUM *aa, uint32_t b)
742 {
743 int i;
744 BIG_CHUNK_TYPE rem;
745
746 if (aa->len == 0) {
747 return (0);
748 }
749 rem = aa->value[aa->len - 1] % b;
750 for (i = aa->len - 2; i >= 0; i--) {
751 rem = ((rem << (BIG_CHUNK_SIZE / 2)) |
752 (aa->value[i] >> (BIG_CHUNK_SIZE / 2))) % b;
753 rem = ((rem << (BIG_CHUNK_SIZE / 2)) |
754 (aa->value[i] & BIG_CHUNK_LOWHALFBITS)) % b;
755 }
756
757 return ((uint32_t)rem);
758 }
759
760
761 /*
762 * result = aa - (2^BIG_CHUNK_SIZE)^lendiff * bb
763 * result->size should be at least aa->len at entry
764 * aa, bb, and result should be positive
765 */
766 void
767 big_sub_pos_high(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
768 {
769 int i, lendiff;
770 BIGNUM res1, aa1;
771
772 lendiff = aa->len - bb->len;
773 res1.size = result->size - lendiff;
774 res1.malloced = 0;
775 res1.value = result->value + lendiff;
776 aa1.size = aa->size - lendiff;
777 aa1.value = aa->value + lendiff;
778 aa1.len = bb->len;
779 aa1.sign = 1;
780 (void) big_sub_pos(&res1, &aa1, bb);
781 if (result->value != aa->value) {
782 for (i = 0; i < lendiff; i++) {
783 result->value[i] = aa->value[i];
784 }
785 }
786 result->len = aa->len;
787 }
788
789
790 /*
791 * returns 1, 0, or -1 depending on whether |aa| > , ==, or <
792 * (2^BIG_CHUNK_SIZE)^lendiff * |bb|
793 * aa->len should be >= bb->len
794 */
795 int
796 big_cmp_abs_high(BIGNUM *aa, BIGNUM *bb)
797 {
798 int lendiff;
799 BIGNUM aa1;
800
801 lendiff = aa->len - bb->len;
802 aa1.len = bb->len;
803 aa1.size = aa->size - lendiff;
804 aa1.malloced = 0;
805 aa1.value = aa->value + lendiff;
806 return (big_cmp_abs(&aa1, bb));
807 }
808
809
810 /*
811 * result = aa * b where b is a max. (BIG_CHUNK_SIZE / 2)-bit positive integer.
812 * result should have enough space allocated.
813 */
814 static void
815 big_mulhalf_low(BIGNUM *result, BIGNUM *aa, BIG_CHUNK_TYPE b)
816 {
817 int i;
818 BIG_CHUNK_TYPE t1, t2, ai, cy;
819 BIG_CHUNK_TYPE *a, *r;
820
821 a = aa->value;
822 r = result->value;
823 cy = 0;
824 for (i = 0; i < aa->len; i++) {
825 ai = a[i];
826 t1 = (ai & BIG_CHUNK_LOWHALFBITS) * b + cy;
827 t2 = (ai >> (BIG_CHUNK_SIZE / 2)) * b +
828 (t1 >> (BIG_CHUNK_SIZE / 2));
829 r[i] = (t1 & BIG_CHUNK_LOWHALFBITS) |
830 (t2 << (BIG_CHUNK_SIZE / 2));
831 cy = t2 >> (BIG_CHUNK_SIZE / 2);
832 }
833 r[i] = cy;
834 result->len = aa->len + 1;
835 result->sign = aa->sign;
836 }
837
838
839 /*
840 * result = aa * b * 2^(BIG_CHUNK_SIZE / 2) where b is a max.
841 * (BIG_CHUNK_SIZE / 2)-bit positive integer.
842 * result should have enough space allocated.
843 */
844 static void
845 big_mulhalf_high(BIGNUM *result, BIGNUM *aa, BIG_CHUNK_TYPE b)
846 {
847 int i;
848 BIG_CHUNK_TYPE t1, t2, ai, cy, ri;
849 BIG_CHUNK_TYPE *a, *r;
850
851 a = aa->value;
852 r = result->value;
853 cy = 0;
854 ri = 0;
855 for (i = 0; i < aa->len; i++) {
856 ai = a[i];
857 t1 = (ai & BIG_CHUNK_LOWHALFBITS) * b + cy;
858 t2 = (ai >> (BIG_CHUNK_SIZE / 2)) * b +
859 (t1 >> (BIG_CHUNK_SIZE / 2));
860 r[i] = (t1 << (BIG_CHUNK_SIZE / 2)) + ri;
861 ri = t2 & BIG_CHUNK_LOWHALFBITS;
862 cy = t2 >> (BIG_CHUNK_SIZE / 2);
863 }
864 r[i] = (cy << (BIG_CHUNK_SIZE / 2)) + ri;
865 result->len = aa->len + 1;
866 result->sign = aa->sign;
867 }
868
869
870 /* it is assumed that result->size is big enough */
871 void
872 big_shiftleft(BIGNUM *result, BIGNUM *aa, int offs)
873 {
874 int i;
875 BIG_CHUNK_TYPE cy, ai;
876
877 if (offs == 0) {
878 if (result != aa) {
879 (void) big_copy(result, aa);
880 }
881 return;
882 }
883 cy = 0;
884 for (i = 0; i < aa->len; i++) {
885 ai = aa->value[i];
886 result->value[i] = (ai << offs) | cy;
887 cy = ai >> (BIG_CHUNK_SIZE - offs);
888 }
889 if (cy != 0) {
890 result->len = aa->len + 1;
891 result->value[result->len - 1] = cy;
892 } else {
893 result->len = aa->len;
894 }
895 result->sign = aa->sign;
896 }
897
898
899 /* it is assumed that result->size is big enough */
900 void
901 big_shiftright(BIGNUM *result, BIGNUM *aa, int offs)
902 {
903 int i;
904 BIG_CHUNK_TYPE cy, ai;
905
906 if (offs == 0) {
907 if (result != aa) {
908 (void) big_copy(result, aa);
909 }
910 return;
911 }
912 cy = aa->value[0] >> offs;
913 for (i = 1; i < aa->len; i++) {
914 ai = aa->value[i];
915 result->value[i-1] = (ai << (BIG_CHUNK_SIZE - offs)) | cy;
916 cy = ai >> offs;
917 }
918 result->len = aa->len;
919 result->value[result->len - 1] = cy;
920 result->sign = aa->sign;
921 }
922
923
924 /*
925 * result = aa/bb remainder = aa mod bb
926 * it is assumed that aa and bb are positive
927 */
928 BIG_ERR_CODE
929 big_div_pos_fast(BIGNUM *result, BIGNUM *remainder, BIGNUM *aa, BIGNUM *bb)
930 {
931 BIG_ERR_CODE err = BIG_OK;
932 int i, alen, blen, tlen, rlen, offs;
933 BIG_CHUNK_TYPE higha, highb, coeff;
934 BIG_CHUNK_TYPE *a, *b;
935 BIGNUM bbhigh, bblow, tresult, tmp1, tmp2;
936 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE];
937 BIG_CHUNK_TYPE tmp2value[BIGTMPSIZE];
938 BIG_CHUNK_TYPE tresultvalue[BIGTMPSIZE];
939 BIG_CHUNK_TYPE bblowvalue[BIGTMPSIZE];
940 BIG_CHUNK_TYPE bbhighvalue[BIGTMPSIZE];
941
942 a = aa->value;
943 b = bb->value;
944 alen = aa->len;
945 blen = bb->len;
946 while ((alen > 1) && (a[alen - 1] == 0)) {
947 alen = alen - 1;
948 }
949 aa->len = alen;
950 while ((blen > 1) && (b[blen - 1] == 0)) {
951 blen = blen - 1;
952 }
953 bb->len = blen;
954 if ((blen == 1) && (b[0] == 0)) {
955 return (BIG_DIV_BY_0);
956 }
957
958 if (big_cmp_abs(aa, bb) < 0) {
959 if ((remainder != NULL) &&
960 ((err = big_copy(remainder, aa)) != BIG_OK)) {
961 return (err);
962 }
963 if (result != NULL) {
964 result->len = 1;
965 result->sign = 1;
966 result->value[0] = 0;
967 }
968 return (BIG_OK);
969 }
970
971 if ((err = big_init1(&bblow, blen + 1,
972 bblowvalue, arraysize(bblowvalue))) != BIG_OK)
973 return (err);
974
975 if ((err = big_init1(&bbhigh, blen + 1,
976 bbhighvalue, arraysize(bbhighvalue))) != BIG_OK)
977 goto ret1;
978
979 if ((err = big_init1(&tmp1, alen + 2,
980 tmp1value, arraysize(tmp1value))) != BIG_OK)
981 goto ret2;
982
983 if ((err = big_init1(&tmp2, blen + 2,
984 tmp2value, arraysize(tmp2value))) != BIG_OK)
985 goto ret3;
986
987 if ((err = big_init1(&tresult, alen - blen + 2,
988 tresultvalue, arraysize(tresultvalue))) != BIG_OK)
989 goto ret4;
990
991 offs = 0;
992 highb = b[blen - 1];
993 if (highb >= (BIG_CHUNK_HALF_HIGHBIT << 1)) {
994 highb = highb >> (BIG_CHUNK_SIZE / 2);
995 offs = (BIG_CHUNK_SIZE / 2);
996 }
997 while ((highb & BIG_CHUNK_HALF_HIGHBIT) == 0) {
998 highb = highb << 1;
999 offs++;
1000 }
1001
1002 big_shiftleft(&bblow, bb, offs);
1003
1004 if (offs <= (BIG_CHUNK_SIZE / 2 - 1)) {
1005 big_shiftleft(&bbhigh, &bblow, BIG_CHUNK_SIZE / 2);
1006 } else {
1007 big_shiftright(&bbhigh, &bblow, BIG_CHUNK_SIZE / 2);
1008 }
1009 if (bbhigh.value[bbhigh.len - 1] == 0) {
1010 bbhigh.len--;
1011 } else {
1012 bbhigh.value[bbhigh.len] = 0;
1013 }
1014
1015 highb = bblow.value[bblow.len - 1];
1016
1017 big_shiftleft(&tmp1, aa, offs);
1018 rlen = tmp1.len - bblow.len + 1;
1019 tresult.len = rlen;
1020
1021 tmp1.len++;
1022 tlen = tmp1.len;
1023 tmp1.value[tmp1.len - 1] = 0;
1024 for (i = 0; i < rlen; i++) {
1025 higha = (tmp1.value[tlen - 1] << (BIG_CHUNK_SIZE / 2)) +
1026 (tmp1.value[tlen - 2] >> (BIG_CHUNK_SIZE / 2));
1027 coeff = higha / (highb + 1);
1028 big_mulhalf_high(&tmp2, &bblow, coeff);
1029 big_sub_pos_high(&tmp1, &tmp1, &tmp2);
1030 bbhigh.len++;
1031 while (tmp1.value[tlen - 1] > 0) {
1032 big_sub_pos_high(&tmp1, &tmp1, &bbhigh);
1033 coeff++;
1034 }
1035 bbhigh.len--;
1036 tlen--;
1037 tmp1.len--;
1038 while (big_cmp_abs_high(&tmp1, &bbhigh) >= 0) {
1039 big_sub_pos_high(&tmp1, &tmp1, &bbhigh);
1040 coeff++;
1041 }
1042 tresult.value[rlen - i - 1] = coeff << (BIG_CHUNK_SIZE / 2);
1043 higha = tmp1.value[tlen - 1];
1044 coeff = higha / (highb + 1);
1045 big_mulhalf_low(&tmp2, &bblow, coeff);
1046 tmp2.len--;
1047 big_sub_pos_high(&tmp1, &tmp1, &tmp2);
1048 while (big_cmp_abs_high(&tmp1, &bblow) >= 0) {
1049 big_sub_pos_high(&tmp1, &tmp1, &bblow);
1050 coeff++;
1051 }
1052 tresult.value[rlen - i - 1] =
1053 tresult.value[rlen - i - 1] + coeff;
1054 }
1055
1056 big_shiftright(&tmp1, &tmp1, offs);
1057
1058 err = BIG_OK;
1059
1060 if ((remainder != NULL) &&
1061 ((err = big_copy(remainder, &tmp1)) != BIG_OK))
1062 goto ret;
1063
1064 if (result != NULL)
1065 err = big_copy(result, &tresult);
1066
1067 ret:
1068 big_finish(&tresult);
1069 ret4:
1070 big_finish(&tmp1);
1071 ret3:
1072 big_finish(&tmp2);
1073 ret2:
1074 big_finish(&bbhigh);
1075 ret1:
1076 big_finish(&bblow);
1077 return (err);
1078 }
1079
1080 /*
1081 * If there is no processor-specific integer implementation of
1082 * the lower level multiply functions, then this code is provided
1083 * for big_mul_set_vec(), big_mul_add_vec(), big_mul_vec() and
1084 * big_sqr_vec().
1085 *
1086 * There are two generic implementations. One that assumes that
1087 * there is hardware and C compiler support for a 32 x 32 --> 64
1088 * bit unsigned multiply, but otherwise is not specific to any
1089 * processor, platform, or ISA.
1090 *
1091 * The other makes very few assumptions about hardware capabilities.
1092 * It does not even assume that there is any implementation of a
1093 * 32 x 32 --> 64 bit multiply that is accessible to C code and
1094 * appropriate to use. It falls constructs 32 x 32 --> 64 bit
1095 * multiplies from 16 x 16 --> 32 bit multiplies.
1096 *
1097 */
1098
1099 #if !defined(PSR_MUL)
1100
1101 #ifdef UMUL64
1102
1103 #if (BIG_CHUNK_SIZE == 32)
1104
1105 #define UNROLL8
1106
1107 #define MUL_SET_VEC_ROUND_PREFETCH(R) \
1108 p = pf * d; \
1109 pf = (uint64_t)a[R+1]; \
1110 t = p + cy; \
1111 r[R] = (uint32_t)t; \
1112 cy = t >> 32
1113
1114 #define MUL_SET_VEC_ROUND_NOPREFETCH(R) \
1115 p = pf * d; \
1116 t = p + cy; \
1117 r[R] = (uint32_t)t; \
1118 cy = t >> 32
1119
1120 #define MUL_ADD_VEC_ROUND_PREFETCH(R) \
1121 t = (uint64_t)r[R]; \
1122 p = pf * d; \
1123 pf = (uint64_t)a[R+1]; \
1124 t = p + t + cy; \
1125 r[R] = (uint32_t)t; \
1126 cy = t >> 32
1127
1128 #define MUL_ADD_VEC_ROUND_NOPREFETCH(R) \
1129 t = (uint64_t)r[R]; \
1130 p = pf * d; \
1131 t = p + t + cy; \
1132 r[R] = (uint32_t)t; \
1133 cy = t >> 32
1134
1135 #ifdef UNROLL8
1136
1137 #define UNROLL 8
1138
1139 /*
1140 * r = a * b
1141 * where r and a are vectors; b is a single 32-bit digit
1142 */
1143
1144 uint32_t
1145 big_mul_set_vec(uint32_t *r, uint32_t *a, int len, uint32_t b)
1146 {
1147 uint64_t d, pf, p, t, cy;
1148
1149 if (len == 0)
1150 return (0);
1151 cy = 0;
1152 d = (uint64_t)b;
1153 pf = (uint64_t)a[0];
1154 while (len > UNROLL) {
1155 MUL_SET_VEC_ROUND_PREFETCH(0);
1156 MUL_SET_VEC_ROUND_PREFETCH(1);
1157 MUL_SET_VEC_ROUND_PREFETCH(2);
1158 MUL_SET_VEC_ROUND_PREFETCH(3);
1159 MUL_SET_VEC_ROUND_PREFETCH(4);
1160 MUL_SET_VEC_ROUND_PREFETCH(5);
1161 MUL_SET_VEC_ROUND_PREFETCH(6);
1162 MUL_SET_VEC_ROUND_PREFETCH(7);
1163 r += UNROLL;
1164 a += UNROLL;
1165 len -= UNROLL;
1166 }
1167 if (len == UNROLL) {
1168 MUL_SET_VEC_ROUND_PREFETCH(0);
1169 MUL_SET_VEC_ROUND_PREFETCH(1);
1170 MUL_SET_VEC_ROUND_PREFETCH(2);
1171 MUL_SET_VEC_ROUND_PREFETCH(3);
1172 MUL_SET_VEC_ROUND_PREFETCH(4);
1173 MUL_SET_VEC_ROUND_PREFETCH(5);
1174 MUL_SET_VEC_ROUND_PREFETCH(6);
1175 MUL_SET_VEC_ROUND_NOPREFETCH(7);
1176 return ((uint32_t)cy);
1177 }
1178 while (len > 1) {
1179 MUL_SET_VEC_ROUND_PREFETCH(0);
1180 ++r;
1181 ++a;
1182 --len;
1183 }
1184 if (len > 0) {
1185 MUL_SET_VEC_ROUND_NOPREFETCH(0);
1186 }
1187 return ((uint32_t)cy);
1188 }
1189
1190 /*
1191 * r += a * b
1192 * where r and a are vectors; b is a single 32-bit digit
1193 */
1194
1195 uint32_t
1196 big_mul_add_vec(uint32_t *r, uint32_t *a, int len, uint32_t b)
1197 {
1198 uint64_t d, pf, p, t, cy;
1199
1200 if (len == 0)
1201 return (0);
1202 cy = 0;
1203 d = (uint64_t)b;
1204 pf = (uint64_t)a[0];
1205 while (len > 8) {
1206 MUL_ADD_VEC_ROUND_PREFETCH(0);
1207 MUL_ADD_VEC_ROUND_PREFETCH(1);
1208 MUL_ADD_VEC_ROUND_PREFETCH(2);
1209 MUL_ADD_VEC_ROUND_PREFETCH(3);
1210 MUL_ADD_VEC_ROUND_PREFETCH(4);
1211 MUL_ADD_VEC_ROUND_PREFETCH(5);
1212 MUL_ADD_VEC_ROUND_PREFETCH(6);
1213 MUL_ADD_VEC_ROUND_PREFETCH(7);
1214 r += 8;
1215 a += 8;
1216 len -= 8;
1217 }
1218 if (len == 8) {
1219 MUL_ADD_VEC_ROUND_PREFETCH(0);
1220 MUL_ADD_VEC_ROUND_PREFETCH(1);
1221 MUL_ADD_VEC_ROUND_PREFETCH(2);
1222 MUL_ADD_VEC_ROUND_PREFETCH(3);
1223 MUL_ADD_VEC_ROUND_PREFETCH(4);
1224 MUL_ADD_VEC_ROUND_PREFETCH(5);
1225 MUL_ADD_VEC_ROUND_PREFETCH(6);
1226 MUL_ADD_VEC_ROUND_NOPREFETCH(7);
1227 return ((uint32_t)cy);
1228 }
1229 while (len > 1) {
1230 MUL_ADD_VEC_ROUND_PREFETCH(0);
1231 ++r;
1232 ++a;
1233 --len;
1234 }
1235 if (len > 0) {
1236 MUL_ADD_VEC_ROUND_NOPREFETCH(0);
1237 }
1238 return ((uint32_t)cy);
1239 }
1240 #endif /* UNROLL8 */
1241
1242 void
1243 big_sqr_vec(uint32_t *r, uint32_t *a, int len)
1244 {
1245 uint32_t *tr, *ta;
1246 int tlen, row, col;
1247 uint64_t p, s, t, t2, cy;
1248 uint32_t d;
1249
1250 tr = r + 1;
1251 ta = a;
1252 tlen = len - 1;
1253 tr[tlen] = big_mul_set_vec(tr, ta + 1, tlen, ta[0]);
1254 while (--tlen > 0) {
1255 tr += 2;
1256 ++ta;
1257 tr[tlen] = big_mul_add_vec(tr, ta + 1, tlen, ta[0]);
1258 }
1259 s = (uint64_t)a[0];
1260 s = s * s;
1261 r[0] = (uint32_t)s;
1262 cy = s >> 32;
1263 p = ((uint64_t)r[1] << 1) + cy;
1264 r[1] = (uint32_t)p;
1265 cy = p >> 32;
1266 row = 1;
1267 col = 2;
1268 while (row < len) {
1269 s = (uint64_t)a[row];
1270 s = s * s;
1271 p = (uint64_t)r[col] << 1;
1272 t = p + s;
1273 d = (uint32_t)t;
1274 t2 = (uint64_t)d + cy;
1275 r[col] = (uint32_t)t2;
1276 cy = (t >> 32) + (t2 >> 32);
1277 if (row == len - 1)
1278 break;
1279 p = ((uint64_t)r[col+1] << 1) + cy;
1280 r[col+1] = (uint32_t)p;
1281 cy = p >> 32;
1282 ++row;
1283 col += 2;
1284 }
1285 r[col+1] = (uint32_t)cy;
1286 }
1287
1288 #else /* BIG_CHUNK_SIZE == 64 */
1289
1290 /*
1291 * r = r + a * digit, r and a are vectors of length len
1292 * returns the carry digit
1293 */
1294 BIG_CHUNK_TYPE
1295 big_mul_add_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len,
1296 BIG_CHUNK_TYPE digit)
1297 {
1298 BIG_CHUNK_TYPE cy, cy1, retcy, dlow, dhigh;
1299 int i;
1300
1301 cy1 = 0;
1302 dlow = digit & BIG_CHUNK_LOWHALFBITS;
1303 dhigh = digit >> (BIG_CHUNK_SIZE / 2);
1304 for (i = 0; i < len; i++) {
1305 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) +
1306 dlow * (a[i] & BIG_CHUNK_LOWHALFBITS) +
1307 (r[i] & BIG_CHUNK_LOWHALFBITS);
1308 cy1 = (cy >> (BIG_CHUNK_SIZE / 2)) +
1309 dlow * (a[i] >> (BIG_CHUNK_SIZE / 2)) +
1310 (r[i] >> (BIG_CHUNK_SIZE / 2));
1311 r[i] = (cy & BIG_CHUNK_LOWHALFBITS) |
1312 (cy1 << (BIG_CHUNK_SIZE / 2));
1313 }
1314 retcy = cy1 >> (BIG_CHUNK_SIZE / 2);
1315
1316 cy1 = r[0] & BIG_CHUNK_LOWHALFBITS;
1317 for (i = 0; i < len - 1; i++) {
1318 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) +
1319 dhigh * (a[i] & BIG_CHUNK_LOWHALFBITS) +
1320 (r[i] >> (BIG_CHUNK_SIZE / 2));
1321 r[i] = (cy1 & BIG_CHUNK_LOWHALFBITS) |
1322 (cy << (BIG_CHUNK_SIZE / 2));
1323 cy1 = (cy >> (BIG_CHUNK_SIZE / 2)) +
1324 dhigh * (a[i] >> (BIG_CHUNK_SIZE / 2)) +
1325 (r[i + 1] & BIG_CHUNK_LOWHALFBITS);
1326 }
1327 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) +
1328 dhigh * (a[len - 1] & BIG_CHUNK_LOWHALFBITS) +
1329 (r[len - 1] >> (BIG_CHUNK_SIZE / 2));
1330 r[len - 1] = (cy1 & BIG_CHUNK_LOWHALFBITS) |
1331 (cy << (BIG_CHUNK_SIZE / 2));
1332 retcy = (cy >> (BIG_CHUNK_SIZE / 2)) +
1333 dhigh * (a[len - 1] >> (BIG_CHUNK_SIZE / 2)) + retcy;
1334
1335 return (retcy);
1336 }
1337
1338
1339 /*
1340 * r = a * digit, r and a are vectors of length len
1341 * returns the carry digit
1342 */
1343 BIG_CHUNK_TYPE
1344 big_mul_set_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len,
1345 BIG_CHUNK_TYPE digit)
1346 {
1347 int i;
1348
1349 ASSERT(r != a);
1350 for (i = 0; i < len; i++) {
1351 r[i] = 0;
1352 }
1353 return (big_mul_add_vec(r, a, len, digit));
1354 }
1355
1356 void
1357 big_sqr_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len)
1358 {
1359 int i;
1360
1361 ASSERT(r != a);
1362 r[len] = big_mul_set_vec(r, a, len, a[0]);
1363 for (i = 1; i < len; ++i)
1364 r[len + i] = big_mul_add_vec(r+i, a, len, a[i]);
1365 }
1366
1367 #endif /* BIG_CHUNK_SIZE == 32/64 */
1368
1369 #else /* ! UMUL64 */
1370
1371 #if (BIG_CHUNK_SIZE != 32)
1372 #error Don't use 64-bit chunks without defining UMUL64
1373 #endif
1374
1375
1376 /*
1377 * r = r + a * digit, r and a are vectors of length len
1378 * returns the carry digit
1379 */
1380 uint32_t
1381 big_mul_add_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit)
1382 {
1383 uint32_t cy, cy1, retcy, dlow, dhigh;
1384 int i;
1385
1386 cy1 = 0;
1387 dlow = digit & 0xffff;
1388 dhigh = digit >> 16;
1389 for (i = 0; i < len; i++) {
1390 cy = (cy1 >> 16) + dlow * (a[i] & 0xffff) + (r[i] & 0xffff);
1391 cy1 = (cy >> 16) + dlow * (a[i]>>16) + (r[i] >> 16);
1392 r[i] = (cy & 0xffff) | (cy1 << 16);
1393 }
1394 retcy = cy1 >> 16;
1395
1396 cy1 = r[0] & 0xffff;
1397 for (i = 0; i < len - 1; i++) {
1398 cy = (cy1 >> 16) + dhigh * (a[i] & 0xffff) + (r[i] >> 16);
1399 r[i] = (cy1 & 0xffff) | (cy << 16);
1400 cy1 = (cy >> 16) + dhigh * (a[i] >> 16) + (r[i + 1] & 0xffff);
1401 }
1402 cy = (cy1 >> 16) + dhigh * (a[len - 1] & 0xffff) + (r[len - 1] >> 16);
1403 r[len - 1] = (cy1 & 0xffff) | (cy << 16);
1404 retcy = (cy >> 16) + dhigh * (a[len - 1] >> 16) + retcy;
1405
1406 return (retcy);
1407 }
1408
1409
1410 /*
1411 * r = a * digit, r and a are vectors of length len
1412 * returns the carry digit
1413 */
1414 uint32_t
1415 big_mul_set_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit)
1416 {
1417 int i;
1418
1419 ASSERT(r != a);
1420 for (i = 0; i < len; i++) {
1421 r[i] = 0;
1422 }
1423
1424 return (big_mul_add_vec(r, a, len, digit));
1425 }
1426
1427 void
1428 big_sqr_vec(uint32_t *r, uint32_t *a, int len)
1429 {
1430 int i;
1431
1432 ASSERT(r != a);
1433 r[len] = big_mul_set_vec(r, a, len, a[0]);
1434 for (i = 1; i < len; ++i)
1435 r[len + i] = big_mul_add_vec(r+i, a, len, a[i]);
1436 }
1437
1438 #endif /* UMUL64 */
1439
1440 void
1441 big_mul_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int alen,
1442 BIG_CHUNK_TYPE *b, int blen)
1443 {
1444 int i;
1445
1446 r[alen] = big_mul_set_vec(r, a, alen, b[0]);
1447 for (i = 1; i < blen; ++i)
1448 r[alen + i] = big_mul_add_vec(r+i, a, alen, b[i]);
1449 }
1450
1451
1452 #endif /* ! PSR_MUL */
1453
1454
1455 /*
1456 * result = aa * bb result->value should be big enough to hold the result
1457 *
1458 * Implementation: Standard grammar school algorithm
1459 *
1460 */
1461 BIG_ERR_CODE
1462 big_mul(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
1463 {
1464 BIGNUM tmp1;
1465 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE];
1466 BIG_CHUNK_TYPE *r, *t, *a, *b;
1467 BIG_ERR_CODE err;
1468 int i, alen, blen, rsize, sign, diff;
1469
1470 if (aa == bb) {
1471 diff = 0;
1472 } else {
1473 diff = big_cmp_abs(aa, bb);
1474 if (diff < 0) {
1475 BIGNUM *tt;
1476 tt = aa;
1477 aa = bb;
1478 bb = tt;
1479 }
1480 }
1481 a = aa->value;
1482 b = bb->value;
1483 alen = aa->len;
1484 blen = bb->len;
1485 while ((alen > 1) && (a[alen - 1] == 0)) {
1486 alen--;
1487 }
1488 aa->len = alen;
1489 while ((blen > 1) && (b[blen - 1] == 0)) {
1490 blen--;
1491 }
1492 bb->len = blen;
1493
1494 rsize = alen + blen;
1495 if (result->size < rsize) {
1496 err = big_extend(result, rsize);
1497 if (err != BIG_OK) {
1498 return (err);
1499 }
1500 /* aa or bb might be an alias to result */
1501 a = aa->value;
1502 b = bb->value;
1503 }
1504 r = result->value;
1505
1506 if (((alen == 1) && (a[0] == 0)) || ((blen == 1) && (b[0] == 0))) {
1507 result->len = 1;
1508 result->sign = 1;
1509 r[0] = 0;
1510 return (BIG_OK);
1511 }
1512 sign = aa->sign * bb->sign;
1513 if ((alen == 1) && (a[0] == 1)) {
1514 for (i = 0; i < blen; i++) {
1515 r[i] = b[i];
1516 }
1517 result->len = blen;
1518 result->sign = sign;
1519 return (BIG_OK);
1520 }
1521 if ((blen == 1) && (b[0] == 1)) {
1522 for (i = 0; i < alen; i++) {
1523 r[i] = a[i];
1524 }
1525 result->len = alen;
1526 result->sign = sign;
1527 return (BIG_OK);
1528 }
1529
1530 if ((err = big_init1(&tmp1, rsize,
1531 tmp1value, arraysize(tmp1value))) != BIG_OK) {
1532 return (err);
1533 }
1534 (void) big_copy(&tmp1, aa);
1535 t = tmp1.value;
1536
1537 for (i = 0; i < rsize; i++) {
1538 t[i] = 0;
1539 }
1540
1541 if (diff == 0 && alen > 2) {
1542 BIG_SQR_VEC(t, a, alen);
1543 } else if (blen > 0) {
1544 BIG_MUL_VEC(t, a, alen, b, blen);
1545 }
1546
1547 if (t[rsize - 1] == 0) {
1548 tmp1.len = rsize - 1;
1549 } else {
1550 tmp1.len = rsize;
1551 }
1552 if ((err = big_copy(result, &tmp1)) != BIG_OK) {
1553 return (err);
1554 }
1555 result->sign = sign;
1556
1557 big_finish(&tmp1);
1558
1559 return (BIG_OK);
1560 }
1561
1562
1563 /*
1564 * caller must ensure that a < n, b < n and ret->size >= 2 * n->len + 1
1565 * and that ret is not n
1566 */
1567 BIG_ERR_CODE
1568 big_mont_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, BIGNUM *n, BIG_CHUNK_TYPE n0)
1569 {
1570 int i, j, nlen, needsubtract;
1571 BIG_CHUNK_TYPE *nn, *rr;
1572 BIG_CHUNK_TYPE digit, c;
1573 BIG_ERR_CODE err;
1574
1575 nlen = n->len;
1576 nn = n->value;
1577
1578 rr = ret->value;
1579
1580 if ((err = big_mul(ret, a, b)) != BIG_OK) {
1581 return (err);
1582 }
1583
1584 rr = ret->value;
1585 for (i = ret->len; i < 2 * nlen + 1; i++) {
1586 rr[i] = 0;
1587 }
1588 for (i = 0; i < nlen; i++) {
1589 digit = rr[i];
1590 digit = digit * n0;
1591
1592 c = BIG_MUL_ADD_VEC(rr + i, nn, nlen, digit);
1593 j = i + nlen;
1594 rr[j] += c;
1595 while (rr[j] < c) {
1596 rr[j + 1] += 1;
1597 j++;
1598 c = 1;
1599 }
1600 }
1601
1602 needsubtract = 0;
1603 if ((rr[2 * nlen] != 0))
1604 needsubtract = 1;
1605 else {
1606 for (i = 2 * nlen - 1; i >= nlen; i--) {
1607 if (rr[i] > nn[i - nlen]) {
1608 needsubtract = 1;
1609 break;
1610 } else if (rr[i] < nn[i - nlen]) {
1611 break;
1612 }
1613 }
1614 }
1615 if (needsubtract)
1616 big_sub_vec(rr, rr + nlen, nn, nlen);
1617 else {
1618 for (i = 0; i < nlen; i++) {
1619 rr[i] = rr[i + nlen];
1620 }
1621 }
1622 for (i = nlen - 1; (i >= 0) && (rr[i] == 0); i--)
1623 ;
1624 ret->len = i+1;
1625
1626 return (BIG_OK);
1627 }
1628
1629
1630 BIG_CHUNK_TYPE
1631 big_n0(BIG_CHUNK_TYPE n)
1632 {
1633 int i;
1634 BIG_CHUNK_TYPE result, tmp;
1635
1636 result = 0;
1637 tmp = BIG_CHUNK_ALLBITS;
1638 for (i = 0; i < BIG_CHUNK_SIZE; i++) {
1639 if ((tmp & 1) == 1) {
1640 result = (result >> 1) | BIG_CHUNK_HIGHBIT;
1641 tmp = tmp - n;
1642 } else {
1643 result = (result >> 1);
1644 }
1645 tmp = tmp >> 1;
1646 }
1647
1648 return (result);
1649 }
1650
1651
1652 int
1653 big_numbits(BIGNUM *n)
1654 {
1655 int i, j;
1656 BIG_CHUNK_TYPE t;
1657
1658 for (i = n->len - 1; i > 0; i--) {
1659 if (n->value[i] != 0) {
1660 break;
1661 }
1662 }
1663 t = n->value[i];
1664 for (j = BIG_CHUNK_SIZE; j > 0; j--) {
1665 if ((t & BIG_CHUNK_HIGHBIT) == 0) {
1666 t = t << 1;
1667 } else {
1668 return (BIG_CHUNK_SIZE * i + j);
1669 }
1670 }
1671 return (0);
1672 }
1673
1674
1675 /* caller must make sure that a < n */
1676 BIG_ERR_CODE
1677 big_mont_rr(BIGNUM *result, BIGNUM *n)
1678 {
1679 BIGNUM rr;
1680 BIG_CHUNK_TYPE rrvalue[BIGTMPSIZE];
1681 int len, i;
1682 BIG_ERR_CODE err;
1683
1684 rr.malloced = 0;
1685 len = n->len;
1686
1687 if ((err = big_init1(&rr, 2 * len + 1,
1688 rrvalue, arraysize(rrvalue))) != BIG_OK) {
1689 return (err);
1690 }
1691
1692 for (i = 0; i < 2 * len; i++) {
1693 rr.value[i] = 0;
1694 }
1695 rr.value[2 * len] = 1;
1696 rr.len = 2 * len + 1;
1697 if ((err = big_div_pos(NULL, &rr, &rr, n)) != BIG_OK) {
1698 goto ret;
1699 }
1700 err = big_copy(result, &rr);
1701 ret:
1702 big_finish(&rr);
1703 return (err);
1704 }
1705
1706
1707 /* caller must make sure that a < n */
1708 BIG_ERR_CODE
1709 big_mont_conv(BIGNUM *result, BIGNUM *a, BIGNUM *n, BIG_CHUNK_TYPE n0,
1710 BIGNUM *n_rr)
1711 {
1712 BIGNUM rr;
1713 BIG_CHUNK_TYPE rrvalue[BIGTMPSIZE];
1714 int len, i;
1715 BIG_ERR_CODE err;
1716
1717 rr.malloced = 0;
1718 len = n->len;
1719
1720 if ((err = big_init1(&rr, 2 * len + 1, rrvalue, arraysize(rrvalue)))
1721 != BIG_OK) {
1722 return (err);
1723 }
1724
1725 if (n_rr == NULL) {
1726 for (i = 0; i < 2 * len; i++) {
1727 rr.value[i] = 0;
1728 }
1729 rr.value[2 * len] = 1;
1730 rr.len = 2 * len + 1;
1731 if ((err = big_div_pos(NULL, &rr, &rr, n)) != BIG_OK) {
1732 goto ret;
1733 }
1734 n_rr = &rr;
1735 }
1736
1737 if ((err = big_mont_mul(&rr, n_rr, a, n, n0)) != BIG_OK) {
1738 goto ret;
1739 }
1740 err = big_copy(result, &rr);
1741
1742 ret:
1743 big_finish(&rr);
1744 return (err);
1745 }
1746
1747
1748 #ifdef USE_FLOATING_POINT
1749 #define big_modexp_ncp_float big_modexp_ncp_sw
1750 #else
1751 #define big_modexp_ncp_int big_modexp_ncp_sw
1752 #endif
1753
1754 #define MAX_EXP_BIT_GROUP_SIZE 6
1755 #define APOWERS_MAX_SIZE (1 << (MAX_EXP_BIT_GROUP_SIZE - 1))
1756
1757 /* ARGSUSED */
1758 static BIG_ERR_CODE
1759 big_modexp_ncp_int(BIGNUM *result, BIGNUM *ma, BIGNUM *e, BIGNUM *n,
1760 BIGNUM *tmp, BIG_CHUNK_TYPE n0)
1761
1762 {
1763 BIGNUM apowers[APOWERS_MAX_SIZE];
1764 BIGNUM tmp1;
1765 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE];
1766 int i, j, k, l, m, p;
1767 int bit, bitind, bitcount, groupbits, apowerssize;
1768 int nbits;
1769 BIG_ERR_CODE err;
1770
1771 nbits = big_numbits(e);
1772 if (nbits < 50) {
1773 groupbits = 1;
1774 apowerssize = 1;
1775 } else {
1776 groupbits = MAX_EXP_BIT_GROUP_SIZE;
1777 apowerssize = 1 << (groupbits - 1);
1778 }
1779
1780
1781 if ((err = big_init1(&tmp1, 2 * n->len + 1,
1782 tmp1value, arraysize(tmp1value))) != BIG_OK) {
1783 return (err);
1784 }
1785
1786 /* set the malloced bit to help cleanup */
1787 for (i = 0; i < apowerssize; i++) {
1788 apowers[i].malloced = 0;
1789 }
1790 for (i = 0; i < apowerssize; i++) {
1791 if ((err = big_init1(&(apowers[i]), n->len, NULL, 0)) !=
1792 BIG_OK) {
1793 goto ret;
1794 }
1795 }
1796
1797 (void) big_copy(&(apowers[0]), ma);
1798
1799 if ((err = big_mont_mul(&tmp1, ma, ma, n, n0)) != BIG_OK) {
1800 goto ret;
1801 }
1802 (void) big_copy(ma, &tmp1);
1803
1804 for (i = 1; i < apowerssize; i++) {
1805 if ((err = big_mont_mul(&tmp1, ma,
1806 &(apowers[i-1]), n, n0)) != BIG_OK) {
1807 goto ret;
1808 }
1809 (void) big_copy(&apowers[i], &tmp1);
1810 }
1811
1812 bitind = nbits % BIG_CHUNK_SIZE;
1813 k = 0;
1814 l = 0;
1815 p = 0;
1816 bitcount = 0;
1817 for (i = nbits / BIG_CHUNK_SIZE; i >= 0; i--) {
1818 for (j = bitind - 1; j >= 0; j--) {
1819 bit = (e->value[i] >> j) & 1;
1820 if ((bitcount == 0) && (bit == 0)) {
1821 if ((err = big_mont_mul(tmp,
1822 tmp, tmp, n, n0)) != BIG_OK) {
1823 goto ret;
1824 }
1825 } else {
1826 bitcount++;
1827 p = p * 2 + bit;
1828 if (bit == 1) {
1829 k = k + l + 1;
1830 l = 0;
1831 } else {
1832 l++;
1833 }
1834 if (bitcount == groupbits) {
1835 for (m = 0; m < k; m++) {
1836 if ((err = big_mont_mul(tmp,
1837 tmp, tmp, n, n0)) !=
1838 BIG_OK) {
1839 goto ret;
1840 }
1841 }
1842 if ((err = big_mont_mul(tmp, tmp,
1843 &(apowers[p >> (l + 1)]),
1844 n, n0)) != BIG_OK) {
1845 goto ret;
1846 }
1847 for (m = 0; m < l; m++) {
1848 if ((err = big_mont_mul(tmp,
1849 tmp, tmp, n, n0)) !=
1850 BIG_OK) {
1851 goto ret;
1852 }
1853 }
1854 k = 0;
1855 l = 0;
1856 p = 0;
1857 bitcount = 0;
1858 }
1859 }
1860 }
1861 bitind = BIG_CHUNK_SIZE;
1862 }
1863
1864 for (m = 0; m < k; m++) {
1865 if ((err = big_mont_mul(tmp, tmp, tmp, n, n0)) != BIG_OK) {
1866 goto ret;
1867 }
1868 }
1869 if (p != 0) {
1870 if ((err = big_mont_mul(tmp, tmp,
1871 &(apowers[p >> (l + 1)]), n, n0)) != BIG_OK) {
1872 goto ret;
1873 }
1874 }
1875 for (m = 0; m < l; m++) {
1876 if ((err = big_mont_mul(result, tmp, tmp, n, n0)) != BIG_OK) {
1877 goto ret;
1878 }
1879 }
1880
1881 ret:
1882 for (i = apowerssize - 1; i >= 0; i--) {
1883 big_finish(&(apowers[i]));
1884 }
1885 big_finish(&tmp1);
1886
1887 return (err);
1888 }
1889
1890
1891 #ifdef USE_FLOATING_POINT
1892
1893 #ifdef _KERNEL
1894
1895 #include <sys/sysmacros.h>
1896 #include <sys/regset.h>
1897 #include <sys/fpu/fpusystm.h>
1898
1899 /* the alignment for block stores to save fp registers */
1900 #define FPR_ALIGN (64)
1901
1902 extern void big_savefp(kfpu_t *);
1903 extern void big_restorefp(kfpu_t *);
1904
1905 #endif /* _KERNEL */
1906
1907 /*
1908 * This version makes use of floating point for performance
1909 */
1910 static BIG_ERR_CODE
1911 big_modexp_ncp_float(BIGNUM *result, BIGNUM *ma, BIGNUM *e, BIGNUM *n,
1912 BIGNUM *tmp, BIG_CHUNK_TYPE n0)
1913 {
1914
1915 int i, j, k, l, m, p, bit, bitind, bitcount, nlen;
1916 double dn0;
1917 double *dn, *dt, *d16r, *d32r;
1918 uint32_t *nint, *prod;
1919 double *apowers[APOWERS_MAX_SIZE];
1920 int nbits, groupbits, apowerssize;
1921 BIG_ERR_CODE err = BIG_OK;
1922
1923 #ifdef _KERNEL
1924 uint8_t fpua[sizeof (kfpu_t) + FPR_ALIGN];
1925 kfpu_t *fpu;
1926
1927 #ifdef DEBUG
1928 if (!fpu_exists)
1929 return (BIG_GENERAL_ERR);
1930 #endif
1931
1932 fpu = (kfpu_t *)P2ROUNDUP((uintptr_t)fpua, FPR_ALIGN);
1933 big_savefp(fpu);
1934
1935 #endif /* _KERNEL */
1936
1937 nbits = big_numbits(e);
1938 if (nbits < 50) {
1939 groupbits = 1;
1940 apowerssize = 1;
1941 } else {
1942 groupbits = MAX_EXP_BIT_GROUP_SIZE;
1943 apowerssize = 1 << (groupbits - 1);
1944 }
1945
1946 nlen = (BIG_CHUNK_SIZE / 32) * n->len;
1947 dn0 = (double)(n0 & 0xffff);
1948
1949 dn = dt = d16r = d32r = NULL;
1950 nint = prod = NULL;
1951 for (i = 0; i < apowerssize; i++) {
1952 apowers[i] = NULL;
1953 }
1954
1955 if ((dn = big_malloc(nlen * sizeof (double))) == NULL) {
1956 err = BIG_NO_MEM;
1957 goto ret;
1958 }
1959 if ((dt = big_malloc((4 * nlen + 2) * sizeof (double))) == NULL) {
1960 err = BIG_NO_MEM;
1961 goto ret;
1962 }
1963 if ((nint = big_malloc(nlen * sizeof (uint32_t))) == NULL) {
1964 err = BIG_NO_MEM;
1965 goto ret;
1966 }
1967 if ((prod = big_malloc((nlen + 1) * sizeof (uint32_t))) == NULL) {
1968 err = BIG_NO_MEM;
1969 goto ret;
1970 }
1971 if ((d16r = big_malloc((2 * nlen + 1) * sizeof (double))) == NULL) {
1972 err = BIG_NO_MEM;
1973 goto ret;
1974 }
1975 if ((d32r = big_malloc(nlen * sizeof (double))) == NULL) {
1976 err = BIG_NO_MEM;
1977 goto ret;
1978 }
1979 for (i = 0; i < apowerssize; i++) {
1980 if ((apowers[i] = big_malloc((2 * nlen + 1) *
1981 sizeof (double))) == NULL) {
1982 err = BIG_NO_MEM;
1983 goto ret;
1984 }
1985 }
1986
1987 #if (BIG_CHUNK_SIZE == 32)
1988 for (i = 0; i < ma->len; i++) {
1989 nint[i] = ma->value[i];
1990 }
1991 for (; i < nlen; i++) {
1992 nint[i] = 0;
1993 }
1994 #else
1995 for (i = 0; i < ma->len; i++) {
1996 nint[2 * i] = (uint32_t)(ma->value[i] & 0xffffffffULL);
1997 nint[2 * i + 1] = (uint32_t)(ma->value[i] >> 32);
1998 }
1999 for (i = ma->len * 2; i < nlen; i++) {
2000 nint[i] = 0;
2001 }
2002 #endif
2003 conv_i32_to_d32_and_d16(d32r, apowers[0], nint, nlen);
2004
2005 #if (BIG_CHUNK_SIZE == 32)
2006 for (i = 0; i < n->len; i++) {
2007 nint[i] = n->value[i];
2008 }
2009 for (; i < nlen; i++) {
2010 nint[i] = 0;
2011 }
2012 #else
2013 for (i = 0; i < n->len; i++) {
2014 nint[2 * i] = (uint32_t)(n->value[i] & 0xffffffffULL);
2015 nint[2 * i + 1] = (uint32_t)(n->value[i] >> 32);
2016 }
2017 for (i = n->len * 2; i < nlen; i++) {
2018 nint[i] = 0;
2019 }
2020 #endif
2021 conv_i32_to_d32(dn, nint, nlen);
2022
2023 mont_mulf_noconv(prod, d32r, apowers[0], dt, dn, nint, nlen, dn0);
2024 conv_i32_to_d32(d32r, prod, nlen);
2025 for (i = 1; i < apowerssize; i++) {
2026 mont_mulf_noconv(prod, d32r, apowers[i - 1],
2027 dt, dn, nint, nlen, dn0);
2028 conv_i32_to_d16(apowers[i], prod, nlen);
2029 }
2030
2031 #if (BIG_CHUNK_SIZE == 32)
2032 for (i = 0; i < tmp->len; i++) {
2033 prod[i] = tmp->value[i];
2034 }
2035 for (; i < nlen + 1; i++) {
2036 prod[i] = 0;
2037 }
2038 #else
2039 for (i = 0; i < tmp->len; i++) {
2040 prod[2 * i] = (uint32_t)(tmp->value[i] & 0xffffffffULL);
2041 prod[2 * i + 1] = (uint32_t)(tmp->value[i] >> 32);
2042 }
2043 for (i = tmp->len * 2; i < nlen + 1; i++) {
2044 prod[i] = 0;
2045 }
2046 #endif
2047
2048 bitind = nbits % BIG_CHUNK_SIZE;
2049 k = 0;
2050 l = 0;
2051 p = 0;
2052 bitcount = 0;
2053 for (i = nbits / BIG_CHUNK_SIZE; i >= 0; i--) {
2054 for (j = bitind - 1; j >= 0; j--) {
2055 bit = (e->value[i] >> j) & 1;
2056 if ((bitcount == 0) && (bit == 0)) {
2057 conv_i32_to_d32_and_d16(d32r, d16r,
2058 prod, nlen);
2059 mont_mulf_noconv(prod, d32r, d16r,
2060 dt, dn, nint, nlen, dn0);
2061 } else {
2062 bitcount++;
2063 p = p * 2 + bit;
2064 if (bit == 1) {
2065 k = k + l + 1;
2066 l = 0;
2067 } else {
2068 l++;
2069 }
2070 if (bitcount == groupbits) {
2071 for (m = 0; m < k; m++) {
2072 conv_i32_to_d32_and_d16(d32r,
2073 d16r, prod, nlen);
2074 mont_mulf_noconv(prod, d32r,
2075 d16r, dt, dn, nint,
2076 nlen, dn0);
2077 }
2078 conv_i32_to_d32(d32r, prod, nlen);
2079 mont_mulf_noconv(prod, d32r,
2080 apowers[p >> (l+1)],
2081 dt, dn, nint, nlen, dn0);
2082 for (m = 0; m < l; m++) {
2083 conv_i32_to_d32_and_d16(d32r,
2084 d16r, prod, nlen);
2085 mont_mulf_noconv(prod, d32r,
2086 d16r, dt, dn, nint,
2087 nlen, dn0);
2088 }
2089 k = 0;
2090 l = 0;
2091 p = 0;
2092 bitcount = 0;
2093 }
2094 }
2095 }
2096 bitind = BIG_CHUNK_SIZE;
2097 }
2098
2099 for (m = 0; m < k; m++) {
2100 conv_i32_to_d32_and_d16(d32r, d16r, prod, nlen);
2101 mont_mulf_noconv(prod, d32r, d16r, dt, dn, nint, nlen, dn0);
2102 }
2103 if (p != 0) {
2104 conv_i32_to_d32(d32r, prod, nlen);
2105 mont_mulf_noconv(prod, d32r, apowers[p >> (l + 1)],
2106 dt, dn, nint, nlen, dn0);
2107 }
2108 for (m = 0; m < l; m++) {
2109 conv_i32_to_d32_and_d16(d32r, d16r, prod, nlen);
2110 mont_mulf_noconv(prod, d32r, d16r, dt, dn, nint, nlen, dn0);
2111 }
2112
2113 #if (BIG_CHUNK_SIZE == 32)
2114 for (i = 0; i < nlen; i++) {
2115 result->value[i] = prod[i];
2116 }
2117 for (i = nlen - 1; (i > 0) && (prod[i] == 0); i--)
2118 ;
2119 #else
2120 for (i = 0; i < nlen / 2; i++) {
2121 result->value[i] = (uint64_t)(prod[2 * i]) +
2122 (((uint64_t)(prod[2 * i + 1])) << 32);
2123 }
2124 for (i = nlen / 2 - 1; (i > 0) && (result->value[i] == 0); i--)
2125 ;
2126 #endif
2127 result->len = i + 1;
2128
2129 ret:
2130 for (i = apowerssize - 1; i >= 0; i--) {
2131 if (apowers[i] != NULL)
2132 big_free(apowers[i], (2 * nlen + 1) * sizeof (double));
2133 }
2134 if (d32r != NULL) {
2135 big_free(d32r, nlen * sizeof (double));
2136 }
2137 if (d16r != NULL) {
2138 big_free(d16r, (2 * nlen + 1) * sizeof (double));
2139 }
2140 if (prod != NULL) {
2141 big_free(prod, (nlen + 1) * sizeof (uint32_t));
2142 }
2143 if (nint != NULL) {
2144 big_free(nint, nlen * sizeof (uint32_t));
2145 }
2146 if (dt != NULL) {
2147 big_free(dt, (4 * nlen + 2) * sizeof (double));
2148 }
2149 if (dn != NULL) {
2150 big_free(dn, nlen * sizeof (double));
2151 }
2152
2153 #ifdef _KERNEL
2154 big_restorefp(fpu);
2155 #endif
2156
2157 return (err);
2158 }
2159
2160 #endif /* USE_FLOATING_POINT */
2161
2162
2163 BIG_ERR_CODE
2164 big_modexp_ext(BIGNUM *result, BIGNUM *a, BIGNUM *e, BIGNUM *n, BIGNUM *n_rr,
2165 big_modexp_ncp_info_t *info)
2166 {
2167 BIGNUM ma, tmp, rr;
2168 BIG_CHUNK_TYPE mavalue[BIGTMPSIZE];
2169 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE];
2170 BIG_CHUNK_TYPE rrvalue[BIGTMPSIZE];
2171 BIG_ERR_CODE err;
2172 BIG_CHUNK_TYPE n0;
2173
2174 if ((err = big_init1(&ma, n->len, mavalue, arraysize(mavalue))) !=
2175 BIG_OK) {
2176 return (err);
2177 }
2178 ma.len = 1;
2179 ma.value[0] = 0;
2180
2181 if ((err = big_init1(&tmp, 2 * n->len + 1,
2182 tmpvalue, arraysize(tmpvalue))) != BIG_OK) {
2183 goto ret1;
2184 }
2185
2186 /* set the malloced bit to help cleanup */
2187 rr.malloced = 0;
2188 if (n_rr == NULL) {
2189 if ((err = big_init1(&rr, 2 * n->len + 1,
2190 rrvalue, arraysize(rrvalue))) != BIG_OK) {
2191 goto ret2;
2192 }
2193 if (big_mont_rr(&rr, n) != BIG_OK) {
2194 goto ret;
2195 }
2196 n_rr = &rr;
2197 }
2198
2199 n0 = big_n0(n->value[0]);
2200
2201 if (big_cmp_abs(a, n) > 0) {
2202 if ((err = big_div_pos(NULL, &ma, a, n)) != BIG_OK) {
2203 goto ret;
2204 }
2205 err = big_mont_conv(&ma, &ma, n, n0, n_rr);
2206 } else {
2207 err = big_mont_conv(&ma, a, n, n0, n_rr);
2208 }
2209 if (err != BIG_OK) {
2210 goto ret;
2211 }
2212
2213 tmp.len = 1;
2214 tmp.value[0] = 1;
2215 if ((err = big_mont_conv(&tmp, &tmp, n, n0, n_rr)) != BIG_OK) {
2216 goto ret;
2217 }
2218
2219 if ((info != NULL) && (info->func != NULL)) {
2220 err = (*(info->func))(&tmp, &ma, e, n, &tmp, n0,
2221 info->ncp, info->reqp);
2222 } else {
2223 err = big_modexp_ncp_sw(&tmp, &ma, e, n, &tmp, n0);
2224 }
2225 if (err != BIG_OK) {
2226 goto ret;
2227 }
2228
2229 ma.value[0] = 1;
2230 ma.len = 1;
2231 if ((err = big_mont_mul(&tmp, &tmp, &ma, n, n0)) != BIG_OK) {
2232 goto ret;
2233 }
2234 err = big_copy(result, &tmp);
2235
2236 ret:
2237 if (rr.malloced) {
2238 big_finish(&rr);
2239 }
2240 ret2:
2241 big_finish(&tmp);
2242 ret1:
2243 big_finish(&ma);
2244
2245 return (err);
2246 }
2247
2248 BIG_ERR_CODE
2249 big_modexp(BIGNUM *result, BIGNUM *a, BIGNUM *e, BIGNUM *n, BIGNUM *n_rr)
2250 {
2251 return (big_modexp_ext(result, a, e, n, n_rr, NULL));
2252 }
2253
2254
2255 BIG_ERR_CODE
2256 big_modexp_crt_ext(BIGNUM *result, BIGNUM *a, BIGNUM *dmodpminus1,
2257 BIGNUM *dmodqminus1, BIGNUM *p, BIGNUM *q, BIGNUM *pinvmodq,
2258 BIGNUM *p_rr, BIGNUM *q_rr, big_modexp_ncp_info_t *info)
2259 {
2260 BIGNUM ap, aq, tmp;
2261 int alen, biglen, sign;
2262 BIG_ERR_CODE err;
2263
2264 if (p->len > q->len) {
2265 biglen = p->len;
2266 } else {
2267 biglen = q->len;
2268 }
2269
2270 if ((err = big_init1(&ap, p->len, NULL, 0)) != BIG_OK) {
2271 return (err);
2272 }
2273 if ((err = big_init1(&aq, q->len, NULL, 0)) != BIG_OK) {
2274 goto ret1;
2275 }
2276 if ((err = big_init1(&tmp, biglen + q->len + 1, NULL, 0)) != BIG_OK) {
2277 goto ret2;
2278 }
2279
2280 /*
2281 * check whether a is too short - to avoid timing attacks
2282 */
2283 alen = a->len;
2284 while ((alen > p->len) && (a->value[alen - 1] == 0)) {
2285 alen--;
2286 }
2287 if (alen < p->len + q->len) {
2288 /*
2289 * a is too short, add p*q to it before
2290 * taking it modulo p and q
2291 * this will also affect timing, but this difference
2292 * does not depend on p or q, only on a
2293 * (in "normal" operation, this path will never be
2294 * taken, so it is not a performance penalty
2295 */
2296 if ((err = big_mul(&tmp, p, q)) != BIG_OK) {
2297 goto ret;
2298 }
2299 if ((err = big_add(&tmp, &tmp, a)) != BIG_OK) {
2300 goto ret;
2301 }
2302 if ((err = big_div_pos(NULL, &ap, &tmp, p)) != BIG_OK) {
2303 goto ret;
2304 }
2305 if ((err = big_div_pos(NULL, &aq, &tmp, q)) != BIG_OK) {
2306 goto ret;
2307 }
2308 } else {
2309 if ((err = big_div_pos(NULL, &ap, a, p)) != BIG_OK) {
2310 goto ret;
2311 }
2312 if ((err = big_div_pos(NULL, &aq, a, q)) != BIG_OK) {
2313 goto ret;
2314 }
2315 }
2316
2317 if ((err = big_modexp_ext(&ap, &ap, dmodpminus1, p, p_rr, info)) !=
2318 BIG_OK) {
2319 goto ret;
2320 }
2321 if ((err = big_modexp_ext(&aq, &aq, dmodqminus1, q, q_rr, info)) !=
2322 BIG_OK) {
2323 goto ret;
2324 }
2325 if ((err = big_sub(&tmp, &aq, &ap)) != BIG_OK) {
2326 goto ret;
2327 }
2328 if ((err = big_mul(&tmp, &tmp, pinvmodq)) != BIG_OK) {
2329 goto ret;
2330 }
2331 sign = tmp.sign;
2332 tmp.sign = 1;
2333 if ((err = big_div_pos(NULL, &aq, &tmp, q)) != BIG_OK) {
2334 goto ret;
2335 }
2336 if ((sign == -1) && (!big_is_zero(&aq))) {
2337 (void) big_sub_pos(&aq, q, &aq);
2338 }
2339 if ((err = big_mul(&tmp, &aq, p)) != BIG_OK) {
2340 goto ret;
2341 }
2342 err = big_add_abs(result, &ap, &tmp);
2343
2344 ret:
2345 big_finish(&tmp);
2346 ret2:
2347 big_finish(&aq);
2348 ret1:
2349 big_finish(&ap);
2350
2351 return (err);
2352 }
2353
2354
2355 BIG_ERR_CODE
2356 big_modexp_crt(BIGNUM *result, BIGNUM *a, BIGNUM *dmodpminus1,
2357 BIGNUM *dmodqminus1, BIGNUM *p, BIGNUM *q, BIGNUM *pinvmodq,
2358 BIGNUM *p_rr, BIGNUM *q_rr)
2359 {
2360 return (big_modexp_crt_ext(result, a, dmodpminus1, dmodqminus1,
2361 p, q, pinvmodq, p_rr, q_rr, NULL));
2362 }
2363
2364
2365 static BIG_CHUNK_TYPE onearr[1] = {(BIG_CHUNK_TYPE)1};
2366 BIGNUM big_One = {1, 1, 1, 0, onearr};
2367
2368 static BIG_CHUNK_TYPE twoarr[1] = {(BIG_CHUNK_TYPE)2};
2369 BIGNUM big_Two = {1, 1, 1, 0, twoarr};
2370
2371 static BIG_CHUNK_TYPE fourarr[1] = {(BIG_CHUNK_TYPE)4};
2372 static BIGNUM big_Four = {1, 1, 1, 0, fourarr};
2373
2374
2375 BIG_ERR_CODE
2376 big_sqrt_pos(BIGNUM *result, BIGNUM *n)
2377 {
2378 BIGNUM *high, *low, *mid, *t;
2379 BIGNUM t1, t2, t3, prod;
2380 BIG_CHUNK_TYPE t1value[BIGTMPSIZE];
2381 BIG_CHUNK_TYPE t2value[BIGTMPSIZE];
2382 BIG_CHUNK_TYPE t3value[BIGTMPSIZE];
2383 BIG_CHUNK_TYPE prodvalue[BIGTMPSIZE];
2384 int i, nbits, diff, nrootbits, highbits;
2385 BIG_ERR_CODE err;
2386
2387 nbits = big_numbits(n);
2388
2389 if ((err = big_init1(&t1, n->len + 1,
2390 t1value, arraysize(t1value))) != BIG_OK)
2391 return (err);
2392 if ((err = big_init1(&t2, n->len + 1,
2393 t2value, arraysize(t2value))) != BIG_OK)
2394 goto ret1;
2395 if ((err = big_init1(&t3, n->len + 1,
2396 t3value, arraysize(t3value))) != BIG_OK)
2397 goto ret2;
2398 if ((err = big_init1(&prod, n->len + 1,
2399 prodvalue, arraysize(prodvalue))) != BIG_OK)
2400 goto ret3;
2401
2402 nrootbits = (nbits + 1) / 2;
2403 t1.len = t2.len = t3.len = (nrootbits - 1) / BIG_CHUNK_SIZE + 1;
2404 for (i = 0; i < t1.len; i++) {
2405 t1.value[i] = 0;
2406 t2.value[i] = BIG_CHUNK_ALLBITS;
2407 }
2408 highbits = nrootbits - BIG_CHUNK_SIZE * (t1.len - 1);
2409 if (highbits == BIG_CHUNK_SIZE) {
2410 t1.value[t1.len - 1] = BIG_CHUNK_HIGHBIT;
2411 t2.value[t2.len - 1] = BIG_CHUNK_ALLBITS;
2412 } else {
2413 t1.value[t1.len - 1] = (BIG_CHUNK_TYPE)1 << (highbits - 1);
2414 t2.value[t2.len - 1] = 2 * t1.value[t1.len - 1] - 1;
2415 }
2416
2417 high = &t2;
2418 low = &t1;
2419 mid = &t3;
2420
2421 if ((err = big_mul(&prod, high, high)) != BIG_OK) {
2422 goto ret;
2423 }
2424 diff = big_cmp_abs(&prod, n);
2425 if (diff <= 0) {
2426 err = big_copy(result, high);
2427 goto ret;
2428 }
2429
2430 (void) big_sub_pos(mid, high, low);
2431 while (big_cmp_abs(&big_One, mid) != 0) {
2432 (void) big_add_abs(mid, high, low);
2433 (void) big_half_pos(mid, mid);
2434 if ((err = big_mul(&prod, mid, mid)) != BIG_OK)
2435 goto ret;
2436 diff = big_cmp_abs(&prod, n);
2437 if (diff > 0) {
2438 t = high;
2439 high = mid;
2440 mid = t;
2441 } else if (diff < 0) {
2442 t = low;
2443 low = mid;
2444 mid = t;
2445 } else {
2446 err = big_copy(result, low);
2447 goto ret;
2448 }
2449 (void) big_sub_pos(mid, high, low);
2450 }
2451
2452 err = big_copy(result, low);
2453 ret:
2454 if (prod.malloced) big_finish(&prod);
2455 ret3:
2456 if (t3.malloced) big_finish(&t3);
2457 ret2:
2458 if (t2.malloced) big_finish(&t2);
2459 ret1:
2460 if (t1.malloced) big_finish(&t1);
2461
2462 return (err);
2463 }
2464
2465
2466 BIG_ERR_CODE
2467 big_Jacobi_pos(int *jac, BIGNUM *nn, BIGNUM *mm)
2468 {
2469 BIGNUM *t, *tmp2, *m, *n;
2470 BIGNUM t1, t2, t3;
2471 BIG_CHUNK_TYPE t1value[BIGTMPSIZE];
2472 BIG_CHUNK_TYPE t2value[BIGTMPSIZE];
2473 BIG_CHUNK_TYPE t3value[BIGTMPSIZE];
2474 int len, err;
2475
2476 if (big_is_zero(nn) ||
2477 (((nn->value[0] & 1) | (mm->value[0] & 1)) == 0)) {
2478 *jac = 0;
2479 return (BIG_OK);
2480 }
2481
2482 if (nn->len > mm->len) {
2483 len = nn->len;
2484 } else {
2485 len = mm->len;
2486 }
2487
2488 if ((err = big_init1(&t1, len,
2489 t1value, arraysize(t1value))) != BIG_OK) {
2490 return (err);
2491 }
2492 if ((err = big_init1(&t2, len,
2493 t2value, arraysize(t2value))) != BIG_OK) {
2494 goto ret1;
2495 }
2496 if ((err = big_init1(&t3, len,
2497 t3value, arraysize(t3value))) != BIG_OK) {
2498 goto ret2;
2499 }
2500
2501 n = &t1;
2502 m = &t2;
2503 tmp2 = &t3;
2504
2505 (void) big_copy(n, nn);
2506 (void) big_copy(m, mm);
2507
2508 *jac = 1;
2509 while (big_cmp_abs(&big_One, m) != 0) {
2510 if (big_is_zero(n)) {
2511 *jac = 0;
2512 goto ret;
2513 }
2514 if ((m->value[0] & 1) == 0) {
2515 if (((n->value[0] & 7) == 3) ||
2516 ((n->value[0] & 7) == 5))
2517 *jac = -*jac;
2518 (void) big_half_pos(m, m);
2519 } else if ((n->value[0] & 1) == 0) {
2520 if (((m->value[0] & 7) == 3) ||
2521 ((m->value[0] & 7) == 5))
2522 *jac = -*jac;
2523 (void) big_half_pos(n, n);
2524 } else {
2525 if (((m->value[0] & 3) == 3) &&
2526 ((n->value[0] & 3) == 3)) {
2527 *jac = -*jac;
2528 }
2529 if ((err = big_div_pos(NULL, tmp2, m, n)) != BIG_OK) {
2530 goto ret;
2531 }
2532 t = tmp2;
2533 tmp2 = m;
2534 m = n;
2535 n = t;
2536 }
2537 }
2538 err = BIG_OK;
2539
2540 ret:
2541 if (t3.malloced) big_finish(&t3);
2542 ret2:
2543 if (t2.malloced) big_finish(&t2);
2544 ret1:
2545 if (t1.malloced) big_finish(&t1);
2546
2547 return (err);
2548 }
2549
2550
2551 BIG_ERR_CODE
2552 big_Lucas(BIGNUM *Lkminus1, BIGNUM *Lk, BIGNUM *p, BIGNUM *k, BIGNUM *n)
2553 {
2554 int m, w, i;
2555 BIG_CHUNK_TYPE bit;
2556 BIGNUM ki, tmp, tmp2;
2557 BIG_CHUNK_TYPE kivalue[BIGTMPSIZE];
2558 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE];
2559 BIG_CHUNK_TYPE tmp2value[BIGTMPSIZE];
2560 BIG_ERR_CODE err;
2561
2562 if (big_cmp_abs(k, &big_One) == 0) {
2563 (void) big_copy(Lk, p);
2564 (void) big_copy(Lkminus1, &big_Two);
2565 return (BIG_OK);
2566 }
2567
2568 if ((err = big_init1(&ki, k->len + 1,
2569 kivalue, arraysize(kivalue))) != BIG_OK)
2570 return (err);
2571
2572 if ((err = big_init1(&tmp, 2 * n->len +1,
2573 tmpvalue, arraysize(tmpvalue))) != BIG_OK)
2574 goto ret1;
2575
2576 if ((err = big_init1(&tmp2, n->len,
2577 tmp2value, arraysize(tmp2value))) != BIG_OK)
2578 goto ret2;
2579
2580 m = big_numbits(k);
2581 ki.len = (m - 1) / BIG_CHUNK_SIZE + 1;
2582 w = (m - 1) / BIG_CHUNK_SIZE;
2583 bit = (BIG_CHUNK_TYPE)1 << ((m - 1) % BIG_CHUNK_SIZE);
2584 for (i = 0; i < ki.len; i++) {
2585 ki.value[i] = 0;
2586 }
2587 ki.value[ki.len - 1] = bit;
2588 if (big_cmp_abs(k, &ki) != 0) {
2589 (void) big_double(&ki, &ki);
2590 }
2591 (void) big_sub_pos(&ki, &ki, k);
2592
2593 (void) big_copy(Lk, p);
2594 (void) big_copy(Lkminus1, &big_Two);
2595
2596 for (i = 0; i < m; i++) {
2597 if ((err = big_mul(&tmp, Lk, Lkminus1)) != BIG_OK) {
2598 goto ret;
2599 }
2600 (void) big_add_abs(&tmp, &tmp, n);
2601 (void) big_sub_pos(&tmp, &tmp, p);
2602 if ((err = big_div_pos(NULL, &tmp2, &tmp, n)) != BIG_OK) {
2603 goto ret;
2604 }
2605 if ((ki.value[w] & bit) != 0) {
2606 if ((err = big_mul(&tmp, Lkminus1, Lkminus1)) !=
2607 BIG_OK) {
2608 goto ret;
2609 }
2610 (void) big_add_abs(&tmp, &tmp, n);
2611 (void) big_sub_pos(&tmp, &tmp, &big_Two);
2612 if ((err = big_div_pos(NULL, Lkminus1, &tmp, n)) !=
2613 BIG_OK) {
2614 goto ret;
2615 }
2616 (void) big_copy(Lk, &tmp2);
2617 } else {
2618 if ((err = big_mul(&tmp, Lk, Lk)) != BIG_OK) {
2619 goto ret;
2620 }
2621 (void) big_add_abs(&tmp, &tmp, n);
2622 (void) big_sub_pos(&tmp, &tmp, &big_Two);
2623 if ((err = big_div_pos(NULL, Lk, &tmp, n)) != BIG_OK) {
2624 goto ret;
2625 }
2626 (void) big_copy(Lkminus1, &tmp2);
2627 }
2628 bit = bit >> 1;
2629 if (bit == 0) {
2630 bit = BIG_CHUNK_HIGHBIT;
2631 w--;
2632 }
2633 }
2634
2635 err = BIG_OK;
2636
2637 ret:
2638 if (tmp2.malloced) big_finish(&tmp2);
2639 ret2:
2640 if (tmp.malloced) big_finish(&tmp);
2641 ret1:
2642 if (ki.malloced) big_finish(&ki);
2643
2644 return (err);
2645 }
2646
2647
2648 BIG_ERR_CODE
2649 big_isprime_pos_ext(BIGNUM *n, big_modexp_ncp_info_t *info)
2650 {
2651 BIGNUM o, nminus1, tmp, Lkminus1, Lk;
2652 BIG_CHUNK_TYPE ovalue[BIGTMPSIZE];
2653 BIG_CHUNK_TYPE nminus1value[BIGTMPSIZE];
2654 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE];
2655 BIG_CHUNK_TYPE Lkminus1value[BIGTMPSIZE];
2656 BIG_CHUNK_TYPE Lkvalue[BIGTMPSIZE];
2657 BIG_ERR_CODE err;
2658 int e, i, jac;
2659
2660 if (big_cmp_abs(n, &big_One) == 0) {
2661 return (BIG_FALSE);
2662 }
2663 if (big_cmp_abs(n, &big_Two) == 0) {
2664 return (BIG_TRUE);
2665 }
2666 if ((n->value[0] & 1) == 0) {
2667 return (BIG_FALSE);
2668 }
2669
2670 if ((err = big_init1(&o, n->len, ovalue, arraysize(ovalue))) !=
2671 BIG_OK) {
2672 return (err);
2673 }
2674
2675 if ((err = big_init1(&nminus1, n->len,
2676 nminus1value, arraysize(nminus1value))) != BIG_OK) {
2677 goto ret1;
2678 }
2679
2680 if ((err = big_init1(&tmp, 2 * n->len,
2681 tmpvalue, arraysize(tmpvalue))) != BIG_OK) {
2682 goto ret2;
2683 }
2684
2685 if ((err = big_init1(&Lkminus1, n->len,
2686 Lkminus1value, arraysize(Lkminus1value))) != BIG_OK) {
2687 goto ret3;
2688 }
2689
2690 if ((err = big_init1(&Lk, n->len,
2691 Lkvalue, arraysize(Lkvalue))) != BIG_OK) {
2692 goto ret4;
2693 }
2694
2695 (void) big_sub_pos(&o, n, &big_One); /* cannot fail */
2696 (void) big_copy(&nminus1, &o); /* cannot fail */
2697 e = 0;
2698 while ((o.value[0] & 1) == 0) {
2699 e++;
2700 (void) big_half_pos(&o, &o); /* cannot fail */
2701 }
2702 if ((err = big_modexp_ext(&tmp, &big_Two, &o, n, NULL, info)) !=
2703 BIG_OK) {
2704 goto ret;
2705 }
2706
2707 i = 0;
2708 while ((i < e) &&
2709 (big_cmp_abs(&tmp, &big_One) != 0) &&
2710 (big_cmp_abs(&tmp, &nminus1) != 0)) {
2711 if ((err =
2712 big_modexp_ext(&tmp, &tmp, &big_Two, n, NULL, info)) !=
2713 BIG_OK)
2714 goto ret;
2715 i++;
2716 }
2717
2718 if (!((big_cmp_abs(&tmp, &nminus1) == 0) ||
2719 ((i == 0) && (big_cmp_abs(&tmp, &big_One) == 0)))) {
2720 err = BIG_FALSE;
2721 goto ret;
2722 }
2723
2724 if ((err = big_sqrt_pos(&tmp, n)) != BIG_OK) {
2725 goto ret;
2726 }
2727
2728 if ((err = big_mul(&tmp, &tmp, &tmp)) != BIG_OK) {
2729 goto ret;
2730 }
2731 if (big_cmp_abs(&tmp, n) == 0) {
2732 err = BIG_FALSE;
2733 goto ret;
2734 }
2735
2736 (void) big_copy(&o, &big_Two);
2737 do {
2738 (void) big_add_abs(&o, &o, &big_One);
2739 if ((err = big_mul(&tmp, &o, &o)) != BIG_OK) {
2740 goto ret;
2741 }
2742 (void) big_sub_pos(&tmp, &tmp, &big_Four);
2743 if ((err = big_Jacobi_pos(&jac, &tmp, n)) != BIG_OK) {
2744 goto ret;
2745 }
2746 } while (jac != -1);
2747
2748 (void) big_add_abs(&tmp, n, &big_One);
2749 if ((err = big_Lucas(&Lkminus1, &Lk, &o, &tmp, n)) != BIG_OK) {
2750 goto ret;
2751 }
2752
2753 if ((big_cmp_abs(&Lkminus1, &o) == 0) &&
2754 (big_cmp_abs(&Lk, &big_Two) == 0)) {
2755 err = BIG_TRUE;
2756 } else {
2757 err = BIG_FALSE;
2758 }
2759
2760 ret:
2761 if (Lk.malloced) big_finish(&Lk);
2762 ret4:
2763 if (Lkminus1.malloced) big_finish(&Lkminus1);
2764 ret3:
2765 if (tmp.malloced) big_finish(&tmp);
2766 ret2:
2767 if (nminus1.malloced) big_finish(&nminus1);
2768 ret1:
2769 if (o.malloced) big_finish(&o);
2770
2771 return (err);
2772 }
2773
2774
2775 BIG_ERR_CODE
2776 big_isprime_pos(BIGNUM *n)
2777 {
2778 return (big_isprime_pos_ext(n, NULL));
2779 }
2780
2781
2782 #define SIEVESIZE 1000
2783
2784 uint32_t smallprimes[] =
2785 {
2786 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47,
2787 51, 53, 59, 61, 67, 71, 73, 79, 83, 89, 91, 97
2788 };
2789
2790
2791 BIG_ERR_CODE
2792 big_nextprime_pos_ext(BIGNUM *result, BIGNUM *n, big_modexp_ncp_info_t *info)
2793 {
2794 BIG_ERR_CODE err;
2795 int sieve[SIEVESIZE];
2796 int i;
2797 uint32_t off, p;
2798
2799 if ((err = big_copy(result, n)) != BIG_OK) {
2800 return (err);
2801 }
2802 result->value[0] |= 1;
2803 /* CONSTCOND */
2804 while (1) {
2805 for (i = 0; i < SIEVESIZE; i++) sieve[i] = 0;
2806 for (i = 0;
2807 i < sizeof (smallprimes) / sizeof (smallprimes[0]); i++) {
2808 p = smallprimes[i];
2809 off = big_modhalf_pos(result, p);
2810 off = p - off;
2811 if ((off % 2) == 1) {
2812 off = off + p;
2813 }
2814 off = off / 2;
2815 while (off < SIEVESIZE) {
2816 sieve[off] = 1;
2817 off = off + p;
2818 }
2819 }
2820
2821 for (i = 0; i < SIEVESIZE; i++) {
2822 if (sieve[i] == 0) {
2823 err = big_isprime_pos_ext(result, info);
2824 if (err != BIG_FALSE) {
2825 if (err != BIG_TRUE) {
2826 return (err);
2827 } else {
2828 goto out;
2829 }
2830 }
2831
2832 }
2833 if ((err = big_add_abs(result, result, &big_Two)) !=
2834 BIG_OK) {
2835 return (err);
2836 }
2837 }
2838 }
2839
2840 out:
2841 return (BIG_OK);
2842 }
2843
2844
2845 BIG_ERR_CODE
2846 big_nextprime_pos(BIGNUM *result, BIGNUM *n)
2847 {
2848 return (big_nextprime_pos_ext(result, n, NULL));
2849 }
2850
2851
2852 BIG_ERR_CODE
2853 big_nextprime_pos_slow(BIGNUM *result, BIGNUM *n)
2854 {
2855 BIG_ERR_CODE err;
2856
2857
2858 if ((err = big_copy(result, n)) != BIG_OK)
2859 return (err);
2860 result->value[0] |= 1;
2861 while ((err = big_isprime_pos_ext(result, NULL)) != BIG_TRUE) {
2862 if (err != BIG_FALSE)
2863 return (err);
2864 if ((err = big_add_abs(result, result, &big_Two)) != BIG_OK)
2865 return (err);
2866 }
2867 return (BIG_OK);
2868 }
2869
2870
2871 /*
2872 * given m and e, computes the rest in the equation
2873 * gcd(m, e) = cm * m + ce * e
2874 */
2875 BIG_ERR_CODE
2876 big_ext_gcd_pos(BIGNUM *gcd, BIGNUM *cm, BIGNUM *ce, BIGNUM *m, BIGNUM *e)
2877 {
2878 BIGNUM *xi, *ri, *riminus1, *riminus2, *t;
2879 BIGNUM *vmi, *vei, *vmiminus1, *veiminus1;
2880 BIGNUM t1, t2, t3, t4, t5, t6, t7, t8, tmp;
2881 BIG_CHUNK_TYPE t1value[BIGTMPSIZE];
2882 BIG_CHUNK_TYPE t2value[BIGTMPSIZE];
2883 BIG_CHUNK_TYPE t3value[BIGTMPSIZE];
2884 BIG_CHUNK_TYPE t4value[BIGTMPSIZE];
2885 BIG_CHUNK_TYPE t5value[BIGTMPSIZE];
2886 BIG_CHUNK_TYPE t6value[BIGTMPSIZE];
2887 BIG_CHUNK_TYPE t7value[BIGTMPSIZE];
2888 BIG_CHUNK_TYPE t8value[BIGTMPSIZE];
2889 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE];
2890 BIG_ERR_CODE err;
2891 int len;
2892
2893 if (big_cmp_abs(m, e) >= 0) {
2894 len = m->len;
2895 } else {
2896 len = e->len;
2897 }
2898
2899 if ((err = big_init1(&t1, len,
2900 t1value, arraysize(t1value))) != BIG_OK) {
2901 return (err);
2902 }
2903 if ((err = big_init1(&t2, len,
2904 t2value, arraysize(t2value))) != BIG_OK) {
2905 goto ret1;
2906 }
2907 if ((err = big_init1(&t3, len,
2908 t3value, arraysize(t3value))) != BIG_OK) {
2909 goto ret2;
2910 }
2911 if ((err = big_init1(&t4, len,
2912 t4value, arraysize(t3value))) != BIG_OK) {
2913 goto ret3;
2914 }
2915 if ((err = big_init1(&t5, len,
2916 t5value, arraysize(t5value))) != BIG_OK) {
2917 goto ret4;
2918 }
2919 if ((err = big_init1(&t6, len,
2920 t6value, arraysize(t6value))) != BIG_OK) {
2921 goto ret5;
2922 }
2923 if ((err = big_init1(&t7, len,
2924 t7value, arraysize(t7value))) != BIG_OK) {
2925 goto ret6;
2926 }
2927 if ((err = big_init1(&t8, len,
2928 t8value, arraysize(t8value))) != BIG_OK) {
2929 goto ret7;
2930 }
2931
2932 if ((err = big_init1(&tmp, 2 * len,
2933 tmpvalue, arraysize(tmpvalue))) != BIG_OK) {
2934 goto ret8;
2935 }
2936
2937 ri = &t1;
2938 ri->value[0] = 1;
2939 ri->len = 1;
2940 xi = &t2;
2941 riminus1 = &t3;
2942 riminus2 = &t4;
2943 vmi = &t5;
2944 vei = &t6;
2945 vmiminus1 = &t7;
2946 veiminus1 = &t8;
2947
2948 (void) big_copy(vmiminus1, &big_One);
2949 (void) big_copy(vmi, &big_One);
2950 (void) big_copy(veiminus1, &big_One);
2951 (void) big_copy(xi, &big_One);
2952 vei->len = 1;
2953 vei->value[0] = 0;
2954
2955 (void) big_copy(riminus1, m);
2956 (void) big_copy(ri, e);
2957
2958 while (!big_is_zero(ri)) {
2959 t = riminus2;
2960 riminus2 = riminus1;
2961 riminus1 = ri;
2962 ri = t;
2963 if ((err = big_mul(&tmp, vmi, xi)) != BIG_OK) {
2964 goto ret;
2965 }
2966 if ((err = big_sub(vmiminus1, vmiminus1, &tmp)) != BIG_OK) {
2967 goto ret;
2968 }
2969 t = vmiminus1;
2970 vmiminus1 = vmi;
2971 vmi = t;
2972 if ((err = big_mul(&tmp, vei, xi)) != BIG_OK) {
2973 goto ret;
2974 }
2975 if ((err = big_sub(veiminus1, veiminus1, &tmp)) != BIG_OK) {
2976 goto ret;
2977 }
2978 t = veiminus1;
2979 veiminus1 = vei;
2980 vei = t;
2981 if ((err = big_div_pos(xi, ri, riminus2, riminus1)) !=
2982 BIG_OK) {
2983 goto ret;
2984 }
2985 }
2986 if ((gcd != NULL) && ((err = big_copy(gcd, riminus1)) != BIG_OK)) {
2987 goto ret;
2988 }
2989 if ((cm != NULL) && ((err = big_copy(cm, vmi)) != BIG_OK)) {
2990 goto ret;
2991 }
2992 if (ce != NULL) {
2993 err = big_copy(ce, vei);
2994 }
2995 ret:
2996 if (tmp.malloced) big_finish(&tmp);
2997 ret8:
2998 if (t8.malloced) big_finish(&t8);
2999 ret7:
3000 if (t7.malloced) big_finish(&t7);
3001 ret6:
3002 if (t6.malloced) big_finish(&t6);
3003 ret5:
3004 if (t5.malloced) big_finish(&t5);
3005 ret4:
3006 if (t4.malloced) big_finish(&t4);
3007 ret3:
3008 if (t3.malloced) big_finish(&t3);
3009 ret2:
3010 if (t2.malloced) big_finish(&t2);
3011 ret1:
3012 if (t1.malloced) big_finish(&t1);
3013
3014 return (err);
3015 }