1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Configuration guide
  28  * -------------------
  29  *
  30  * There are 4 preprocessor symbols used to configure the bignum
  31  * implementation.  This file contains no logic to configure based on
  32  * processor; we leave that to the Makefiles to specify.
  33  *
  34  * USE_FLOATING_POINT
  35  *   Meaning: There is support for a fast floating-point implementation of
  36  *   Montgomery multiply.
  37  *
  38  * PSR_MUL
  39  *   Meaning: There are processor-specific versions of the low level
  40  *   functions to implement big_mul.  Those functions are: big_mul_set_vec,
  41  *   big_mul_add_vec, big_mul_vec, and big_sqr_vec.  PSR_MUL implies support
  42  *   for all 4 functions.  You cannot pick and choose which subset of these
  43  *   functions to support; that would lead to a rat's nest of #ifdefs.
  44  *
  45  * HWCAP
  46  *   Meaning: Call multiply support functions through a function pointer.
  47  *   On x86, there are multiple implementations for different hardware
  48  *   capabilities, such as MMX, SSE2, etc.  Tests are made at run-time, when
  49  *   a function is first used.  So, the support functions are called through
  50  *   a function pointer.  There is no need for that on Sparc, because there
  51  *   is only one implementation; support functions are called directly.
  52  *   Later, if there were some new VIS instruction, or something, and a
  53  *   run-time test were needed, rather than variant kernel modules and
  54  *   libraries, then HWCAP would be defined for Sparc, as well.
  55  *
  56  * UMUL64
  57  *   Meaning: It is safe to use generic C code that assumes the existence
  58  *   of a 32 x 32 --> 64 bit unsigned multiply.  If this is not defined,
  59  *   then the generic code for big_mul_add_vec() must necessarily be very slow,
  60  *   because it must fall back to using 16 x 16 --> 32 bit multiplication.
  61  *
  62  */
  63 
  64 
  65 #include <sys/types.h>
  66 #include "bignum.h"
  67 
  68 #ifdef  _KERNEL
  69 #include <sys/ddi.h>
  70 #include <sys/mdesc.h>
  71 #include <sys/crypto/common.h>
  72 
  73 #include <sys/kmem.h>
  74 #include <sys/param.h>
  75 #include <sys/sunddi.h>
  76 
  77 #else
  78 #include <stdlib.h>
  79 #include <stdio.h>
  80 #include <assert.h>
  81 #define ASSERT  assert
  82 #endif  /* _KERNEL */
  83 
  84 #ifdef  _LP64 /* truncate 64-bit size_t to 32-bits */
  85 #define UI32(ui)        ((uint32_t)ui)
  86 #else /* size_t already 32-bits */
  87 #define UI32(ui)        (ui)
  88 #endif
  89 
  90 
  91 #ifdef  _KERNEL
  92 #define big_malloc(size)        kmem_alloc(size, KM_NOSLEEP)
  93 #define big_free(ptr, size)     kmem_free(ptr, size)
  94 
  95 void *
  96 big_realloc(void *from, size_t oldsize, size_t newsize)
  97 {
  98         void *rv;
  99 
 100         rv = kmem_alloc(newsize, KM_NOSLEEP);
 101         if (rv != NULL)
 102                 bcopy(from, rv, oldsize);
 103         kmem_free(from, oldsize);
 104         return (rv);
 105 }
 106 
 107 #else   /* _KERNEL */
 108 
 109 #ifndef MALLOC_DEBUG
 110 
 111 #define big_malloc(size)        malloc(size)
 112 #define big_free(ptr, size)     free(ptr)
 113 
 114 #else
 115 
 116 void
 117 big_free(void *ptr, size_t size)
 118 {
 119         printf("freed %d bytes at %p\n", size, ptr);
 120         free(ptr);
 121 }
 122 
 123 void *
 124 big_malloc(size_t size)
 125 {
 126         void *rv;
 127         rv = malloc(size);
 128         printf("malloced %d bytes, addr:%p\n", size, rv);
 129         return (rv);
 130 }
 131 #endif /* MALLOC_DEBUG */
 132 
 133 #define big_realloc(x, y, z) realloc((x), (z))
 134 
 135 
 136 /*
 137  * printbignum()
 138  * Print a BIGNUM type to stdout.
 139  */
 140 void
 141 printbignum(char *aname, BIGNUM *a)
 142 {
 143         int i;
 144 
 145         (void) printf("\n%s\n%d\n", aname, a->sign*a->len);
 146         for (i = a->len - 1; i >= 0; i--) {
 147 #ifdef BIGNUM_CHUNK_32
 148                 (void) printf("%08x ", a->value[i]);
 149                 if (((i & (BITSINBYTE - 1)) == 0) && (i != 0)) {
 150                         (void) printf("\n");
 151                 }
 152 #else
 153                 (void) printf("%08x %08x ", (uint32_t)((a->value[i]) >> 32),
 154                     (uint32_t)((a->value[i]) & 0xffffffff));
 155                 if (((i & 3) == 0) && (i != 0)) { /* end of this chunk */
 156                         (void) printf("\n");
 157                 }
 158 #endif
 159         }
 160         (void) printf("\n");
 161 }
 162 
 163 #endif  /* _KERNEL */
 164 
 165 
 166 /*
 167  * big_init()
 168  * Initialize and allocate memory for a BIGNUM type.
 169  *
 170  * big_init(number, size) is equivalent to big_init1(number, size, NULL, 0)
 171  *
 172  * Note: call big_finish() to free memory allocated by big_init().
 173  *
 174  * Input:
 175  * number       Uninitialized memory for BIGNUM
 176  * size         Minimum size, in BIG_CHUNK_SIZE-bit words, required for BIGNUM
 177  *
 178  * Output:
 179  * number       Initialized BIGNUM
 180  *
 181  * Return BIG_OK on success or BIG_NO_MEM for an allocation error.
 182  */
 183 BIG_ERR_CODE
 184 big_init(BIGNUM *number, int size)
 185 {
 186         number->value = big_malloc(BIGNUM_WORDSIZE * size);
 187         if (number->value == NULL) {
 188                 return (BIG_NO_MEM);
 189         }
 190         number->size = size;
 191         number->len = 0;
 192         number->sign = 1;
 193         number->malloced = 1;
 194         return (BIG_OK);
 195 }
 196 
 197 
 198 /*
 199  * big_init1()
 200  * Initialize and, if needed, allocate memory for a BIGNUM type.
 201  * Use the buffer passed, buf, if any, instad of allocating memory
 202  * if it's at least "size" bytes.
 203  *
 204  * Note: call big_finish() to free memory allocated by big_init().
 205  *
 206  * Input:
 207  * number       Uninitialized memory for BIGNUM
 208  * size         Minimum size, in BIG_CHUNK_SIZE-bit words, required for BIGNUM
 209  * buf          Buffer for storing a BIGNUM.
 210  *              If NULL, big_init1() will allocate a buffer
 211  * bufsize      Size, in BIG_CHUNK_SIZE_bit words, of buf
 212  *
 213  * Output:
 214  * number       Initialized BIGNUM
 215  *
 216  * Return BIG_OK on success or BIG_NO_MEM for an allocation error.
 217  */
 218 BIG_ERR_CODE
 219 big_init1(BIGNUM *number, int size, BIG_CHUNK_TYPE *buf, int bufsize)
 220 {
 221         if ((buf == NULL) || (size > bufsize)) {
 222                 number->value = big_malloc(BIGNUM_WORDSIZE * size);
 223                 if (number->value == NULL) {
 224                         return (BIG_NO_MEM);
 225                 }
 226                 number->size = size;
 227                 number->malloced = 1;
 228         } else {
 229                 number->value = buf;
 230                 number->size = bufsize;
 231                 number->malloced = 0;
 232         }
 233         number->len = 0;
 234         number->sign = 1;
 235 
 236         return (BIG_OK);
 237 }
 238 
 239 
 240 /*
 241  * big_finish()
 242  * Free memory, if any, allocated by big_init() or big_init1().
 243  */
 244 void
 245 big_finish(BIGNUM *number)
 246 {
 247         if (number->malloced == 1) {
 248                 big_free(number->value, BIGNUM_WORDSIZE * number->size);
 249                 number->malloced = 0;
 250         }
 251 }
 252 
 253 
 254 /*
 255  * bn->size should be at least
 256  * (len + BIGNUM_WORDSIZE - 1) / BIGNUM_WORDSIZE bytes
 257  * converts from byte-big-endian format to bignum format (words in
 258  * little endian order, but bytes within the words big endian)
 259  */
 260 void
 261 bytestring2bignum(BIGNUM *bn, uchar_t *kn, size_t len)
 262 {
 263         int             i, j;
 264         uint32_t        offs;
 265         const uint32_t  slen = UI32(len);
 266         BIG_CHUNK_TYPE  word;
 267         uchar_t         *knwordp;
 268 
 269         if (slen == 0) {
 270                 bn->len = 1;
 271                 bn->value[0] = 0;
 272                 return;
 273         }
 274 
 275         offs = slen % BIGNUM_WORDSIZE;
 276         bn->len = slen / BIGNUM_WORDSIZE;
 277 
 278         for (i = 0; i < slen / BIGNUM_WORDSIZE; i++) {
 279                 knwordp = &(kn[slen - BIGNUM_WORDSIZE * (i + 1)]);
 280                 word = knwordp[0];
 281                 for (j = 1; j < BIGNUM_WORDSIZE; j++) {
 282                         word = (word << BITSINBYTE) + knwordp[j];
 283                 }
 284                 bn->value[i] = word;
 285         }
 286         if (offs > 0) {
 287                 word = kn[0];
 288                 for (i = 1; i < offs; i++) word = (word << BITSINBYTE) + kn[i];
 289                 bn->value[bn->len++] = word;
 290         }
 291         while ((bn->len > 1) && (bn->value[bn->len - 1] == 0)) {
 292                 bn->len --;
 293         }
 294 }
 295 
 296 
 297 /*
 298  * copies the least significant len bytes if
 299  * len < bn->len * BIGNUM_WORDSIZE
 300  * converts from bignum format to byte-big-endian format.
 301  * bignum format is words of type  BIG_CHUNK_TYPE in little endian order.
 302  */
 303 void
 304 bignum2bytestring(uchar_t *kn, BIGNUM *bn, size_t len)
 305 {
 306         int             i, j;
 307         uint32_t        offs;
 308         const uint32_t  slen = UI32(len);
 309         BIG_CHUNK_TYPE  word;
 310 
 311         if (len < BIGNUM_WORDSIZE * bn->len) {
 312                 for (i = 0; i < slen / BIGNUM_WORDSIZE; i++) {
 313                         word = bn->value[i];
 314                         for (j = 0; j < BIGNUM_WORDSIZE; j++) {
 315                                 kn[slen - BIGNUM_WORDSIZE * i - j - 1] =
 316                                     word & 0xff;
 317                                 word = word >> BITSINBYTE;
 318                         }
 319                 }
 320                 offs = slen % BIGNUM_WORDSIZE;
 321                 if (offs > 0) {
 322                         word = bn->value[slen / BIGNUM_WORDSIZE];
 323                         for (i =  slen % BIGNUM_WORDSIZE; i > 0; i --) {
 324                                 kn[i - 1] = word & 0xff;
 325                                 word = word >> BITSINBYTE;
 326                         }
 327                 }
 328         } else {
 329                 for (i = 0; i < bn->len; i++) {
 330                         word = bn->value[i];
 331                         for (j = 0; j < BIGNUM_WORDSIZE; j++) {
 332                                 kn[slen - BIGNUM_WORDSIZE * i - j - 1] =
 333                                     word & 0xff;
 334                                 word = word >> BITSINBYTE;
 335                         }
 336                 }
 337                 for (i = 0; i < slen - BIGNUM_WORDSIZE * bn->len; i++) {
 338                         kn[i] = 0;
 339                 }
 340         }
 341 }
 342 
 343 
 344 int
 345 big_bitlength(BIGNUM *a)
 346 {
 347         int             l = 0, b = 0;
 348         BIG_CHUNK_TYPE  c;
 349 
 350         l = a->len - 1;
 351         while ((l > 0) && (a->value[l] == 0)) {
 352                 l--;
 353         }
 354         b = BIG_CHUNK_SIZE;
 355         c = a->value[l];
 356         while ((b > 1) && ((c & BIG_CHUNK_HIGHBIT) == 0)) {
 357                 c = c << 1;
 358                 b--;
 359         }
 360 
 361         return (l * BIG_CHUNK_SIZE + b);
 362 }
 363 
 364 
 365 BIG_ERR_CODE
 366 big_copy(BIGNUM *dest, BIGNUM *src)
 367 {
 368         BIG_CHUNK_TYPE  *newptr;
 369         int             i, len;
 370 
 371         len = src->len;
 372         while ((len > 1) && (src->value[len - 1] == 0)) {
 373                 len--;
 374         }
 375         src->len = len;
 376         if (dest->size < len) {
 377                 if (dest->malloced == 1) {
 378                         newptr = (BIG_CHUNK_TYPE *)big_realloc(dest->value,
 379                             BIGNUM_WORDSIZE * dest->size,
 380                             BIGNUM_WORDSIZE * len);
 381                 } else {
 382                         newptr = (BIG_CHUNK_TYPE *)
 383                             big_malloc(BIGNUM_WORDSIZE * len);
 384                         if (newptr != NULL) {
 385                                 dest->malloced = 1;
 386                         }
 387                 }
 388                 if (newptr == NULL) {
 389                         return (BIG_NO_MEM);
 390                 }
 391                 dest->value = newptr;
 392                 dest->size = len;
 393         }
 394         dest->len = len;
 395         dest->sign = src->sign;
 396         for (i = 0; i < len; i++) {
 397                 dest->value[i] = src->value[i];
 398         }
 399 
 400         return (BIG_OK);
 401 }
 402 
 403 
 404 BIG_ERR_CODE
 405 big_extend(BIGNUM *number, int size)
 406 {
 407         BIG_CHUNK_TYPE  *newptr;
 408         int             i;
 409 
 410         if (number->size >= size)
 411                 return (BIG_OK);
 412         if (number->malloced) {
 413                 number->value = big_realloc(number->value,
 414                     BIGNUM_WORDSIZE * number->size,
 415                     BIGNUM_WORDSIZE * size);
 416         } else {
 417                 newptr = big_malloc(BIGNUM_WORDSIZE * size);
 418                 if (newptr != NULL) {
 419                         for (i = 0; i < number->size; i++) {
 420                                 newptr[i] = number->value[i];
 421                         }
 422                 }
 423                 number->value = newptr;
 424         }
 425 
 426         if (number->value == NULL) {
 427                 return (BIG_NO_MEM);
 428         }
 429 
 430         number->size = size;
 431         number->malloced = 1;
 432         return (BIG_OK);
 433 }
 434 
 435 
 436 /* returns 1 if n == 0 */
 437 int
 438 big_is_zero(BIGNUM *n)
 439 {
 440         int     i, result;
 441 
 442         result = 1;
 443         for (i = 0; i < n->len; i++) {
 444                 if (n->value[i] != 0) {
 445                         result = 0;
 446                 }
 447         }
 448         return (result);
 449 }
 450 
 451 
 452 BIG_ERR_CODE
 453 big_add_abs(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
 454 {
 455         int             i, shorter, longer;
 456         BIG_CHUNK_TYPE  cy, ai;
 457         BIG_CHUNK_TYPE  *r, *a, *b, *c;
 458         BIG_ERR_CODE    err;
 459         BIGNUM          *longerarg;
 460 
 461         if (aa->len > bb->len) {
 462                 shorter = bb->len;
 463                 longer = aa->len;
 464                 longerarg = aa;
 465         } else {
 466                 shorter = aa->len;
 467                 longer = bb->len;
 468                 longerarg = bb;
 469         }
 470         if (result->size < longer + 1) {
 471                 err = big_extend(result, longer + 1);
 472                 if (err != BIG_OK) {
 473                         return (err);
 474                 }
 475         }
 476 
 477         r = result->value;
 478         a = aa->value;
 479         b = bb->value;
 480         c = longerarg->value;
 481         cy = 0;
 482         for (i = 0; i < shorter; i++) {
 483                 ai = a[i];
 484                 r[i] = ai + b[i] + cy;
 485                 if (r[i] > ai) {
 486                         cy = 0;
 487                 } else if (r[i] < ai) {
 488                         cy = 1;
 489                 }
 490         }
 491         for (; i < longer; i++) {
 492                 ai = c[i];
 493                 r[i] = ai + cy;
 494                 if (r[i] >= ai) {
 495                         cy = 0;
 496                 }
 497         }
 498         if (cy == 1) {
 499                 r[i] = cy;
 500                 result->len = longer + 1;
 501         } else {
 502                 result->len = longer;
 503         }
 504         result->sign = 1;
 505         return (BIG_OK);
 506 }
 507 
 508 
 509 /* caller must make sure that result has at least len words allocated */
 510 void
 511 big_sub_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, BIG_CHUNK_TYPE *b, int len)
 512 {
 513         int             i;
 514         BIG_CHUNK_TYPE  cy, ai;
 515 
 516         cy = 1;
 517         for (i = 0; i < len; i++) {
 518                 ai = a[i];
 519                 r[i] = ai + (~b[i]) + cy;
 520                 if (r[i] > ai) {
 521                         cy = 0;
 522                 } else if (r[i] < ai) {
 523                         cy = 1;
 524                 }
 525         }
 526 }
 527 
 528 
 529 /* result=aa-bb  it is assumed that aa>=bb */
 530 BIG_ERR_CODE
 531 big_sub_pos(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
 532 {
 533         int             i, shorter;
 534         BIG_CHUNK_TYPE  cy = 1, ai;
 535         BIG_CHUNK_TYPE  *r, *a, *b;
 536         BIG_ERR_CODE    err = BIG_OK;
 537 
 538         if (aa->len > bb->len) {
 539                 shorter = bb->len;
 540         } else {
 541                 shorter = aa->len;
 542         }
 543         if (result->size < aa->len) {
 544                 err = big_extend(result, aa->len);
 545                 if (err != BIG_OK) {
 546                         return (err);
 547                 }
 548         }
 549 
 550         r = result->value;
 551         a = aa->value;
 552         b = bb->value;
 553         result->len = aa->len;
 554         cy = 1;
 555         for (i = 0; i < shorter; i++) {
 556                 ai = a[i];
 557                 r[i] = ai + (~b[i]) + cy;
 558                 if (r[i] > ai) {
 559                         cy = 0;
 560                 } else if (r[i] < ai) {
 561                         cy = 1;
 562                 }
 563         }
 564         for (; i < aa->len; i++) {
 565                 ai = a[i];
 566                 r[i] = ai + (~0) + cy;
 567                 if (r[i] < ai) {
 568                         cy = 1;
 569                 }
 570         }
 571         result->sign = 1;
 572 
 573         if (cy == 0) {
 574                 return (BIG_INVALID_ARGS);
 575         } else {
 576                 return (BIG_OK);
 577         }
 578 }
 579 
 580 
 581 /* returns -1 if |aa|<|bb|, 0 if |aa|==|bb| 1 if |aa|>|bb| */
 582 int
 583 big_cmp_abs(BIGNUM *aa, BIGNUM *bb)
 584 {
 585         int     i;
 586 
 587         if (aa->len > bb->len) {
 588                 for (i = aa->len - 1; i > bb->len - 1; i--) {
 589                         if (aa->value[i] > 0) {
 590                                 return (1);
 591                         }
 592                 }
 593         } else if (aa->len < bb->len) {
 594                 for (i = bb->len - 1; i > aa->len - 1; i--) {
 595                         if (bb->value[i] > 0) {
 596                                 return (-1);
 597                         }
 598                 }
 599         } else {
 600                 i = aa->len - 1;
 601         }
 602         for (; i >= 0; i--) {
 603                 if (aa->value[i] > bb->value[i]) {
 604                         return (1);
 605                 } else if (aa->value[i] < bb->value[i]) {
 606                         return (-1);
 607                 }
 608         }
 609 
 610         return (0);
 611 }
 612 
 613 
 614 BIG_ERR_CODE
 615 big_sub(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
 616 {
 617         BIG_ERR_CODE    err;
 618 
 619         if ((bb->sign == -1) && (aa->sign == 1)) {
 620                 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) {
 621                         return (err);
 622                 }
 623                 result->sign = 1;
 624         } else if ((aa->sign == -1) && (bb->sign == 1)) {
 625                 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) {
 626                         return (err);
 627                 }
 628                 result->sign = -1;
 629         } else if ((aa->sign == 1) && (bb->sign == 1)) {
 630                 if (big_cmp_abs(aa, bb) >= 0) {
 631                         if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) {
 632                                 return (err);
 633                         }
 634                         result->sign = 1;
 635                 } else {
 636                         if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) {
 637                                 return (err);
 638                         }
 639                         result->sign = -1;
 640                 }
 641         } else {
 642                 if (big_cmp_abs(aa, bb) >= 0) {
 643                         if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) {
 644                                 return (err);
 645                         }
 646                         result->sign = -1;
 647                 } else {
 648                         if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) {
 649                                 return (err);
 650                         }
 651                         result->sign = 1;
 652                 }
 653         }
 654         return (BIG_OK);
 655 }
 656 
 657 
 658 BIG_ERR_CODE
 659 big_add(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
 660 {
 661         BIG_ERR_CODE    err;
 662 
 663         if ((bb->sign == -1) && (aa->sign == -1)) {
 664                 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) {
 665                         return (err);
 666                 }
 667                 result->sign = -1;
 668         } else if ((aa->sign == 1) && (bb->sign == 1)) {
 669                 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) {
 670                         return (err);
 671                 }
 672                 result->sign = 1;
 673         } else if ((aa->sign == 1) && (bb->sign == -1)) {
 674                 if (big_cmp_abs(aa, bb) >= 0) {
 675                         if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) {
 676                                 return (err);
 677                         }
 678                         result->sign = 1;
 679                 } else {
 680                         if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) {
 681                                 return (err);
 682                         }
 683                         result->sign = -1;
 684                 }
 685         } else {
 686                 if (big_cmp_abs(aa, bb) >= 0) {
 687                         if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) {
 688                                 return (err);
 689                         }
 690                         result->sign = -1;
 691                 } else {
 692                         if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) {
 693                                 return (err);
 694                         }
 695                         result->sign = 1;
 696                 }
 697         }
 698         return (BIG_OK);
 699 }
 700 
 701 
 702 /* result = aa/2 */
 703 BIG_ERR_CODE
 704 big_half_pos(BIGNUM *result, BIGNUM *aa)
 705 {
 706         BIG_ERR_CODE    err;
 707         int             i;
 708         BIG_CHUNK_TYPE  cy, cy1;
 709         BIG_CHUNK_TYPE  *a, *r;
 710 
 711         if (result->size < aa->len) {
 712                 err = big_extend(result, aa->len);
 713                 if (err != BIG_OK) {
 714                         return (err);
 715                 }
 716         }
 717 
 718         result->len = aa->len;
 719         a = aa->value;
 720         r = result->value;
 721         cy = 0;
 722         for (i = aa->len - 1; i >= 0; i--) {
 723                 cy1 = a[i] << (BIG_CHUNK_SIZE - 1);
 724                 r[i] = (cy | (a[i] >> 1));
 725                 cy = cy1;
 726         }
 727         if (r[result->len - 1] == 0) {
 728                 result->len--;
 729         }
 730 
 731         return (BIG_OK);
 732 }
 733 
 734 /* result  =  aa*2 */
 735 BIG_ERR_CODE
 736 big_double(BIGNUM *result, BIGNUM *aa)
 737 {
 738         BIG_ERR_CODE    err;
 739         int             i, rsize;
 740         BIG_CHUNK_TYPE  cy, cy1;
 741         BIG_CHUNK_TYPE  *a, *r;
 742 
 743         if ((aa->len > 0) &&
 744             ((aa->value[aa->len - 1] & BIG_CHUNK_HIGHBIT) != 0)) {
 745                 rsize = aa->len + 1;
 746         } else {
 747                 rsize = aa->len;
 748         }
 749 
 750         if (result->size < rsize) {
 751                 err = big_extend(result, rsize);
 752                 if (err != BIG_OK)
 753                         return (err);
 754         }
 755 
 756         a = aa->value;
 757         r = result->value;
 758         if (rsize == aa->len + 1) {
 759                 r[rsize - 1] = 1;
 760         }
 761         cy = 0;
 762         for (i = 0; i < aa->len; i++) {
 763                 cy1 = a[i] >> (BIG_CHUNK_SIZE - 1);
 764                 r[i] = (cy | (a[i] << 1));
 765                 cy = cy1;
 766         }
 767         result->len = rsize;
 768         return (BIG_OK);
 769 }
 770 
 771 
 772 /*
 773  * returns aa mod b, aa must be nonneg, b must be a max
 774  * (BIG_CHUNK_SIZE / 2)-bit integer
 775  */
 776 static uint32_t
 777 big_modhalf_pos(BIGNUM *aa, uint32_t b)
 778 {
 779         int             i;
 780         BIG_CHUNK_TYPE  rem;
 781 
 782         if (aa->len == 0) {
 783                 return (0);
 784         }
 785         rem = aa->value[aa->len - 1] % b;
 786         for (i = aa->len - 2; i >= 0; i--) {
 787                 rem = ((rem << (BIG_CHUNK_SIZE / 2)) |
 788                     (aa->value[i] >> (BIG_CHUNK_SIZE / 2))) % b;
 789                 rem = ((rem << (BIG_CHUNK_SIZE / 2)) |
 790                     (aa->value[i] & BIG_CHUNK_LOWHALFBITS)) % b;
 791         }
 792 
 793         return ((uint32_t)rem);
 794 }
 795 
 796 
 797 /*
 798  * result = aa - (2^BIG_CHUNK_SIZE)^lendiff * bb
 799  * result->size should be at least aa->len at entry
 800  * aa, bb, and result should be positive
 801  */
 802 void
 803 big_sub_pos_high(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
 804 {
 805         int i, lendiff;
 806         BIGNUM res1, aa1;
 807 
 808         lendiff = aa->len - bb->len;
 809         res1.size = result->size - lendiff;
 810         res1.malloced = 0;
 811         res1.value = result->value + lendiff;
 812         aa1.size = aa->size - lendiff;
 813         aa1.value = aa->value + lendiff;
 814         aa1.len = bb->len;
 815         aa1.sign = 1;
 816         (void) big_sub_pos(&res1, &aa1, bb);
 817         if (result->value != aa->value) {
 818                 for (i = 0; i < lendiff; i++) {
 819                         result->value[i] = aa->value[i];
 820                 }
 821         }
 822         result->len = aa->len;
 823 }
 824 
 825 
 826 /*
 827  * returns 1, 0, or -1 depending on whether |aa| > , ==, or <
 828  *                                      (2^BIG_CHUNK_SIZE)^lendiff * |bb|
 829  * aa->len should be >= bb->len
 830  */
 831 int
 832 big_cmp_abs_high(BIGNUM *aa, BIGNUM *bb)
 833 {
 834         int             lendiff;
 835         BIGNUM          aa1;
 836 
 837         lendiff = aa->len - bb->len;
 838         aa1.len = bb->len;
 839         aa1.size = aa->size - lendiff;
 840         aa1.malloced = 0;
 841         aa1.value = aa->value + lendiff;
 842         return (big_cmp_abs(&aa1, bb));
 843 }
 844 
 845 
 846 /*
 847  * result = aa * b where b is a max. (BIG_CHUNK_SIZE / 2)-bit positive integer.
 848  * result should have enough space allocated.
 849  */
 850 static void
 851 big_mulhalf_low(BIGNUM *result, BIGNUM *aa, BIG_CHUNK_TYPE b)
 852 {
 853         int             i;
 854         BIG_CHUNK_TYPE  t1, t2, ai, cy;
 855         BIG_CHUNK_TYPE  *a, *r;
 856 
 857         a = aa->value;
 858         r = result->value;
 859         cy = 0;
 860         for (i = 0; i < aa->len; i++) {
 861                 ai = a[i];
 862                 t1 = (ai & BIG_CHUNK_LOWHALFBITS) * b + cy;
 863                 t2 = (ai >> (BIG_CHUNK_SIZE / 2)) * b +
 864                     (t1 >> (BIG_CHUNK_SIZE / 2));
 865                 r[i] = (t1 & BIG_CHUNK_LOWHALFBITS) |
 866                     (t2 << (BIG_CHUNK_SIZE / 2));
 867                 cy = t2 >> (BIG_CHUNK_SIZE / 2);
 868         }
 869         r[i] = cy;
 870         result->len = aa->len + 1;
 871         result->sign = aa->sign;
 872 }
 873 
 874 
 875 /*
 876  * result = aa * b * 2^(BIG_CHUNK_SIZE / 2) where b is a max.
 877  * (BIG_CHUNK_SIZE / 2)-bit positive integer.
 878  * result should have enough space allocated.
 879  */
 880 static void
 881 big_mulhalf_high(BIGNUM *result, BIGNUM *aa, BIG_CHUNK_TYPE b)
 882 {
 883         int             i;
 884         BIG_CHUNK_TYPE  t1, t2, ai, cy, ri;
 885         BIG_CHUNK_TYPE  *a, *r;
 886 
 887         a = aa->value;
 888         r = result->value;
 889         cy = 0;
 890         ri = 0;
 891         for (i = 0; i < aa->len; i++) {
 892                 ai = a[i];
 893                 t1 = (ai & BIG_CHUNK_LOWHALFBITS) * b + cy;
 894                 t2 = (ai >>  (BIG_CHUNK_SIZE / 2)) * b +
 895                     (t1 >>  (BIG_CHUNK_SIZE / 2));
 896                 r[i] = (t1 <<  (BIG_CHUNK_SIZE / 2)) + ri;
 897                 ri = t2 & BIG_CHUNK_LOWHALFBITS;
 898                 cy = t2 >> (BIG_CHUNK_SIZE / 2);
 899         }
 900         r[i] = (cy <<  (BIG_CHUNK_SIZE / 2)) + ri;
 901         result->len = aa->len + 1;
 902         result->sign = aa->sign;
 903 }
 904 
 905 
 906 /* it is assumed that result->size is big enough */
 907 void
 908 big_shiftleft(BIGNUM *result, BIGNUM *aa, int offs)
 909 {
 910         int             i;
 911         BIG_CHUNK_TYPE  cy, ai;
 912 
 913         if (offs == 0) {
 914                 if (result != aa) {
 915                         (void) big_copy(result, aa);
 916                 }
 917                 return;
 918         }
 919         cy = 0;
 920         for (i = 0; i < aa->len; i++) {
 921                 ai = aa->value[i];
 922                 result->value[i] = (ai << offs) | cy;
 923                 cy = ai >> (BIG_CHUNK_SIZE - offs);
 924         }
 925         if (cy != 0) {
 926                 result->len = aa->len + 1;
 927                 result->value[result->len - 1] = cy;
 928         } else {
 929                 result->len = aa->len;
 930         }
 931         result->sign = aa->sign;
 932 }
 933 
 934 
 935 /* it is assumed that result->size is big enough */
 936 void
 937 big_shiftright(BIGNUM *result, BIGNUM *aa, int offs)
 938 {
 939         int              i;
 940         BIG_CHUNK_TYPE  cy, ai;
 941 
 942         if (offs == 0) {
 943                 if (result != aa) {
 944                         (void) big_copy(result, aa);
 945                 }
 946                 return;
 947         }
 948         cy = aa->value[0] >> offs;
 949         for (i = 1; i < aa->len; i++) {
 950                 ai = aa->value[i];
 951                 result->value[i - 1] = (ai << (BIG_CHUNK_SIZE - offs)) | cy;
 952                 cy = ai >> offs;
 953         }
 954         result->len = aa->len;
 955         result->value[result->len - 1] = cy;
 956         result->sign = aa->sign;
 957 }
 958 
 959 
 960 /*
 961  * result = aa/bb   remainder = aa mod bb
 962  * it is assumed that aa and bb are positive
 963  */
 964 BIG_ERR_CODE
 965 big_div_pos(BIGNUM *result, BIGNUM *remainder, BIGNUM *aa, BIGNUM *bb)
 966 {
 967         BIG_ERR_CODE    err = BIG_OK;
 968         int             i, alen, blen, tlen, rlen, offs;
 969         BIG_CHUNK_TYPE  higha, highb, coeff;
 970         BIG_CHUNK_TYPE  *a, *b;
 971         BIGNUM          bbhigh, bblow, tresult, tmp1, tmp2;
 972         BIG_CHUNK_TYPE  tmp1value[BIGTMPSIZE];
 973         BIG_CHUNK_TYPE  tmp2value[BIGTMPSIZE];
 974         BIG_CHUNK_TYPE  tresultvalue[BIGTMPSIZE];
 975         BIG_CHUNK_TYPE  bblowvalue[BIGTMPSIZE];
 976         BIG_CHUNK_TYPE  bbhighvalue[BIGTMPSIZE];
 977 
 978         a = aa->value;
 979         b = bb->value;
 980         alen = aa->len;
 981         blen = bb->len;
 982         while ((alen > 1) && (a[alen - 1] == 0)) {
 983                 alen = alen - 1;
 984         }
 985         aa->len = alen;
 986         while ((blen > 1) && (b[blen - 1] == 0)) {
 987                 blen = blen - 1;
 988         }
 989         bb->len = blen;
 990         if ((blen == 1) && (b[0] == 0)) {
 991                 return (BIG_DIV_BY_0);
 992         }
 993 
 994         if (big_cmp_abs(aa, bb) < 0) {
 995                 if ((remainder != NULL) &&
 996                     ((err = big_copy(remainder, aa)) != BIG_OK)) {
 997                         return (err);
 998                 }
 999                 if (result != NULL) {
1000                         result->len = 1;
1001                         result->sign = 1;
1002                         result->value[0] = 0;
1003                 }
1004                 return (BIG_OK);
1005         }
1006 
1007         if ((err = big_init1(&bblow, blen + 1,
1008             bblowvalue, arraysize(bblowvalue))) != BIG_OK)
1009                 return (err);
1010 
1011         if ((err = big_init1(&bbhigh, blen + 1,
1012             bbhighvalue, arraysize(bbhighvalue))) != BIG_OK)
1013                 goto ret1;
1014 
1015         if ((err = big_init1(&tmp1, alen + 2,
1016             tmp1value, arraysize(tmp1value))) != BIG_OK)
1017                 goto ret2;
1018 
1019         if ((err = big_init1(&tmp2, blen + 2,
1020             tmp2value, arraysize(tmp2value))) != BIG_OK)
1021                 goto ret3;
1022 
1023         if ((err = big_init1(&tresult, alen - blen + 2,
1024             tresultvalue, arraysize(tresultvalue))) != BIG_OK)
1025                 goto ret4;
1026 
1027         offs = 0;
1028         highb = b[blen - 1];
1029         if (highb >= (BIG_CHUNK_HALF_HIGHBIT << 1)) {
1030                 highb = highb >> (BIG_CHUNK_SIZE / 2);
1031                 offs = (BIG_CHUNK_SIZE / 2);
1032         }
1033         while ((highb & BIG_CHUNK_HALF_HIGHBIT) == 0) {
1034                 highb = highb << 1;
1035                 offs++;
1036         }
1037 
1038         big_shiftleft(&bblow, bb, offs);
1039 
1040         if (offs <= (BIG_CHUNK_SIZE / 2 - 1)) {
1041                 big_shiftleft(&bbhigh, &bblow, BIG_CHUNK_SIZE / 2);
1042         } else {
1043                 big_shiftright(&bbhigh, &bblow, BIG_CHUNK_SIZE / 2);
1044         }
1045         if (bbhigh.value[bbhigh.len - 1] == 0) {
1046                 bbhigh.len--;
1047         } else {
1048                 bbhigh.value[bbhigh.len] = 0;
1049         }
1050 
1051         highb = bblow.value[bblow.len - 1];
1052 
1053         big_shiftleft(&tmp1, aa, offs);
1054         rlen = tmp1.len - bblow.len + 1;
1055         tresult.len = rlen;
1056 
1057         tmp1.len++;
1058         tlen = tmp1.len;
1059         tmp1.value[tmp1.len - 1] = 0;
1060         for (i = 0; i < rlen; i++) {
1061                 higha = (tmp1.value[tlen - 1] << (BIG_CHUNK_SIZE / 2)) +
1062                     (tmp1.value[tlen - 2] >> (BIG_CHUNK_SIZE / 2));
1063                 coeff = higha / (highb + 1);
1064                 big_mulhalf_high(&tmp2, &bblow, coeff);
1065                 big_sub_pos_high(&tmp1, &tmp1, &tmp2);
1066                 bbhigh.len++;
1067                 while (tmp1.value[tlen - 1] > 0) {
1068                         big_sub_pos_high(&tmp1, &tmp1, &bbhigh);
1069                         coeff++;
1070                 }
1071                 bbhigh.len--;
1072                 tlen--;
1073                 tmp1.len--;
1074                 while (big_cmp_abs_high(&tmp1, &bbhigh) >= 0) {
1075                         big_sub_pos_high(&tmp1, &tmp1, &bbhigh);
1076                         coeff++;
1077                 }
1078                 tresult.value[rlen - i - 1] = coeff << (BIG_CHUNK_SIZE / 2);
1079                 higha = tmp1.value[tlen - 1];
1080                 coeff = higha / (highb + 1);
1081                 big_mulhalf_low(&tmp2, &bblow, coeff);
1082                 tmp2.len--;
1083                 big_sub_pos_high(&tmp1, &tmp1, &tmp2);
1084                 while (big_cmp_abs_high(&tmp1, &bblow) >= 0) {
1085                         big_sub_pos_high(&tmp1, &tmp1, &bblow);
1086                         coeff++;
1087                 }
1088                 tresult.value[rlen - i - 1] =
1089                     tresult.value[rlen - i - 1] + coeff;
1090         }
1091 
1092         big_shiftright(&tmp1, &tmp1, offs);
1093 
1094         err = BIG_OK;
1095 
1096         if ((remainder != NULL) &&
1097             ((err = big_copy(remainder, &tmp1)) != BIG_OK))
1098                 goto ret;
1099 
1100         if (result != NULL)
1101                 err = big_copy(result, &tresult);
1102 
1103 ret:
1104         big_finish(&tresult);
1105 ret4:
1106         big_finish(&tmp1);
1107 ret3:
1108         big_finish(&tmp2);
1109 ret2:
1110         big_finish(&bbhigh);
1111 ret1:
1112         big_finish(&bblow);
1113         return (err);
1114 }
1115 
1116 
1117 /*
1118  * If there is no processor-specific integer implementation of
1119  * the lower level multiply functions, then this code is provided
1120  * for big_mul_set_vec(), big_mul_add_vec(), big_mul_vec() and
1121  * big_sqr_vec().
1122  *
1123  * There are two generic implementations.  One that assumes that
1124  * there is hardware and C compiler support for a 32 x 32 --> 64
1125  * bit unsigned multiply, but otherwise is not specific to any
1126  * processor, platform, or ISA.
1127  *
1128  * The other makes very few assumptions about hardware capabilities.
1129  * It does not even assume that there is any implementation of a
1130  * 32 x 32 --> 64 bit multiply that is accessible to C code and
1131  * appropriate to use.  It falls constructs 32 x 32 --> 64 bit
1132  * multiplies from 16 x 16 --> 32 bit multiplies.
1133  *
1134  */
1135 
1136 #if !defined(PSR_MUL)
1137 
1138 #ifdef UMUL64
1139 
1140 #if (BIG_CHUNK_SIZE == 32)
1141 
1142 #define UNROLL8
1143 
1144 #define MUL_SET_VEC_ROUND_PREFETCH(R) \
1145         p = pf * d; \
1146         pf = (uint64_t)a[R + 1]; \
1147         t = p + cy; \
1148         r[R] = (uint32_t)t; \
1149         cy = t >> 32
1150 
1151 #define MUL_SET_VEC_ROUND_NOPREFETCH(R) \
1152         p = pf * d; \
1153         t = p + cy; \
1154         r[R] = (uint32_t)t; \
1155         cy = t >> 32
1156 
1157 #define MUL_ADD_VEC_ROUND_PREFETCH(R) \
1158         t = (uint64_t)r[R]; \
1159         p = pf * d; \
1160         pf = (uint64_t)a[R + 1]; \
1161         t = p + t + cy; \
1162         r[R] = (uint32_t)t; \
1163         cy = t >> 32
1164 
1165 #define MUL_ADD_VEC_ROUND_NOPREFETCH(R) \
1166         t = (uint64_t)r[R]; \
1167         p = pf * d; \
1168         t = p + t + cy; \
1169         r[R] = (uint32_t)t; \
1170         cy = t >> 32
1171 
1172 #ifdef UNROLL8
1173 
1174 #define UNROLL 8
1175 
1176 /*
1177  * r = a * b
1178  * where r and a are vectors; b is a single 32-bit digit
1179  */
1180 
1181 uint32_t
1182 big_mul_set_vec(uint32_t *r, uint32_t *a, int len, uint32_t b)
1183 {
1184         uint64_t d, pf, p, t, cy;
1185 
1186         if (len == 0)
1187                 return (0);
1188         cy = 0;
1189         d = (uint64_t)b;
1190         pf = (uint64_t)a[0];
1191         while (len > UNROLL) {
1192                 MUL_SET_VEC_ROUND_PREFETCH(0);
1193                 MUL_SET_VEC_ROUND_PREFETCH(1);
1194                 MUL_SET_VEC_ROUND_PREFETCH(2);
1195                 MUL_SET_VEC_ROUND_PREFETCH(3);
1196                 MUL_SET_VEC_ROUND_PREFETCH(4);
1197                 MUL_SET_VEC_ROUND_PREFETCH(5);
1198                 MUL_SET_VEC_ROUND_PREFETCH(6);
1199                 MUL_SET_VEC_ROUND_PREFETCH(7);
1200                 r += UNROLL;
1201                 a += UNROLL;
1202                 len -= UNROLL;
1203         }
1204         if (len == UNROLL) {
1205                 MUL_SET_VEC_ROUND_PREFETCH(0);
1206                 MUL_SET_VEC_ROUND_PREFETCH(1);
1207                 MUL_SET_VEC_ROUND_PREFETCH(2);
1208                 MUL_SET_VEC_ROUND_PREFETCH(3);
1209                 MUL_SET_VEC_ROUND_PREFETCH(4);
1210                 MUL_SET_VEC_ROUND_PREFETCH(5);
1211                 MUL_SET_VEC_ROUND_PREFETCH(6);
1212                 MUL_SET_VEC_ROUND_NOPREFETCH(7);
1213                 return ((uint32_t)cy);
1214         }
1215         while (len > 1) {
1216                 MUL_SET_VEC_ROUND_PREFETCH(0);
1217                 ++r;
1218                 ++a;
1219                 --len;
1220         }
1221         if (len > 0) {
1222                 MUL_SET_VEC_ROUND_NOPREFETCH(0);
1223         }
1224         return ((uint32_t)cy);
1225 }
1226 
1227 /*
1228  * r += a * b
1229  * where r and a are vectors; b is a single 32-bit digit
1230  */
1231 
1232 uint32_t
1233 big_mul_add_vec(uint32_t *r, uint32_t *a, int len, uint32_t b)
1234 {
1235         uint64_t d, pf, p, t, cy;
1236 
1237         if (len == 0)
1238                 return (0);
1239         cy = 0;
1240         d = (uint64_t)b;
1241         pf = (uint64_t)a[0];
1242         while (len > 8) {
1243                 MUL_ADD_VEC_ROUND_PREFETCH(0);
1244                 MUL_ADD_VEC_ROUND_PREFETCH(1);
1245                 MUL_ADD_VEC_ROUND_PREFETCH(2);
1246                 MUL_ADD_VEC_ROUND_PREFETCH(3);
1247                 MUL_ADD_VEC_ROUND_PREFETCH(4);
1248                 MUL_ADD_VEC_ROUND_PREFETCH(5);
1249                 MUL_ADD_VEC_ROUND_PREFETCH(6);
1250                 MUL_ADD_VEC_ROUND_PREFETCH(7);
1251                 r += 8;
1252                 a += 8;
1253                 len -= 8;
1254         }
1255         if (len == 8) {
1256                 MUL_ADD_VEC_ROUND_PREFETCH(0);
1257                 MUL_ADD_VEC_ROUND_PREFETCH(1);
1258                 MUL_ADD_VEC_ROUND_PREFETCH(2);
1259                 MUL_ADD_VEC_ROUND_PREFETCH(3);
1260                 MUL_ADD_VEC_ROUND_PREFETCH(4);
1261                 MUL_ADD_VEC_ROUND_PREFETCH(5);
1262                 MUL_ADD_VEC_ROUND_PREFETCH(6);
1263                 MUL_ADD_VEC_ROUND_NOPREFETCH(7);
1264                 return ((uint32_t)cy);
1265         }
1266         while (len > 1) {
1267                 MUL_ADD_VEC_ROUND_PREFETCH(0);
1268                 ++r;
1269                 ++a;
1270                 --len;
1271         }
1272         if (len > 0) {
1273                 MUL_ADD_VEC_ROUND_NOPREFETCH(0);
1274         }
1275         return ((uint32_t)cy);
1276 }
1277 #endif /* UNROLL8 */
1278 
1279 void
1280 big_sqr_vec(uint32_t *r, uint32_t *a, int len)
1281 {
1282         uint32_t        *tr, *ta;
1283         int             tlen, row, col;
1284         uint64_t        p, s, t, t2, cy;
1285         uint32_t        d;
1286 
1287         tr = r + 1;
1288         ta = a;
1289         tlen = len - 1;
1290         tr[tlen] = big_mul_set_vec(tr, ta + 1, tlen, ta[0]);
1291         while (--tlen > 0) {
1292                 tr += 2;
1293                 ++ta;
1294                 tr[tlen] = big_mul_add_vec(tr, ta + 1, tlen, ta[0]);
1295         }
1296         s = (uint64_t)a[0];
1297         s = s * s;
1298         r[0] = (uint32_t)s;
1299         cy = s >> 32;
1300         p = ((uint64_t)r[1] << 1) + cy;
1301         r[1] = (uint32_t)p;
1302         cy = p >> 32;
1303         row = 1;
1304         col = 2;
1305         while (row < len) {
1306                 s = (uint64_t)a[row];
1307                 s = s * s;
1308                 p = (uint64_t)r[col] << 1;
1309                 t = p + s;
1310                 d = (uint32_t)t;
1311                 t2 = (uint64_t)d + cy;
1312                 r[col] = (uint32_t)t2;
1313                 cy = (t >> 32) + (t2 >> 32);
1314                 if (row == len - 1)
1315                         break;
1316                 p = ((uint64_t)r[col + 1] << 1) + cy;
1317                 r[col + 1] = (uint32_t)p;
1318                 cy = p >> 32;
1319                 ++row;
1320                 col += 2;
1321         }
1322         r[col + 1] = (uint32_t)cy;
1323 }
1324 
1325 #else /* BIG_CHUNK_SIZE == 64 */
1326 
1327 /*
1328  * r = r + a * digit, r and a are vectors of length len
1329  * returns the carry digit
1330  */
1331 BIG_CHUNK_TYPE
1332 big_mul_add_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len,
1333     BIG_CHUNK_TYPE digit)
1334 {
1335         BIG_CHUNK_TYPE  cy, cy1, retcy, dlow, dhigh;
1336         int             i;
1337 
1338         cy1 = 0;
1339         dlow = digit & BIG_CHUNK_LOWHALFBITS;
1340         dhigh = digit >> (BIG_CHUNK_SIZE / 2);
1341         for (i = 0; i < len; i++) {
1342                 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) +
1343                     dlow * (a[i] & BIG_CHUNK_LOWHALFBITS) +
1344                     (r[i] & BIG_CHUNK_LOWHALFBITS);
1345                 cy1 = (cy >> (BIG_CHUNK_SIZE / 2)) +
1346                     dlow * (a[i] >> (BIG_CHUNK_SIZE / 2)) +
1347                     (r[i] >> (BIG_CHUNK_SIZE / 2));
1348                 r[i] = (cy & BIG_CHUNK_LOWHALFBITS) |
1349                     (cy1 << (BIG_CHUNK_SIZE / 2));
1350         }
1351         retcy = cy1 >> (BIG_CHUNK_SIZE / 2);
1352 
1353         cy1 = r[0] & BIG_CHUNK_LOWHALFBITS;
1354         for (i = 0; i < len - 1; i++) {
1355                 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) +
1356                     dhigh * (a[i] & BIG_CHUNK_LOWHALFBITS) +
1357                     (r[i] >> (BIG_CHUNK_SIZE / 2));
1358                 r[i] = (cy1 & BIG_CHUNK_LOWHALFBITS) |
1359                     (cy << (BIG_CHUNK_SIZE / 2));
1360                 cy1 = (cy >> (BIG_CHUNK_SIZE / 2)) +
1361                     dhigh * (a[i] >> (BIG_CHUNK_SIZE / 2)) +
1362                     (r[i + 1] & BIG_CHUNK_LOWHALFBITS);
1363         }
1364         cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) +
1365             dhigh * (a[len - 1] & BIG_CHUNK_LOWHALFBITS) +
1366             (r[len - 1] >> (BIG_CHUNK_SIZE / 2));
1367         r[len - 1] = (cy1 & BIG_CHUNK_LOWHALFBITS) |
1368             (cy << (BIG_CHUNK_SIZE / 2));
1369         retcy = (cy >> (BIG_CHUNK_SIZE / 2)) +
1370             dhigh * (a[len - 1] >> (BIG_CHUNK_SIZE / 2)) + retcy;
1371 
1372         return (retcy);
1373 }
1374 
1375 
1376 /*
1377  * r = a * digit, r and a are vectors of length len
1378  * returns the carry digit
1379  */
1380 BIG_CHUNK_TYPE
1381 big_mul_set_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len,
1382     BIG_CHUNK_TYPE digit)
1383 {
1384         int     i;
1385 
1386         ASSERT(r != a);
1387         for (i = 0; i < len; i++) {
1388                 r[i] = 0;
1389         }
1390         return (big_mul_add_vec(r, a, len, digit));
1391 }
1392 
1393 void
1394 big_sqr_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len)
1395 {
1396         int i;
1397 
1398         ASSERT(r != a);
1399         r[len] = big_mul_set_vec(r, a, len, a[0]);
1400         for (i = 1; i < len; ++i)
1401                 r[len + i] = big_mul_add_vec(r + i, a, len, a[i]);
1402 }
1403 
1404 #endif /* BIG_CHUNK_SIZE == 32/64 */
1405 
1406 
1407 #else /* ! UMUL64 */
1408 
1409 #if (BIG_CHUNK_SIZE != 32)
1410 #error Don't use 64-bit chunks without defining UMUL64
1411 #endif
1412 
1413 
1414 /*
1415  * r = r + a * digit, r and a are vectors of length len
1416  * returns the carry digit
1417  */
1418 uint32_t
1419 big_mul_add_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit)
1420 {
1421         uint32_t cy, cy1, retcy, dlow, dhigh;
1422         int i;
1423 
1424         cy1 = 0;
1425         dlow = digit & 0xffff;
1426         dhigh = digit >> 16;
1427         for (i = 0; i < len; i++) {
1428                 cy = (cy1 >> 16) + dlow * (a[i] & 0xffff) + (r[i] & 0xffff);
1429                 cy1 = (cy >> 16) + dlow * (a[i]>>16) + (r[i] >> 16);
1430                 r[i] = (cy & 0xffff) | (cy1 << 16);
1431         }
1432         retcy = cy1 >> 16;
1433 
1434         cy1 = r[0] & 0xffff;
1435         for (i = 0; i < len - 1; i++) {
1436                 cy = (cy1 >> 16) + dhigh * (a[i] & 0xffff) + (r[i] >> 16);
1437                 r[i] = (cy1 & 0xffff) | (cy << 16);
1438                 cy1 = (cy >> 16) + dhigh * (a[i] >> 16) + (r[i + 1] & 0xffff);
1439         }
1440         cy = (cy1 >> 16) + dhigh * (a[len - 1] & 0xffff) + (r[len - 1] >> 16);
1441         r[len - 1] = (cy1 & 0xffff) | (cy << 16);
1442         retcy = (cy >> 16) + dhigh * (a[len - 1] >> 16) + retcy;
1443 
1444         return (retcy);
1445 }
1446 
1447 
1448 /*
1449  * r = a * digit, r and a are vectors of length len
1450  * returns the carry digit
1451  */
1452 uint32_t
1453 big_mul_set_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit)
1454 {
1455         int     i;
1456 
1457         ASSERT(r != a);
1458         for (i = 0; i < len; i++) {
1459                 r[i] = 0;
1460         }
1461 
1462         return (big_mul_add_vec(r, a, len, digit));
1463 }
1464 
1465 void
1466 big_sqr_vec(uint32_t *r, uint32_t *a, int len)
1467 {
1468         int i;
1469 
1470         ASSERT(r != a);
1471         r[len] = big_mul_set_vec(r, a, len, a[0]);
1472         for (i = 1; i < len; ++i)
1473                 r[len + i] = big_mul_add_vec(r + i, a, len, a[i]);
1474 }
1475 
1476 #endif /* UMUL64 */
1477 
1478 void
1479 big_mul_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int alen,
1480     BIG_CHUNK_TYPE *b, int blen)
1481 {
1482         int i;
1483 
1484         r[alen] = big_mul_set_vec(r, a, alen, b[0]);
1485         for (i = 1; i < blen; ++i)
1486                 r[alen + i] = big_mul_add_vec(r + i, a, alen, b[i]);
1487 }
1488 
1489 
1490 #endif /* ! PSR_MUL */
1491 
1492 
1493 /*
1494  * result = aa * bb  result->value should be big enough to hold the result
1495  *
1496  * Implementation: Standard grammar school algorithm
1497  *
1498  */
1499 BIG_ERR_CODE
1500 big_mul(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
1501 {
1502         BIGNUM          tmp1;
1503         BIG_CHUNK_TYPE  tmp1value[BIGTMPSIZE];
1504         BIG_CHUNK_TYPE  *r, *t, *a, *b;
1505         BIG_ERR_CODE    err;
1506         int             i, alen, blen, rsize, sign, diff;
1507 
1508         if (aa == bb) {
1509                 diff = 0;
1510         } else {
1511                 diff = big_cmp_abs(aa, bb);
1512                 if (diff < 0) {
1513                         BIGNUM *tt;
1514                         tt = aa;
1515                         aa = bb;
1516                         bb = tt;
1517                 }
1518         }
1519         a = aa->value;
1520         b = bb->value;
1521         alen = aa->len;
1522         blen = bb->len;
1523         while ((alen > 1) && (a[alen - 1] == 0)) {
1524                 alen--;
1525         }
1526         aa->len = alen;
1527         while ((blen > 1) && (b[blen - 1] == 0)) {
1528                 blen--;
1529         }
1530         bb->len = blen;
1531 
1532         rsize = alen + blen;
1533         ASSERT(rsize > 0);
1534         if (result->size < rsize) {
1535                 err = big_extend(result, rsize);
1536                 if (err != BIG_OK) {
1537                         return (err);
1538                 }
1539                 /* aa or bb might be an alias to result */
1540                 a = aa->value;
1541                 b = bb->value;
1542         }
1543         r = result->value;
1544 
1545         if (((alen == 1) && (a[0] == 0)) || ((blen == 1) && (b[0] == 0))) {
1546                 result->len = 1;
1547                 result->sign = 1;
1548                 r[0] = 0;
1549                 return (BIG_OK);
1550         }
1551         sign = aa->sign * bb->sign;
1552         if ((alen == 1) && (a[0] == 1)) {
1553                 for (i = 0; i < blen; i++) {
1554                         r[i] = b[i];
1555                 }
1556                 result->len = blen;
1557                 result->sign = sign;
1558                 return (BIG_OK);
1559         }
1560         if ((blen == 1) && (b[0] == 1)) {
1561                 for (i = 0; i < alen; i++) {
1562                         r[i] = a[i];
1563                 }
1564                 result->len = alen;
1565                 result->sign = sign;
1566                 return (BIG_OK);
1567         }
1568 
1569         if ((err = big_init1(&tmp1, rsize,
1570             tmp1value, arraysize(tmp1value))) != BIG_OK) {
1571                 return (err);
1572         }
1573         (void) big_copy(&tmp1, aa);
1574         t = tmp1.value;
1575 
1576         for (i = 0; i < rsize; i++) {
1577                 t[i] = 0;
1578         }
1579 
1580         if (diff == 0 && alen > 2) {
1581                 BIG_SQR_VEC(t, a, alen);
1582         } else if (blen > 0) {
1583                 BIG_MUL_VEC(t, a, alen, b, blen);
1584         }
1585 
1586         if (t[rsize - 1] == 0) {
1587                 tmp1.len = rsize - 1;
1588         } else {
1589                 tmp1.len = rsize;
1590         }
1591 
1592         err = big_copy(result, &tmp1);
1593 
1594         result->sign = sign;
1595 
1596         big_finish(&tmp1);
1597 
1598         return (err);
1599 }
1600 
1601 
1602 /*
1603  * caller must ensure that  a < n,  b < n  and  ret->size >=  2 * n->len + 1
1604  * and that ret is not n
1605  */
1606 BIG_ERR_CODE
1607 big_mont_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, BIGNUM *n, BIG_CHUNK_TYPE n0)
1608 {
1609         int     i, j, nlen, needsubtract;
1610         BIG_CHUNK_TYPE  *nn, *rr;
1611         BIG_CHUNK_TYPE  digit, c;
1612         BIG_ERR_CODE    err;
1613 
1614         nlen = n->len;
1615         nn = n->value;
1616 
1617         rr = ret->value;
1618 
1619         if ((err = big_mul(ret, a, b)) != BIG_OK) {
1620                 return (err);
1621         }
1622 
1623         rr = ret->value;
1624         for (i = ret->len; i < 2 * nlen + 1; i++) {
1625                 rr[i] = 0;
1626         }
1627         for (i = 0; i < nlen; i++) {
1628                 digit = rr[i];
1629                 digit = digit * n0;
1630 
1631                 c = BIG_MUL_ADD_VEC(rr + i, nn, nlen, digit);
1632                 j = i + nlen;
1633                 rr[j] += c;
1634                 while (rr[j] < c) {
1635                         rr[j + 1] += 1;
1636                         j++;
1637                         c = 1;
1638                 }
1639         }
1640 
1641         needsubtract = 0;
1642         if ((rr[2 * nlen]  != 0))
1643                 needsubtract = 1;
1644         else {
1645                 for (i = 2 * nlen - 1; i >= nlen; i--) {
1646                         if (rr[i] > nn[i - nlen]) {
1647                                 needsubtract = 1;
1648                                 break;
1649                         } else if (rr[i] < nn[i - nlen]) {
1650                                 break;
1651                         }
1652                 }
1653         }
1654         if (needsubtract)
1655                 big_sub_vec(rr, rr + nlen, nn, nlen);
1656         else {
1657                 for (i = 0; i < nlen; i++) {
1658                         rr[i] = rr[i + nlen];
1659                 }
1660         }
1661 
1662         /* Remove leading zeros, but keep at least 1 digit: */
1663         for (i = nlen - 1; (i > 0) && (rr[i] == 0); i--)
1664                 ;
1665         ret->len = i + 1;
1666 
1667         return (BIG_OK);
1668 }
1669 
1670 
1671 BIG_CHUNK_TYPE
1672 big_n0(BIG_CHUNK_TYPE n)
1673 {
1674         int             i;
1675         BIG_CHUNK_TYPE  result, tmp;
1676 
1677         result = 0;
1678         tmp = BIG_CHUNK_ALLBITS;
1679         for (i = 0; i < BIG_CHUNK_SIZE; i++) {
1680                 if ((tmp & 1) == 1) {
1681                         result = (result >> 1) | BIG_CHUNK_HIGHBIT;
1682                         tmp = tmp - n;
1683                 } else {
1684                         result = (result >> 1);
1685                 }
1686                 tmp = tmp >> 1;
1687         }
1688 
1689         return (result);
1690 }
1691 
1692 
1693 int
1694 big_numbits(BIGNUM *n)
1695 {
1696         int             i, j;
1697         BIG_CHUNK_TYPE  t;
1698 
1699         for (i = n->len - 1; i > 0; i--) {
1700                 if (n->value[i] != 0) {
1701                         break;
1702                 }
1703         }
1704         t = n->value[i];
1705         for (j = BIG_CHUNK_SIZE; j > 0; j--) {
1706                 if ((t & BIG_CHUNK_HIGHBIT) == 0) {
1707                         t = t << 1;
1708                 } else {
1709                         return (BIG_CHUNK_SIZE * i + j);
1710                 }
1711         }
1712         return (0);
1713 }
1714 
1715 
1716 /* caller must make sure that a < n */
1717 BIG_ERR_CODE
1718 big_mont_rr(BIGNUM *result, BIGNUM *n)
1719 {
1720         BIGNUM          rr;
1721         BIG_CHUNK_TYPE  rrvalue[BIGTMPSIZE];
1722         int             len, i;
1723         BIG_ERR_CODE    err;
1724 
1725         rr.malloced = 0;
1726         len = n->len;
1727 
1728         if ((err = big_init1(&rr, 2 * len + 1,
1729             rrvalue, arraysize(rrvalue))) != BIG_OK) {
1730                 return (err);
1731         }
1732 
1733         for (i = 0; i < 2 * len; i++) {
1734                 rr.value[i] = 0;
1735         }
1736         rr.value[2 * len] = 1;
1737         rr.len = 2 * len + 1;
1738         if ((err = big_div_pos(NULL, &rr, &rr, n)) != BIG_OK) {
1739                 goto ret;
1740         }
1741         err = big_copy(result, &rr);
1742 ret:
1743         big_finish(&rr);
1744         return (err);
1745 }
1746 
1747 
1748 /* caller must make sure that a < n */
1749 BIG_ERR_CODE
1750 big_mont_conv(BIGNUM *result, BIGNUM *a, BIGNUM *n, BIG_CHUNK_TYPE n0,
1751     BIGNUM *n_rr)
1752 {
1753         BIGNUM          rr;
1754         BIG_CHUNK_TYPE  rrvalue[BIGTMPSIZE];
1755         int             len, i;
1756         BIG_ERR_CODE    err;
1757 
1758         rr.malloced = 0;
1759         len = n->len;
1760 
1761         if ((err = big_init1(&rr, 2 * len + 1, rrvalue, arraysize(rrvalue)))
1762             != BIG_OK) {
1763                 return (err);
1764         }
1765 
1766         if (n_rr == NULL) {
1767                 for (i = 0; i < 2 * len; i++) {
1768                         rr.value[i] = 0;
1769                 }
1770                 rr.value[2 * len] = 1;
1771                 rr.len = 2 * len + 1;
1772                 if ((err = big_div_pos(NULL, &rr, &rr, n)) != BIG_OK) {
1773                         goto ret;
1774                 }
1775                 n_rr = &rr;
1776         }
1777 
1778         if ((err = big_mont_mul(&rr, n_rr, a, n, n0)) != BIG_OK) {
1779                 goto ret;
1780         }
1781         err = big_copy(result, &rr);
1782 
1783 ret:
1784         big_finish(&rr);
1785         return (err);
1786 }
1787 
1788 
1789 #ifdef  USE_FLOATING_POINT
1790 #define big_modexp_ncp_float    big_modexp_ncp_sw
1791 #else
1792 #define big_modexp_ncp_int      big_modexp_ncp_sw
1793 #endif
1794 
1795 #define MAX_EXP_BIT_GROUP_SIZE 6
1796 #define APOWERS_MAX_SIZE (1 << (MAX_EXP_BIT_GROUP_SIZE - 1))
1797 
1798 /* ARGSUSED */
1799 static BIG_ERR_CODE
1800 big_modexp_ncp_int(BIGNUM *result, BIGNUM *ma, BIGNUM *e, BIGNUM *n,
1801     BIGNUM *tmp, BIG_CHUNK_TYPE n0)
1802 
1803 {
1804         BIGNUM          apowers[APOWERS_MAX_SIZE];
1805         BIGNUM          tmp1;
1806         BIG_CHUNK_TYPE  tmp1value[BIGTMPSIZE];
1807         int             i, j, k, l, m, p;
1808         uint32_t        bit, bitind, bitcount, groupbits, apowerssize;
1809         uint32_t        nbits;
1810         BIG_ERR_CODE    err;
1811 
1812         nbits = big_numbits(e);
1813         if (nbits < 50) {
1814                 groupbits = 1;
1815                 apowerssize = 1;
1816         } else {
1817                 groupbits = MAX_EXP_BIT_GROUP_SIZE;
1818                 apowerssize = 1 << (groupbits - 1);
1819         }
1820 
1821 
1822         if ((err = big_init1(&tmp1, 2 * n->len + 1,
1823             tmp1value, arraysize(tmp1value))) != BIG_OK) {
1824                 return (err);
1825         }
1826 
1827         /* clear the malloced bit to help cleanup */
1828         for (i = 0; i < apowerssize; i++) {
1829                 apowers[i].malloced = 0;
1830         }
1831 
1832         for (i = 0; i < apowerssize; i++) {
1833                 if ((err = big_init1(&(apowers[i]), n->len, NULL, 0)) !=
1834                     BIG_OK) {
1835                         goto ret;
1836                 }
1837         }
1838 
1839         (void) big_copy(&(apowers[0]), ma);
1840 
1841         if ((err = big_mont_mul(&tmp1, ma, ma, n, n0)) != BIG_OK) {
1842                 goto ret;
1843         }
1844         (void) big_copy(ma, &tmp1);
1845 
1846         for (i = 1; i < apowerssize; i++) {
1847                 if ((err = big_mont_mul(&tmp1, ma,
1848                     &(apowers[i - 1]), n, n0)) != BIG_OK) {
1849                         goto ret;
1850                 }
1851                 (void) big_copy(&apowers[i], &tmp1);
1852         }
1853 
1854         bitind = nbits % BIG_CHUNK_SIZE;
1855         k = 0;
1856         l = 0;
1857         p = 0;
1858         bitcount = 0;
1859         for (i = nbits / BIG_CHUNK_SIZE; i >= 0; i--) {
1860                 for (j = bitind - 1; j >= 0; j--) {
1861                         bit = (e->value[i] >> j) & 1;
1862                         if ((bitcount == 0) && (bit == 0)) {
1863                                 if ((err = big_mont_mul(tmp,
1864                                     tmp, tmp, n, n0)) != BIG_OK) {
1865                                         goto ret;
1866                                 }
1867                         } else {
1868                                 bitcount++;
1869                                 p = p * 2 + bit;
1870                                 if (bit == 1) {
1871                                         k = k + l + 1;
1872                                         l = 0;
1873                                 } else {
1874                                         l++;
1875                                 }
1876                                 if (bitcount == groupbits) {
1877                                         for (m = 0; m < k; m++) {
1878                                                 if ((err = big_mont_mul(tmp,
1879                                                     tmp, tmp, n, n0)) !=
1880                                                     BIG_OK) {
1881                                                         goto ret;
1882                                                 }
1883                                         }
1884                                         if ((err = big_mont_mul(tmp, tmp,
1885                                             &(apowers[p >> (l + 1)]),
1886                                             n, n0)) != BIG_OK) {
1887                                                 goto ret;
1888                                         }
1889                                         for (m = 0; m < l; m++) {
1890                                                 if ((err = big_mont_mul(tmp,
1891                                                     tmp, tmp, n, n0)) !=
1892                                                     BIG_OK) {
1893                                                         goto ret;
1894                                                 }
1895                                         }
1896                                         k = 0;
1897                                         l = 0;
1898                                         p = 0;
1899                                         bitcount = 0;
1900                                 }
1901                         }
1902                 }
1903                 bitind = BIG_CHUNK_SIZE;
1904         }
1905 
1906         for (m = 0; m < k; m++) {
1907                 if ((err = big_mont_mul(tmp, tmp, tmp, n, n0)) != BIG_OK) {
1908                         goto ret;
1909                 }
1910         }
1911         if (p != 0) {
1912                 if ((err = big_mont_mul(tmp, tmp,
1913                     &(apowers[p >> (l + 1)]), n, n0)) != BIG_OK) {
1914                         goto ret;
1915                 }
1916         }
1917         for (m = 0; m < l; m++) {
1918                 if ((err = big_mont_mul(result, tmp, tmp, n, n0)) != BIG_OK) {
1919                         goto ret;
1920                 }
1921         }
1922 
1923 ret:
1924         for (i = apowerssize - 1; i >= 0; i--) {
1925                 big_finish(&(apowers[i]));
1926         }
1927         big_finish(&tmp1);
1928 
1929         return (err);
1930 }
1931 
1932 
1933 #ifdef USE_FLOATING_POINT
1934 
1935 #ifdef _KERNEL
1936 
1937 #include <sys/sysmacros.h>
1938 #include <sys/regset.h>
1939 #include <sys/fpu/fpusystm.h>
1940 
1941 /* the alignment for block stores to save fp registers */
1942 #define FPR_ALIGN       (64)
1943 
1944 extern void big_savefp(kfpu_t *);
1945 extern void big_restorefp(kfpu_t *);
1946 
1947 #endif /* _KERNEL */
1948 
1949 /*
1950  * This version makes use of floating point for performance
1951  */
1952 static BIG_ERR_CODE
1953 big_modexp_ncp_float(BIGNUM *result, BIGNUM *ma, BIGNUM *e, BIGNUM *n,
1954     BIGNUM *tmp, BIG_CHUNK_TYPE n0)
1955 {
1956 
1957         int             i, j, k, l, m, p;
1958         uint32_t        bit, bitind, bitcount, nlen;
1959         double          dn0;
1960         double          *dn, *dt, *d16r, *d32r;
1961         uint32_t        *nint, *prod;
1962         double          *apowers[APOWERS_MAX_SIZE];
1963         uint32_t        nbits, groupbits, apowerssize;
1964         BIG_ERR_CODE    err = BIG_OK;
1965 
1966 #ifdef _KERNEL
1967         uint8_t fpua[sizeof (kfpu_t) + FPR_ALIGN];
1968         kfpu_t *fpu;
1969 
1970 #ifdef DEBUG
1971         if (!fpu_exists)
1972                 return (BIG_GENERAL_ERR);
1973 #endif
1974 
1975         fpu =  (kfpu_t *)P2ROUNDUP((uintptr_t)fpua, FPR_ALIGN);
1976         big_savefp(fpu);
1977 
1978 #endif /* _KERNEL */
1979 
1980         nbits = big_numbits(e);
1981         if (nbits < 50) {
1982                 groupbits = 1;
1983                 apowerssize = 1;
1984         } else {
1985                 groupbits = MAX_EXP_BIT_GROUP_SIZE;
1986                 apowerssize = 1 << (groupbits - 1);
1987         }
1988 
1989         nlen = (BIG_CHUNK_SIZE / 32) * n->len;
1990         dn0 = (double)(n0 & 0xffff);
1991 
1992         dn = dt = d16r = d32r = NULL;
1993         nint = prod = NULL;
1994         for (i = 0; i < apowerssize; i++) {
1995                 apowers[i] = NULL;
1996         }
1997 
1998         if ((dn = big_malloc(nlen * sizeof (double))) == NULL) {
1999                 err = BIG_NO_MEM;
2000                 goto ret;
2001         }
2002         if ((dt = big_malloc((4 * nlen + 2) * sizeof (double))) == NULL) {
2003                 err = BIG_NO_MEM;
2004                 goto ret;
2005         }
2006         if ((nint = big_malloc(nlen * sizeof (uint32_t))) == NULL) {
2007                 err = BIG_NO_MEM;
2008                 goto ret;
2009         }
2010         if ((prod = big_malloc((nlen + 1) * sizeof (uint32_t))) == NULL) {
2011                 err = BIG_NO_MEM;
2012                 goto ret;
2013         }
2014         if ((d16r = big_malloc((2 * nlen + 1) * sizeof (double))) == NULL) {
2015                 err = BIG_NO_MEM;
2016                 goto ret;
2017         }
2018         if ((d32r = big_malloc(nlen * sizeof (double))) == NULL) {
2019                 err = BIG_NO_MEM;
2020                 goto ret;
2021         }
2022         for (i = 0; i < apowerssize; i++) {
2023                 if ((apowers[i] = big_malloc((2 * nlen + 1) *
2024                     sizeof (double))) == NULL) {
2025                         err = BIG_NO_MEM;
2026                         goto ret;
2027                 }
2028         }
2029 
2030 #if (BIG_CHUNK_SIZE == 32)
2031         for (i = 0; i < ma->len; i++) {
2032                 nint[i] = ma->value[i];
2033         }
2034         for (; i < nlen; i++) {
2035                 nint[i] = 0;
2036         }
2037 #else
2038         for (i = 0; i < ma->len; i++) {
2039                 nint[2 * i] = (uint32_t)(ma->value[i] & 0xffffffffULL);
2040                 nint[2 * i + 1] = (uint32_t)(ma->value[i] >> 32);
2041         }
2042         for (i = ma->len * 2; i < nlen; i++) {
2043                 nint[i] = 0;
2044         }
2045 #endif
2046         conv_i32_to_d32_and_d16(d32r, apowers[0], nint, nlen);
2047 
2048 #if (BIG_CHUNK_SIZE == 32)
2049         for (i = 0; i < n->len; i++) {
2050                 nint[i] = n->value[i];
2051         }
2052         for (; i < nlen; i++) {
2053                 nint[i] = 0;
2054         }
2055 #else
2056         for (i = 0; i < n->len; i++) {
2057                 nint[2 * i] = (uint32_t)(n->value[i] & 0xffffffffULL);
2058                 nint[2 * i + 1] = (uint32_t)(n->value[i] >> 32);
2059         }
2060         for (i = n->len * 2; i < nlen; i++) {
2061                 nint[i] = 0;
2062         }
2063 #endif
2064         conv_i32_to_d32(dn, nint, nlen);
2065 
2066         mont_mulf_noconv(prod, d32r, apowers[0], dt, dn, nint, nlen, dn0);
2067         conv_i32_to_d32(d32r, prod, nlen);
2068         for (i = 1; i < apowerssize; i++) {
2069                 mont_mulf_noconv(prod, d32r, apowers[i - 1],
2070                     dt, dn, nint, nlen, dn0);
2071                 conv_i32_to_d16(apowers[i], prod, nlen);
2072         }
2073 
2074 #if (BIG_CHUNK_SIZE == 32)
2075         for (i = 0; i < tmp->len; i++) {
2076                 prod[i] = tmp->value[i];
2077         }
2078         for (; i < nlen + 1; i++) {
2079                 prod[i] = 0;
2080         }
2081 #else
2082         for (i = 0; i < tmp->len; i++) {
2083                 prod[2 * i] = (uint32_t)(tmp->value[i] & 0xffffffffULL);
2084                 prod[2 * i + 1] = (uint32_t)(tmp->value[i] >> 32);
2085         }
2086         for (i = tmp->len * 2; i < nlen + 1; i++) {
2087                 prod[i] = 0;
2088         }
2089 #endif
2090 
2091         bitind = nbits % BIG_CHUNK_SIZE;
2092         k = 0;
2093         l = 0;
2094         p = 0;
2095         bitcount = 0;
2096         for (i = nbits / BIG_CHUNK_SIZE; i >= 0; i--) {
2097                 for (j = bitind - 1; j >= 0; j--) {
2098                         bit = (e->value[i] >> j) & 1;
2099                         if ((bitcount == 0) && (bit == 0)) {
2100                                 conv_i32_to_d32_and_d16(d32r, d16r,
2101                                     prod, nlen);
2102                                 mont_mulf_noconv(prod, d32r, d16r,
2103                                     dt, dn, nint, nlen, dn0);
2104                         } else {
2105                                 bitcount++;
2106                                 p = p * 2 + bit;
2107                                 if (bit == 1) {
2108                                         k = k + l + 1;
2109                                         l = 0;
2110                                 } else {
2111                                         l++;
2112                                 }
2113                                 if (bitcount == groupbits) {
2114                                         for (m = 0; m < k; m++) {
2115                                                 conv_i32_to_d32_and_d16(d32r,
2116                                                     d16r, prod, nlen);
2117                                                 mont_mulf_noconv(prod, d32r,
2118                                                     d16r, dt, dn, nint,
2119                                                     nlen, dn0);
2120                                         }
2121                                         conv_i32_to_d32(d32r, prod, nlen);
2122                                         mont_mulf_noconv(prod, d32r,
2123                                             apowers[p >> (l + 1)],
2124                                             dt, dn, nint, nlen, dn0);
2125                                         for (m = 0; m < l; m++) {
2126                                                 conv_i32_to_d32_and_d16(d32r,
2127                                                     d16r, prod, nlen);
2128                                                 mont_mulf_noconv(prod, d32r,
2129                                                     d16r, dt, dn, nint,
2130                                                     nlen, dn0);
2131                                         }
2132                                         k = 0;
2133                                         l = 0;
2134                                         p = 0;
2135                                         bitcount = 0;
2136                                 }
2137                         }
2138                 }
2139                 bitind = BIG_CHUNK_SIZE;
2140         }
2141 
2142         for (m = 0; m < k; m++) {
2143                 conv_i32_to_d32_and_d16(d32r, d16r, prod, nlen);
2144                 mont_mulf_noconv(prod, d32r, d16r, dt, dn, nint, nlen, dn0);
2145         }
2146         if (p != 0) {
2147                 conv_i32_to_d32(d32r, prod, nlen);
2148                 mont_mulf_noconv(prod, d32r, apowers[p >> (l + 1)],
2149                     dt, dn, nint, nlen, dn0);
2150         }
2151         for (m = 0; m < l; m++) {
2152                 conv_i32_to_d32_and_d16(d32r, d16r, prod, nlen);
2153                 mont_mulf_noconv(prod, d32r, d16r, dt, dn, nint, nlen, dn0);
2154         }
2155 
2156 #if (BIG_CHUNK_SIZE == 32)
2157         for (i = 0; i < nlen; i++) {
2158                 result->value[i] = prod[i];
2159         }
2160         for (i = nlen - 1; (i > 0) && (prod[i] == 0); i--)
2161                 ;
2162 #else
2163         for (i = 0; i < nlen / 2; i++) {
2164                 result->value[i] = (uint64_t)(prod[2 * i]) +
2165                     (((uint64_t)(prod[2 * i + 1])) << 32);
2166         }
2167         for (i = nlen / 2 - 1; (i > 0) && (result->value[i] == 0); i--)
2168                 ;
2169 #endif
2170         result->len = i + 1;
2171 
2172 ret:
2173         for (i = apowerssize - 1; i >= 0; i--) {
2174                 if (apowers[i] != NULL)
2175                         big_free(apowers[i], (2 * nlen + 1) * sizeof (double));
2176         }
2177         if (d32r != NULL) {
2178                 big_free(d32r, nlen * sizeof (double));
2179         }
2180         if (d16r != NULL) {
2181                 big_free(d16r, (2 * nlen + 1) * sizeof (double));
2182         }
2183         if (prod != NULL) {
2184                 big_free(prod, (nlen + 1) * sizeof (uint32_t));
2185         }
2186         if (nint != NULL) {
2187                 big_free(nint, nlen * sizeof (uint32_t));
2188         }
2189         if (dt != NULL) {
2190                 big_free(dt, (4 * nlen + 2) * sizeof (double));
2191         }
2192         if (dn != NULL) {
2193                 big_free(dn, nlen * sizeof (double));
2194         }
2195 
2196 #ifdef _KERNEL
2197         big_restorefp(fpu);
2198 #endif
2199 
2200         return (err);
2201 }
2202 
2203 #endif /* USE_FLOATING_POINT */
2204 
2205 
2206 BIG_ERR_CODE
2207 big_modexp_ext(BIGNUM *result, BIGNUM *a, BIGNUM *e, BIGNUM *n, BIGNUM *n_rr,
2208     big_modexp_ncp_info_t *info)
2209 {
2210         BIGNUM          ma, tmp, rr;
2211         BIG_CHUNK_TYPE  mavalue[BIGTMPSIZE];
2212         BIG_CHUNK_TYPE  tmpvalue[BIGTMPSIZE];
2213         BIG_CHUNK_TYPE  rrvalue[BIGTMPSIZE];
2214         BIG_ERR_CODE    err;
2215         BIG_CHUNK_TYPE  n0;
2216 
2217         if ((err = big_init1(&ma, n->len, mavalue, arraysize(mavalue)))  !=
2218             BIG_OK) {
2219                 return (err);
2220         }
2221         ma.len = 1;
2222         ma.value[0] = 0;
2223 
2224         if ((err = big_init1(&tmp, 2 * n->len + 1,
2225             tmpvalue, arraysize(tmpvalue))) != BIG_OK) {
2226                 goto ret1;
2227         }
2228 
2229         /* clear the malloced bit to help cleanup */
2230         rr.malloced = 0;
2231 
2232         if (n_rr == NULL) {
2233                 if ((err = big_init1(&rr, 2 * n->len + 1,
2234                     rrvalue, arraysize(rrvalue))) != BIG_OK) {
2235                         goto ret2;
2236                 }
2237                 if (big_mont_rr(&rr, n) != BIG_OK) {
2238                         goto ret;
2239                 }
2240                 n_rr = &rr;
2241         }
2242 
2243         n0 = big_n0(n->value[0]);
2244 
2245         if (big_cmp_abs(a, n) > 0) {
2246                 if ((err = big_div_pos(NULL, &ma, a, n)) != BIG_OK) {
2247                         goto ret;
2248                 }
2249                 err = big_mont_conv(&ma, &ma, n, n0, n_rr);
2250         } else {
2251                 err = big_mont_conv(&ma, a, n, n0, n_rr);
2252         }
2253         if (err != BIG_OK) {
2254                 goto ret;
2255         }
2256 
2257         tmp.len = 1;
2258         tmp.value[0] = 1;
2259         if ((err = big_mont_conv(&tmp, &tmp, n, n0, n_rr)) != BIG_OK) {
2260                 goto ret;
2261         }
2262 
2263         if ((info != NULL) && (info->func != NULL)) {
2264                 err = (*(info->func))(&tmp, &ma, e, n, &tmp, n0,
2265                     info->ncp, info->reqp);
2266         } else {
2267                 err = big_modexp_ncp_sw(&tmp, &ma, e, n, &tmp, n0);
2268         }
2269         if (err != BIG_OK) {
2270                 goto ret;
2271         }
2272 
2273         ma.value[0] = 1;
2274         ma.len = 1;
2275         if ((err = big_mont_mul(&tmp, &tmp, &ma, n, n0)) != BIG_OK) {
2276                 goto ret;
2277         }
2278         err = big_copy(result, &tmp);
2279 
2280 ret:
2281         if (rr.malloced) {
2282                 big_finish(&rr);
2283         }
2284 ret2:
2285         big_finish(&tmp);
2286 ret1:
2287         big_finish(&ma);
2288 
2289         return (err);
2290 }
2291 
2292 BIG_ERR_CODE
2293 big_modexp(BIGNUM *result, BIGNUM *a, BIGNUM *e, BIGNUM *n, BIGNUM *n_rr)
2294 {
2295         return (big_modexp_ext(result, a, e, n, n_rr, NULL));
2296 }
2297 
2298 
2299 BIG_ERR_CODE
2300 big_modexp_crt_ext(BIGNUM *result, BIGNUM *a, BIGNUM *dmodpminus1,
2301     BIGNUM *dmodqminus1, BIGNUM *p, BIGNUM *q, BIGNUM *pinvmodq,
2302     BIGNUM *p_rr, BIGNUM *q_rr, big_modexp_ncp_info_t *info)
2303 {
2304         BIGNUM          ap, aq, tmp;
2305         int             alen, biglen, sign;
2306         BIG_ERR_CODE    err;
2307 
2308         if (p->len > q->len) {
2309                 biglen = p->len;
2310         } else {
2311                 biglen = q->len;
2312         }
2313 
2314         if ((err = big_init1(&ap, p->len, NULL, 0)) != BIG_OK) {
2315                 return (err);
2316         }
2317         if ((err = big_init1(&aq, q->len, NULL, 0)) != BIG_OK) {
2318                 goto ret1;
2319         }
2320         if ((err = big_init1(&tmp, biglen + q->len + 1, NULL, 0)) != BIG_OK) {
2321                 goto ret2;
2322         }
2323 
2324         /*
2325          * check whether a is too short - to avoid timing attacks
2326          */
2327         alen = a->len;
2328         while ((alen > p->len) && (a->value[alen - 1] == 0)) {
2329                 alen--;
2330         }
2331         if (alen < p->len + q->len) {
2332                 /*
2333                  * a is too short, add p*q to it before
2334                  * taking it modulo p and q
2335                  * this will also affect timing, but this difference
2336                  * does not depend on p or q, only on a
2337                  * (in "normal" operation, this path will never be
2338                  * taken, so it is not a performance penalty
2339                  */
2340                 if ((err = big_mul(&tmp, p, q)) != BIG_OK) {
2341                         goto ret;
2342                 }
2343                 if ((err = big_add(&tmp, &tmp, a)) != BIG_OK) {
2344                         goto ret;
2345                 }
2346                 if ((err = big_div_pos(NULL, &ap, &tmp, p)) != BIG_OK) {
2347                         goto ret;
2348                 }
2349                 if ((err = big_div_pos(NULL, &aq, &tmp, q)) != BIG_OK) {
2350                         goto ret;
2351                 }
2352         } else {
2353                 if ((err = big_div_pos(NULL, &ap, a, p)) != BIG_OK) {
2354                         goto ret;
2355                 }
2356                 if ((err = big_div_pos(NULL, &aq, a, q)) != BIG_OK) {
2357                         goto ret;
2358                 }
2359         }
2360 
2361         if ((err = big_modexp_ext(&ap, &ap, dmodpminus1, p, p_rr, info)) !=
2362             BIG_OK) {
2363                 goto ret;
2364         }
2365         if ((err = big_modexp_ext(&aq, &aq, dmodqminus1, q, q_rr, info)) !=
2366             BIG_OK) {
2367                 goto ret;
2368         }
2369         if ((err = big_sub(&tmp, &aq, &ap)) != BIG_OK) {
2370                 goto ret;
2371         }
2372         if ((err = big_mul(&tmp, &tmp, pinvmodq)) != BIG_OK) {
2373                 goto ret;
2374         }
2375         sign = tmp.sign;
2376         tmp.sign = 1;
2377         if ((err = big_div_pos(NULL, &aq, &tmp, q)) != BIG_OK) {
2378                 goto ret;
2379         }
2380         if ((sign == -1) && (!big_is_zero(&aq))) {
2381                 (void) big_sub_pos(&aq, q, &aq);
2382         }
2383         if ((err = big_mul(&tmp, &aq, p)) != BIG_OK) {
2384                 goto ret;
2385         }
2386         err = big_add_abs(result, &ap, &tmp);
2387 
2388 ret:
2389         big_finish(&tmp);
2390 ret2:
2391         big_finish(&aq);
2392 ret1:
2393         big_finish(&ap);
2394 
2395         return (err);
2396 }
2397 
2398 
2399 BIG_ERR_CODE
2400 big_modexp_crt(BIGNUM *result, BIGNUM *a, BIGNUM *dmodpminus1,
2401     BIGNUM *dmodqminus1, BIGNUM *p, BIGNUM *q, BIGNUM *pinvmodq,
2402     BIGNUM *p_rr, BIGNUM *q_rr)
2403 {
2404         return (big_modexp_crt_ext(result, a, dmodpminus1, dmodqminus1,
2405             p, q, pinvmodq, p_rr, q_rr, NULL));
2406 }
2407 
2408 
2409 static BIG_CHUNK_TYPE onearr[1] = {(BIG_CHUNK_TYPE)1};
2410 BIGNUM big_One = {1, 1, 1, 0, onearr};
2411 
2412 static BIG_CHUNK_TYPE twoarr[1] = {(BIG_CHUNK_TYPE)2};
2413 BIGNUM big_Two = {1, 1, 1, 0, twoarr};
2414 
2415 static BIG_CHUNK_TYPE fourarr[1] = {(BIG_CHUNK_TYPE)4};
2416 static BIGNUM big_Four = {1, 1, 1, 0, fourarr};
2417 
2418 
2419 BIG_ERR_CODE
2420 big_sqrt_pos(BIGNUM *result, BIGNUM *n)
2421 {
2422         BIGNUM          *high, *low, *mid, *t;
2423         BIGNUM          t1, t2, t3, prod;
2424         BIG_CHUNK_TYPE  t1value[BIGTMPSIZE];
2425         BIG_CHUNK_TYPE  t2value[BIGTMPSIZE];
2426         BIG_CHUNK_TYPE  t3value[BIGTMPSIZE];
2427         BIG_CHUNK_TYPE  prodvalue[BIGTMPSIZE];
2428         int             i, diff;
2429         uint32_t        nbits, nrootbits, highbits;
2430         BIG_ERR_CODE    err;
2431 
2432         nbits = big_numbits(n);
2433 
2434         if ((err = big_init1(&t1, n->len + 1,
2435             t1value, arraysize(t1value))) != BIG_OK)
2436                 return (err);
2437         if ((err = big_init1(&t2, n->len + 1,
2438             t2value, arraysize(t2value))) != BIG_OK)
2439                 goto ret1;
2440         if ((err = big_init1(&t3, n->len + 1,
2441             t3value, arraysize(t3value))) != BIG_OK)
2442                 goto ret2;
2443         if ((err = big_init1(&prod, n->len + 1,
2444             prodvalue, arraysize(prodvalue))) != BIG_OK)
2445                 goto ret3;
2446 
2447         nrootbits = (nbits + 1) / 2;
2448         t1.len = t2.len = t3.len = (nrootbits - 1) / BIG_CHUNK_SIZE + 1;
2449         for (i = 0; i < t1.len; i++) {
2450                 t1.value[i] = 0;
2451                 t2.value[i] = BIG_CHUNK_ALLBITS;
2452         }
2453         highbits = nrootbits - BIG_CHUNK_SIZE * (t1.len - 1);
2454         if (highbits == BIG_CHUNK_SIZE) {
2455                 t1.value[t1.len - 1] = BIG_CHUNK_HIGHBIT;
2456                 t2.value[t2.len - 1] = BIG_CHUNK_ALLBITS;
2457         } else {
2458                 t1.value[t1.len - 1] = (BIG_CHUNK_TYPE)1 << (highbits - 1);
2459                 t2.value[t2.len - 1] = 2 * t1.value[t1.len - 1] - 1;
2460         }
2461 
2462         high = &t2;
2463         low = &t1;
2464         mid = &t3;
2465 
2466         if ((err = big_mul(&prod, high, high)) != BIG_OK) {
2467                 goto ret;
2468         }
2469         diff = big_cmp_abs(&prod, n);
2470         if (diff <= 0) {
2471                 err = big_copy(result, high);
2472                 goto ret;
2473         }
2474 
2475         (void) big_sub_pos(mid, high, low);
2476         while (big_cmp_abs(&big_One, mid) != 0) {
2477                 (void) big_add_abs(mid, high, low);
2478                 (void) big_half_pos(mid, mid);
2479                 if ((err = big_mul(&prod, mid, mid)) != BIG_OK)
2480                         goto ret;
2481                 diff = big_cmp_abs(&prod, n);
2482                 if (diff > 0) {
2483                         t = high;
2484                         high = mid;
2485                         mid = t;
2486                 } else if (diff < 0) {
2487                         t = low;
2488                         low = mid;
2489                         mid = t;
2490                 } else {
2491                         err = big_copy(result, low);
2492                         goto ret;
2493                 }
2494                 (void) big_sub_pos(mid, high, low);
2495         }
2496 
2497         err = big_copy(result, low);
2498 ret:
2499         if (prod.malloced) big_finish(&prod);
2500 ret3:
2501         if (t3.malloced) big_finish(&t3);
2502 ret2:
2503         if (t2.malloced) big_finish(&t2);
2504 ret1:
2505         if (t1.malloced) big_finish(&t1);
2506 
2507         return (err);
2508 }
2509 
2510 
2511 BIG_ERR_CODE
2512 big_Jacobi_pos(int *jac, BIGNUM *nn, BIGNUM *mm)
2513 {
2514         BIGNUM          *t, *tmp2, *m, *n;
2515         BIGNUM          t1, t2, t3;
2516         BIG_CHUNK_TYPE  t1value[BIGTMPSIZE];
2517         BIG_CHUNK_TYPE  t2value[BIGTMPSIZE];
2518         BIG_CHUNK_TYPE  t3value[BIGTMPSIZE];
2519         int             len, err;
2520 
2521         if (big_is_zero(nn) ||
2522             (((nn->value[0] & 1) | (mm->value[0] & 1)) == 0)) {
2523                 *jac = 0;
2524                 return (BIG_OK);
2525         }
2526 
2527         if (nn->len > mm->len) {
2528                 len = nn->len;
2529         } else {
2530                 len = mm->len;
2531         }
2532 
2533         if ((err = big_init1(&t1, len,
2534             t1value, arraysize(t1value))) != BIG_OK) {
2535                 return (err);
2536         }
2537         if ((err = big_init1(&t2, len,
2538             t2value, arraysize(t2value))) != BIG_OK) {
2539                 goto ret1;
2540         }
2541         if ((err = big_init1(&t3, len,
2542             t3value, arraysize(t3value))) != BIG_OK) {
2543                 goto ret2;
2544         }
2545 
2546         n = &t1;
2547         m = &t2;
2548         tmp2 = &t3;
2549 
2550         (void) big_copy(n, nn);
2551         (void) big_copy(m, mm);
2552 
2553         *jac = 1;
2554         while (big_cmp_abs(&big_One, m) != 0) {
2555                 if (big_is_zero(n)) {
2556                         *jac = 0;
2557                         goto ret;
2558                 }
2559                 if ((m->value[0] & 1) == 0) {
2560                         if (((n->value[0] & 7) == 3) ||
2561                             ((n->value[0] & 7) == 5))
2562                                 *jac = -*jac;
2563                         (void) big_half_pos(m, m);
2564                 } else if ((n->value[0] & 1) == 0) {
2565                         if (((m->value[0] & 7) == 3) ||
2566                             ((m->value[0] & 7) == 5))
2567                                 *jac = -*jac;
2568                         (void) big_half_pos(n, n);
2569                 } else {
2570                         if (((m->value[0] & 3) == 3) &&
2571                             ((n->value[0] & 3) == 3)) {
2572                                 *jac = -*jac;
2573                         }
2574                         if ((err = big_div_pos(NULL, tmp2, m, n)) != BIG_OK) {
2575                                 goto ret;
2576                         }
2577                         t = tmp2;
2578                         tmp2 = m;
2579                         m = n;
2580                         n = t;
2581                 }
2582         }
2583         err = BIG_OK;
2584 
2585 ret:
2586         if (t3.malloced) big_finish(&t3);
2587 ret2:
2588         if (t2.malloced) big_finish(&t2);
2589 ret1:
2590         if (t1.malloced) big_finish(&t1);
2591 
2592         return (err);
2593 }
2594 
2595 
2596 BIG_ERR_CODE
2597 big_Lucas(BIGNUM *Lkminus1, BIGNUM *Lk, BIGNUM *p, BIGNUM *k, BIGNUM *n)
2598 {
2599         int             i;
2600         uint32_t        m, w;
2601         BIG_CHUNK_TYPE  bit;
2602         BIGNUM          ki, tmp, tmp2;
2603         BIG_CHUNK_TYPE  kivalue[BIGTMPSIZE];
2604         BIG_CHUNK_TYPE  tmpvalue[BIGTMPSIZE];
2605         BIG_CHUNK_TYPE  tmp2value[BIGTMPSIZE];
2606         BIG_ERR_CODE    err;
2607 
2608         if (big_cmp_abs(k, &big_One) == 0) {
2609                 (void) big_copy(Lk, p);
2610                 (void) big_copy(Lkminus1, &big_Two);
2611                 return (BIG_OK);
2612         }
2613 
2614         if ((err = big_init1(&ki, k->len + 1,
2615             kivalue, arraysize(kivalue))) != BIG_OK)
2616                 return (err);
2617 
2618         if ((err = big_init1(&tmp, 2 * n->len + 1,
2619             tmpvalue, arraysize(tmpvalue))) != BIG_OK)
2620                 goto ret1;
2621 
2622         if ((err = big_init1(&tmp2, n->len,
2623             tmp2value, arraysize(tmp2value))) != BIG_OK)
2624                 goto ret2;
2625 
2626         m = big_numbits(k);
2627         ki.len = (m - 1) / BIG_CHUNK_SIZE + 1;
2628         w = (m - 1) / BIG_CHUNK_SIZE;
2629         bit = (BIG_CHUNK_TYPE)1 << ((m - 1) % BIG_CHUNK_SIZE);
2630         for (i = 0; i < ki.len; i++) {
2631                 ki.value[i] = 0;
2632         }
2633         ki.value[ki.len - 1] = bit;
2634         if (big_cmp_abs(k, &ki) != 0) {
2635                 (void) big_double(&ki, &ki);
2636         }
2637         (void) big_sub_pos(&ki, &ki, k);
2638 
2639         (void) big_copy(Lk, p);
2640         (void) big_copy(Lkminus1, &big_Two);
2641 
2642         for (i = 0; i < m; i++) {
2643                 if ((err = big_mul(&tmp, Lk, Lkminus1)) != BIG_OK) {
2644                         goto ret;
2645                 }
2646                 (void) big_add_abs(&tmp, &tmp, n);
2647                 (void) big_sub_pos(&tmp, &tmp, p);
2648                 if ((err = big_div_pos(NULL, &tmp2, &tmp, n)) != BIG_OK) {
2649                         goto ret;
2650                 }
2651                 if ((ki.value[w] & bit) != 0) {
2652                         if ((err = big_mul(&tmp, Lkminus1, Lkminus1)) !=
2653                             BIG_OK) {
2654                                 goto ret;
2655                         }
2656                         (void) big_add_abs(&tmp, &tmp, n);
2657                         (void) big_sub_pos(&tmp, &tmp, &big_Two);
2658                         if ((err = big_div_pos(NULL, Lkminus1, &tmp, n)) !=
2659                             BIG_OK) {
2660                                 goto ret;
2661                         }
2662                         (void) big_copy(Lk, &tmp2);
2663                 } else {
2664                         if ((err = big_mul(&tmp, Lk, Lk)) != BIG_OK) {
2665                                 goto ret;
2666                         }
2667                         (void) big_add_abs(&tmp, &tmp, n);
2668                         (void) big_sub_pos(&tmp, &tmp, &big_Two);
2669                         if ((err = big_div_pos(NULL, Lk, &tmp, n)) != BIG_OK) {
2670                                 goto ret;
2671                         }
2672                         (void) big_copy(Lkminus1, &tmp2);
2673                 }
2674                 bit = bit >> 1;
2675                 if (bit == 0) {
2676                         bit = BIG_CHUNK_HIGHBIT;
2677                         w--;
2678                 }
2679         }
2680 
2681         err = BIG_OK;
2682 
2683 ret:
2684         if (tmp2.malloced) big_finish(&tmp2);
2685 ret2:
2686         if (tmp.malloced) big_finish(&tmp);
2687 ret1:
2688         if (ki.malloced) big_finish(&ki);
2689 
2690         return (err);
2691 }
2692 
2693 
2694 BIG_ERR_CODE
2695 big_isprime_pos_ext(BIGNUM *n, big_modexp_ncp_info_t *info)
2696 {
2697         BIGNUM          o, nminus1, tmp, Lkminus1, Lk;
2698         BIG_CHUNK_TYPE  ovalue[BIGTMPSIZE];
2699         BIG_CHUNK_TYPE  nminus1value[BIGTMPSIZE];
2700         BIG_CHUNK_TYPE  tmpvalue[BIGTMPSIZE];
2701         BIG_CHUNK_TYPE  Lkminus1value[BIGTMPSIZE];
2702         BIG_CHUNK_TYPE  Lkvalue[BIGTMPSIZE];
2703         BIG_ERR_CODE    err;
2704         int             e, i, jac;
2705 
2706         if (big_cmp_abs(n, &big_One) == 0) {
2707                 return (BIG_FALSE);
2708         }
2709         if (big_cmp_abs(n, &big_Two) == 0) {
2710                 return (BIG_TRUE);
2711         }
2712         if ((n->value[0] & 1) == 0) {
2713                 return (BIG_FALSE);
2714         }
2715 
2716         if ((err = big_init1(&o, n->len, ovalue, arraysize(ovalue))) !=
2717             BIG_OK) {
2718                 return (err);
2719         }
2720 
2721         if ((err = big_init1(&nminus1, n->len,
2722             nminus1value, arraysize(nminus1value))) != BIG_OK) {
2723                 goto ret1;
2724         }
2725 
2726         if ((err = big_init1(&tmp, 2 * n->len,
2727             tmpvalue, arraysize(tmpvalue))) != BIG_OK) {
2728                 goto ret2;
2729         }
2730 
2731         if ((err = big_init1(&Lkminus1, n->len,
2732             Lkminus1value, arraysize(Lkminus1value))) != BIG_OK) {
2733                 goto ret3;
2734         }
2735 
2736         if ((err = big_init1(&Lk, n->len,
2737             Lkvalue, arraysize(Lkvalue))) != BIG_OK) {
2738                 goto ret4;
2739         }
2740 
2741         (void) big_sub_pos(&o, n, &big_One);    /* cannot fail */
2742         (void) big_copy(&nminus1, &o);          /* cannot fail */
2743         e = 0;
2744         while ((o.value[0] & 1) == 0) {
2745                 e++;
2746                 (void) big_half_pos(&o, &o);  /* cannot fail */
2747         }
2748         if ((err = big_modexp_ext(&tmp, &big_Two, &o, n, NULL, info)) !=
2749             BIG_OK) {
2750                 goto ret;
2751         }
2752 
2753         i = 0;
2754         while ((i < e) &&
2755             (big_cmp_abs(&tmp, &big_One) != 0) &&
2756             (big_cmp_abs(&tmp, &nminus1) != 0)) {
2757                 if ((err =
2758                     big_modexp_ext(&tmp, &tmp, &big_Two, n, NULL, info)) !=
2759                     BIG_OK)
2760                         goto ret;
2761                 i++;
2762         }
2763 
2764         if (!((big_cmp_abs(&tmp, &nminus1) == 0) ||
2765             ((i == 0) && (big_cmp_abs(&tmp, &big_One) == 0)))) {
2766                 err = BIG_FALSE;
2767                 goto ret;
2768         }
2769 
2770         if ((err = big_sqrt_pos(&tmp, n)) != BIG_OK) {
2771                 goto ret;
2772         }
2773 
2774         if ((err = big_mul(&tmp, &tmp, &tmp)) != BIG_OK) {
2775                 goto ret;
2776         }
2777         if (big_cmp_abs(&tmp, n) == 0) {
2778                 err = BIG_FALSE;
2779                 goto ret;
2780         }
2781 
2782         (void) big_copy(&o, &big_Two);
2783         do {
2784                 (void) big_add_abs(&o, &o, &big_One);
2785                 if ((err = big_mul(&tmp, &o, &o)) != BIG_OK) {
2786                         goto ret;
2787                 }
2788                 (void) big_sub_pos(&tmp, &tmp, &big_Four);
2789                 if ((err = big_Jacobi_pos(&jac, &tmp, n)) != BIG_OK) {
2790                         goto ret;
2791                 }
2792         } while (jac != -1);
2793 
2794         (void) big_add_abs(&tmp, n, &big_One);
2795         if ((err = big_Lucas(&Lkminus1, &Lk, &o, &tmp, n)) != BIG_OK) {
2796                 goto ret;
2797         }
2798 
2799         if ((big_cmp_abs(&Lkminus1, &o) == 0) &&
2800             (big_cmp_abs(&Lk, &big_Two) == 0)) {
2801                 err = BIG_TRUE;
2802         } else {
2803                 err = BIG_FALSE;
2804         }
2805 
2806 ret:
2807         if (Lk.malloced) big_finish(&Lk);
2808 ret4:
2809         if (Lkminus1.malloced) big_finish(&Lkminus1);
2810 ret3:
2811         if (tmp.malloced) big_finish(&tmp);
2812 ret2:
2813         if (nminus1.malloced) big_finish(&nminus1);
2814 ret1:
2815         if (o.malloced) big_finish(&o);
2816 
2817         return (err);
2818 }
2819 
2820 
2821 BIG_ERR_CODE
2822 big_isprime_pos(BIGNUM *n)
2823 {
2824         return (big_isprime_pos_ext(n, NULL));
2825 }
2826 
2827 
2828 #define SIEVESIZE 1000
2829 
2830 
2831 BIG_ERR_CODE
2832 big_nextprime_pos_ext(BIGNUM *result, BIGNUM *n, big_modexp_ncp_info_t *info)
2833 {
2834         static const uint32_t smallprimes[] = {
2835             3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47,
2836             51, 53, 59, 61, 67, 71, 73, 79, 83, 89, 91, 97 };
2837         BIG_ERR_CODE    err;
2838         int             sieve[SIEVESIZE];
2839         int             i;
2840         uint32_t        off, p;
2841 
2842         if ((err = big_copy(result, n)) != BIG_OK) {
2843                 return (err);
2844         }
2845         result->value[0] |= 1;
2846         /* CONSTCOND */
2847         while (1) {
2848                 for (i = 0; i < SIEVESIZE; i++) sieve[i] = 0;
2849                 for (i = 0;
2850                     i < sizeof (smallprimes) / sizeof (smallprimes[0]); i++) {
2851                         p = smallprimes[i];
2852                         off = big_modhalf_pos(result, p);
2853                         off = p - off;
2854                         if ((off % 2) == 1) {
2855                                 off = off + p;
2856                         }
2857                         off = off / 2;
2858                         while (off < SIEVESIZE) {
2859                                 sieve[off] = 1;
2860                                 off = off + p;
2861                         }
2862                 }
2863 
2864                 for (i = 0; i < SIEVESIZE; i++) {
2865                         if (sieve[i] == 0) {
2866                                 err = big_isprime_pos_ext(result, info);
2867                                 if (err != BIG_FALSE) {
2868                                         if (err != BIG_TRUE) {
2869                                                 return (err);
2870                                         } else {
2871                                                 goto out;
2872                                         }
2873                                 }
2874 
2875                         }
2876                         if ((err = big_add_abs(result, result, &big_Two)) !=
2877                             BIG_OK) {
2878                                 return (err);
2879                         }
2880                 }
2881         }
2882 
2883 out:
2884         return (BIG_OK);
2885 }
2886 
2887 
2888 BIG_ERR_CODE
2889 big_nextprime_pos(BIGNUM *result, BIGNUM *n)
2890 {
2891         return (big_nextprime_pos_ext(result, n, NULL));
2892 }
2893 
2894 
2895 BIG_ERR_CODE
2896 big_nextprime_pos_slow(BIGNUM *result, BIGNUM *n)
2897 {
2898         BIG_ERR_CODE err;
2899 
2900 
2901         if ((err = big_copy(result, n)) != BIG_OK)
2902                 return (err);
2903         result->value[0] |= 1;
2904         while ((err = big_isprime_pos_ext(result, NULL)) != BIG_TRUE) {
2905                 if (err != BIG_FALSE)
2906                         return (err);
2907                 if ((err = big_add_abs(result, result, &big_Two)) != BIG_OK)
2908                         return (err);
2909         }
2910         return (BIG_OK);
2911 }
2912 
2913 
2914 /*
2915  * given m and e, computes the rest in the equation
2916  * gcd(m, e) = cm * m + ce * e
2917  */
2918 BIG_ERR_CODE
2919 big_ext_gcd_pos(BIGNUM *gcd, BIGNUM *cm, BIGNUM *ce, BIGNUM *m, BIGNUM *e)
2920 {
2921         BIGNUM          *xi, *ri, *riminus1, *riminus2, *t;
2922         BIGNUM          *vmi, *vei, *vmiminus1, *veiminus1;
2923         BIGNUM          t1, t2, t3, t4, t5, t6, t7, t8, tmp;
2924         BIG_CHUNK_TYPE  t1value[BIGTMPSIZE];
2925         BIG_CHUNK_TYPE  t2value[BIGTMPSIZE];
2926         BIG_CHUNK_TYPE  t3value[BIGTMPSIZE];
2927         BIG_CHUNK_TYPE  t4value[BIGTMPSIZE];
2928         BIG_CHUNK_TYPE  t5value[BIGTMPSIZE];
2929         BIG_CHUNK_TYPE  t6value[BIGTMPSIZE];
2930         BIG_CHUNK_TYPE  t7value[BIGTMPSIZE];
2931         BIG_CHUNK_TYPE  t8value[BIGTMPSIZE];
2932         BIG_CHUNK_TYPE  tmpvalue[BIGTMPSIZE];
2933         BIG_ERR_CODE    err;
2934         int             len;
2935 
2936         if (big_cmp_abs(m, e) >= 0) {
2937                 len = m->len;
2938         } else {
2939                 len = e->len;
2940         }
2941 
2942         if ((err = big_init1(&t1, len,
2943             t1value, arraysize(t1value))) != BIG_OK) {
2944                 return (err);
2945         }
2946         if ((err = big_init1(&t2, len,
2947             t2value, arraysize(t2value))) != BIG_OK) {
2948                 goto ret1;
2949         }
2950         if ((err = big_init1(&t3, len,
2951             t3value, arraysize(t3value))) != BIG_OK) {
2952                 goto ret2;
2953         }
2954         if ((err = big_init1(&t4, len,
2955             t4value, arraysize(t3value))) != BIG_OK) {
2956                 goto ret3;
2957         }
2958         if ((err = big_init1(&t5, len,
2959             t5value, arraysize(t5value))) != BIG_OK) {
2960                 goto ret4;
2961         }
2962         if ((err = big_init1(&t6, len,
2963             t6value, arraysize(t6value))) != BIG_OK) {
2964                 goto ret5;
2965         }
2966         if ((err = big_init1(&t7, len,
2967             t7value, arraysize(t7value))) != BIG_OK) {
2968                 goto ret6;
2969         }
2970         if ((err = big_init1(&t8, len,
2971             t8value, arraysize(t8value))) != BIG_OK) {
2972                 goto ret7;
2973         }
2974 
2975         if ((err = big_init1(&tmp, 2 * len,
2976             tmpvalue, arraysize(tmpvalue))) != BIG_OK) {
2977                 goto ret8;
2978         }
2979 
2980         ri = &t1;
2981         ri->value[0] = 1;
2982         ri->len = 1;
2983         xi = &t2;
2984         riminus1 = &t3;
2985         riminus2 = &t4;
2986         vmi = &t5;
2987         vei = &t6;
2988         vmiminus1 = &t7;
2989         veiminus1 = &t8;
2990 
2991         (void) big_copy(vmiminus1, &big_One);
2992         (void) big_copy(vmi, &big_One);
2993         (void) big_copy(veiminus1, &big_One);
2994         (void) big_copy(xi, &big_One);
2995         vei->len = 1;
2996         vei->value[0] = 0;
2997 
2998         (void) big_copy(riminus1, m);
2999         (void) big_copy(ri, e);
3000 
3001         while (!big_is_zero(ri)) {
3002                 t = riminus2;
3003                 riminus2 = riminus1;
3004                 riminus1 = ri;
3005                 ri = t;
3006                 if ((err = big_mul(&tmp, vmi, xi)) != BIG_OK) {
3007                         goto ret;
3008                 }
3009                 if ((err = big_sub(vmiminus1, vmiminus1, &tmp)) != BIG_OK) {
3010                         goto ret;
3011                 }
3012                 t = vmiminus1;
3013                 vmiminus1 = vmi;
3014                 vmi = t;
3015                 if ((err = big_mul(&tmp, vei, xi)) != BIG_OK) {
3016                         goto ret;
3017                 }
3018                 if ((err = big_sub(veiminus1, veiminus1, &tmp)) != BIG_OK) {
3019                         goto ret;
3020                 }
3021                 t = veiminus1;
3022                 veiminus1 = vei;
3023                 vei = t;
3024                 if ((err = big_div_pos(xi, ri, riminus2, riminus1)) !=
3025                     BIG_OK) {
3026                         goto ret;
3027                 }
3028         }
3029         if ((gcd != NULL) && ((err = big_copy(gcd, riminus1)) != BIG_OK)) {
3030                 goto ret;
3031         }
3032         if ((cm != NULL) && ((err = big_copy(cm, vmi)) != BIG_OK)) {
3033                 goto ret;
3034         }
3035         if (ce != NULL) {
3036                 err = big_copy(ce, vei);
3037         }
3038 ret:
3039         if (tmp.malloced) big_finish(&tmp);
3040 ret8:
3041         if (t8.malloced) big_finish(&t8);
3042 ret7:
3043         if (t7.malloced) big_finish(&t7);
3044 ret6:
3045         if (t6.malloced) big_finish(&t6);
3046 ret5:
3047         if (t5.malloced) big_finish(&t5);
3048 ret4:
3049         if (t4.malloced) big_finish(&t4);
3050 ret3:
3051         if (t3.malloced) big_finish(&t3);
3052 ret2:
3053         if (t2.malloced) big_finish(&t2);
3054 ret1:
3055         if (t1.malloced) big_finish(&t1);
3056 
3057         return (err);
3058 }