1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #pragma ident   "%Z%%M% %I%     %E% SMI"
  27 
  28 #define big_div_pos_fast big_div_pos
  29 
  30 #include "bignum.h"
  31 
  32 /*
  33  * Configuration guide
  34  * -------------------
  35  *
  36  * There are 4 preprocessor symbols used to configure the bignum
  37  * implementation.  This file contains no logic to configure based on
  38  * processor; we leave that to the Makefiles to specify.
  39  *
  40  * USE_FLOATING_POINT
  41  *   Meaning: There is support for a fast floating-point implementation of
  42  *   Montgomery multiply.
  43  *
  44  * PSR_MUL
  45  *   Meaning: There are processor-specific versions of the low level
  46  *   functions to implement big_mul.  Those functions are: big_mul_set_vec,
  47  *   big_mul_add_vec, big_mul_vec, and big_sqr_vec.  PSR_MUL implies support
  48  *   for all 4 functions.  You cannot pick and choose which subset of these
  49  *   functions to support; that would lead to a rat's nest of #ifdefs.
  50  *
  51  * HWCAP
  52  *   Meaning: Call multiply support functions through a function pointer.
  53  *   On x86, there are multiple implementations for differnt hardware
  54  *   capabilities, such as MMX, SSE2, etc.  Tests are made at run-time, when
  55  *   a function is first used.  So, the support functions are called through
  56  *   a function pointer.  There is no need for that on Sparc, because there
  57  *   is only one implementation; support functions are called directly.
  58  *   Later, if there were some new VIS instruction, or something, and a
  59  *   run-time test were needed, rather than variant kernel modules and
  60  *   libraries, then HWCAP would be defined for Sparc, as well.
  61  *
  62  * UMUL64
  63  *   Meaning: It is safe to use generic C code that assumes the existence
  64  *   of a 32 x 32 --> 64 bit unsigned multiply.  If this is not defined,
  65  *   then the generic code for big_mul_add_vec() must necessarily be very slow,
  66  *   because it must fall back to using 16 x 16 --> 32 bit multiplication.
  67  *
  68  */
  69 
  70 
  71 #ifdef  _KERNEL
  72 #include <sys/ddi.h>
  73 #include <sys/mdesc.h>
  74 #include <sys/crypto/common.h>
  75 
  76 #include <sys/types.h>
  77 #include <sys/kmem.h>
  78 #include <sys/param.h>
  79 #include <sys/sunddi.h>
  80 
  81 #define big_malloc(size)        kmem_alloc(size, KM_NOSLEEP)
  82 #define big_free(ptr, size)     kmem_free(ptr, size)
  83 
  84 void *
  85 big_realloc(void *from, size_t oldsize, size_t newsize)
  86 {
  87         void *rv;
  88 
  89         rv = kmem_alloc(newsize, KM_NOSLEEP);
  90         if (rv != NULL)
  91                 bcopy(from, rv, oldsize);
  92         kmem_free(from, oldsize);
  93         return (rv);
  94 }
  95 
  96 #else   /* _KERNEL */
  97 
  98 #include <stdlib.h>
  99 #include <stdio.h>
 100 #include <assert.h>
 101 #define ASSERT  assert
 102 
 103 #ifndef MALLOC_DEBUG
 104 
 105 #define big_malloc(size)        malloc(size)
 106 #define big_free(ptr, size)     free(ptr)
 107 
 108 #else
 109 
 110 void
 111 big_free(void *ptr, size_t size)
 112 {
 113         printf("freed %d bytes at %p\n", size, ptr);
 114         free(ptr);
 115 }
 116 
 117 void *
 118 big_malloc(size_t size)
 119 {
 120         void *rv;
 121         rv = malloc(size);
 122         printf("malloced %d bytes, addr:%p\n", size, rv);
 123         return (rv);
 124 }
 125 #endif /* MALLOC_DEBUG */
 126 
 127 #define big_realloc(x, y, z) realloc((x), (z))
 128 
 129 void
 130 printbignum(char *aname, BIGNUM *a)
 131 {
 132         int i;
 133 
 134         (void) printf("\n%s\n%d\n", aname, a->sign*a->len);
 135         for (i = a->len - 1; i >= 0; i--) {
 136 #ifdef BIGNUM_CHUNK_32
 137                 (void) printf("%08x ", a->value[i]);
 138                 if ((i % 8 == 0) && (i != 0)) {
 139                         (void) printf("\n");
 140                 }
 141 #else
 142                 (void) printf("%08x %08x ", (uint32_t)((a->value[i]) >> 32),
 143                     (uint32_t)((a->value[i]) & 0xffffffff));
 144                 if ((i % 4 == 0) && (i != 0)) {
 145                         (void) printf("\n");
 146                 }
 147 #endif
 148         }
 149         (void) printf("\n");
 150 }
 151 
 152 #endif  /* _KERNEL */
 153 
 154 
 155 /* size in BIG_CHUNK_SIZE-bit words */
 156 BIG_ERR_CODE
 157 big_init(BIGNUM *number, int size)
 158 {
 159         number->value = big_malloc(sizeof (BIG_CHUNK_TYPE) * size);
 160         if (number->value == NULL) {
 161                 return (BIG_NO_MEM);
 162         }
 163         number->size = size;
 164         number->len = 0;
 165         number->sign = 1;
 166         number->malloced = 1;
 167         return (BIG_OK);
 168 }
 169 
 170 /* size in BIG_CHUNK_SIZE-bit words */
 171 BIG_ERR_CODE
 172 big_init1(BIGNUM *number, int size, BIG_CHUNK_TYPE *buf, int bufsize)
 173 {
 174         if ((buf == NULL) || (size > bufsize)) {
 175                 number->value = big_malloc(sizeof (BIG_CHUNK_TYPE) * size);
 176                 if (number->value == NULL) {
 177                         return (BIG_NO_MEM);
 178                 }
 179                 number->size = size;
 180                 number->malloced = 1;
 181         } else {
 182                 number->value = buf;
 183                 number->size = bufsize;
 184                 number->malloced = 0;
 185         }
 186                 number->len = 0;
 187                 number->sign = 1;
 188 
 189         return (BIG_OK);
 190 }
 191 
 192 void
 193 big_finish(BIGNUM *number)
 194 {
 195         if (number->malloced == 1) {
 196                 big_free(number->value,
 197                     sizeof (BIG_CHUNK_TYPE) * number->size);
 198                 number->malloced = 0;
 199         }
 200 }
 201 
 202 
 203 /*
 204  *  bn->size should be at least
 205  * (len + sizeof (BIG_CHUNK_TYPE) - 1) / sizeof (BIG_CHUNK_TYPE) bytes
 206  * converts from byte-big-endian format to bignum format (words in
 207  * little endian order, but bytes within the words big endian)
 208  */
 209 void
 210 bytestring2bignum(BIGNUM *bn, uchar_t *kn, size_t len)
 211 {
 212         int             i, j, offs;
 213         BIG_CHUNK_TYPE  word;
 214         uchar_t         *knwordp;
 215 
 216 #ifdef  _LP64
 217         offs = (uint32_t)len % sizeof (BIG_CHUNK_TYPE);
 218         bn->len = (uint32_t)len / sizeof (BIG_CHUNK_TYPE);
 219 
 220         for (i = 0; i < (uint32_t)len / sizeof (BIG_CHUNK_TYPE); i++) {
 221 #else   /* !_LP64 */
 222         offs = len % sizeof (BIG_CHUNK_TYPE);
 223         bn->len = len / sizeof (BIG_CHUNK_TYPE);
 224         for (i = 0; i < len / sizeof (BIG_CHUNK_TYPE); i++) {
 225 #endif  /* _LP64 */
 226                 knwordp = &(kn[len - sizeof (BIG_CHUNK_TYPE) * (i + 1)]);
 227                 word = knwordp[0];
 228                 for (j = 1; j < sizeof (BIG_CHUNK_TYPE); j++) {
 229                         word = (word << 8)+ knwordp[j];
 230                 }
 231                 bn->value[i] = word;
 232         }
 233         if (offs > 0) {
 234                 word = kn[0];
 235                 for (i = 1; i < offs; i++) word = (word << 8) + kn[i];
 236                 bn->value[bn->len++] = word;
 237         }
 238         while ((bn->len > 1) && (bn->value[bn->len-1] == 0)) {
 239                 bn->len --;
 240         }
 241 }
 242 
 243 /*
 244  * copies the least significant len bytes if
 245  * len < bn->len * sizeof (BIG_CHUNK_TYPE)
 246  * converts from bignum format to byte-big-endian format.
 247  * bignum format is words of type  BIG_CHUNK_TYPE in little endian order.
 248  */
 249 void
 250 bignum2bytestring(uchar_t *kn, BIGNUM *bn, size_t len)
 251 {
 252         int             i, j, offs;
 253         BIG_CHUNK_TYPE  word;
 254 
 255         if (len < sizeof (BIG_CHUNK_TYPE) * bn->len) {
 256 #ifdef  _LP64
 257                 for (i = 0; i < (uint32_t)len / sizeof (BIG_CHUNK_TYPE); i++) {
 258 #else   /* !_LP64 */
 259                 for (i = 0; i < len / sizeof (BIG_CHUNK_TYPE); i++) {
 260 #endif  /* _LP64 */
 261                         word = bn->value[i];
 262                         for (j = 0; j < sizeof (BIG_CHUNK_TYPE); j++) {
 263                                 kn[len - sizeof (BIG_CHUNK_TYPE) * i - j - 1] =
 264                                     word & 0xff;
 265                                 word = word >> 8;
 266                         }
 267                 }
 268 #ifdef  _LP64
 269                 offs = (uint32_t)len % sizeof (BIG_CHUNK_TYPE);
 270 #else   /* !_LP64 */
 271                 offs = len % sizeof (BIG_CHUNK_TYPE);
 272 #endif  /* _LP64 */
 273                 if (offs > 0) {
 274                         word = bn->value[len / sizeof (BIG_CHUNK_TYPE)];
 275 #ifdef  _LP64
 276                         for (i =  (uint32_t)len % sizeof (BIG_CHUNK_TYPE);
 277                             i > 0; i --) {
 278 #else   /* !_LP64 */
 279                         for (i = len % sizeof (BIG_CHUNK_TYPE);
 280                             i > 0; i --) {
 281 #endif  /* _LP64 */
 282                                 kn[i - 1] = word & 0xff;
 283                                 word = word >> 8;
 284                         }
 285                 }
 286         } else {
 287                 for (i = 0; i < bn->len; i++) {
 288                         word = bn->value[i];
 289                         for (j = 0; j < sizeof (BIG_CHUNK_TYPE); j++) {
 290                                 kn[len - sizeof (BIG_CHUNK_TYPE) * i - j - 1] =
 291                                     word & 0xff;
 292                                 word = word >> 8;
 293                         }
 294                 }
 295 #ifdef  _LP64
 296                 for (i = 0;
 297                     i < (uint32_t)len - sizeof (BIG_CHUNK_TYPE) * bn->len;
 298                     i++) {
 299 #else   /* !_LP64 */
 300                 for (i = 0; i < len - sizeof (BIG_CHUNK_TYPE) * bn->len; i++) {
 301 #endif  /* _LP64 */
 302                         kn[i] = 0;
 303                 }
 304         }
 305 }
 306 
 307 
 308 int
 309 big_bitlength(BIGNUM *a)
 310 {
 311         int             l = 0, b = 0;
 312         BIG_CHUNK_TYPE  c;
 313 
 314         l = a->len - 1;
 315         while ((l > 0) && (a->value[l] == 0)) {
 316                 l--;
 317         }
 318         b = sizeof (BIG_CHUNK_TYPE) * BITSINBYTE;
 319         c = a->value[l];
 320         while ((b > 1) && ((c & BIG_CHUNK_HIGHBIT) == 0)) {
 321                 c = c << 1;
 322                 b--;
 323         }
 324 
 325         return (l * sizeof (BIG_CHUNK_TYPE) * BITSINBYTE + b);
 326 }
 327 
 328 
 329 BIG_ERR_CODE
 330 big_copy(BIGNUM *dest, BIGNUM *src)
 331 {
 332         BIG_CHUNK_TYPE  *newptr;
 333         int             i, len;
 334 
 335         len = src->len;
 336         while ((len > 1) && (src->value[len - 1] == 0)) {
 337                 len--;
 338         }
 339         src->len = len;
 340         if (dest->size < len) {
 341                 if (dest->malloced == 1) {
 342                         newptr = (BIG_CHUNK_TYPE *)big_realloc(dest->value,
 343                             sizeof (BIG_CHUNK_TYPE) * dest->size,
 344                             sizeof (BIG_CHUNK_TYPE) * len);
 345                 } else {
 346                         newptr = (BIG_CHUNK_TYPE *)
 347                             big_malloc(sizeof (BIG_CHUNK_TYPE) * len);
 348                         if (newptr != NULL) {
 349                                 dest->malloced = 1;
 350                         }
 351                 }
 352                 if (newptr == NULL) {
 353                         return (BIG_NO_MEM);
 354                 }
 355                 dest->value = newptr;
 356                 dest->size = len;
 357         }
 358         dest->len = len;
 359         dest->sign = src->sign;
 360         for (i = 0; i < len; i++) {
 361                 dest->value[i] = src->value[i];
 362         }
 363 
 364         return (BIG_OK);
 365 }
 366 
 367 
 368 BIG_ERR_CODE
 369 big_extend(BIGNUM *number, int size)
 370 {
 371         BIG_CHUNK_TYPE  *newptr;
 372         int             i;
 373 
 374         if (number->size >= size)
 375                 return (BIG_OK);
 376         if (number->malloced) {
 377                 number->value = big_realloc(number->value,
 378                     sizeof (BIG_CHUNK_TYPE) * number->size,
 379                     sizeof (BIG_CHUNK_TYPE) * size);
 380         } else {
 381                 newptr = big_malloc(sizeof (BIG_CHUNK_TYPE) * size);
 382                 if (newptr != NULL) {
 383                         for (i = 0; i < number->size; i++) {
 384                                 newptr[i] = number->value[i];
 385                         }
 386                 }
 387                 number->value = newptr;
 388         }
 389 
 390         if (number->value == NULL) {
 391                 return (BIG_NO_MEM);
 392         }
 393 
 394         number->size = size;
 395         number->malloced = 1;
 396         return (BIG_OK);
 397 }
 398 
 399 
 400 /* returns 1 if n == 0 */
 401 int
 402 big_is_zero(BIGNUM *n)
 403 {
 404         int     i, result;
 405 
 406         result = 1;
 407         for (i = 0; i < n->len; i++) {
 408                 if (n->value[i] != 0) {
 409                         result = 0;
 410                 }
 411         }
 412         return (result);
 413 }
 414 
 415 
 416 BIG_ERR_CODE
 417 big_add_abs(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
 418 {
 419         int             i, shorter, longer;
 420         BIG_CHUNK_TYPE  cy, ai;
 421         BIG_CHUNK_TYPE  *r, *a, *b, *c;
 422         BIG_ERR_CODE    err;
 423         BIGNUM          *longerarg;
 424 
 425         if (aa->len > bb->len) {
 426                 shorter = bb->len;
 427                 longer = aa->len;
 428                 longerarg = aa;
 429         } else {
 430                 shorter = aa->len;
 431                 longer = bb->len;
 432                 longerarg = bb;
 433         }
 434         if (result->size < longer + 1) {
 435                 err = big_extend(result, longer + 1);
 436                 if (err != BIG_OK) {
 437                         return (err);
 438                 }
 439         }
 440 
 441         r = result->value;
 442         a = aa->value;
 443         b = bb->value;
 444         c = longerarg->value;
 445         cy = 0;
 446         for (i = 0; i < shorter; i++) {
 447                 ai = a[i];
 448                 r[i] = ai + b[i] + cy;
 449                 if (r[i] > ai) {
 450                         cy = 0;
 451                 } else if (r[i] < ai) {
 452                         cy = 1;
 453                 }
 454         }
 455         for (; i < longer; i++) {
 456                 ai = c[i];
 457                 r[i] = ai + cy;
 458                 if (r[i] >= ai) {
 459                         cy = 0;
 460                 }
 461         }
 462         if (cy == 1) {
 463                 r[i] = cy;
 464                 result->len = longer + 1;
 465         } else {
 466                 result->len = longer;
 467         }
 468         result->sign = 1;
 469         return (BIG_OK);
 470 }
 471 
 472 
 473 /* caller must make sure that result has at least len words allocated */
 474 void
 475 big_sub_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, BIG_CHUNK_TYPE *b, int len)
 476 {
 477         int             i;
 478         BIG_CHUNK_TYPE  cy, ai;
 479 
 480         cy = 1;
 481         for (i = 0; i < len; i++) {
 482                 ai = a[i];
 483                 r[i] = ai + (~b[i]) + cy;
 484                 if (r[i] > ai) {
 485                         cy = 0;
 486                 } else if (r[i] < ai) {
 487                         cy = 1;
 488                 }
 489         }
 490 }
 491 
 492 
 493 /* result=aa-bb  it is assumed that aa>=bb */
 494 BIG_ERR_CODE
 495 big_sub_pos(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
 496 {
 497         int             i, shorter;
 498         BIG_CHUNK_TYPE  cy = 1, ai;
 499         BIG_CHUNK_TYPE  *r, *a, *b;
 500         BIG_ERR_CODE    err = BIG_OK;
 501 
 502         if (aa->len > bb->len) {
 503                 shorter = bb->len;
 504         } else {
 505                 shorter = aa->len;
 506         }
 507         if (result->size < aa->len) {
 508                 err = big_extend(result, aa->len);
 509                 if (err != BIG_OK) {
 510                         return (err);
 511                 }
 512         }
 513 
 514         r = result->value;
 515         a = aa->value;
 516         b = bb->value;
 517         result->len = aa->len;
 518         cy = 1;
 519         for (i = 0; i < shorter; i++) {
 520                 ai = a[i];
 521                 r[i] = ai + (~b[i]) + cy;
 522                 if (r[i] > ai) {
 523                         cy = 0;
 524                 } else if (r[i] < ai) {
 525                         cy = 1;
 526                 }
 527         }
 528         for (; i < aa->len; i++) {
 529                 ai = a[i];
 530                 r[i] = ai + (~0) + cy;
 531                 if (r[i] < ai) {
 532                         cy = 1;
 533                 }
 534         }
 535         result->sign = 1;
 536 
 537         if (cy == 0) {
 538                 return (BIG_INVALID_ARGS);
 539         } else {
 540                 return (BIG_OK);
 541         }
 542 }
 543 
 544 
 545 /* returns -1 if |aa|<|bb|, 0 if |aa|==|bb| 1 if |aa|>|bb| */
 546 int
 547 big_cmp_abs(BIGNUM *aa, BIGNUM *bb)
 548 {
 549         int     i;
 550 
 551         if (aa->len > bb->len) {
 552                 for (i = aa->len - 1; i > bb->len - 1; i--) {
 553                         if (aa->value[i] > 0) {
 554                                 return (1);
 555                         }
 556                 }
 557         } else if (aa->len < bb->len) {
 558                 for (i = bb->len - 1; i > aa->len - 1; i--) {
 559                         if (bb->value[i] > 0) {
 560                                 return (-1);
 561                         }
 562                 }
 563         } else {
 564                 i = aa->len-1;
 565         }
 566         for (; i >= 0; i--) {
 567                 if (aa->value[i] > bb->value[i]) {
 568                         return (1);
 569                 } else if (aa->value[i] < bb->value[i]) {
 570                         return (-1);
 571                 }
 572         }
 573 
 574         return (0);
 575 }
 576 
 577 
 578 BIG_ERR_CODE
 579 big_sub(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
 580 {
 581         BIG_ERR_CODE    err;
 582 
 583         if ((bb->sign == -1) && (aa->sign == 1)) {
 584                 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) {
 585                         return (err);
 586                 }
 587                 result->sign = 1;
 588         } else if ((aa->sign == -1) && (bb->sign == 1)) {
 589                 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) {
 590                         return (err);
 591                 }
 592                 result->sign = -1;
 593         } else if ((aa->sign == 1) && (bb->sign == 1)) {
 594                 if (big_cmp_abs(aa, bb) >= 0) {
 595                         if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) {
 596                                 return (err);
 597                         }
 598                         result->sign = 1;
 599                 } else {
 600                         if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) {
 601                                 return (err);
 602                         }
 603                         result->sign = -1;
 604                 }
 605         } else {
 606                 if (big_cmp_abs(aa, bb) >= 0) {
 607                         if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) {
 608                                 return (err);
 609                         }
 610                         result->sign = -1;
 611                 } else {
 612                         if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) {
 613                                 return (err);
 614                         }
 615                         result->sign = 1;
 616                 }
 617         }
 618         return (BIG_OK);
 619 }
 620 
 621 
 622 BIG_ERR_CODE
 623 big_add(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
 624 {
 625         BIG_ERR_CODE    err;
 626 
 627         if ((bb->sign == -1) && (aa->sign == -1)) {
 628                 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) {
 629                         return (err);
 630                 }
 631                 result->sign = -1;
 632         } else if ((aa->sign == 1) && (bb->sign == 1)) {
 633                 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) {
 634                         return (err);
 635                 }
 636                 result->sign = 1;
 637         } else if ((aa->sign == 1) && (bb->sign == -1)) {
 638                 if (big_cmp_abs(aa, bb) >= 0) {
 639                         if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) {
 640                                 return (err);
 641                         }
 642                         result->sign = 1;
 643                 } else {
 644                         if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) {
 645                                 return (err);
 646                         }
 647                         result->sign = -1;
 648                 }
 649         } else {
 650                 if (big_cmp_abs(aa, bb) >= 0) {
 651                         if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) {
 652                                 return (err);
 653                         }
 654                         result->sign = -1;
 655                 } else {
 656                         if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) {
 657                                 return (err);
 658                         }
 659                         result->sign = 1;
 660                 }
 661         }
 662         return (BIG_OK);
 663 }
 664 
 665 
 666 /* result = aa/2 */
 667 BIG_ERR_CODE
 668 big_half_pos(BIGNUM *result, BIGNUM *aa)
 669 {
 670         BIG_ERR_CODE    err;
 671         int             i;
 672         BIG_CHUNK_TYPE  cy, cy1;
 673         BIG_CHUNK_TYPE  *a, *r;
 674 
 675         if (result->size < aa->len) {
 676                 err = big_extend(result, aa->len);
 677                 if (err != BIG_OK) {
 678                         return (err);
 679                 }
 680         }
 681 
 682         result->len = aa->len;
 683         a = aa->value;
 684         r = result->value;
 685         cy = 0;
 686         for (i = aa->len - 1; i >= 0; i--) {
 687                 cy1 = a[i] << (BIG_CHUNK_SIZE - 1);
 688                 r[i] = (cy | (a[i] >> 1));
 689                 cy = cy1;
 690         }
 691         if (r[result->len - 1] == 0) {
 692                 result->len--;
 693         }
 694 
 695         return (BIG_OK);
 696 }
 697 
 698 /* result  =  aa*2 */
 699 BIG_ERR_CODE
 700 big_double(BIGNUM *result, BIGNUM *aa)
 701 {
 702         BIG_ERR_CODE    err;
 703         int             i, rsize;
 704         BIG_CHUNK_TYPE  cy, cy1;
 705         BIG_CHUNK_TYPE  *a, *r;
 706 
 707         if ((aa->len > 0) &&
 708             ((aa->value[aa->len - 1] & BIG_CHUNK_HIGHBIT) != 0)) {
 709                 rsize = aa->len + 1;
 710         } else {
 711                 rsize = aa->len;
 712         }
 713 
 714         if (result->size < rsize) {
 715                 err = big_extend(result, rsize);
 716                 if (err != BIG_OK)
 717                         return (err);
 718         }
 719 
 720         a = aa->value;
 721         r = result->value;
 722         if (rsize == aa->len + 1) {
 723                 r[rsize - 1] = 1;
 724         }
 725         cy = 0;
 726         for (i = 0; i < aa->len; i++) {
 727                 cy1 = a[i] >> (BIG_CHUNK_SIZE - 1);
 728                 r[i] = (cy | (a[i] << 1));
 729                 cy = cy1;
 730         }
 731         result->len = rsize;
 732         return (BIG_OK);
 733 }
 734 
 735 
 736 /*
 737  * returns aa mod b, aa must be nonneg, b must be a max
 738  * (BIG_CHUNK_SIZE / 2)-bit integer
 739  */
 740 static uint32_t
 741 big_modhalf_pos(BIGNUM *aa, uint32_t b)
 742 {
 743         int             i;
 744         BIG_CHUNK_TYPE  rem;
 745 
 746         if (aa->len == 0) {
 747                 return (0);
 748         }
 749         rem = aa->value[aa->len - 1] % b;
 750         for (i = aa->len - 2; i >= 0; i--) {
 751                 rem = ((rem << (BIG_CHUNK_SIZE / 2)) |
 752                     (aa->value[i] >> (BIG_CHUNK_SIZE / 2))) % b;
 753                 rem = ((rem << (BIG_CHUNK_SIZE / 2)) |
 754                     (aa->value[i] & BIG_CHUNK_LOWHALFBITS)) % b;
 755         }
 756 
 757         return ((uint32_t)rem);
 758 }
 759 
 760 
 761 /*
 762  * result = aa - (2^BIG_CHUNK_SIZE)^lendiff * bb
 763  * result->size should be at least aa->len at entry
 764  * aa, bb, and result should be positive
 765  */
 766 void
 767 big_sub_pos_high(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
 768 {
 769         int i, lendiff;
 770         BIGNUM res1, aa1;
 771 
 772         lendiff = aa->len - bb->len;
 773         res1.size = result->size - lendiff;
 774         res1.malloced = 0;
 775         res1.value = result->value + lendiff;
 776         aa1.size = aa->size - lendiff;
 777         aa1.value = aa->value + lendiff;
 778         aa1.len = bb->len;
 779         aa1.sign = 1;
 780         (void) big_sub_pos(&res1, &aa1, bb);
 781         if (result->value != aa->value) {
 782                 for (i = 0; i < lendiff; i++) {
 783                         result->value[i] = aa->value[i];
 784                 }
 785         }
 786         result->len = aa->len;
 787 }
 788 
 789 
 790 /*
 791  * returns 1, 0, or -1 depending on whether |aa| > , ==, or <
 792  *                                      (2^BIG_CHUNK_SIZE)^lendiff * |bb|
 793  * aa->len should be >= bb->len
 794  */
 795 int
 796 big_cmp_abs_high(BIGNUM *aa, BIGNUM *bb)
 797 {
 798         int lendiff;
 799         BIGNUM aa1;
 800 
 801         lendiff = aa->len - bb->len;
 802         aa1.len = bb->len;
 803         aa1.size = aa->size - lendiff;
 804         aa1.malloced = 0;
 805         aa1.value = aa->value + lendiff;
 806         return (big_cmp_abs(&aa1, bb));
 807 }
 808 
 809 
 810 /*
 811  * result = aa * b where b is a max. (BIG_CHUNK_SIZE / 2)-bit positive integer.
 812  * result should have enough space allocated.
 813  */
 814 static void
 815 big_mulhalf_low(BIGNUM *result, BIGNUM *aa, BIG_CHUNK_TYPE b)
 816 {
 817         int             i;
 818         BIG_CHUNK_TYPE  t1, t2, ai, cy;
 819         BIG_CHUNK_TYPE  *a, *r;
 820 
 821         a = aa->value;
 822         r = result->value;
 823         cy = 0;
 824         for (i = 0; i < aa->len; i++) {
 825                 ai = a[i];
 826                 t1 = (ai & BIG_CHUNK_LOWHALFBITS) * b + cy;
 827                 t2 = (ai >> (BIG_CHUNK_SIZE / 2)) * b +
 828                     (t1 >> (BIG_CHUNK_SIZE / 2));
 829                 r[i] = (t1 & BIG_CHUNK_LOWHALFBITS) |
 830                     (t2 << (BIG_CHUNK_SIZE / 2));
 831                 cy = t2 >> (BIG_CHUNK_SIZE / 2);
 832         }
 833         r[i] = cy;
 834         result->len = aa->len + 1;
 835         result->sign = aa->sign;
 836 }
 837 
 838 
 839 /*
 840  * result = aa * b * 2^(BIG_CHUNK_SIZE / 2) where b is a max.
 841  * (BIG_CHUNK_SIZE / 2)-bit positive integer.
 842  * result should have enough space allocated.
 843  */
 844 static void
 845 big_mulhalf_high(BIGNUM *result, BIGNUM *aa, BIG_CHUNK_TYPE b)
 846 {
 847         int             i;
 848         BIG_CHUNK_TYPE  t1, t2, ai, cy, ri;
 849         BIG_CHUNK_TYPE  *a, *r;
 850 
 851         a = aa->value;
 852         r = result->value;
 853         cy = 0;
 854         ri = 0;
 855         for (i = 0; i < aa->len; i++) {
 856                 ai = a[i];
 857                 t1 = (ai & BIG_CHUNK_LOWHALFBITS) * b + cy;
 858                 t2 = (ai >>  (BIG_CHUNK_SIZE / 2)) * b +
 859                     (t1 >>  (BIG_CHUNK_SIZE / 2));
 860                 r[i] = (t1 <<  (BIG_CHUNK_SIZE / 2)) + ri;
 861                 ri = t2 & BIG_CHUNK_LOWHALFBITS;
 862                 cy = t2 >> (BIG_CHUNK_SIZE / 2);
 863         }
 864         r[i] = (cy <<  (BIG_CHUNK_SIZE / 2)) + ri;
 865         result->len = aa->len + 1;
 866         result->sign = aa->sign;
 867 }
 868 
 869 
 870 /* it is assumed that result->size is big enough */
 871 void
 872 big_shiftleft(BIGNUM *result, BIGNUM *aa, int offs)
 873 {
 874         int             i;
 875         BIG_CHUNK_TYPE  cy, ai;
 876 
 877         if (offs == 0) {
 878                 if (result != aa) {
 879                         (void) big_copy(result, aa);
 880                 }
 881                 return;
 882         }
 883         cy = 0;
 884         for (i = 0; i < aa->len; i++) {
 885                 ai = aa->value[i];
 886                 result->value[i] = (ai << offs) | cy;
 887                 cy = ai >> (BIG_CHUNK_SIZE - offs);
 888         }
 889         if (cy != 0) {
 890                 result->len = aa->len + 1;
 891                 result->value[result->len - 1] = cy;
 892         } else {
 893                 result->len = aa->len;
 894         }
 895         result->sign = aa->sign;
 896 }
 897 
 898 
 899 /* it is assumed that result->size is big enough */
 900 void
 901 big_shiftright(BIGNUM *result, BIGNUM *aa, int offs)
 902 {
 903         int              i;
 904         BIG_CHUNK_TYPE  cy, ai;
 905 
 906         if (offs == 0) {
 907                 if (result != aa) {
 908                         (void) big_copy(result, aa);
 909                 }
 910                 return;
 911         }
 912         cy = aa->value[0] >> offs;
 913         for (i = 1; i < aa->len; i++) {
 914                 ai = aa->value[i];
 915                 result->value[i-1] = (ai << (BIG_CHUNK_SIZE - offs)) | cy;
 916                 cy = ai >> offs;
 917         }
 918         result->len = aa->len;
 919         result->value[result->len - 1] = cy;
 920         result->sign = aa->sign;
 921 }
 922 
 923 
 924 /*
 925  * result = aa/bb   remainder = aa mod bb
 926  * it is assumed that aa and bb are positive
 927  */
 928 BIG_ERR_CODE
 929 big_div_pos_fast(BIGNUM *result, BIGNUM *remainder, BIGNUM *aa, BIGNUM *bb)
 930 {
 931         BIG_ERR_CODE    err = BIG_OK;
 932         int             i, alen, blen, tlen, rlen, offs;
 933         BIG_CHUNK_TYPE  higha, highb, coeff;
 934         BIG_CHUNK_TYPE  *a, *b;
 935         BIGNUM          bbhigh, bblow, tresult, tmp1, tmp2;
 936         BIG_CHUNK_TYPE  tmp1value[BIGTMPSIZE];
 937         BIG_CHUNK_TYPE  tmp2value[BIGTMPSIZE];
 938         BIG_CHUNK_TYPE  tresultvalue[BIGTMPSIZE];
 939         BIG_CHUNK_TYPE  bblowvalue[BIGTMPSIZE];
 940         BIG_CHUNK_TYPE  bbhighvalue[BIGTMPSIZE];
 941 
 942         a = aa->value;
 943         b = bb->value;
 944         alen = aa->len;
 945         blen = bb->len;
 946         while ((alen > 1) && (a[alen - 1] == 0)) {
 947                 alen = alen - 1;
 948         }
 949         aa->len = alen;
 950         while ((blen > 1) && (b[blen - 1] == 0)) {
 951                 blen = blen - 1;
 952         }
 953         bb->len = blen;
 954         if ((blen == 1) && (b[0] == 0)) {
 955                 return (BIG_DIV_BY_0);
 956         }
 957 
 958         if (big_cmp_abs(aa, bb) < 0) {
 959                 if ((remainder != NULL) &&
 960                     ((err = big_copy(remainder, aa)) != BIG_OK)) {
 961                         return (err);
 962                 }
 963                 if (result != NULL) {
 964                         result->len = 1;
 965                         result->sign = 1;
 966                         result->value[0] = 0;
 967                 }
 968                 return (BIG_OK);
 969         }
 970 
 971         if ((err = big_init1(&bblow, blen + 1,
 972             bblowvalue, arraysize(bblowvalue))) != BIG_OK)
 973                 return (err);
 974 
 975         if ((err = big_init1(&bbhigh, blen + 1,
 976             bbhighvalue, arraysize(bbhighvalue))) != BIG_OK)
 977                 goto ret1;
 978 
 979         if ((err = big_init1(&tmp1, alen + 2,
 980             tmp1value, arraysize(tmp1value))) != BIG_OK)
 981                 goto ret2;
 982 
 983         if ((err = big_init1(&tmp2, blen + 2,
 984             tmp2value, arraysize(tmp2value))) != BIG_OK)
 985                 goto ret3;
 986 
 987         if ((err = big_init1(&tresult, alen - blen + 2,
 988             tresultvalue, arraysize(tresultvalue))) != BIG_OK)
 989                 goto ret4;
 990 
 991         offs = 0;
 992         highb = b[blen - 1];
 993         if (highb >= (BIG_CHUNK_HALF_HIGHBIT << 1)) {
 994                 highb = highb >> (BIG_CHUNK_SIZE / 2);
 995                 offs = (BIG_CHUNK_SIZE / 2);
 996         }
 997         while ((highb & BIG_CHUNK_HALF_HIGHBIT) == 0) {
 998                 highb = highb << 1;
 999                 offs++;
1000         }
1001 
1002         big_shiftleft(&bblow, bb, offs);
1003 
1004         if (offs <= (BIG_CHUNK_SIZE / 2 - 1)) {
1005                 big_shiftleft(&bbhigh, &bblow, BIG_CHUNK_SIZE / 2);
1006         } else {
1007                 big_shiftright(&bbhigh, &bblow, BIG_CHUNK_SIZE / 2);
1008         }
1009         if (bbhigh.value[bbhigh.len - 1] == 0) {
1010                 bbhigh.len--;
1011         } else {
1012                 bbhigh.value[bbhigh.len] = 0;
1013         }
1014 
1015         highb = bblow.value[bblow.len - 1];
1016 
1017         big_shiftleft(&tmp1, aa, offs);
1018         rlen = tmp1.len - bblow.len + 1;
1019         tresult.len = rlen;
1020 
1021         tmp1.len++;
1022         tlen = tmp1.len;
1023         tmp1.value[tmp1.len - 1] = 0;
1024         for (i = 0; i < rlen; i++) {
1025                 higha = (tmp1.value[tlen - 1] << (BIG_CHUNK_SIZE / 2)) +
1026                     (tmp1.value[tlen - 2] >> (BIG_CHUNK_SIZE / 2));
1027                 coeff = higha / (highb + 1);
1028                 big_mulhalf_high(&tmp2, &bblow, coeff);
1029                 big_sub_pos_high(&tmp1, &tmp1, &tmp2);
1030                 bbhigh.len++;
1031                 while (tmp1.value[tlen - 1] > 0) {
1032                         big_sub_pos_high(&tmp1, &tmp1, &bbhigh);
1033                         coeff++;
1034                 }
1035                 bbhigh.len--;
1036                 tlen--;
1037                 tmp1.len--;
1038                 while (big_cmp_abs_high(&tmp1, &bbhigh) >= 0) {
1039                         big_sub_pos_high(&tmp1, &tmp1, &bbhigh);
1040                         coeff++;
1041                 }
1042                 tresult.value[rlen - i - 1] = coeff << (BIG_CHUNK_SIZE / 2);
1043                 higha = tmp1.value[tlen - 1];
1044                 coeff = higha / (highb + 1);
1045                 big_mulhalf_low(&tmp2, &bblow, coeff);
1046                 tmp2.len--;
1047                 big_sub_pos_high(&tmp1, &tmp1, &tmp2);
1048                 while (big_cmp_abs_high(&tmp1, &bblow) >= 0) {
1049                         big_sub_pos_high(&tmp1, &tmp1, &bblow);
1050                         coeff++;
1051                 }
1052                 tresult.value[rlen - i - 1] =
1053                     tresult.value[rlen - i - 1] + coeff;
1054         }
1055 
1056         big_shiftright(&tmp1, &tmp1, offs);
1057 
1058         err = BIG_OK;
1059 
1060         if ((remainder != NULL) &&
1061             ((err = big_copy(remainder, &tmp1)) != BIG_OK))
1062                 goto ret;
1063 
1064         if (result != NULL)
1065                 err = big_copy(result, &tresult);
1066 
1067 ret:
1068         big_finish(&tresult);
1069 ret4:
1070         big_finish(&tmp1);
1071 ret3:
1072         big_finish(&tmp2);
1073 ret2:
1074         big_finish(&bbhigh);
1075 ret1:
1076         big_finish(&bblow);
1077         return (err);
1078 }
1079 
1080 /*
1081  * If there is no processor-specific integer implementation of
1082  * the lower level multiply functions, then this code is provided
1083  * for big_mul_set_vec(), big_mul_add_vec(), big_mul_vec() and
1084  * big_sqr_vec().
1085  *
1086  * There are two generic implementations.  One that assumes that
1087  * there is hardware and C compiler support for a 32 x 32 --> 64
1088  * bit unsigned multiply, but otherwise is not specific to any
1089  * processor, platform, or ISA.
1090  *
1091  * The other makes very few assumptions about hardware capabilities.
1092  * It does not even assume that there is any implementation of a
1093  * 32 x 32 --> 64 bit multiply that is accessible to C code and
1094  * appropriate to use.  It falls constructs 32 x 32 --> 64 bit
1095  * multiplies from 16 x 16 --> 32 bit multiplies.
1096  *
1097  */
1098 
1099 #if !defined(PSR_MUL)
1100 
1101 #ifdef UMUL64
1102 
1103 #if (BIG_CHUNK_SIZE == 32)
1104 
1105 #define UNROLL8
1106 
1107 #define MUL_SET_VEC_ROUND_PREFETCH(R) \
1108         p = pf * d; \
1109         pf = (uint64_t)a[R+1]; \
1110         t = p + cy; \
1111         r[R] = (uint32_t)t; \
1112         cy = t >> 32
1113 
1114 #define MUL_SET_VEC_ROUND_NOPREFETCH(R) \
1115         p = pf * d; \
1116         t = p + cy; \
1117         r[R] = (uint32_t)t; \
1118         cy = t >> 32
1119 
1120 #define MUL_ADD_VEC_ROUND_PREFETCH(R) \
1121         t = (uint64_t)r[R]; \
1122         p = pf * d; \
1123         pf = (uint64_t)a[R+1]; \
1124         t = p + t + cy; \
1125         r[R] = (uint32_t)t; \
1126         cy = t >> 32
1127 
1128 #define MUL_ADD_VEC_ROUND_NOPREFETCH(R) \
1129         t = (uint64_t)r[R]; \
1130         p = pf * d; \
1131         t = p + t + cy; \
1132         r[R] = (uint32_t)t; \
1133         cy = t >> 32
1134 
1135 #ifdef UNROLL8
1136 
1137 #define UNROLL 8
1138 
1139 /*
1140  * r = a * b
1141  * where r and a are vectors; b is a single 32-bit digit
1142  */
1143 
1144 uint32_t
1145 big_mul_set_vec(uint32_t *r, uint32_t *a, int len, uint32_t b)
1146 {
1147         uint64_t d, pf, p, t, cy;
1148 
1149         if (len == 0)
1150                 return (0);
1151         cy = 0;
1152         d = (uint64_t)b;
1153         pf = (uint64_t)a[0];
1154         while (len > UNROLL) {
1155                 MUL_SET_VEC_ROUND_PREFETCH(0);
1156                 MUL_SET_VEC_ROUND_PREFETCH(1);
1157                 MUL_SET_VEC_ROUND_PREFETCH(2);
1158                 MUL_SET_VEC_ROUND_PREFETCH(3);
1159                 MUL_SET_VEC_ROUND_PREFETCH(4);
1160                 MUL_SET_VEC_ROUND_PREFETCH(5);
1161                 MUL_SET_VEC_ROUND_PREFETCH(6);
1162                 MUL_SET_VEC_ROUND_PREFETCH(7);
1163                 r += UNROLL;
1164                 a += UNROLL;
1165                 len -= UNROLL;
1166         }
1167         if (len == UNROLL) {
1168                 MUL_SET_VEC_ROUND_PREFETCH(0);
1169                 MUL_SET_VEC_ROUND_PREFETCH(1);
1170                 MUL_SET_VEC_ROUND_PREFETCH(2);
1171                 MUL_SET_VEC_ROUND_PREFETCH(3);
1172                 MUL_SET_VEC_ROUND_PREFETCH(4);
1173                 MUL_SET_VEC_ROUND_PREFETCH(5);
1174                 MUL_SET_VEC_ROUND_PREFETCH(6);
1175                 MUL_SET_VEC_ROUND_NOPREFETCH(7);
1176                 return ((uint32_t)cy);
1177         }
1178         while (len > 1) {
1179                 MUL_SET_VEC_ROUND_PREFETCH(0);
1180                 ++r;
1181                 ++a;
1182                 --len;
1183         }
1184         if (len > 0) {
1185                 MUL_SET_VEC_ROUND_NOPREFETCH(0);
1186         }
1187         return ((uint32_t)cy);
1188 }
1189 
1190 /*
1191  * r += a * b
1192  * where r and a are vectors; b is a single 32-bit digit
1193  */
1194 
1195 uint32_t
1196 big_mul_add_vec(uint32_t *r, uint32_t *a, int len, uint32_t b)
1197 {
1198         uint64_t d, pf, p, t, cy;
1199 
1200         if (len == 0)
1201                 return (0);
1202         cy = 0;
1203         d = (uint64_t)b;
1204         pf = (uint64_t)a[0];
1205         while (len > 8) {
1206                 MUL_ADD_VEC_ROUND_PREFETCH(0);
1207                 MUL_ADD_VEC_ROUND_PREFETCH(1);
1208                 MUL_ADD_VEC_ROUND_PREFETCH(2);
1209                 MUL_ADD_VEC_ROUND_PREFETCH(3);
1210                 MUL_ADD_VEC_ROUND_PREFETCH(4);
1211                 MUL_ADD_VEC_ROUND_PREFETCH(5);
1212                 MUL_ADD_VEC_ROUND_PREFETCH(6);
1213                 MUL_ADD_VEC_ROUND_PREFETCH(7);
1214                 r += 8;
1215                 a += 8;
1216                 len -= 8;
1217         }
1218         if (len == 8) {
1219                 MUL_ADD_VEC_ROUND_PREFETCH(0);
1220                 MUL_ADD_VEC_ROUND_PREFETCH(1);
1221                 MUL_ADD_VEC_ROUND_PREFETCH(2);
1222                 MUL_ADD_VEC_ROUND_PREFETCH(3);
1223                 MUL_ADD_VEC_ROUND_PREFETCH(4);
1224                 MUL_ADD_VEC_ROUND_PREFETCH(5);
1225                 MUL_ADD_VEC_ROUND_PREFETCH(6);
1226                 MUL_ADD_VEC_ROUND_NOPREFETCH(7);
1227                 return ((uint32_t)cy);
1228         }
1229         while (len > 1) {
1230                 MUL_ADD_VEC_ROUND_PREFETCH(0);
1231                 ++r;
1232                 ++a;
1233                 --len;
1234         }
1235         if (len > 0) {
1236                 MUL_ADD_VEC_ROUND_NOPREFETCH(0);
1237         }
1238         return ((uint32_t)cy);
1239 }
1240 #endif /* UNROLL8 */
1241 
1242 void
1243 big_sqr_vec(uint32_t *r, uint32_t *a, int len)
1244 {
1245         uint32_t *tr, *ta;
1246         int tlen, row, col;
1247         uint64_t p, s, t, t2, cy;
1248         uint32_t d;
1249 
1250         tr = r + 1;
1251         ta = a;
1252         tlen = len - 1;
1253         tr[tlen] = big_mul_set_vec(tr, ta + 1, tlen, ta[0]);
1254         while (--tlen > 0) {
1255                 tr += 2;
1256                 ++ta;
1257                 tr[tlen] = big_mul_add_vec(tr, ta + 1, tlen, ta[0]);
1258         }
1259         s = (uint64_t)a[0];
1260         s = s * s;
1261         r[0] = (uint32_t)s;
1262         cy = s >> 32;
1263         p = ((uint64_t)r[1] << 1) + cy;
1264         r[1] = (uint32_t)p;
1265         cy = p >> 32;
1266         row = 1;
1267         col = 2;
1268         while (row < len) {
1269                 s = (uint64_t)a[row];
1270                 s = s * s;
1271                 p = (uint64_t)r[col] << 1;
1272                 t = p + s;
1273                 d = (uint32_t)t;
1274                 t2 = (uint64_t)d + cy;
1275                 r[col] = (uint32_t)t2;
1276                 cy = (t >> 32) + (t2 >> 32);
1277                 if (row == len - 1)
1278                         break;
1279                 p = ((uint64_t)r[col+1] << 1) + cy;
1280                 r[col+1] = (uint32_t)p;
1281                 cy = p >> 32;
1282                 ++row;
1283                 col += 2;
1284         }
1285         r[col+1] = (uint32_t)cy;
1286 }
1287 
1288 #else /* BIG_CHUNK_SIZE == 64 */
1289 
1290 /*
1291  * r = r + a * digit, r and a are vectors of length len
1292  * returns the carry digit
1293  */
1294 BIG_CHUNK_TYPE
1295 big_mul_add_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len,
1296     BIG_CHUNK_TYPE digit)
1297 {
1298         BIG_CHUNK_TYPE  cy, cy1, retcy, dlow, dhigh;
1299         int             i;
1300 
1301         cy1 = 0;
1302         dlow = digit & BIG_CHUNK_LOWHALFBITS;
1303         dhigh = digit >> (BIG_CHUNK_SIZE / 2);
1304         for (i = 0; i < len; i++) {
1305                 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) +
1306                     dlow * (a[i] & BIG_CHUNK_LOWHALFBITS) +
1307                     (r[i] & BIG_CHUNK_LOWHALFBITS);
1308                 cy1 = (cy >> (BIG_CHUNK_SIZE / 2)) +
1309                     dlow * (a[i] >> (BIG_CHUNK_SIZE / 2)) +
1310                     (r[i] >> (BIG_CHUNK_SIZE / 2));
1311                 r[i] = (cy & BIG_CHUNK_LOWHALFBITS) |
1312                     (cy1 << (BIG_CHUNK_SIZE / 2));
1313         }
1314         retcy = cy1 >> (BIG_CHUNK_SIZE / 2);
1315 
1316         cy1 = r[0] & BIG_CHUNK_LOWHALFBITS;
1317         for (i = 0; i < len - 1; i++) {
1318                 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) +
1319                     dhigh * (a[i] & BIG_CHUNK_LOWHALFBITS) +
1320                     (r[i] >> (BIG_CHUNK_SIZE / 2));
1321                 r[i] = (cy1 & BIG_CHUNK_LOWHALFBITS) |
1322                     (cy << (BIG_CHUNK_SIZE / 2));
1323                 cy1 = (cy >> (BIG_CHUNK_SIZE / 2)) +
1324                     dhigh * (a[i] >> (BIG_CHUNK_SIZE / 2)) +
1325                     (r[i + 1] & BIG_CHUNK_LOWHALFBITS);
1326         }
1327         cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) +
1328             dhigh * (a[len - 1] & BIG_CHUNK_LOWHALFBITS) +
1329             (r[len - 1] >> (BIG_CHUNK_SIZE / 2));
1330         r[len - 1] = (cy1 & BIG_CHUNK_LOWHALFBITS) |
1331             (cy << (BIG_CHUNK_SIZE / 2));
1332         retcy = (cy >> (BIG_CHUNK_SIZE / 2)) +
1333             dhigh * (a[len - 1] >> (BIG_CHUNK_SIZE / 2)) + retcy;
1334 
1335         return (retcy);
1336 }
1337 
1338 
1339 /*
1340  * r = a * digit, r and a are vectors of length len
1341  * returns the carry digit
1342  */
1343 BIG_CHUNK_TYPE
1344 big_mul_set_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len,
1345     BIG_CHUNK_TYPE digit)
1346 {
1347         int     i;
1348 
1349         ASSERT(r != a);
1350         for (i = 0; i < len; i++) {
1351                 r[i] = 0;
1352         }
1353         return (big_mul_add_vec(r, a, len, digit));
1354 }
1355 
1356 void
1357 big_sqr_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len)
1358 {
1359         int i;
1360 
1361         ASSERT(r != a);
1362         r[len] = big_mul_set_vec(r, a, len, a[0]);
1363         for (i = 1; i < len; ++i)
1364                 r[len + i] = big_mul_add_vec(r+i, a, len, a[i]);
1365 }
1366 
1367 #endif /* BIG_CHUNK_SIZE == 32/64 */
1368 
1369 #else /* ! UMUL64 */
1370 
1371 #if (BIG_CHUNK_SIZE != 32)
1372 #error Don't use 64-bit chunks without defining UMUL64
1373 #endif
1374 
1375 
1376 /*
1377  * r = r + a * digit, r and a are vectors of length len
1378  * returns the carry digit
1379  */
1380 uint32_t
1381 big_mul_add_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit)
1382 {
1383         uint32_t cy, cy1, retcy, dlow, dhigh;
1384         int i;
1385 
1386         cy1 = 0;
1387         dlow = digit & 0xffff;
1388         dhigh = digit >> 16;
1389         for (i = 0; i < len; i++) {
1390                 cy = (cy1 >> 16) + dlow * (a[i] & 0xffff) + (r[i] & 0xffff);
1391                 cy1 = (cy >> 16) + dlow * (a[i]>>16) + (r[i] >> 16);
1392                 r[i] = (cy & 0xffff) | (cy1 << 16);
1393         }
1394         retcy = cy1 >> 16;
1395 
1396         cy1 = r[0] & 0xffff;
1397         for (i = 0; i < len - 1; i++) {
1398                 cy = (cy1 >> 16) + dhigh * (a[i] & 0xffff) + (r[i] >> 16);
1399                 r[i] = (cy1 & 0xffff) | (cy << 16);
1400                 cy1 = (cy >> 16) + dhigh * (a[i] >> 16) + (r[i + 1] & 0xffff);
1401         }
1402         cy = (cy1 >> 16) + dhigh * (a[len - 1] & 0xffff) + (r[len - 1] >> 16);
1403         r[len - 1] = (cy1 & 0xffff) | (cy << 16);
1404         retcy = (cy >> 16) + dhigh * (a[len - 1] >> 16) + retcy;
1405 
1406         return (retcy);
1407 }
1408 
1409 
1410 /*
1411  * r = a * digit, r and a are vectors of length len
1412  * returns the carry digit
1413  */
1414 uint32_t
1415 big_mul_set_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit)
1416 {
1417         int     i;
1418 
1419         ASSERT(r != a);
1420         for (i = 0; i < len; i++) {
1421                 r[i] = 0;
1422         }
1423 
1424         return (big_mul_add_vec(r, a, len, digit));
1425 }
1426 
1427 void
1428 big_sqr_vec(uint32_t *r, uint32_t *a, int len)
1429 {
1430         int i;
1431 
1432         ASSERT(r != a);
1433         r[len] = big_mul_set_vec(r, a, len, a[0]);
1434         for (i = 1; i < len; ++i)
1435                 r[len + i] = big_mul_add_vec(r+i, a, len, a[i]);
1436 }
1437 
1438 #endif /* UMUL64 */
1439 
1440 void
1441 big_mul_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int alen,
1442     BIG_CHUNK_TYPE *b, int blen)
1443 {
1444         int i;
1445 
1446         r[alen] = big_mul_set_vec(r, a, alen, b[0]);
1447         for (i = 1; i < blen; ++i)
1448                 r[alen + i] = big_mul_add_vec(r+i, a, alen, b[i]);
1449 }
1450 
1451 
1452 #endif /* ! PSR_MUL */
1453 
1454 
1455 /*
1456  * result = aa * bb  result->value should be big enough to hold the result
1457  *
1458  * Implementation: Standard grammar school algorithm
1459  *
1460  */
1461 BIG_ERR_CODE
1462 big_mul(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
1463 {
1464         BIGNUM          tmp1;
1465         BIG_CHUNK_TYPE  tmp1value[BIGTMPSIZE];
1466         BIG_CHUNK_TYPE  *r, *t, *a, *b;
1467         BIG_ERR_CODE    err;
1468         int             i, alen, blen, rsize, sign, diff;
1469 
1470         if (aa == bb) {
1471                 diff = 0;
1472         } else {
1473                 diff = big_cmp_abs(aa, bb);
1474                 if (diff < 0) {
1475                         BIGNUM *tt;
1476                         tt = aa;
1477                         aa = bb;
1478                         bb = tt;
1479                 }
1480         }
1481         a = aa->value;
1482         b = bb->value;
1483         alen = aa->len;
1484         blen = bb->len;
1485         while ((alen > 1) && (a[alen - 1] == 0)) {
1486                 alen--;
1487         }
1488         aa->len = alen;
1489         while ((blen > 1) && (b[blen - 1] == 0)) {
1490                 blen--;
1491         }
1492         bb->len = blen;
1493 
1494         rsize = alen + blen;
1495         if (result->size < rsize) {
1496                 err = big_extend(result, rsize);
1497                 if (err != BIG_OK) {
1498                         return (err);
1499                 }
1500                 /* aa or bb might be an alias to result */
1501                 a = aa->value;
1502                 b = bb->value;
1503         }
1504         r = result->value;
1505 
1506         if (((alen == 1) && (a[0] == 0)) || ((blen == 1) && (b[0] == 0))) {
1507                 result->len = 1;
1508                 result->sign = 1;
1509                 r[0] = 0;
1510                 return (BIG_OK);
1511         }
1512         sign = aa->sign * bb->sign;
1513         if ((alen == 1) && (a[0] == 1)) {
1514                 for (i = 0; i < blen; i++) {
1515                         r[i] = b[i];
1516                 }
1517                 result->len = blen;
1518                 result->sign = sign;
1519                 return (BIG_OK);
1520         }
1521         if ((blen == 1) && (b[0] == 1)) {
1522                 for (i = 0; i < alen; i++) {
1523                         r[i] = a[i];
1524                 }
1525                 result->len = alen;
1526                 result->sign = sign;
1527                 return (BIG_OK);
1528         }
1529 
1530         if ((err = big_init1(&tmp1, rsize,
1531             tmp1value, arraysize(tmp1value))) != BIG_OK) {
1532                 return (err);
1533         }
1534         (void) big_copy(&tmp1, aa);
1535         t = tmp1.value;
1536 
1537         for (i = 0; i < rsize; i++) {
1538                 t[i] = 0;
1539         }
1540 
1541         if (diff == 0 && alen > 2) {
1542                 BIG_SQR_VEC(t, a, alen);
1543         } else if (blen > 0) {
1544                 BIG_MUL_VEC(t, a, alen, b, blen);
1545         }
1546 
1547         if (t[rsize - 1] == 0) {
1548                 tmp1.len = rsize - 1;
1549         } else {
1550                 tmp1.len = rsize;
1551         }
1552         if ((err = big_copy(result, &tmp1)) != BIG_OK) {
1553                 return (err);
1554         }
1555         result->sign = sign;
1556 
1557         big_finish(&tmp1);
1558 
1559         return (BIG_OK);
1560 }
1561 
1562 
1563 /*
1564  * caller must ensure that  a < n,  b < n  and  ret->size >=  2 * n->len + 1
1565  * and that ret is not n
1566  */
1567 BIG_ERR_CODE
1568 big_mont_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, BIGNUM *n, BIG_CHUNK_TYPE n0)
1569 {
1570         int     i, j, nlen, needsubtract;
1571         BIG_CHUNK_TYPE  *nn, *rr;
1572         BIG_CHUNK_TYPE  digit, c;
1573         BIG_ERR_CODE    err;
1574 
1575         nlen = n->len;
1576         nn = n->value;
1577 
1578         rr = ret->value;
1579 
1580         if ((err = big_mul(ret, a, b)) != BIG_OK) {
1581                 return (err);
1582         }
1583 
1584         rr = ret->value;
1585         for (i = ret->len; i < 2 * nlen + 1; i++) {
1586                 rr[i] = 0;
1587         }
1588         for (i = 0; i < nlen; i++) {
1589                 digit = rr[i];
1590                 digit = digit * n0;
1591 
1592                 c = BIG_MUL_ADD_VEC(rr + i, nn, nlen, digit);
1593                 j = i + nlen;
1594                 rr[j] += c;
1595                 while (rr[j] < c) {
1596                         rr[j + 1] += 1;
1597                         j++;
1598                         c = 1;
1599                 }
1600         }
1601 
1602         needsubtract = 0;
1603         if ((rr[2 * nlen]  != 0))
1604                 needsubtract = 1;
1605         else {
1606                 for (i = 2 * nlen - 1; i >= nlen; i--) {
1607                         if (rr[i] > nn[i - nlen]) {
1608                                 needsubtract = 1;
1609                                 break;
1610                         } else if (rr[i] < nn[i - nlen]) {
1611                                 break;
1612                         }
1613                 }
1614         }
1615         if (needsubtract)
1616                 big_sub_vec(rr, rr + nlen, nn, nlen);
1617         else {
1618                 for (i = 0; i < nlen; i++) {
1619                         rr[i] = rr[i + nlen];
1620                 }
1621         }
1622         for (i = nlen - 1; (i >= 0) && (rr[i] == 0); i--)
1623                 ;
1624         ret->len = i+1;
1625 
1626         return (BIG_OK);
1627 }
1628 
1629 
1630 BIG_CHUNK_TYPE
1631 big_n0(BIG_CHUNK_TYPE n)
1632 {
1633         int             i;
1634         BIG_CHUNK_TYPE  result, tmp;
1635 
1636         result = 0;
1637         tmp = BIG_CHUNK_ALLBITS;
1638         for (i = 0; i < BIG_CHUNK_SIZE; i++) {
1639                 if ((tmp & 1) == 1) {
1640                         result = (result >> 1) | BIG_CHUNK_HIGHBIT;
1641                         tmp = tmp - n;
1642                 } else {
1643                         result = (result >> 1);
1644                 }
1645                 tmp = tmp >> 1;
1646         }
1647 
1648         return (result);
1649 }
1650 
1651 
1652 int
1653 big_numbits(BIGNUM *n)
1654 {
1655         int             i, j;
1656         BIG_CHUNK_TYPE  t;
1657 
1658         for (i = n->len - 1; i > 0; i--) {
1659                 if (n->value[i] != 0) {
1660                         break;
1661                 }
1662         }
1663         t = n->value[i];
1664         for (j = BIG_CHUNK_SIZE; j > 0; j--) {
1665                 if ((t & BIG_CHUNK_HIGHBIT) == 0) {
1666                         t = t << 1;
1667                 } else {
1668                         return (BIG_CHUNK_SIZE * i + j);
1669                 }
1670         }
1671         return (0);
1672 }
1673 
1674 
1675 /* caller must make sure that a < n */
1676 BIG_ERR_CODE
1677 big_mont_rr(BIGNUM *result, BIGNUM *n)
1678 {
1679         BIGNUM          rr;
1680         BIG_CHUNK_TYPE  rrvalue[BIGTMPSIZE];
1681         int             len, i;
1682         BIG_ERR_CODE    err;
1683 
1684         rr.malloced = 0;
1685         len = n->len;
1686 
1687         if ((err = big_init1(&rr, 2 * len + 1,
1688             rrvalue, arraysize(rrvalue))) != BIG_OK) {
1689                 return (err);
1690         }
1691 
1692         for (i = 0; i < 2 * len; i++) {
1693                 rr.value[i] = 0;
1694         }
1695         rr.value[2 * len] = 1;
1696         rr.len = 2 * len + 1;
1697         if ((err = big_div_pos(NULL, &rr, &rr, n)) != BIG_OK) {
1698                 goto ret;
1699         }
1700         err = big_copy(result, &rr);
1701 ret:
1702         big_finish(&rr);
1703         return (err);
1704 }
1705 
1706 
1707 /* caller must make sure that a < n */
1708 BIG_ERR_CODE
1709 big_mont_conv(BIGNUM *result, BIGNUM *a, BIGNUM *n, BIG_CHUNK_TYPE n0,
1710     BIGNUM *n_rr)
1711 {
1712         BIGNUM          rr;
1713         BIG_CHUNK_TYPE  rrvalue[BIGTMPSIZE];
1714         int             len, i;
1715         BIG_ERR_CODE    err;
1716 
1717         rr.malloced = 0;
1718         len = n->len;
1719 
1720         if ((err = big_init1(&rr, 2 * len + 1, rrvalue, arraysize(rrvalue)))
1721             != BIG_OK) {
1722                 return (err);
1723         }
1724 
1725         if (n_rr == NULL) {
1726                 for (i = 0; i < 2 * len; i++) {
1727                         rr.value[i] = 0;
1728                 }
1729                 rr.value[2 * len] = 1;
1730                 rr.len = 2 * len + 1;
1731                 if ((err = big_div_pos(NULL, &rr, &rr, n)) != BIG_OK) {
1732                         goto ret;
1733                 }
1734                 n_rr = &rr;
1735         }
1736 
1737         if ((err = big_mont_mul(&rr, n_rr, a, n, n0)) != BIG_OK) {
1738                 goto ret;
1739         }
1740         err = big_copy(result, &rr);
1741 
1742 ret:
1743         big_finish(&rr);
1744         return (err);
1745 }
1746 
1747 
1748 #ifdef  USE_FLOATING_POINT
1749 #define big_modexp_ncp_float    big_modexp_ncp_sw
1750 #else
1751 #define big_modexp_ncp_int      big_modexp_ncp_sw
1752 #endif
1753 
1754 #define MAX_EXP_BIT_GROUP_SIZE 6
1755 #define APOWERS_MAX_SIZE (1 << (MAX_EXP_BIT_GROUP_SIZE - 1))
1756 
1757 /* ARGSUSED */
1758 static BIG_ERR_CODE
1759 big_modexp_ncp_int(BIGNUM *result, BIGNUM *ma, BIGNUM *e, BIGNUM *n,
1760     BIGNUM *tmp, BIG_CHUNK_TYPE n0)
1761 
1762 {
1763         BIGNUM          apowers[APOWERS_MAX_SIZE];
1764         BIGNUM          tmp1;
1765         BIG_CHUNK_TYPE  tmp1value[BIGTMPSIZE];
1766         int             i, j, k, l, m, p;
1767         int             bit, bitind, bitcount, groupbits, apowerssize;
1768         int             nbits;
1769         BIG_ERR_CODE    err;
1770 
1771         nbits = big_numbits(e);
1772         if (nbits < 50) {
1773                 groupbits = 1;
1774                 apowerssize = 1;
1775         } else {
1776                 groupbits = MAX_EXP_BIT_GROUP_SIZE;
1777                 apowerssize = 1 << (groupbits - 1);
1778         }
1779 
1780 
1781         if ((err = big_init1(&tmp1, 2 * n->len + 1,
1782             tmp1value, arraysize(tmp1value))) != BIG_OK) {
1783                 return (err);
1784         }
1785 
1786         /* set the malloced bit to help cleanup */
1787         for (i = 0; i < apowerssize; i++) {
1788                 apowers[i].malloced = 0;
1789         }
1790         for (i = 0; i < apowerssize; i++) {
1791                 if ((err = big_init1(&(apowers[i]), n->len, NULL, 0)) !=
1792                     BIG_OK) {
1793                         goto ret;
1794                 }
1795         }
1796 
1797         (void) big_copy(&(apowers[0]), ma);
1798 
1799         if ((err = big_mont_mul(&tmp1, ma, ma, n, n0)) != BIG_OK) {
1800                 goto ret;
1801         }
1802         (void) big_copy(ma, &tmp1);
1803 
1804         for (i = 1; i < apowerssize; i++) {
1805                 if ((err = big_mont_mul(&tmp1, ma,
1806                     &(apowers[i-1]), n, n0)) != BIG_OK) {
1807                         goto ret;
1808                 }
1809                 (void) big_copy(&apowers[i], &tmp1);
1810         }
1811 
1812         bitind = nbits % BIG_CHUNK_SIZE;
1813         k = 0;
1814         l = 0;
1815         p = 0;
1816         bitcount = 0;
1817         for (i = nbits / BIG_CHUNK_SIZE; i >= 0; i--) {
1818                 for (j = bitind - 1; j >= 0; j--) {
1819                         bit = (e->value[i] >> j) & 1;
1820                         if ((bitcount == 0) && (bit == 0)) {
1821                                 if ((err = big_mont_mul(tmp,
1822                                     tmp, tmp, n, n0)) != BIG_OK) {
1823                                         goto ret;
1824                                 }
1825                         } else {
1826                                 bitcount++;
1827                                 p = p * 2 + bit;
1828                                 if (bit == 1) {
1829                                         k = k + l + 1;
1830                                         l = 0;
1831                                 } else {
1832                                         l++;
1833                                 }
1834                                 if (bitcount == groupbits) {
1835                                         for (m = 0; m < k; m++) {
1836                                                 if ((err = big_mont_mul(tmp,
1837                                                     tmp, tmp, n, n0)) !=
1838                                                     BIG_OK) {
1839                                                         goto ret;
1840                                                 }
1841                                         }
1842                                         if ((err = big_mont_mul(tmp, tmp,
1843                                             &(apowers[p >> (l + 1)]),
1844                                             n, n0)) != BIG_OK) {
1845                                                 goto ret;
1846                                         }
1847                                         for (m = 0; m < l; m++) {
1848                                                 if ((err = big_mont_mul(tmp,
1849                                                     tmp, tmp, n, n0)) !=
1850                                                     BIG_OK) {
1851                                                         goto ret;
1852                                                 }
1853                                         }
1854                                         k = 0;
1855                                         l = 0;
1856                                         p = 0;
1857                                         bitcount = 0;
1858                                 }
1859                         }
1860                 }
1861                 bitind = BIG_CHUNK_SIZE;
1862         }
1863 
1864         for (m = 0; m < k; m++) {
1865                 if ((err = big_mont_mul(tmp, tmp, tmp, n, n0)) != BIG_OK) {
1866                         goto ret;
1867                 }
1868         }
1869         if (p != 0) {
1870                 if ((err = big_mont_mul(tmp, tmp,
1871                     &(apowers[p >> (l + 1)]), n, n0)) != BIG_OK) {
1872                         goto ret;
1873                 }
1874         }
1875         for (m = 0; m < l; m++) {
1876                 if ((err = big_mont_mul(result, tmp, tmp, n, n0)) != BIG_OK) {
1877                         goto ret;
1878                 }
1879         }
1880 
1881 ret:
1882         for (i = apowerssize - 1; i >= 0; i--) {
1883                 big_finish(&(apowers[i]));
1884         }
1885         big_finish(&tmp1);
1886 
1887         return (err);
1888 }
1889 
1890 
1891 #ifdef USE_FLOATING_POINT
1892 
1893 #ifdef _KERNEL
1894 
1895 #include <sys/sysmacros.h>
1896 #include <sys/regset.h>
1897 #include <sys/fpu/fpusystm.h>
1898 
1899 /* the alignment for block stores to save fp registers */
1900 #define FPR_ALIGN       (64)
1901 
1902 extern void big_savefp(kfpu_t *);
1903 extern void big_restorefp(kfpu_t *);
1904 
1905 #endif /* _KERNEL */
1906 
1907 /*
1908  * This version makes use of floating point for performance
1909  */
1910 static BIG_ERR_CODE
1911 big_modexp_ncp_float(BIGNUM *result, BIGNUM *ma, BIGNUM *e, BIGNUM *n,
1912     BIGNUM *tmp, BIG_CHUNK_TYPE n0)
1913 {
1914 
1915         int             i, j, k, l, m, p, bit, bitind, bitcount, nlen;
1916         double          dn0;
1917         double          *dn, *dt, *d16r, *d32r;
1918         uint32_t        *nint, *prod;
1919         double          *apowers[APOWERS_MAX_SIZE];
1920         int             nbits, groupbits, apowerssize;
1921         BIG_ERR_CODE    err = BIG_OK;
1922 
1923 #ifdef _KERNEL
1924         uint8_t fpua[sizeof (kfpu_t) + FPR_ALIGN];
1925         kfpu_t *fpu;
1926 
1927 #ifdef DEBUG
1928         if (!fpu_exists)
1929                 return (BIG_GENERAL_ERR);
1930 #endif
1931 
1932         fpu =  (kfpu_t *)P2ROUNDUP((uintptr_t)fpua, FPR_ALIGN);
1933         big_savefp(fpu);
1934 
1935 #endif /* _KERNEL */
1936 
1937         nbits = big_numbits(e);
1938         if (nbits < 50) {
1939                 groupbits = 1;
1940                 apowerssize = 1;
1941         } else {
1942                 groupbits = MAX_EXP_BIT_GROUP_SIZE;
1943                 apowerssize = 1 << (groupbits - 1);
1944         }
1945 
1946         nlen = (BIG_CHUNK_SIZE / 32) * n->len;
1947         dn0 = (double)(n0 & 0xffff);
1948 
1949         dn = dt = d16r = d32r = NULL;
1950         nint = prod = NULL;
1951         for (i = 0; i < apowerssize; i++) {
1952                 apowers[i] = NULL;
1953         }
1954 
1955         if ((dn = big_malloc(nlen * sizeof (double))) == NULL) {
1956                 err = BIG_NO_MEM;
1957                 goto ret;
1958         }
1959         if ((dt = big_malloc((4 * nlen + 2) * sizeof (double))) == NULL) {
1960                 err = BIG_NO_MEM;
1961                 goto ret;
1962         }
1963         if ((nint = big_malloc(nlen * sizeof (uint32_t))) == NULL) {
1964                 err = BIG_NO_MEM;
1965                 goto ret;
1966         }
1967         if ((prod = big_malloc((nlen + 1) * sizeof (uint32_t))) == NULL) {
1968                 err = BIG_NO_MEM;
1969                 goto ret;
1970         }
1971         if ((d16r = big_malloc((2 * nlen + 1) * sizeof (double))) == NULL) {
1972                 err = BIG_NO_MEM;
1973                 goto ret;
1974         }
1975         if ((d32r = big_malloc(nlen * sizeof (double))) == NULL) {
1976                 err = BIG_NO_MEM;
1977                 goto ret;
1978         }
1979         for (i = 0; i < apowerssize; i++) {
1980                 if ((apowers[i] = big_malloc((2 * nlen + 1) *
1981                     sizeof (double))) == NULL) {
1982                         err = BIG_NO_MEM;
1983                         goto ret;
1984                 }
1985         }
1986 
1987 #if (BIG_CHUNK_SIZE == 32)
1988         for (i = 0; i < ma->len; i++) {
1989                 nint[i] = ma->value[i];
1990         }
1991         for (; i < nlen; i++) {
1992                 nint[i] = 0;
1993         }
1994 #else
1995         for (i = 0; i < ma->len; i++) {
1996                 nint[2 * i] = (uint32_t)(ma->value[i] & 0xffffffffULL);
1997                 nint[2 * i + 1] = (uint32_t)(ma->value[i] >> 32);
1998         }
1999         for (i = ma->len * 2; i < nlen; i++) {
2000                 nint[i] = 0;
2001         }
2002 #endif
2003         conv_i32_to_d32_and_d16(d32r, apowers[0], nint, nlen);
2004 
2005 #if (BIG_CHUNK_SIZE == 32)
2006         for (i = 0; i < n->len; i++) {
2007                 nint[i] = n->value[i];
2008         }
2009         for (; i < nlen; i++) {
2010                 nint[i] = 0;
2011         }
2012 #else
2013         for (i = 0; i < n->len; i++) {
2014                 nint[2 * i] = (uint32_t)(n->value[i] & 0xffffffffULL);
2015                 nint[2 * i + 1] = (uint32_t)(n->value[i] >> 32);
2016         }
2017         for (i = n->len * 2; i < nlen; i++) {
2018                 nint[i] = 0;
2019         }
2020 #endif
2021         conv_i32_to_d32(dn, nint, nlen);
2022 
2023         mont_mulf_noconv(prod, d32r, apowers[0], dt, dn, nint, nlen, dn0);
2024         conv_i32_to_d32(d32r, prod, nlen);
2025         for (i = 1; i < apowerssize; i++) {
2026                 mont_mulf_noconv(prod, d32r, apowers[i - 1],
2027                     dt, dn, nint, nlen, dn0);
2028                 conv_i32_to_d16(apowers[i], prod, nlen);
2029         }
2030 
2031 #if (BIG_CHUNK_SIZE == 32)
2032         for (i = 0; i < tmp->len; i++) {
2033                 prod[i] = tmp->value[i];
2034         }
2035         for (; i < nlen + 1; i++) {
2036                 prod[i] = 0;
2037         }
2038 #else
2039         for (i = 0; i < tmp->len; i++) {
2040                 prod[2 * i] = (uint32_t)(tmp->value[i] & 0xffffffffULL);
2041                 prod[2 * i + 1] = (uint32_t)(tmp->value[i] >> 32);
2042         }
2043         for (i = tmp->len * 2; i < nlen + 1; i++) {
2044                 prod[i] = 0;
2045         }
2046 #endif
2047 
2048         bitind = nbits % BIG_CHUNK_SIZE;
2049         k = 0;
2050         l = 0;
2051         p = 0;
2052         bitcount = 0;
2053         for (i = nbits / BIG_CHUNK_SIZE; i >= 0; i--) {
2054                 for (j = bitind - 1; j >= 0; j--) {
2055                         bit = (e->value[i] >> j) & 1;
2056                         if ((bitcount == 0) && (bit == 0)) {
2057                                 conv_i32_to_d32_and_d16(d32r, d16r,
2058                                     prod, nlen);
2059                                 mont_mulf_noconv(prod, d32r, d16r,
2060                                     dt, dn, nint, nlen, dn0);
2061                         } else {
2062                                 bitcount++;
2063                                 p = p * 2 + bit;
2064                                 if (bit == 1) {
2065                                         k = k + l + 1;
2066                                         l = 0;
2067                                 } else {
2068                                         l++;
2069                                 }
2070                                 if (bitcount == groupbits) {
2071                                         for (m = 0; m < k; m++) {
2072                                                 conv_i32_to_d32_and_d16(d32r,
2073                                                     d16r, prod, nlen);
2074                                                 mont_mulf_noconv(prod, d32r,
2075                                                     d16r, dt, dn, nint,
2076                                                     nlen, dn0);
2077                                         }
2078                                         conv_i32_to_d32(d32r, prod, nlen);
2079                                         mont_mulf_noconv(prod, d32r,
2080                                             apowers[p >> (l+1)],
2081                                             dt, dn, nint, nlen, dn0);
2082                                         for (m = 0; m < l; m++) {
2083                                                 conv_i32_to_d32_and_d16(d32r,
2084                                                     d16r, prod, nlen);
2085                                                 mont_mulf_noconv(prod, d32r,
2086                                                     d16r, dt, dn, nint,
2087                                                     nlen, dn0);
2088                                         }
2089                                         k = 0;
2090                                         l = 0;
2091                                         p = 0;
2092                                         bitcount = 0;
2093                                 }
2094                         }
2095                 }
2096                 bitind = BIG_CHUNK_SIZE;
2097         }
2098 
2099         for (m = 0; m < k; m++) {
2100                 conv_i32_to_d32_and_d16(d32r, d16r, prod, nlen);
2101                 mont_mulf_noconv(prod, d32r, d16r, dt, dn, nint, nlen, dn0);
2102         }
2103         if (p != 0) {
2104                 conv_i32_to_d32(d32r, prod, nlen);
2105                 mont_mulf_noconv(prod, d32r, apowers[p >> (l + 1)],
2106                     dt, dn, nint, nlen, dn0);
2107         }
2108         for (m = 0; m < l; m++) {
2109                 conv_i32_to_d32_and_d16(d32r, d16r, prod, nlen);
2110                 mont_mulf_noconv(prod, d32r, d16r, dt, dn, nint, nlen, dn0);
2111         }
2112 
2113 #if (BIG_CHUNK_SIZE == 32)
2114         for (i = 0; i < nlen; i++) {
2115                 result->value[i] = prod[i];
2116         }
2117         for (i = nlen - 1; (i > 0) && (prod[i] == 0); i--)
2118                 ;
2119 #else
2120         for (i = 0; i < nlen / 2; i++) {
2121                 result->value[i] = (uint64_t)(prod[2 * i]) +
2122                     (((uint64_t)(prod[2 * i + 1])) << 32);
2123         }
2124         for (i = nlen / 2 - 1; (i > 0) && (result->value[i] == 0); i--)
2125                 ;
2126 #endif
2127         result->len = i + 1;
2128 
2129 ret:
2130         for (i = apowerssize - 1; i >= 0; i--) {
2131                 if (apowers[i] != NULL)
2132                         big_free(apowers[i], (2 * nlen + 1) * sizeof (double));
2133         }
2134         if (d32r != NULL) {
2135                 big_free(d32r, nlen * sizeof (double));
2136         }
2137         if (d16r != NULL) {
2138                 big_free(d16r, (2 * nlen + 1) * sizeof (double));
2139         }
2140         if (prod != NULL) {
2141                 big_free(prod, (nlen + 1) * sizeof (uint32_t));
2142         }
2143         if (nint != NULL) {
2144                 big_free(nint, nlen * sizeof (uint32_t));
2145         }
2146         if (dt != NULL) {
2147                 big_free(dt, (4 * nlen + 2) * sizeof (double));
2148         }
2149         if (dn != NULL) {
2150                 big_free(dn, nlen * sizeof (double));
2151         }
2152 
2153 #ifdef _KERNEL
2154         big_restorefp(fpu);
2155 #endif
2156 
2157         return (err);
2158 }
2159 
2160 #endif /* USE_FLOATING_POINT */
2161 
2162 
2163 BIG_ERR_CODE
2164 big_modexp_ext(BIGNUM *result, BIGNUM *a, BIGNUM *e, BIGNUM *n, BIGNUM *n_rr,
2165     big_modexp_ncp_info_t *info)
2166 {
2167         BIGNUM          ma, tmp, rr;
2168         BIG_CHUNK_TYPE  mavalue[BIGTMPSIZE];
2169         BIG_CHUNK_TYPE  tmpvalue[BIGTMPSIZE];
2170         BIG_CHUNK_TYPE  rrvalue[BIGTMPSIZE];
2171         BIG_ERR_CODE    err;
2172         BIG_CHUNK_TYPE  n0;
2173 
2174         if ((err = big_init1(&ma, n->len, mavalue, arraysize(mavalue)))  !=
2175             BIG_OK) {
2176                 return (err);
2177         }
2178         ma.len = 1;
2179         ma.value[0] = 0;
2180 
2181         if ((err = big_init1(&tmp, 2 * n->len + 1,
2182             tmpvalue, arraysize(tmpvalue))) != BIG_OK) {
2183                 goto ret1;
2184         }
2185 
2186         /* set the malloced bit to help cleanup */
2187         rr.malloced = 0;
2188         if (n_rr == NULL) {
2189                 if ((err = big_init1(&rr, 2 * n->len + 1,
2190                     rrvalue, arraysize(rrvalue))) != BIG_OK) {
2191                         goto ret2;
2192                 }
2193                 if (big_mont_rr(&rr, n) != BIG_OK) {
2194                         goto ret;
2195                 }
2196                 n_rr = &rr;
2197         }
2198 
2199         n0 = big_n0(n->value[0]);
2200 
2201         if (big_cmp_abs(a, n) > 0) {
2202                 if ((err = big_div_pos(NULL, &ma, a, n)) != BIG_OK) {
2203                         goto ret;
2204                 }
2205                 err = big_mont_conv(&ma, &ma, n, n0, n_rr);
2206         } else {
2207                 err = big_mont_conv(&ma, a, n, n0, n_rr);
2208         }
2209         if (err != BIG_OK) {
2210                 goto ret;
2211         }
2212 
2213         tmp.len = 1;
2214         tmp.value[0] = 1;
2215         if ((err = big_mont_conv(&tmp, &tmp, n, n0, n_rr)) != BIG_OK) {
2216                 goto ret;
2217         }
2218 
2219         if ((info != NULL) && (info->func != NULL)) {
2220                 err = (*(info->func))(&tmp, &ma, e, n, &tmp, n0,
2221                     info->ncp, info->reqp);
2222         } else {
2223                 err = big_modexp_ncp_sw(&tmp, &ma, e, n, &tmp, n0);
2224         }
2225         if (err != BIG_OK) {
2226                 goto ret;
2227         }
2228 
2229         ma.value[0] = 1;
2230         ma.len = 1;
2231         if ((err = big_mont_mul(&tmp, &tmp, &ma, n, n0)) != BIG_OK) {
2232                 goto ret;
2233         }
2234         err = big_copy(result, &tmp);
2235 
2236 ret:
2237         if (rr.malloced) {
2238                 big_finish(&rr);
2239         }
2240 ret2:
2241         big_finish(&tmp);
2242 ret1:
2243         big_finish(&ma);
2244 
2245         return (err);
2246 }
2247 
2248 BIG_ERR_CODE
2249 big_modexp(BIGNUM *result, BIGNUM *a, BIGNUM *e, BIGNUM *n, BIGNUM *n_rr)
2250 {
2251         return (big_modexp_ext(result, a, e, n, n_rr, NULL));
2252 }
2253 
2254 
2255 BIG_ERR_CODE
2256 big_modexp_crt_ext(BIGNUM *result, BIGNUM *a, BIGNUM *dmodpminus1,
2257     BIGNUM *dmodqminus1, BIGNUM *p, BIGNUM *q, BIGNUM *pinvmodq,
2258     BIGNUM *p_rr, BIGNUM *q_rr, big_modexp_ncp_info_t *info)
2259 {
2260         BIGNUM          ap, aq, tmp;
2261         int             alen, biglen, sign;
2262         BIG_ERR_CODE    err;
2263 
2264         if (p->len > q->len) {
2265                 biglen = p->len;
2266         } else {
2267                 biglen = q->len;
2268         }
2269 
2270         if ((err = big_init1(&ap, p->len, NULL, 0)) != BIG_OK) {
2271                 return (err);
2272         }
2273         if ((err = big_init1(&aq, q->len, NULL, 0)) != BIG_OK) {
2274                 goto ret1;
2275         }
2276         if ((err = big_init1(&tmp, biglen + q->len + 1, NULL, 0)) != BIG_OK) {
2277                 goto ret2;
2278         }
2279 
2280         /*
2281          * check whether a is too short - to avoid timing attacks
2282          */
2283         alen = a->len;
2284         while ((alen > p->len) && (a->value[alen - 1] == 0)) {
2285                 alen--;
2286         }
2287         if (alen < p->len + q->len) {
2288                 /*
2289                  * a is too short, add p*q to it before
2290                  * taking it modulo p and q
2291                  * this will also affect timing, but this difference
2292                  * does not depend on p or q, only on a
2293                  * (in "normal" operation, this path will never be
2294                  * taken, so it is not a performance penalty
2295                  */
2296                 if ((err = big_mul(&tmp, p, q)) != BIG_OK) {
2297                         goto ret;
2298                 }
2299                 if ((err = big_add(&tmp, &tmp, a)) != BIG_OK) {
2300                         goto ret;
2301                 }
2302                 if ((err = big_div_pos(NULL, &ap, &tmp, p)) != BIG_OK) {
2303                         goto ret;
2304                 }
2305                 if ((err = big_div_pos(NULL, &aq, &tmp, q)) != BIG_OK) {
2306                         goto ret;
2307                 }
2308         } else {
2309                 if ((err = big_div_pos(NULL, &ap, a, p)) != BIG_OK) {
2310                         goto ret;
2311                 }
2312                 if ((err = big_div_pos(NULL, &aq, a, q)) != BIG_OK) {
2313                         goto ret;
2314                 }
2315         }
2316 
2317         if ((err = big_modexp_ext(&ap, &ap, dmodpminus1, p, p_rr, info)) !=
2318             BIG_OK) {
2319                 goto ret;
2320         }
2321         if ((err = big_modexp_ext(&aq, &aq, dmodqminus1, q, q_rr, info)) !=
2322             BIG_OK) {
2323                 goto ret;
2324         }
2325         if ((err = big_sub(&tmp, &aq, &ap)) != BIG_OK) {
2326                 goto ret;
2327         }
2328         if ((err = big_mul(&tmp, &tmp, pinvmodq)) != BIG_OK) {
2329                 goto ret;
2330         }
2331         sign = tmp.sign;
2332         tmp.sign = 1;
2333         if ((err = big_div_pos(NULL, &aq, &tmp, q)) != BIG_OK) {
2334                 goto ret;
2335         }
2336         if ((sign == -1) && (!big_is_zero(&aq))) {
2337                 (void) big_sub_pos(&aq, q, &aq);
2338         }
2339         if ((err = big_mul(&tmp, &aq, p)) != BIG_OK) {
2340                 goto ret;
2341         }
2342         err = big_add_abs(result, &ap, &tmp);
2343 
2344 ret:
2345         big_finish(&tmp);
2346 ret2:
2347         big_finish(&aq);
2348 ret1:
2349         big_finish(&ap);
2350 
2351         return (err);
2352 }
2353 
2354 
2355 BIG_ERR_CODE
2356 big_modexp_crt(BIGNUM *result, BIGNUM *a, BIGNUM *dmodpminus1,
2357     BIGNUM *dmodqminus1, BIGNUM *p, BIGNUM *q, BIGNUM *pinvmodq,
2358     BIGNUM *p_rr, BIGNUM *q_rr)
2359 {
2360         return (big_modexp_crt_ext(result, a, dmodpminus1, dmodqminus1,
2361             p, q, pinvmodq, p_rr, q_rr, NULL));
2362 }
2363 
2364 
2365 static BIG_CHUNK_TYPE onearr[1] = {(BIG_CHUNK_TYPE)1};
2366 BIGNUM big_One = {1, 1, 1, 0, onearr};
2367 
2368 static BIG_CHUNK_TYPE twoarr[1] = {(BIG_CHUNK_TYPE)2};
2369 BIGNUM big_Two = {1, 1, 1, 0, twoarr};
2370 
2371 static BIG_CHUNK_TYPE fourarr[1] = {(BIG_CHUNK_TYPE)4};
2372 static BIGNUM big_Four = {1, 1, 1, 0, fourarr};
2373 
2374 
2375 BIG_ERR_CODE
2376 big_sqrt_pos(BIGNUM *result, BIGNUM *n)
2377 {
2378         BIGNUM          *high, *low, *mid, *t;
2379         BIGNUM          t1, t2, t3, prod;
2380         BIG_CHUNK_TYPE  t1value[BIGTMPSIZE];
2381         BIG_CHUNK_TYPE  t2value[BIGTMPSIZE];
2382         BIG_CHUNK_TYPE  t3value[BIGTMPSIZE];
2383         BIG_CHUNK_TYPE  prodvalue[BIGTMPSIZE];
2384         int             i, nbits, diff, nrootbits, highbits;
2385         BIG_ERR_CODE    err;
2386 
2387         nbits = big_numbits(n);
2388 
2389         if ((err = big_init1(&t1, n->len + 1,
2390             t1value, arraysize(t1value))) != BIG_OK)
2391                 return (err);
2392         if ((err = big_init1(&t2, n->len + 1,
2393             t2value, arraysize(t2value))) != BIG_OK)
2394                 goto ret1;
2395         if ((err = big_init1(&t3, n->len + 1,
2396             t3value, arraysize(t3value))) != BIG_OK)
2397                 goto ret2;
2398         if ((err = big_init1(&prod, n->len + 1,
2399             prodvalue, arraysize(prodvalue))) != BIG_OK)
2400                 goto ret3;
2401 
2402         nrootbits = (nbits + 1) / 2;
2403         t1.len = t2.len = t3.len = (nrootbits - 1) / BIG_CHUNK_SIZE + 1;
2404         for (i = 0; i < t1.len; i++) {
2405                 t1.value[i] = 0;
2406                 t2.value[i] = BIG_CHUNK_ALLBITS;
2407         }
2408         highbits = nrootbits - BIG_CHUNK_SIZE * (t1.len - 1);
2409         if (highbits == BIG_CHUNK_SIZE) {
2410                 t1.value[t1.len - 1] = BIG_CHUNK_HIGHBIT;
2411                 t2.value[t2.len - 1] = BIG_CHUNK_ALLBITS;
2412         } else {
2413                 t1.value[t1.len - 1] = (BIG_CHUNK_TYPE)1 << (highbits - 1);
2414                 t2.value[t2.len - 1] = 2 * t1.value[t1.len - 1] - 1;
2415         }
2416 
2417         high = &t2;
2418         low = &t1;
2419         mid = &t3;
2420 
2421         if ((err = big_mul(&prod, high, high)) != BIG_OK) {
2422                 goto ret;
2423         }
2424         diff = big_cmp_abs(&prod, n);
2425         if (diff <= 0) {
2426                 err = big_copy(result, high);
2427                 goto ret;
2428         }
2429 
2430         (void) big_sub_pos(mid, high, low);
2431         while (big_cmp_abs(&big_One, mid) != 0) {
2432                 (void) big_add_abs(mid, high, low);
2433                 (void) big_half_pos(mid, mid);
2434                 if ((err = big_mul(&prod, mid, mid)) != BIG_OK)
2435                         goto ret;
2436                 diff = big_cmp_abs(&prod, n);
2437                 if (diff > 0) {
2438                         t = high;
2439                         high = mid;
2440                         mid = t;
2441                 } else if (diff < 0) {
2442                         t = low;
2443                         low = mid;
2444                         mid = t;
2445                 } else {
2446                         err = big_copy(result, low);
2447                         goto ret;
2448                 }
2449                 (void) big_sub_pos(mid, high, low);
2450         }
2451 
2452         err = big_copy(result, low);
2453 ret:
2454         if (prod.malloced) big_finish(&prod);
2455 ret3:
2456         if (t3.malloced) big_finish(&t3);
2457 ret2:
2458         if (t2.malloced) big_finish(&t2);
2459 ret1:
2460         if (t1.malloced) big_finish(&t1);
2461 
2462         return (err);
2463 }
2464 
2465 
2466 BIG_ERR_CODE
2467 big_Jacobi_pos(int *jac, BIGNUM *nn, BIGNUM *mm)
2468 {
2469         BIGNUM          *t, *tmp2, *m, *n;
2470         BIGNUM          t1, t2, t3;
2471         BIG_CHUNK_TYPE  t1value[BIGTMPSIZE];
2472         BIG_CHUNK_TYPE  t2value[BIGTMPSIZE];
2473         BIG_CHUNK_TYPE  t3value[BIGTMPSIZE];
2474         int             len, err;
2475 
2476         if (big_is_zero(nn) ||
2477             (((nn->value[0] & 1) | (mm->value[0] & 1)) == 0)) {
2478                 *jac = 0;
2479                 return (BIG_OK);
2480         }
2481 
2482         if (nn->len > mm->len) {
2483                 len = nn->len;
2484         } else {
2485                 len = mm->len;
2486         }
2487 
2488         if ((err = big_init1(&t1, len,
2489             t1value, arraysize(t1value))) != BIG_OK) {
2490                 return (err);
2491         }
2492         if ((err = big_init1(&t2, len,
2493             t2value, arraysize(t2value))) != BIG_OK) {
2494                 goto ret1;
2495         }
2496         if ((err = big_init1(&t3, len,
2497             t3value, arraysize(t3value))) != BIG_OK) {
2498                 goto ret2;
2499         }
2500 
2501         n = &t1;
2502         m = &t2;
2503         tmp2 = &t3;
2504 
2505         (void) big_copy(n, nn);
2506         (void) big_copy(m, mm);
2507 
2508         *jac = 1;
2509         while (big_cmp_abs(&big_One, m) != 0) {
2510                 if (big_is_zero(n)) {
2511                         *jac = 0;
2512                         goto ret;
2513                 }
2514                 if ((m->value[0] & 1) == 0) {
2515                         if (((n->value[0] & 7) == 3) ||
2516                             ((n->value[0] & 7) == 5))
2517                                 *jac = -*jac;
2518                         (void) big_half_pos(m, m);
2519                 } else if ((n->value[0] & 1) == 0) {
2520                         if (((m->value[0] & 7) == 3) ||
2521                             ((m->value[0] & 7) == 5))
2522                                 *jac = -*jac;
2523                         (void) big_half_pos(n, n);
2524                 } else {
2525                         if (((m->value[0] & 3) == 3) &&
2526                             ((n->value[0] & 3) == 3)) {
2527                                 *jac = -*jac;
2528                         }
2529                         if ((err = big_div_pos(NULL, tmp2, m, n)) != BIG_OK) {
2530                                 goto ret;
2531                         }
2532                         t = tmp2;
2533                         tmp2 = m;
2534                         m = n;
2535                         n = t;
2536                 }
2537         }
2538         err = BIG_OK;
2539 
2540 ret:
2541         if (t3.malloced) big_finish(&t3);
2542 ret2:
2543         if (t2.malloced) big_finish(&t2);
2544 ret1:
2545         if (t1.malloced) big_finish(&t1);
2546 
2547         return (err);
2548 }
2549 
2550 
2551 BIG_ERR_CODE
2552 big_Lucas(BIGNUM *Lkminus1, BIGNUM *Lk, BIGNUM *p, BIGNUM *k, BIGNUM *n)
2553 {
2554         int             m, w, i;
2555         BIG_CHUNK_TYPE  bit;
2556         BIGNUM          ki, tmp, tmp2;
2557         BIG_CHUNK_TYPE  kivalue[BIGTMPSIZE];
2558         BIG_CHUNK_TYPE  tmpvalue[BIGTMPSIZE];
2559         BIG_CHUNK_TYPE  tmp2value[BIGTMPSIZE];
2560         BIG_ERR_CODE    err;
2561 
2562         if (big_cmp_abs(k, &big_One) == 0) {
2563                 (void) big_copy(Lk, p);
2564                 (void) big_copy(Lkminus1, &big_Two);
2565                 return (BIG_OK);
2566         }
2567 
2568         if ((err = big_init1(&ki, k->len + 1,
2569             kivalue, arraysize(kivalue))) != BIG_OK)
2570                 return (err);
2571 
2572         if ((err = big_init1(&tmp, 2 * n->len +1,
2573             tmpvalue, arraysize(tmpvalue))) != BIG_OK)
2574                 goto ret1;
2575 
2576         if ((err = big_init1(&tmp2, n->len,
2577             tmp2value, arraysize(tmp2value))) != BIG_OK)
2578                 goto ret2;
2579 
2580         m = big_numbits(k);
2581         ki.len = (m - 1) / BIG_CHUNK_SIZE + 1;
2582         w = (m - 1) / BIG_CHUNK_SIZE;
2583         bit = (BIG_CHUNK_TYPE)1 << ((m - 1) % BIG_CHUNK_SIZE);
2584         for (i = 0; i < ki.len; i++) {
2585                 ki.value[i] = 0;
2586         }
2587         ki.value[ki.len - 1] = bit;
2588         if (big_cmp_abs(k, &ki) != 0) {
2589                 (void) big_double(&ki, &ki);
2590         }
2591         (void) big_sub_pos(&ki, &ki, k);
2592 
2593         (void) big_copy(Lk, p);
2594         (void) big_copy(Lkminus1, &big_Two);
2595 
2596         for (i = 0; i < m; i++) {
2597                 if ((err = big_mul(&tmp, Lk, Lkminus1)) != BIG_OK) {
2598                         goto ret;
2599                 }
2600                 (void) big_add_abs(&tmp, &tmp, n);
2601                 (void) big_sub_pos(&tmp, &tmp, p);
2602                 if ((err = big_div_pos(NULL, &tmp2, &tmp, n)) != BIG_OK) {
2603                         goto ret;
2604                 }
2605                 if ((ki.value[w] & bit) != 0) {
2606                         if ((err = big_mul(&tmp, Lkminus1, Lkminus1)) !=
2607                             BIG_OK) {
2608                                 goto ret;
2609                         }
2610                         (void) big_add_abs(&tmp, &tmp, n);
2611                         (void) big_sub_pos(&tmp, &tmp, &big_Two);
2612                         if ((err = big_div_pos(NULL, Lkminus1, &tmp, n)) !=
2613                             BIG_OK) {
2614                                 goto ret;
2615                         }
2616                         (void) big_copy(Lk, &tmp2);
2617                 } else {
2618                         if ((err = big_mul(&tmp, Lk, Lk)) != BIG_OK) {
2619                                 goto ret;
2620                         }
2621                         (void) big_add_abs(&tmp, &tmp, n);
2622                         (void) big_sub_pos(&tmp, &tmp, &big_Two);
2623                         if ((err = big_div_pos(NULL, Lk, &tmp, n)) != BIG_OK) {
2624                                 goto ret;
2625                         }
2626                         (void) big_copy(Lkminus1, &tmp2);
2627                 }
2628                 bit = bit >> 1;
2629                 if (bit == 0) {
2630                         bit = BIG_CHUNK_HIGHBIT;
2631                         w--;
2632                 }
2633         }
2634 
2635         err = BIG_OK;
2636 
2637 ret:
2638         if (tmp2.malloced) big_finish(&tmp2);
2639 ret2:
2640         if (tmp.malloced) big_finish(&tmp);
2641 ret1:
2642         if (ki.malloced) big_finish(&ki);
2643 
2644         return (err);
2645 }
2646 
2647 
2648 BIG_ERR_CODE
2649 big_isprime_pos_ext(BIGNUM *n, big_modexp_ncp_info_t *info)
2650 {
2651         BIGNUM          o, nminus1, tmp, Lkminus1, Lk;
2652         BIG_CHUNK_TYPE  ovalue[BIGTMPSIZE];
2653         BIG_CHUNK_TYPE  nminus1value[BIGTMPSIZE];
2654         BIG_CHUNK_TYPE  tmpvalue[BIGTMPSIZE];
2655         BIG_CHUNK_TYPE  Lkminus1value[BIGTMPSIZE];
2656         BIG_CHUNK_TYPE  Lkvalue[BIGTMPSIZE];
2657         BIG_ERR_CODE    err;
2658         int             e, i, jac;
2659 
2660         if (big_cmp_abs(n, &big_One) == 0) {
2661                 return (BIG_FALSE);
2662         }
2663         if (big_cmp_abs(n, &big_Two) == 0) {
2664                 return (BIG_TRUE);
2665         }
2666         if ((n->value[0] & 1) == 0) {
2667                 return (BIG_FALSE);
2668         }
2669 
2670         if ((err = big_init1(&o, n->len, ovalue, arraysize(ovalue))) !=
2671             BIG_OK) {
2672                 return (err);
2673         }
2674 
2675         if ((err = big_init1(&nminus1, n->len,
2676             nminus1value, arraysize(nminus1value))) != BIG_OK) {
2677                 goto ret1;
2678         }
2679 
2680         if ((err = big_init1(&tmp, 2 * n->len,
2681             tmpvalue, arraysize(tmpvalue))) != BIG_OK) {
2682                 goto ret2;
2683         }
2684 
2685         if ((err = big_init1(&Lkminus1, n->len,
2686             Lkminus1value, arraysize(Lkminus1value))) != BIG_OK) {
2687                 goto ret3;
2688         }
2689 
2690         if ((err = big_init1(&Lk, n->len,
2691             Lkvalue, arraysize(Lkvalue))) != BIG_OK) {
2692                 goto ret4;
2693         }
2694 
2695         (void) big_sub_pos(&o, n, &big_One);    /* cannot fail */
2696         (void) big_copy(&nminus1, &o);          /* cannot fail */
2697         e = 0;
2698         while ((o.value[0] & 1) == 0) {
2699                 e++;
2700                 (void) big_half_pos(&o, &o);  /* cannot fail */
2701         }
2702         if ((err = big_modexp_ext(&tmp, &big_Two, &o, n, NULL, info)) !=
2703             BIG_OK) {
2704                 goto ret;
2705         }
2706 
2707         i = 0;
2708         while ((i < e) &&
2709             (big_cmp_abs(&tmp, &big_One) != 0) &&
2710             (big_cmp_abs(&tmp, &nminus1) != 0)) {
2711                 if ((err =
2712                     big_modexp_ext(&tmp, &tmp, &big_Two, n, NULL, info)) !=
2713                     BIG_OK)
2714                         goto ret;
2715                 i++;
2716         }
2717 
2718         if (!((big_cmp_abs(&tmp, &nminus1) == 0) ||
2719             ((i == 0) && (big_cmp_abs(&tmp, &big_One) == 0)))) {
2720                 err = BIG_FALSE;
2721                 goto ret;
2722         }
2723 
2724         if ((err = big_sqrt_pos(&tmp, n)) != BIG_OK) {
2725                 goto ret;
2726         }
2727 
2728         if ((err = big_mul(&tmp, &tmp, &tmp)) != BIG_OK) {
2729                 goto ret;
2730         }
2731         if (big_cmp_abs(&tmp, n) == 0) {
2732                 err = BIG_FALSE;
2733                 goto ret;
2734         }
2735 
2736         (void) big_copy(&o, &big_Two);
2737         do {
2738                 (void) big_add_abs(&o, &o, &big_One);
2739                 if ((err = big_mul(&tmp, &o, &o)) != BIG_OK) {
2740                         goto ret;
2741                 }
2742                 (void) big_sub_pos(&tmp, &tmp, &big_Four);
2743                 if ((err = big_Jacobi_pos(&jac, &tmp, n)) != BIG_OK) {
2744                         goto ret;
2745                 }
2746         } while (jac != -1);
2747 
2748         (void) big_add_abs(&tmp, n, &big_One);
2749         if ((err = big_Lucas(&Lkminus1, &Lk, &o, &tmp, n)) != BIG_OK) {
2750                 goto ret;
2751         }
2752 
2753         if ((big_cmp_abs(&Lkminus1, &o) == 0) &&
2754             (big_cmp_abs(&Lk, &big_Two) == 0)) {
2755                 err = BIG_TRUE;
2756         } else {
2757                 err = BIG_FALSE;
2758         }
2759 
2760 ret:
2761         if (Lk.malloced) big_finish(&Lk);
2762 ret4:
2763         if (Lkminus1.malloced) big_finish(&Lkminus1);
2764 ret3:
2765         if (tmp.malloced) big_finish(&tmp);
2766 ret2:
2767         if (nminus1.malloced) big_finish(&nminus1);
2768 ret1:
2769         if (o.malloced) big_finish(&o);
2770 
2771         return (err);
2772 }
2773 
2774 
2775 BIG_ERR_CODE
2776 big_isprime_pos(BIGNUM *n)
2777 {
2778         return (big_isprime_pos_ext(n, NULL));
2779 }
2780 
2781 
2782 #define SIEVESIZE 1000
2783 
2784 uint32_t smallprimes[] =
2785 {
2786 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47,
2787 51, 53, 59, 61, 67, 71, 73, 79, 83, 89, 91, 97
2788 };
2789 
2790 
2791 BIG_ERR_CODE
2792 big_nextprime_pos_ext(BIGNUM *result, BIGNUM *n, big_modexp_ncp_info_t *info)
2793 {
2794         BIG_ERR_CODE    err;
2795         int             sieve[SIEVESIZE];
2796         int             i;
2797         uint32_t        off, p;
2798 
2799         if ((err = big_copy(result, n)) != BIG_OK) {
2800                 return (err);
2801         }
2802         result->value[0] |= 1;
2803         /* CONSTCOND */
2804         while (1) {
2805                 for (i = 0; i < SIEVESIZE; i++) sieve[i] = 0;
2806                 for (i = 0;
2807                     i < sizeof (smallprimes) / sizeof (smallprimes[0]); i++) {
2808                         p = smallprimes[i];
2809                         off = big_modhalf_pos(result, p);
2810                         off = p - off;
2811                         if ((off % 2) == 1) {
2812                                 off = off + p;
2813                         }
2814                         off = off / 2;
2815                         while (off < SIEVESIZE) {
2816                                 sieve[off] = 1;
2817                                 off = off + p;
2818                         }
2819                 }
2820 
2821                 for (i = 0; i < SIEVESIZE; i++) {
2822                         if (sieve[i] == 0) {
2823                                 err = big_isprime_pos_ext(result, info);
2824                                 if (err != BIG_FALSE) {
2825                                         if (err != BIG_TRUE) {
2826                                                 return (err);
2827                                         } else {
2828                                                 goto out;
2829                                         }
2830                                 }
2831 
2832                         }
2833                         if ((err = big_add_abs(result, result, &big_Two)) !=
2834                             BIG_OK) {
2835                                 return (err);
2836                         }
2837                 }
2838         }
2839 
2840 out:
2841         return (BIG_OK);
2842 }
2843 
2844 
2845 BIG_ERR_CODE
2846 big_nextprime_pos(BIGNUM *result, BIGNUM *n)
2847 {
2848         return (big_nextprime_pos_ext(result, n, NULL));
2849 }
2850 
2851 
2852 BIG_ERR_CODE
2853 big_nextprime_pos_slow(BIGNUM *result, BIGNUM *n)
2854 {
2855         BIG_ERR_CODE err;
2856 
2857 
2858         if ((err = big_copy(result, n)) != BIG_OK)
2859                 return (err);
2860         result->value[0] |= 1;
2861         while ((err = big_isprime_pos_ext(result, NULL)) != BIG_TRUE) {
2862                 if (err != BIG_FALSE)
2863                         return (err);
2864                 if ((err = big_add_abs(result, result, &big_Two)) != BIG_OK)
2865                         return (err);
2866         }
2867         return (BIG_OK);
2868 }
2869 
2870 
2871 /*
2872  * given m and e, computes the rest in the equation
2873  * gcd(m, e) = cm * m + ce * e
2874  */
2875 BIG_ERR_CODE
2876 big_ext_gcd_pos(BIGNUM *gcd, BIGNUM *cm, BIGNUM *ce, BIGNUM *m, BIGNUM *e)
2877 {
2878         BIGNUM          *xi, *ri, *riminus1, *riminus2, *t;
2879         BIGNUM          *vmi, *vei, *vmiminus1, *veiminus1;
2880         BIGNUM          t1, t2, t3, t4, t5, t6, t7, t8, tmp;
2881         BIG_CHUNK_TYPE  t1value[BIGTMPSIZE];
2882         BIG_CHUNK_TYPE  t2value[BIGTMPSIZE];
2883         BIG_CHUNK_TYPE  t3value[BIGTMPSIZE];
2884         BIG_CHUNK_TYPE  t4value[BIGTMPSIZE];
2885         BIG_CHUNK_TYPE  t5value[BIGTMPSIZE];
2886         BIG_CHUNK_TYPE  t6value[BIGTMPSIZE];
2887         BIG_CHUNK_TYPE  t7value[BIGTMPSIZE];
2888         BIG_CHUNK_TYPE  t8value[BIGTMPSIZE];
2889         BIG_CHUNK_TYPE  tmpvalue[BIGTMPSIZE];
2890         BIG_ERR_CODE    err;
2891         int             len;
2892 
2893         if (big_cmp_abs(m, e) >= 0) {
2894                 len = m->len;
2895         } else {
2896                 len = e->len;
2897         }
2898 
2899         if ((err = big_init1(&t1, len,
2900             t1value, arraysize(t1value))) != BIG_OK) {
2901                 return (err);
2902         }
2903         if ((err = big_init1(&t2, len,
2904             t2value, arraysize(t2value))) != BIG_OK) {
2905                 goto ret1;
2906         }
2907         if ((err = big_init1(&t3, len,
2908             t3value, arraysize(t3value))) != BIG_OK) {
2909                 goto ret2;
2910         }
2911         if ((err = big_init1(&t4, len,
2912             t4value, arraysize(t3value))) != BIG_OK) {
2913                 goto ret3;
2914         }
2915         if ((err = big_init1(&t5, len,
2916             t5value, arraysize(t5value))) != BIG_OK) {
2917                 goto ret4;
2918         }
2919         if ((err = big_init1(&t6, len,
2920             t6value, arraysize(t6value))) != BIG_OK) {
2921                 goto ret5;
2922         }
2923         if ((err = big_init1(&t7, len,
2924             t7value, arraysize(t7value))) != BIG_OK) {
2925                 goto ret6;
2926         }
2927         if ((err = big_init1(&t8, len,
2928             t8value, arraysize(t8value))) != BIG_OK) {
2929                 goto ret7;
2930         }
2931 
2932         if ((err = big_init1(&tmp, 2 * len,
2933             tmpvalue, arraysize(tmpvalue))) != BIG_OK) {
2934                 goto ret8;
2935         }
2936 
2937         ri = &t1;
2938         ri->value[0] = 1;
2939         ri->len = 1;
2940         xi = &t2;
2941         riminus1 = &t3;
2942         riminus2 = &t4;
2943         vmi = &t5;
2944         vei = &t6;
2945         vmiminus1 = &t7;
2946         veiminus1 = &t8;
2947 
2948         (void) big_copy(vmiminus1, &big_One);
2949         (void) big_copy(vmi, &big_One);
2950         (void) big_copy(veiminus1, &big_One);
2951         (void) big_copy(xi, &big_One);
2952         vei->len = 1;
2953         vei->value[0] = 0;
2954 
2955         (void) big_copy(riminus1, m);
2956         (void) big_copy(ri, e);
2957 
2958         while (!big_is_zero(ri)) {
2959                 t = riminus2;
2960                 riminus2 = riminus1;
2961                 riminus1 = ri;
2962                 ri = t;
2963                 if ((err = big_mul(&tmp, vmi, xi)) != BIG_OK) {
2964                         goto ret;
2965                 }
2966                 if ((err = big_sub(vmiminus1, vmiminus1, &tmp)) != BIG_OK) {
2967                         goto ret;
2968                 }
2969                 t = vmiminus1;
2970                 vmiminus1 = vmi;
2971                 vmi = t;
2972                 if ((err = big_mul(&tmp, vei, xi)) != BIG_OK) {
2973                         goto ret;
2974                 }
2975                 if ((err = big_sub(veiminus1, veiminus1, &tmp)) != BIG_OK) {
2976                         goto ret;
2977                 }
2978                 t = veiminus1;
2979                 veiminus1 = vei;
2980                 vei = t;
2981                 if ((err = big_div_pos(xi, ri, riminus2, riminus1)) !=
2982                     BIG_OK) {
2983                         goto ret;
2984                 }
2985         }
2986         if ((gcd != NULL) && ((err = big_copy(gcd, riminus1)) != BIG_OK)) {
2987                 goto ret;
2988         }
2989         if ((cm != NULL) && ((err = big_copy(cm, vmi)) != BIG_OK)) {
2990                 goto ret;
2991         }
2992         if (ce != NULL) {
2993                 err = big_copy(ce, vei);
2994         }
2995 ret:
2996         if (tmp.malloced) big_finish(&tmp);
2997 ret8:
2998         if (t8.malloced) big_finish(&t8);
2999 ret7:
3000         if (t7.malloced) big_finish(&t7);
3001 ret6:
3002         if (t6.malloced) big_finish(&t6);
3003 ret5:
3004         if (t5.malloced) big_finish(&t5);
3005 ret4:
3006         if (t4.malloced) big_finish(&t4);
3007 ret3:
3008         if (t3.malloced) big_finish(&t3);
3009 ret2:
3010         if (t2.malloced) big_finish(&t2);
3011 ret1:
3012         if (t1.malloced) big_finish(&t1);
3013 
3014         return (err);
3015 }