Print this page
6799218 RSA using Solaris Kernel Crypto framework lagging behind OpenSSL
5016936 bignumimpl:big_mul: potential memory leak
6810280 panic from bignum module: vmem_xalloc(): size == 0


   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #pragma ident   "%Z%%M% %I%     %E% SMI"
  27 
  28 #define big_div_pos_fast big_div_pos
  29 
  30 #include "bignum.h"
  31 
  32 /*
  33  * Configuration guide
  34  * -------------------
  35  *
  36  * There are 4 preprocessor symbols used to configure the bignum
  37  * implementation.  This file contains no logic to configure based on
  38  * processor; we leave that to the Makefiles to specify.
  39  *
  40  * USE_FLOATING_POINT
  41  *   Meaning: There is support for a fast floating-point implementation of
  42  *   Montgomery multiply.
  43  *
  44  * PSR_MUL
  45  *   Meaning: There are processor-specific versions of the low level
  46  *   functions to implement big_mul.  Those functions are: big_mul_set_vec,
  47  *   big_mul_add_vec, big_mul_vec, and big_sqr_vec.  PSR_MUL implies support
  48  *   for all 4 functions.  You cannot pick and choose which subset of these
  49  *   functions to support; that would lead to a rat's nest of #ifdefs.
  50  *
  51  * HWCAP
  52  *   Meaning: Call multiply support functions through a function pointer.
  53  *   On x86, there are multiple implementations for differnt hardware
  54  *   capabilities, such as MMX, SSE2, etc.  Tests are made at run-time, when
  55  *   a function is first used.  So, the support functions are called through
  56  *   a function pointer.  There is no need for that on Sparc, because there
  57  *   is only one implementation; support functions are called directly.
  58  *   Later, if there were some new VIS instruction, or something, and a
  59  *   run-time test were needed, rather than variant kernel modules and
  60  *   libraries, then HWCAP would be defined for Sparc, as well.
  61  *
  62  * UMUL64
  63  *   Meaning: It is safe to use generic C code that assumes the existence
  64  *   of a 32 x 32 --> 64 bit unsigned multiply.  If this is not defined,
  65  *   then the generic code for big_mul_add_vec() must necessarily be very slow,
  66  *   because it must fall back to using 16 x 16 --> 32 bit multiplication.
  67  *
  68  */
  69 
  70 



  71 #ifdef  _KERNEL
  72 #include <sys/ddi.h>
  73 #include <sys/mdesc.h>
  74 #include <sys/crypto/common.h>
  75 
  76 #include <sys/types.h>
  77 #include <sys/kmem.h>
  78 #include <sys/param.h>
  79 #include <sys/sunddi.h>
  80 















  81 #define big_malloc(size)        kmem_alloc(size, KM_NOSLEEP)
  82 #define big_free(ptr, size)     kmem_free(ptr, size)
  83 
  84 void *
  85 big_realloc(void *from, size_t oldsize, size_t newsize)
  86 {
  87         void *rv;
  88 
  89         rv = kmem_alloc(newsize, KM_NOSLEEP);
  90         if (rv != NULL)
  91                 bcopy(from, rv, oldsize);
  92         kmem_free(from, oldsize);
  93         return (rv);
  94 }
  95 
  96 #else   /* _KERNEL */
  97 
  98 #include <stdlib.h>
  99 #include <stdio.h>
 100 #include <assert.h>
 101 #define ASSERT  assert
 102 
 103 #ifndef MALLOC_DEBUG
 104 
 105 #define big_malloc(size)        malloc(size)
 106 #define big_free(ptr, size)     free(ptr)
 107 
 108 #else
 109 
 110 void
 111 big_free(void *ptr, size_t size)
 112 {
 113         printf("freed %d bytes at %p\n", size, ptr);
 114         free(ptr);
 115 }
 116 
 117 void *
 118 big_malloc(size_t size)
 119 {
 120         void *rv;
 121         rv = malloc(size);
 122         printf("malloced %d bytes, addr:%p\n", size, rv);
 123         return (rv);
 124 }
 125 #endif /* MALLOC_DEBUG */
 126 
 127 #define big_realloc(x, y, z) realloc((x), (z))
 128 





 129 void
 130 printbignum(char *aname, BIGNUM *a)
 131 {
 132         int i;
 133 
 134         (void) printf("\n%s\n%d\n", aname, a->sign*a->len);
 135         for (i = a->len - 1; i >= 0; i--) {
 136 #ifdef BIGNUM_CHUNK_32
 137                 (void) printf("%08x ", a->value[i]);
 138                 if ((i % 8 == 0) && (i != 0)) {
 139                         (void) printf("\n");
 140                 }
 141 #else
 142                 (void) printf("%08x %08x ", (uint32_t)((a->value[i]) >> 32),
 143                     (uint32_t)((a->value[i]) & 0xffffffff));
 144                 if ((i % 4 == 0) && (i != 0)) {
 145                         (void) printf("\n");
 146                 }
 147 #endif
 148         }
 149         (void) printf("\n");
 150 }
 151 
 152 #endif  /* _KERNEL */
 153 
 154 
 155 /* size in BIG_CHUNK_SIZE-bit words */
















 156 BIG_ERR_CODE
 157 big_init(BIGNUM *number, int size)
 158 {
 159         number->value = big_malloc(sizeof (BIG_CHUNK_TYPE) * size);
 160         if (number->value == NULL) {
 161                 return (BIG_NO_MEM);
 162         }
 163         number->size = size;
 164         number->len = 0;
 165         number->sign = 1;
 166         number->malloced = 1;
 167         return (BIG_OK);
 168 }
 169 
 170 /* size in BIG_CHUNK_SIZE-bit words */




















 171 BIG_ERR_CODE
 172 big_init1(BIGNUM *number, int size, BIG_CHUNK_TYPE *buf, int bufsize)
 173 {
 174         if ((buf == NULL) || (size > bufsize)) {
 175                 number->value = big_malloc(sizeof (BIG_CHUNK_TYPE) * size);
 176                 if (number->value == NULL) {
 177                         return (BIG_NO_MEM);
 178                 }
 179                 number->size = size;
 180                 number->malloced = 1;
 181         } else {
 182                 number->value = buf;
 183                 number->size = bufsize;
 184                 number->malloced = 0;
 185         }
 186                 number->len = 0;
 187                 number->sign = 1;
 188 
 189         return (BIG_OK);
 190 }
 191 





 192 void
 193 big_finish(BIGNUM *number)
 194 {
 195         if (number->malloced == 1) {
 196                 big_free(number->value,
 197                     sizeof (BIG_CHUNK_TYPE) * number->size);
 198                 number->malloced = 0;
 199         }
 200 }
 201 
 202 
 203 /*
 204  *  bn->size should be at least
 205  * (len + sizeof (BIG_CHUNK_TYPE) - 1) / sizeof (BIG_CHUNK_TYPE) bytes
 206  * converts from byte-big-endian format to bignum format (words in
 207  * little endian order, but bytes within the words big endian)
 208  */
 209 void
 210 bytestring2bignum(BIGNUM *bn, uchar_t *kn, size_t len)
 211 {
 212         int             i, j, offs;


 213         BIG_CHUNK_TYPE  word;
 214         uchar_t         *knwordp;
 215 
 216 #ifdef  _LP64
 217         offs = (uint32_t)len % sizeof (BIG_CHUNK_TYPE);
 218         bn->len = (uint32_t)len / sizeof (BIG_CHUNK_TYPE);


 219 
 220         for (i = 0; i < (uint32_t)len / sizeof (BIG_CHUNK_TYPE); i++) {
 221 #else   /* !_LP64 */
 222         offs = len % sizeof (BIG_CHUNK_TYPE);
 223         bn->len = len / sizeof (BIG_CHUNK_TYPE);
 224         for (i = 0; i < len / sizeof (BIG_CHUNK_TYPE); i++) {
 225 #endif  /* _LP64 */
 226                 knwordp = &(kn[len - sizeof (BIG_CHUNK_TYPE) * (i + 1)]);
 227                 word = knwordp[0];
 228                 for (j = 1; j < sizeof (BIG_CHUNK_TYPE); j++) {
 229                         word = (word << 8)+ knwordp[j];
 230                 }
 231                 bn->value[i] = word;
 232         }
 233         if (offs > 0) {
 234                 word = kn[0];
 235                 for (i = 1; i < offs; i++) word = (word << 8) + kn[i];
 236                 bn->value[bn->len++] = word;
 237         }
 238         while ((bn->len > 1) && (bn->value[bn->len-1] == 0)) {
 239                 bn->len --;
 240         }
 241 }
 242 

 243 /*
 244  * copies the least significant len bytes if
 245  * len < bn->len * sizeof (BIG_CHUNK_TYPE)
 246  * converts from bignum format to byte-big-endian format.
 247  * bignum format is words of type  BIG_CHUNK_TYPE in little endian order.
 248  */
 249 void
 250 bignum2bytestring(uchar_t *kn, BIGNUM *bn, size_t len)
 251 {
 252         int             i, j, offs;


 253         BIG_CHUNK_TYPE  word;
 254 
 255         if (len < sizeof (BIG_CHUNK_TYPE) * bn->len) {
 256 #ifdef  _LP64
 257                 for (i = 0; i < (uint32_t)len / sizeof (BIG_CHUNK_TYPE); i++) {
 258 #else   /* !_LP64 */
 259                 for (i = 0; i < len / sizeof (BIG_CHUNK_TYPE); i++) {
 260 #endif  /* _LP64 */
 261                         word = bn->value[i];
 262                         for (j = 0; j < sizeof (BIG_CHUNK_TYPE); j++) {
 263                                 kn[len - sizeof (BIG_CHUNK_TYPE) * i - j - 1] =
 264                                     word & 0xff;
 265                                 word = word >> 8;
 266                         }
 267                 }
 268 #ifdef  _LP64
 269                 offs = (uint32_t)len % sizeof (BIG_CHUNK_TYPE);
 270 #else   /* !_LP64 */
 271                 offs = len % sizeof (BIG_CHUNK_TYPE);
 272 #endif  /* _LP64 */
 273                 if (offs > 0) {
 274                         word = bn->value[len / sizeof (BIG_CHUNK_TYPE)];
 275 #ifdef  _LP64
 276                         for (i =  (uint32_t)len % sizeof (BIG_CHUNK_TYPE);
 277                             i > 0; i --) {
 278 #else   /* !_LP64 */
 279                         for (i = len % sizeof (BIG_CHUNK_TYPE);
 280                             i > 0; i --) {
 281 #endif  /* _LP64 */
 282                                 kn[i - 1] = word & 0xff;
 283                                 word = word >> 8;
 284                         }
 285                 }
 286         } else {
 287                 for (i = 0; i < bn->len; i++) {
 288                         word = bn->value[i];
 289                         for (j = 0; j < sizeof (BIG_CHUNK_TYPE); j++) {
 290                                 kn[len - sizeof (BIG_CHUNK_TYPE) * i - j - 1] =
 291                                     word & 0xff;
 292                                 word = word >> 8;
 293                         }
 294                 }
 295 #ifdef  _LP64
 296                 for (i = 0;
 297                     i < (uint32_t)len - sizeof (BIG_CHUNK_TYPE) * bn->len;
 298                     i++) {
 299 #else   /* !_LP64 */
 300                 for (i = 0; i < len - sizeof (BIG_CHUNK_TYPE) * bn->len; i++) {
 301 #endif  /* _LP64 */
 302                         kn[i] = 0;
 303                 }
 304         }
 305 }
 306 
 307 
 308 int
 309 big_bitlength(BIGNUM *a)
 310 {
 311         int             l = 0, b = 0;
 312         BIG_CHUNK_TYPE  c;
 313 
 314         l = a->len - 1;
 315         while ((l > 0) && (a->value[l] == 0)) {
 316                 l--;
 317         }
 318         b = sizeof (BIG_CHUNK_TYPE) * BITSINBYTE;
 319         c = a->value[l];
 320         while ((b > 1) && ((c & BIG_CHUNK_HIGHBIT) == 0)) {
 321                 c = c << 1;
 322                 b--;
 323         }
 324 
 325         return (l * sizeof (BIG_CHUNK_TYPE) * BITSINBYTE + b);
 326 }
 327 
 328 
 329 BIG_ERR_CODE
 330 big_copy(BIGNUM *dest, BIGNUM *src)
 331 {
 332         BIG_CHUNK_TYPE  *newptr;
 333         int             i, len;
 334 
 335         len = src->len;
 336         while ((len > 1) && (src->value[len - 1] == 0)) {
 337                 len--;
 338         }
 339         src->len = len;
 340         if (dest->size < len) {
 341                 if (dest->malloced == 1) {
 342                         newptr = (BIG_CHUNK_TYPE *)big_realloc(dest->value,
 343                             sizeof (BIG_CHUNK_TYPE) * dest->size,
 344                             sizeof (BIG_CHUNK_TYPE) * len);
 345                 } else {
 346                         newptr = (BIG_CHUNK_TYPE *)
 347                             big_malloc(sizeof (BIG_CHUNK_TYPE) * len);
 348                         if (newptr != NULL) {
 349                                 dest->malloced = 1;
 350                         }
 351                 }
 352                 if (newptr == NULL) {
 353                         return (BIG_NO_MEM);
 354                 }
 355                 dest->value = newptr;
 356                 dest->size = len;
 357         }
 358         dest->len = len;
 359         dest->sign = src->sign;
 360         for (i = 0; i < len; i++) {
 361                 dest->value[i] = src->value[i];
 362         }
 363 
 364         return (BIG_OK);
 365 }
 366 
 367 
 368 BIG_ERR_CODE
 369 big_extend(BIGNUM *number, int size)
 370 {
 371         BIG_CHUNK_TYPE  *newptr;
 372         int             i;
 373 
 374         if (number->size >= size)
 375                 return (BIG_OK);
 376         if (number->malloced) {
 377                 number->value = big_realloc(number->value,
 378                     sizeof (BIG_CHUNK_TYPE) * number->size,
 379                     sizeof (BIG_CHUNK_TYPE) * size);
 380         } else {
 381                 newptr = big_malloc(sizeof (BIG_CHUNK_TYPE) * size);
 382                 if (newptr != NULL) {
 383                         for (i = 0; i < number->size; i++) {
 384                                 newptr[i] = number->value[i];
 385                         }
 386                 }
 387                 number->value = newptr;
 388         }
 389 
 390         if (number->value == NULL) {
 391                 return (BIG_NO_MEM);
 392         }
 393 
 394         number->size = size;
 395         number->malloced = 1;
 396         return (BIG_OK);
 397 }
 398 
 399 
 400 /* returns 1 if n == 0 */
 401 int


 544 
 545 /* returns -1 if |aa|<|bb|, 0 if |aa|==|bb| 1 if |aa|>|bb| */
 546 int
 547 big_cmp_abs(BIGNUM *aa, BIGNUM *bb)
 548 {
 549         int     i;
 550 
 551         if (aa->len > bb->len) {
 552                 for (i = aa->len - 1; i > bb->len - 1; i--) {
 553                         if (aa->value[i] > 0) {
 554                                 return (1);
 555                         }
 556                 }
 557         } else if (aa->len < bb->len) {
 558                 for (i = bb->len - 1; i > aa->len - 1; i--) {
 559                         if (bb->value[i] > 0) {
 560                                 return (-1);
 561                         }
 562                 }
 563         } else {
 564                 i = aa->len-1;
 565         }
 566         for (; i >= 0; i--) {
 567                 if (aa->value[i] > bb->value[i]) {
 568                         return (1);
 569                 } else if (aa->value[i] < bb->value[i]) {
 570                         return (-1);
 571                 }
 572         }
 573 
 574         return (0);
 575 }
 576 
 577 
 578 BIG_ERR_CODE
 579 big_sub(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
 580 {
 581         BIG_ERR_CODE    err;
 582 
 583         if ((bb->sign == -1) && (aa->sign == 1)) {
 584                 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) {


 895         result->sign = aa->sign;
 896 }
 897 
 898 
 899 /* it is assumed that result->size is big enough */
 900 void
 901 big_shiftright(BIGNUM *result, BIGNUM *aa, int offs)
 902 {
 903         int              i;
 904         BIG_CHUNK_TYPE  cy, ai;
 905 
 906         if (offs == 0) {
 907                 if (result != aa) {
 908                         (void) big_copy(result, aa);
 909                 }
 910                 return;
 911         }
 912         cy = aa->value[0] >> offs;
 913         for (i = 1; i < aa->len; i++) {
 914                 ai = aa->value[i];
 915                 result->value[i-1] = (ai << (BIG_CHUNK_SIZE - offs)) | cy;
 916                 cy = ai >> offs;
 917         }
 918         result->len = aa->len;
 919         result->value[result->len - 1] = cy;
 920         result->sign = aa->sign;
 921 }
 922 
 923 
 924 /*
 925  * result = aa/bb   remainder = aa mod bb
 926  * it is assumed that aa and bb are positive
 927  */
 928 BIG_ERR_CODE
 929 big_div_pos_fast(BIGNUM *result, BIGNUM *remainder, BIGNUM *aa, BIGNUM *bb)
 930 {
 931         BIG_ERR_CODE    err = BIG_OK;
 932         int             i, alen, blen, tlen, rlen, offs;
 933         BIG_CHUNK_TYPE  higha, highb, coeff;
 934         BIG_CHUNK_TYPE  *a, *b;
 935         BIGNUM          bbhigh, bblow, tresult, tmp1, tmp2;
 936         BIG_CHUNK_TYPE  tmp1value[BIGTMPSIZE];
 937         BIG_CHUNK_TYPE  tmp2value[BIGTMPSIZE];
 938         BIG_CHUNK_TYPE  tresultvalue[BIGTMPSIZE];
 939         BIG_CHUNK_TYPE  bblowvalue[BIGTMPSIZE];
 940         BIG_CHUNK_TYPE  bbhighvalue[BIGTMPSIZE];
 941 
 942         a = aa->value;
 943         b = bb->value;
 944         alen = aa->len;
 945         blen = bb->len;
 946         while ((alen > 1) && (a[alen - 1] == 0)) {
 947                 alen = alen - 1;
 948         }
 949         aa->len = alen;


1060         if ((remainder != NULL) &&
1061             ((err = big_copy(remainder, &tmp1)) != BIG_OK))
1062                 goto ret;
1063 
1064         if (result != NULL)
1065                 err = big_copy(result, &tresult);
1066 
1067 ret:
1068         big_finish(&tresult);
1069 ret4:
1070         big_finish(&tmp1);
1071 ret3:
1072         big_finish(&tmp2);
1073 ret2:
1074         big_finish(&bbhigh);
1075 ret1:
1076         big_finish(&bblow);
1077         return (err);
1078 }
1079 

1080 /*
1081  * If there is no processor-specific integer implementation of
1082  * the lower level multiply functions, then this code is provided
1083  * for big_mul_set_vec(), big_mul_add_vec(), big_mul_vec() and
1084  * big_sqr_vec().
1085  *
1086  * There are two generic implementations.  One that assumes that
1087  * there is hardware and C compiler support for a 32 x 32 --> 64
1088  * bit unsigned multiply, but otherwise is not specific to any
1089  * processor, platform, or ISA.
1090  *
1091  * The other makes very few assumptions about hardware capabilities.
1092  * It does not even assume that there is any implementation of a
1093  * 32 x 32 --> 64 bit multiply that is accessible to C code and
1094  * appropriate to use.  It falls constructs 32 x 32 --> 64 bit
1095  * multiplies from 16 x 16 --> 32 bit multiplies.
1096  *
1097  */
1098 
1099 #if !defined(PSR_MUL)
1100 
1101 #ifdef UMUL64
1102 
1103 #if (BIG_CHUNK_SIZE == 32)
1104 
1105 #define UNROLL8
1106 
1107 #define MUL_SET_VEC_ROUND_PREFETCH(R) \
1108         p = pf * d; \
1109         pf = (uint64_t)a[R+1]; \
1110         t = p + cy; \
1111         r[R] = (uint32_t)t; \
1112         cy = t >> 32
1113 
1114 #define MUL_SET_VEC_ROUND_NOPREFETCH(R) \
1115         p = pf * d; \
1116         t = p + cy; \
1117         r[R] = (uint32_t)t; \
1118         cy = t >> 32
1119 
1120 #define MUL_ADD_VEC_ROUND_PREFETCH(R) \
1121         t = (uint64_t)r[R]; \
1122         p = pf * d; \
1123         pf = (uint64_t)a[R+1]; \
1124         t = p + t + cy; \
1125         r[R] = (uint32_t)t; \
1126         cy = t >> 32
1127 
1128 #define MUL_ADD_VEC_ROUND_NOPREFETCH(R) \
1129         t = (uint64_t)r[R]; \
1130         p = pf * d; \
1131         t = p + t + cy; \
1132         r[R] = (uint32_t)t; \
1133         cy = t >> 32
1134 
1135 #ifdef UNROLL8
1136 
1137 #define UNROLL 8
1138 
1139 /*
1140  * r = a * b
1141  * where r and a are vectors; b is a single 32-bit digit
1142  */
1143 


1259         s = (uint64_t)a[0];
1260         s = s * s;
1261         r[0] = (uint32_t)s;
1262         cy = s >> 32;
1263         p = ((uint64_t)r[1] << 1) + cy;
1264         r[1] = (uint32_t)p;
1265         cy = p >> 32;
1266         row = 1;
1267         col = 2;
1268         while (row < len) {
1269                 s = (uint64_t)a[row];
1270                 s = s * s;
1271                 p = (uint64_t)r[col] << 1;
1272                 t = p + s;
1273                 d = (uint32_t)t;
1274                 t2 = (uint64_t)d + cy;
1275                 r[col] = (uint32_t)t2;
1276                 cy = (t >> 32) + (t2 >> 32);
1277                 if (row == len - 1)
1278                         break;
1279                 p = ((uint64_t)r[col+1] << 1) + cy;
1280                 r[col+1] = (uint32_t)p;
1281                 cy = p >> 32;
1282                 ++row;
1283                 col += 2;
1284         }
1285         r[col+1] = (uint32_t)cy;
1286 }
1287 
1288 #else /* BIG_CHUNK_SIZE == 64 */
1289 
1290 /*
1291  * r = r + a * digit, r and a are vectors of length len
1292  * returns the carry digit
1293  */
1294 BIG_CHUNK_TYPE
1295 big_mul_add_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len,
1296     BIG_CHUNK_TYPE digit)
1297 {
1298         BIG_CHUNK_TYPE  cy, cy1, retcy, dlow, dhigh;
1299         int             i;
1300 
1301         cy1 = 0;
1302         dlow = digit & BIG_CHUNK_LOWHALFBITS;
1303         dhigh = digit >> (BIG_CHUNK_SIZE / 2);
1304         for (i = 0; i < len; i++) {
1305                 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) +


1344 big_mul_set_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len,
1345     BIG_CHUNK_TYPE digit)
1346 {
1347         int     i;
1348 
1349         ASSERT(r != a);
1350         for (i = 0; i < len; i++) {
1351                 r[i] = 0;
1352         }
1353         return (big_mul_add_vec(r, a, len, digit));
1354 }
1355 
1356 void
1357 big_sqr_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len)
1358 {
1359         int i;
1360 
1361         ASSERT(r != a);
1362         r[len] = big_mul_set_vec(r, a, len, a[0]);
1363         for (i = 1; i < len; ++i)
1364                 r[len + i] = big_mul_add_vec(r+i, a, len, a[i]);
1365 }
1366 
1367 #endif /* BIG_CHUNK_SIZE == 32/64 */
1368 

1369 #else /* ! UMUL64 */
1370 
1371 #if (BIG_CHUNK_SIZE != 32)
1372 #error Don't use 64-bit chunks without defining UMUL64
1373 #endif
1374 
1375 
1376 /*
1377  * r = r + a * digit, r and a are vectors of length len
1378  * returns the carry digit
1379  */
1380 uint32_t
1381 big_mul_add_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit)
1382 {
1383         uint32_t cy, cy1, retcy, dlow, dhigh;
1384         int i;
1385 
1386         cy1 = 0;
1387         dlow = digit & 0xffff;
1388         dhigh = digit >> 16;


1415 big_mul_set_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit)
1416 {
1417         int     i;
1418 
1419         ASSERT(r != a);
1420         for (i = 0; i < len; i++) {
1421                 r[i] = 0;
1422         }
1423 
1424         return (big_mul_add_vec(r, a, len, digit));
1425 }
1426 
1427 void
1428 big_sqr_vec(uint32_t *r, uint32_t *a, int len)
1429 {
1430         int i;
1431 
1432         ASSERT(r != a);
1433         r[len] = big_mul_set_vec(r, a, len, a[0]);
1434         for (i = 1; i < len; ++i)
1435                 r[len + i] = big_mul_add_vec(r+i, a, len, a[i]);
1436 }
1437 
1438 #endif /* UMUL64 */
1439 
1440 void
1441 big_mul_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int alen,
1442     BIG_CHUNK_TYPE *b, int blen)
1443 {
1444         int i;
1445 
1446         r[alen] = big_mul_set_vec(r, a, alen, b[0]);
1447         for (i = 1; i < blen; ++i)
1448                 r[alen + i] = big_mul_add_vec(r+i, a, alen, b[i]);
1449 }
1450 
1451 
1452 #endif /* ! PSR_MUL */
1453 
1454 
1455 /*
1456  * result = aa * bb  result->value should be big enough to hold the result
1457  *
1458  * Implementation: Standard grammar school algorithm
1459  *
1460  */
1461 BIG_ERR_CODE
1462 big_mul(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
1463 {
1464         BIGNUM          tmp1;
1465         BIG_CHUNK_TYPE  tmp1value[BIGTMPSIZE];
1466         BIG_CHUNK_TYPE  *r, *t, *a, *b;
1467         BIG_ERR_CODE    err;
1468         int             i, alen, blen, rsize, sign, diff;


1475                         BIGNUM *tt;
1476                         tt = aa;
1477                         aa = bb;
1478                         bb = tt;
1479                 }
1480         }
1481         a = aa->value;
1482         b = bb->value;
1483         alen = aa->len;
1484         blen = bb->len;
1485         while ((alen > 1) && (a[alen - 1] == 0)) {
1486                 alen--;
1487         }
1488         aa->len = alen;
1489         while ((blen > 1) && (b[blen - 1] == 0)) {
1490                 blen--;
1491         }
1492         bb->len = blen;
1493 
1494         rsize = alen + blen;

1495         if (result->size < rsize) {
1496                 err = big_extend(result, rsize);
1497                 if (err != BIG_OK) {
1498                         return (err);
1499                 }
1500                 /* aa or bb might be an alias to result */
1501                 a = aa->value;
1502                 b = bb->value;
1503         }
1504         r = result->value;
1505 
1506         if (((alen == 1) && (a[0] == 0)) || ((blen == 1) && (b[0] == 0))) {
1507                 result->len = 1;
1508                 result->sign = 1;
1509                 r[0] = 0;
1510                 return (BIG_OK);
1511         }
1512         sign = aa->sign * bb->sign;
1513         if ((alen == 1) && (a[0] == 1)) {
1514                 for (i = 0; i < blen; i++) {


1532                 return (err);
1533         }
1534         (void) big_copy(&tmp1, aa);
1535         t = tmp1.value;
1536 
1537         for (i = 0; i < rsize; i++) {
1538                 t[i] = 0;
1539         }
1540 
1541         if (diff == 0 && alen > 2) {
1542                 BIG_SQR_VEC(t, a, alen);
1543         } else if (blen > 0) {
1544                 BIG_MUL_VEC(t, a, alen, b, blen);
1545         }
1546 
1547         if (t[rsize - 1] == 0) {
1548                 tmp1.len = rsize - 1;
1549         } else {
1550                 tmp1.len = rsize;
1551         }
1552         if ((err = big_copy(result, &tmp1)) != BIG_OK) {
1553                 return (err);
1554         }
1555         result->sign = sign;
1556 
1557         big_finish(&tmp1);
1558 
1559         return (BIG_OK);
1560 }
1561 
1562 
1563 /*
1564  * caller must ensure that  a < n,  b < n  and  ret->size >=  2 * n->len + 1
1565  * and that ret is not n
1566  */
1567 BIG_ERR_CODE
1568 big_mont_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, BIGNUM *n, BIG_CHUNK_TYPE n0)
1569 {
1570         int     i, j, nlen, needsubtract;
1571         BIG_CHUNK_TYPE  *nn, *rr;
1572         BIG_CHUNK_TYPE  digit, c;
1573         BIG_ERR_CODE    err;
1574 
1575         nlen = n->len;
1576         nn = n->value;
1577 
1578         rr = ret->value;
1579 


1602         needsubtract = 0;
1603         if ((rr[2 * nlen]  != 0))
1604                 needsubtract = 1;
1605         else {
1606                 for (i = 2 * nlen - 1; i >= nlen; i--) {
1607                         if (rr[i] > nn[i - nlen]) {
1608                                 needsubtract = 1;
1609                                 break;
1610                         } else if (rr[i] < nn[i - nlen]) {
1611                                 break;
1612                         }
1613                 }
1614         }
1615         if (needsubtract)
1616                 big_sub_vec(rr, rr + nlen, nn, nlen);
1617         else {
1618                 for (i = 0; i < nlen; i++) {
1619                         rr[i] = rr[i + nlen];
1620                 }
1621         }
1622         for (i = nlen - 1; (i >= 0) && (rr[i] == 0); i--)


1623                 ;
1624         ret->len = i+1;
1625 
1626         return (BIG_OK);
1627 }
1628 
1629 
1630 BIG_CHUNK_TYPE
1631 big_n0(BIG_CHUNK_TYPE n)
1632 {
1633         int             i;
1634         BIG_CHUNK_TYPE  result, tmp;
1635 
1636         result = 0;
1637         tmp = BIG_CHUNK_ALLBITS;
1638         for (i = 0; i < BIG_CHUNK_SIZE; i++) {
1639                 if ((tmp & 1) == 1) {
1640                         result = (result >> 1) | BIG_CHUNK_HIGHBIT;
1641                         tmp = tmp - n;
1642                 } else {
1643                         result = (result >> 1);
1644                 }


1747 
1748 #ifdef  USE_FLOATING_POINT
1749 #define big_modexp_ncp_float    big_modexp_ncp_sw
1750 #else
1751 #define big_modexp_ncp_int      big_modexp_ncp_sw
1752 #endif
1753 
1754 #define MAX_EXP_BIT_GROUP_SIZE 6
1755 #define APOWERS_MAX_SIZE (1 << (MAX_EXP_BIT_GROUP_SIZE - 1))
1756 
1757 /* ARGSUSED */
1758 static BIG_ERR_CODE
1759 big_modexp_ncp_int(BIGNUM *result, BIGNUM *ma, BIGNUM *e, BIGNUM *n,
1760     BIGNUM *tmp, BIG_CHUNK_TYPE n0)
1761 
1762 {
1763         BIGNUM          apowers[APOWERS_MAX_SIZE];
1764         BIGNUM          tmp1;
1765         BIG_CHUNK_TYPE  tmp1value[BIGTMPSIZE];
1766         int             i, j, k, l, m, p;
1767         int             bit, bitind, bitcount, groupbits, apowerssize;
1768         int             nbits;
1769         BIG_ERR_CODE    err;
1770 
1771         nbits = big_numbits(e);
1772         if (nbits < 50) {
1773                 groupbits = 1;
1774                 apowerssize = 1;
1775         } else {
1776                 groupbits = MAX_EXP_BIT_GROUP_SIZE;
1777                 apowerssize = 1 << (groupbits - 1);
1778         }
1779 
1780 
1781         if ((err = big_init1(&tmp1, 2 * n->len + 1,
1782             tmp1value, arraysize(tmp1value))) != BIG_OK) {
1783                 return (err);
1784         }
1785 
1786         /* set the malloced bit to help cleanup */
1787         for (i = 0; i < apowerssize; i++) {
1788                 apowers[i].malloced = 0;
1789         }

1790         for (i = 0; i < apowerssize; i++) {
1791                 if ((err = big_init1(&(apowers[i]), n->len, NULL, 0)) !=
1792                     BIG_OK) {
1793                         goto ret;
1794                 }
1795         }
1796 
1797         (void) big_copy(&(apowers[0]), ma);
1798 
1799         if ((err = big_mont_mul(&tmp1, ma, ma, n, n0)) != BIG_OK) {
1800                 goto ret;
1801         }
1802         (void) big_copy(ma, &tmp1);
1803 
1804         for (i = 1; i < apowerssize; i++) {
1805                 if ((err = big_mont_mul(&tmp1, ma,
1806                     &(apowers[i-1]), n, n0)) != BIG_OK) {
1807                         goto ret;
1808                 }
1809                 (void) big_copy(&apowers[i], &tmp1);
1810         }
1811 
1812         bitind = nbits % BIG_CHUNK_SIZE;
1813         k = 0;
1814         l = 0;
1815         p = 0;
1816         bitcount = 0;
1817         for (i = nbits / BIG_CHUNK_SIZE; i >= 0; i--) {
1818                 for (j = bitind - 1; j >= 0; j--) {
1819                         bit = (e->value[i] >> j) & 1;
1820                         if ((bitcount == 0) && (bit == 0)) {
1821                                 if ((err = big_mont_mul(tmp,
1822                                     tmp, tmp, n, n0)) != BIG_OK) {
1823                                         goto ret;
1824                                 }
1825                         } else {
1826                                 bitcount++;


1895 #include <sys/sysmacros.h>
1896 #include <sys/regset.h>
1897 #include <sys/fpu/fpusystm.h>
1898 
1899 /* the alignment for block stores to save fp registers */
1900 #define FPR_ALIGN       (64)
1901 
1902 extern void big_savefp(kfpu_t *);
1903 extern void big_restorefp(kfpu_t *);
1904 
1905 #endif /* _KERNEL */
1906 
1907 /*
1908  * This version makes use of floating point for performance
1909  */
1910 static BIG_ERR_CODE
1911 big_modexp_ncp_float(BIGNUM *result, BIGNUM *ma, BIGNUM *e, BIGNUM *n,
1912     BIGNUM *tmp, BIG_CHUNK_TYPE n0)
1913 {
1914 
1915         int             i, j, k, l, m, p, bit, bitind, bitcount, nlen;

1916         double          dn0;
1917         double          *dn, *dt, *d16r, *d32r;
1918         uint32_t        *nint, *prod;
1919         double          *apowers[APOWERS_MAX_SIZE];
1920         int             nbits, groupbits, apowerssize;
1921         BIG_ERR_CODE    err = BIG_OK;
1922 
1923 #ifdef _KERNEL
1924         uint8_t fpua[sizeof (kfpu_t) + FPR_ALIGN];
1925         kfpu_t *fpu;
1926 
1927 #ifdef DEBUG
1928         if (!fpu_exists)
1929                 return (BIG_GENERAL_ERR);
1930 #endif
1931 
1932         fpu =  (kfpu_t *)P2ROUNDUP((uintptr_t)fpua, FPR_ALIGN);
1933         big_savefp(fpu);
1934 
1935 #endif /* _KERNEL */
1936 
1937         nbits = big_numbits(e);
1938         if (nbits < 50) {
1939                 groupbits = 1;
1940                 apowerssize = 1;


2060                                     dt, dn, nint, nlen, dn0);
2061                         } else {
2062                                 bitcount++;
2063                                 p = p * 2 + bit;
2064                                 if (bit == 1) {
2065                                         k = k + l + 1;
2066                                         l = 0;
2067                                 } else {
2068                                         l++;
2069                                 }
2070                                 if (bitcount == groupbits) {
2071                                         for (m = 0; m < k; m++) {
2072                                                 conv_i32_to_d32_and_d16(d32r,
2073                                                     d16r, prod, nlen);
2074                                                 mont_mulf_noconv(prod, d32r,
2075                                                     d16r, dt, dn, nint,
2076                                                     nlen, dn0);
2077                                         }
2078                                         conv_i32_to_d32(d32r, prod, nlen);
2079                                         mont_mulf_noconv(prod, d32r,
2080                                             apowers[p >> (l+1)],
2081                                             dt, dn, nint, nlen, dn0);
2082                                         for (m = 0; m < l; m++) {
2083                                                 conv_i32_to_d32_and_d16(d32r,
2084                                                     d16r, prod, nlen);
2085                                                 mont_mulf_noconv(prod, d32r,
2086                                                     d16r, dt, dn, nint,
2087                                                     nlen, dn0);
2088                                         }
2089                                         k = 0;
2090                                         l = 0;
2091                                         p = 0;
2092                                         bitcount = 0;
2093                                 }
2094                         }
2095                 }
2096                 bitind = BIG_CHUNK_SIZE;
2097         }
2098 
2099         for (m = 0; m < k; m++) {
2100                 conv_i32_to_d32_and_d16(d32r, d16r, prod, nlen);


2166 {
2167         BIGNUM          ma, tmp, rr;
2168         BIG_CHUNK_TYPE  mavalue[BIGTMPSIZE];
2169         BIG_CHUNK_TYPE  tmpvalue[BIGTMPSIZE];
2170         BIG_CHUNK_TYPE  rrvalue[BIGTMPSIZE];
2171         BIG_ERR_CODE    err;
2172         BIG_CHUNK_TYPE  n0;
2173 
2174         if ((err = big_init1(&ma, n->len, mavalue, arraysize(mavalue)))  !=
2175             BIG_OK) {
2176                 return (err);
2177         }
2178         ma.len = 1;
2179         ma.value[0] = 0;
2180 
2181         if ((err = big_init1(&tmp, 2 * n->len + 1,
2182             tmpvalue, arraysize(tmpvalue))) != BIG_OK) {
2183                 goto ret1;
2184         }
2185 
2186         /* set the malloced bit to help cleanup */
2187         rr.malloced = 0;

2188         if (n_rr == NULL) {
2189                 if ((err = big_init1(&rr, 2 * n->len + 1,
2190                     rrvalue, arraysize(rrvalue))) != BIG_OK) {
2191                         goto ret2;
2192                 }
2193                 if (big_mont_rr(&rr, n) != BIG_OK) {
2194                         goto ret;
2195                 }
2196                 n_rr = &rr;
2197         }
2198 
2199         n0 = big_n0(n->value[0]);
2200 
2201         if (big_cmp_abs(a, n) > 0) {
2202                 if ((err = big_div_pos(NULL, &ma, a, n)) != BIG_OK) {
2203                         goto ret;
2204                 }
2205                 err = big_mont_conv(&ma, &ma, n, n0, n_rr);
2206         } else {
2207                 err = big_mont_conv(&ma, a, n, n0, n_rr);


2364 
2365 static BIG_CHUNK_TYPE onearr[1] = {(BIG_CHUNK_TYPE)1};
2366 BIGNUM big_One = {1, 1, 1, 0, onearr};
2367 
2368 static BIG_CHUNK_TYPE twoarr[1] = {(BIG_CHUNK_TYPE)2};
2369 BIGNUM big_Two = {1, 1, 1, 0, twoarr};
2370 
2371 static BIG_CHUNK_TYPE fourarr[1] = {(BIG_CHUNK_TYPE)4};
2372 static BIGNUM big_Four = {1, 1, 1, 0, fourarr};
2373 
2374 
2375 BIG_ERR_CODE
2376 big_sqrt_pos(BIGNUM *result, BIGNUM *n)
2377 {
2378         BIGNUM          *high, *low, *mid, *t;
2379         BIGNUM          t1, t2, t3, prod;
2380         BIG_CHUNK_TYPE  t1value[BIGTMPSIZE];
2381         BIG_CHUNK_TYPE  t2value[BIGTMPSIZE];
2382         BIG_CHUNK_TYPE  t3value[BIGTMPSIZE];
2383         BIG_CHUNK_TYPE  prodvalue[BIGTMPSIZE];
2384         int             i, nbits, diff, nrootbits, highbits;

2385         BIG_ERR_CODE    err;
2386 
2387         nbits = big_numbits(n);
2388 
2389         if ((err = big_init1(&t1, n->len + 1,
2390             t1value, arraysize(t1value))) != BIG_OK)
2391                 return (err);
2392         if ((err = big_init1(&t2, n->len + 1,
2393             t2value, arraysize(t2value))) != BIG_OK)
2394                 goto ret1;
2395         if ((err = big_init1(&t3, n->len + 1,
2396             t3value, arraysize(t3value))) != BIG_OK)
2397                 goto ret2;
2398         if ((err = big_init1(&prod, n->len + 1,
2399             prodvalue, arraysize(prodvalue))) != BIG_OK)
2400                 goto ret3;
2401 
2402         nrootbits = (nbits + 1) / 2;
2403         t1.len = t2.len = t3.len = (nrootbits - 1) / BIG_CHUNK_SIZE + 1;
2404         for (i = 0; i < t1.len; i++) {


2534                         m = n;
2535                         n = t;
2536                 }
2537         }
2538         err = BIG_OK;
2539 
2540 ret:
2541         if (t3.malloced) big_finish(&t3);
2542 ret2:
2543         if (t2.malloced) big_finish(&t2);
2544 ret1:
2545         if (t1.malloced) big_finish(&t1);
2546 
2547         return (err);
2548 }
2549 
2550 
2551 BIG_ERR_CODE
2552 big_Lucas(BIGNUM *Lkminus1, BIGNUM *Lk, BIGNUM *p, BIGNUM *k, BIGNUM *n)
2553 {
2554         int             m, w, i;

2555         BIG_CHUNK_TYPE  bit;
2556         BIGNUM          ki, tmp, tmp2;
2557         BIG_CHUNK_TYPE  kivalue[BIGTMPSIZE];
2558         BIG_CHUNK_TYPE  tmpvalue[BIGTMPSIZE];
2559         BIG_CHUNK_TYPE  tmp2value[BIGTMPSIZE];
2560         BIG_ERR_CODE    err;
2561 
2562         if (big_cmp_abs(k, &big_One) == 0) {
2563                 (void) big_copy(Lk, p);
2564                 (void) big_copy(Lkminus1, &big_Two);
2565                 return (BIG_OK);
2566         }
2567 
2568         if ((err = big_init1(&ki, k->len + 1,
2569             kivalue, arraysize(kivalue))) != BIG_OK)
2570                 return (err);
2571 
2572         if ((err = big_init1(&tmp, 2 * n->len +1,
2573             tmpvalue, arraysize(tmpvalue))) != BIG_OK)
2574                 goto ret1;
2575 
2576         if ((err = big_init1(&tmp2, n->len,
2577             tmp2value, arraysize(tmp2value))) != BIG_OK)
2578                 goto ret2;
2579 
2580         m = big_numbits(k);
2581         ki.len = (m - 1) / BIG_CHUNK_SIZE + 1;
2582         w = (m - 1) / BIG_CHUNK_SIZE;
2583         bit = (BIG_CHUNK_TYPE)1 << ((m - 1) % BIG_CHUNK_SIZE);
2584         for (i = 0; i < ki.len; i++) {
2585                 ki.value[i] = 0;
2586         }
2587         ki.value[ki.len - 1] = bit;
2588         if (big_cmp_abs(k, &ki) != 0) {
2589                 (void) big_double(&ki, &ki);
2590         }
2591         (void) big_sub_pos(&ki, &ki, k);
2592 


2764 ret3:
2765         if (tmp.malloced) big_finish(&tmp);
2766 ret2:
2767         if (nminus1.malloced) big_finish(&nminus1);
2768 ret1:
2769         if (o.malloced) big_finish(&o);
2770 
2771         return (err);
2772 }
2773 
2774 
2775 BIG_ERR_CODE
2776 big_isprime_pos(BIGNUM *n)
2777 {
2778         return (big_isprime_pos_ext(n, NULL));
2779 }
2780 
2781 
2782 #define SIEVESIZE 1000
2783 
2784 uint32_t smallprimes[] =
2785 {
2786 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47,
2787 51, 53, 59, 61, 67, 71, 73, 79, 83, 89, 91, 97
2788 };
2789 
2790 
2791 BIG_ERR_CODE
2792 big_nextprime_pos_ext(BIGNUM *result, BIGNUM *n, big_modexp_ncp_info_t *info)
2793 {



2794         BIG_ERR_CODE    err;
2795         int             sieve[SIEVESIZE];
2796         int             i;
2797         uint32_t        off, p;
2798 
2799         if ((err = big_copy(result, n)) != BIG_OK) {
2800                 return (err);
2801         }
2802         result->value[0] |= 1;
2803         /* CONSTCOND */
2804         while (1) {
2805                 for (i = 0; i < SIEVESIZE; i++) sieve[i] = 0;
2806                 for (i = 0;
2807                     i < sizeof (smallprimes) / sizeof (smallprimes[0]); i++) {
2808                         p = smallprimes[i];
2809                         off = big_modhalf_pos(result, p);
2810                         off = p - off;
2811                         if ((off % 2) == 1) {
2812                                 off = off + p;
2813                         }




   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 






  26 /*
  27  * Configuration guide
  28  * -------------------
  29  *
  30  * There are 4 preprocessor symbols used to configure the bignum
  31  * implementation.  This file contains no logic to configure based on
  32  * processor; we leave that to the Makefiles to specify.
  33  *
  34  * USE_FLOATING_POINT
  35  *   Meaning: There is support for a fast floating-point implementation of
  36  *   Montgomery multiply.
  37  *
  38  * PSR_MUL
  39  *   Meaning: There are processor-specific versions of the low level
  40  *   functions to implement big_mul.  Those functions are: big_mul_set_vec,
  41  *   big_mul_add_vec, big_mul_vec, and big_sqr_vec.  PSR_MUL implies support
  42  *   for all 4 functions.  You cannot pick and choose which subset of these
  43  *   functions to support; that would lead to a rat's nest of #ifdefs.
  44  *
  45  * HWCAP
  46  *   Meaning: Call multiply support functions through a function pointer.
  47  *   On x86, there are multiple implementations for different hardware
  48  *   capabilities, such as MMX, SSE2, etc.  Tests are made at run-time, when
  49  *   a function is first used.  So, the support functions are called through
  50  *   a function pointer.  There is no need for that on Sparc, because there
  51  *   is only one implementation; support functions are called directly.
  52  *   Later, if there were some new VIS instruction, or something, and a
  53  *   run-time test were needed, rather than variant kernel modules and
  54  *   libraries, then HWCAP would be defined for Sparc, as well.
  55  *
  56  * UMUL64
  57  *   Meaning: It is safe to use generic C code that assumes the existence
  58  *   of a 32 x 32 --> 64 bit unsigned multiply.  If this is not defined,
  59  *   then the generic code for big_mul_add_vec() must necessarily be very slow,
  60  *   because it must fall back to using 16 x 16 --> 32 bit multiplication.
  61  *
  62  */
  63 
  64 
  65 #include <sys/types.h>
  66 #include "bignum.h"
  67 
  68 #ifdef  _KERNEL
  69 #include <sys/ddi.h>
  70 #include <sys/mdesc.h>
  71 #include <sys/crypto/common.h>
  72 

  73 #include <sys/kmem.h>
  74 #include <sys/param.h>
  75 #include <sys/sunddi.h>
  76 
  77 #else
  78 #include <stdlib.h>
  79 #include <stdio.h>
  80 #include <assert.h>
  81 #define ASSERT  assert
  82 #endif  /* _KERNEL */
  83 
  84 #ifdef  _LP64 /* truncate 64-bit size_t to 32-bits */
  85 #define UI32(ui)        ((uint32_t)ui)
  86 #else /* size_t already 32-bits */
  87 #define UI32(ui)        (ui)
  88 #endif
  89 
  90 
  91 #ifdef  _KERNEL
  92 #define big_malloc(size)        kmem_alloc(size, KM_NOSLEEP)
  93 #define big_free(ptr, size)     kmem_free(ptr, size)
  94 
  95 void *
  96 big_realloc(void *from, size_t oldsize, size_t newsize)
  97 {
  98         void *rv;
  99 
 100         rv = kmem_alloc(newsize, KM_NOSLEEP);
 101         if (rv != NULL)
 102                 bcopy(from, rv, oldsize);
 103         kmem_free(from, oldsize);
 104         return (rv);
 105 }
 106 
 107 #else   /* _KERNEL */
 108 





 109 #ifndef MALLOC_DEBUG
 110 
 111 #define big_malloc(size)        malloc(size)
 112 #define big_free(ptr, size)     free(ptr)
 113 
 114 #else
 115 
 116 void
 117 big_free(void *ptr, size_t size)
 118 {
 119         printf("freed %d bytes at %p\n", size, ptr);
 120         free(ptr);
 121 }
 122 
 123 void *
 124 big_malloc(size_t size)
 125 {
 126         void *rv;
 127         rv = malloc(size);
 128         printf("malloced %d bytes, addr:%p\n", size, rv);
 129         return (rv);
 130 }
 131 #endif /* MALLOC_DEBUG */
 132 
 133 #define big_realloc(x, y, z) realloc((x), (z))
 134 
 135 
 136 /*
 137  * printbignum()
 138  * Print a BIGNUM type to stdout.
 139  */
 140 void
 141 printbignum(char *aname, BIGNUM *a)
 142 {
 143         int i;
 144 
 145         (void) printf("\n%s\n%d\n", aname, a->sign*a->len);
 146         for (i = a->len - 1; i >= 0; i--) {
 147 #ifdef BIGNUM_CHUNK_32
 148                 (void) printf("%08x ", a->value[i]);
 149                 if (((i & (BITSINBYTE - 1)) == 0) && (i != 0)) {
 150                         (void) printf("\n");
 151                 }
 152 #else
 153                 (void) printf("%08x %08x ", (uint32_t)((a->value[i]) >> 32),
 154                     (uint32_t)((a->value[i]) & 0xffffffff));
 155                 if (((i & 3) == 0) && (i != 0)) { /* end of this chunk */
 156                         (void) printf("\n");
 157                 }
 158 #endif
 159         }
 160         (void) printf("\n");
 161 }
 162 
 163 #endif  /* _KERNEL */
 164 
 165 
 166 /*
 167  * big_init()
 168  * Initialize and allocate memory for a BIGNUM type.
 169  *
 170  * big_init(number, size) is equivalent to big_init1(number, size, NULL, 0)
 171  *
 172  * Note: call big_finish() to free memory allocated by big_init().
 173  *
 174  * Input:
 175  * number       Uninitialized memory for BIGNUM
 176  * size         Minimum size, in BIG_CHUNK_SIZE-bit words, required for BIGNUM
 177  *
 178  * Output:
 179  * number       Initialized BIGNUM
 180  *
 181  * Return BIG_OK on success or BIG_NO_MEM for an allocation error.
 182  */
 183 BIG_ERR_CODE
 184 big_init(BIGNUM *number, int size)
 185 {
 186         number->value = big_malloc(BIGNUM_WORDSIZE * size);
 187         if (number->value == NULL) {
 188                 return (BIG_NO_MEM);
 189         }
 190         number->size = size;
 191         number->len = 0;
 192         number->sign = 1;
 193         number->malloced = 1;
 194         return (BIG_OK);
 195 }
 196 
 197 
 198 /*
 199  * big_init1()
 200  * Initialize and, if needed, allocate memory for a BIGNUM type.
 201  * Use the buffer passed, buf, if any, instad of allocating memory
 202  * if it's at least "size" bytes.
 203  *
 204  * Note: call big_finish() to free memory allocated by big_init().
 205  *
 206  * Input:
 207  * number       Uninitialized memory for BIGNUM
 208  * size         Minimum size, in BIG_CHUNK_SIZE-bit words, required for BIGNUM
 209  * buf          Buffer for storing a BIGNUM.
 210  *              If NULL, big_init1() will allocate a buffer
 211  * bufsize      Size, in BIG_CHUNK_SIZE_bit words, of buf
 212  *
 213  * Output:
 214  * number       Initialized BIGNUM
 215  *
 216  * Return BIG_OK on success or BIG_NO_MEM for an allocation error.
 217  */
 218 BIG_ERR_CODE
 219 big_init1(BIGNUM *number, int size, BIG_CHUNK_TYPE *buf, int bufsize)
 220 {
 221         if ((buf == NULL) || (size > bufsize)) {
 222                 number->value = big_malloc(BIGNUM_WORDSIZE * size);
 223                 if (number->value == NULL) {
 224                         return (BIG_NO_MEM);
 225                 }
 226                 number->size = size;
 227                 number->malloced = 1;
 228         } else {
 229                 number->value = buf;
 230                 number->size = bufsize;
 231                 number->malloced = 0;
 232         }
 233         number->len = 0;
 234         number->sign = 1;
 235 
 236         return (BIG_OK);
 237 }
 238 
 239 
 240 /*
 241  * big_finish()
 242  * Free memory, if any, allocated by big_init() or big_init1().
 243  */
 244 void
 245 big_finish(BIGNUM *number)
 246 {
 247         if (number->malloced == 1) {
 248                 big_free(number->value, BIGNUM_WORDSIZE * number->size);

 249                 number->malloced = 0;
 250         }
 251 }
 252 
 253 
 254 /*
 255  * bn->size should be at least
 256  * (len + BIGNUM_WORDSIZE - 1) / BIGNUM_WORDSIZE bytes
 257  * converts from byte-big-endian format to bignum format (words in
 258  * little endian order, but bytes within the words big endian)
 259  */
 260 void
 261 bytestring2bignum(BIGNUM *bn, uchar_t *kn, size_t len)
 262 {
 263         int             i, j;
 264         uint32_t        offs;
 265         const uint32_t  slen = UI32(len);
 266         BIG_CHUNK_TYPE  word;
 267         uchar_t         *knwordp;
 268 
 269         if (slen == 0) {
 270                 bn->len = 1;
 271                 bn->value[0] = 0;
 272                 return;
 273         }
 274 
 275         offs = slen % BIGNUM_WORDSIZE;
 276         bn->len = slen / BIGNUM_WORDSIZE;
 277 
 278         for (i = 0; i < slen / BIGNUM_WORDSIZE; i++) {
 279                 knwordp = &(kn[slen - BIGNUM_WORDSIZE * (i + 1)]);


 280                 word = knwordp[0];
 281                 for (j = 1; j < BIGNUM_WORDSIZE; j++) {
 282                         word = (word << BITSINBYTE) + knwordp[j];
 283                 }
 284                 bn->value[i] = word;
 285         }
 286         if (offs > 0) {
 287                 word = kn[0];
 288                 for (i = 1; i < offs; i++) word = (word << BITSINBYTE) + kn[i];
 289                 bn->value[bn->len++] = word;
 290         }
 291         while ((bn->len > 1) && (bn->value[bn->len - 1] == 0)) {
 292                 bn->len --;
 293         }
 294 }
 295 
 296 
 297 /*
 298  * copies the least significant len bytes if
 299  * len < bn->len * BIGNUM_WORDSIZE
 300  * converts from bignum format to byte-big-endian format.
 301  * bignum format is words of type  BIG_CHUNK_TYPE in little endian order.
 302  */
 303 void
 304 bignum2bytestring(uchar_t *kn, BIGNUM *bn, size_t len)
 305 {
 306         int             i, j;
 307         uint32_t        offs;
 308         const uint32_t  slen = UI32(len);
 309         BIG_CHUNK_TYPE  word;
 310 
 311         if (len < BIGNUM_WORDSIZE * bn->len) {
 312                 for (i = 0; i < slen / BIGNUM_WORDSIZE; i++) {




 313                         word = bn->value[i];
 314                         for (j = 0; j < BIGNUM_WORDSIZE; j++) {
 315                                 kn[slen - BIGNUM_WORDSIZE * i - j - 1] =
 316                                     word & 0xff;
 317                                 word = word >> BITSINBYTE;
 318                         }
 319                 }
 320                 offs = slen % BIGNUM_WORDSIZE;




 321                 if (offs > 0) {
 322                         word = bn->value[slen / BIGNUM_WORDSIZE];
 323                         for (i =  slen % BIGNUM_WORDSIZE; i > 0; i --) {






 324                                 kn[i - 1] = word & 0xff;
 325                                 word = word >> BITSINBYTE;
 326                         }
 327                 }
 328         } else {
 329                 for (i = 0; i < bn->len; i++) {
 330                         word = bn->value[i];
 331                         for (j = 0; j < BIGNUM_WORDSIZE; j++) {
 332                                 kn[slen - BIGNUM_WORDSIZE * i - j - 1] =
 333                                     word & 0xff;
 334                                 word = word >> BITSINBYTE;
 335                         }
 336                 }
 337                 for (i = 0; i < slen - BIGNUM_WORDSIZE * bn->len; i++) {






 338                         kn[i] = 0;
 339                 }
 340         }
 341 }
 342 
 343 
 344 int
 345 big_bitlength(BIGNUM *a)
 346 {
 347         int             l = 0, b = 0;
 348         BIG_CHUNK_TYPE  c;
 349 
 350         l = a->len - 1;
 351         while ((l > 0) && (a->value[l] == 0)) {
 352                 l--;
 353         }
 354         b = BIG_CHUNK_SIZE;
 355         c = a->value[l];
 356         while ((b > 1) && ((c & BIG_CHUNK_HIGHBIT) == 0)) {
 357                 c = c << 1;
 358                 b--;
 359         }
 360 
 361         return (l * BIG_CHUNK_SIZE + b);
 362 }
 363 
 364 
 365 BIG_ERR_CODE
 366 big_copy(BIGNUM *dest, BIGNUM *src)
 367 {
 368         BIG_CHUNK_TYPE  *newptr;
 369         int             i, len;
 370 
 371         len = src->len;
 372         while ((len > 1) && (src->value[len - 1] == 0)) {
 373                 len--;
 374         }
 375         src->len = len;
 376         if (dest->size < len) {
 377                 if (dest->malloced == 1) {
 378                         newptr = (BIG_CHUNK_TYPE *)big_realloc(dest->value,
 379                             BIGNUM_WORDSIZE * dest->size,
 380                             BIGNUM_WORDSIZE * len);
 381                 } else {
 382                         newptr = (BIG_CHUNK_TYPE *)
 383                             big_malloc(BIGNUM_WORDSIZE * len);
 384                         if (newptr != NULL) {
 385                                 dest->malloced = 1;
 386                         }
 387                 }
 388                 if (newptr == NULL) {
 389                         return (BIG_NO_MEM);
 390                 }
 391                 dest->value = newptr;
 392                 dest->size = len;
 393         }
 394         dest->len = len;
 395         dest->sign = src->sign;
 396         for (i = 0; i < len; i++) {
 397                 dest->value[i] = src->value[i];
 398         }
 399 
 400         return (BIG_OK);
 401 }
 402 
 403 
 404 BIG_ERR_CODE
 405 big_extend(BIGNUM *number, int size)
 406 {
 407         BIG_CHUNK_TYPE  *newptr;
 408         int             i;
 409 
 410         if (number->size >= size)
 411                 return (BIG_OK);
 412         if (number->malloced) {
 413                 number->value = big_realloc(number->value,
 414                     BIGNUM_WORDSIZE * number->size,
 415                     BIGNUM_WORDSIZE * size);
 416         } else {
 417                 newptr = big_malloc(BIGNUM_WORDSIZE * size);
 418                 if (newptr != NULL) {
 419                         for (i = 0; i < number->size; i++) {
 420                                 newptr[i] = number->value[i];
 421                         }
 422                 }
 423                 number->value = newptr;
 424         }
 425 
 426         if (number->value == NULL) {
 427                 return (BIG_NO_MEM);
 428         }
 429 
 430         number->size = size;
 431         number->malloced = 1;
 432         return (BIG_OK);
 433 }
 434 
 435 
 436 /* returns 1 if n == 0 */
 437 int


 580 
 581 /* returns -1 if |aa|<|bb|, 0 if |aa|==|bb| 1 if |aa|>|bb| */
 582 int
 583 big_cmp_abs(BIGNUM *aa, BIGNUM *bb)
 584 {
 585         int     i;
 586 
 587         if (aa->len > bb->len) {
 588                 for (i = aa->len - 1; i > bb->len - 1; i--) {
 589                         if (aa->value[i] > 0) {
 590                                 return (1);
 591                         }
 592                 }
 593         } else if (aa->len < bb->len) {
 594                 for (i = bb->len - 1; i > aa->len - 1; i--) {
 595                         if (bb->value[i] > 0) {
 596                                 return (-1);
 597                         }
 598                 }
 599         } else {
 600                 i = aa->len - 1;
 601         }
 602         for (; i >= 0; i--) {
 603                 if (aa->value[i] > bb->value[i]) {
 604                         return (1);
 605                 } else if (aa->value[i] < bb->value[i]) {
 606                         return (-1);
 607                 }
 608         }
 609 
 610         return (0);
 611 }
 612 
 613 
 614 BIG_ERR_CODE
 615 big_sub(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
 616 {
 617         BIG_ERR_CODE    err;
 618 
 619         if ((bb->sign == -1) && (aa->sign == 1)) {
 620                 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) {


 931         result->sign = aa->sign;
 932 }
 933 
 934 
 935 /* it is assumed that result->size is big enough */
 936 void
 937 big_shiftright(BIGNUM *result, BIGNUM *aa, int offs)
 938 {
 939         int              i;
 940         BIG_CHUNK_TYPE  cy, ai;
 941 
 942         if (offs == 0) {
 943                 if (result != aa) {
 944                         (void) big_copy(result, aa);
 945                 }
 946                 return;
 947         }
 948         cy = aa->value[0] >> offs;
 949         for (i = 1; i < aa->len; i++) {
 950                 ai = aa->value[i];
 951                 result->value[i - 1] = (ai << (BIG_CHUNK_SIZE - offs)) | cy;
 952                 cy = ai >> offs;
 953         }
 954         result->len = aa->len;
 955         result->value[result->len - 1] = cy;
 956         result->sign = aa->sign;
 957 }
 958 
 959 
 960 /*
 961  * result = aa/bb   remainder = aa mod bb
 962  * it is assumed that aa and bb are positive
 963  */
 964 BIG_ERR_CODE
 965 big_div_pos(BIGNUM *result, BIGNUM *remainder, BIGNUM *aa, BIGNUM *bb)
 966 {
 967         BIG_ERR_CODE    err = BIG_OK;
 968         int             i, alen, blen, tlen, rlen, offs;
 969         BIG_CHUNK_TYPE  higha, highb, coeff;
 970         BIG_CHUNK_TYPE  *a, *b;
 971         BIGNUM          bbhigh, bblow, tresult, tmp1, tmp2;
 972         BIG_CHUNK_TYPE  tmp1value[BIGTMPSIZE];
 973         BIG_CHUNK_TYPE  tmp2value[BIGTMPSIZE];
 974         BIG_CHUNK_TYPE  tresultvalue[BIGTMPSIZE];
 975         BIG_CHUNK_TYPE  bblowvalue[BIGTMPSIZE];
 976         BIG_CHUNK_TYPE  bbhighvalue[BIGTMPSIZE];
 977 
 978         a = aa->value;
 979         b = bb->value;
 980         alen = aa->len;
 981         blen = bb->len;
 982         while ((alen > 1) && (a[alen - 1] == 0)) {
 983                 alen = alen - 1;
 984         }
 985         aa->len = alen;


1096         if ((remainder != NULL) &&
1097             ((err = big_copy(remainder, &tmp1)) != BIG_OK))
1098                 goto ret;
1099 
1100         if (result != NULL)
1101                 err = big_copy(result, &tresult);
1102 
1103 ret:
1104         big_finish(&tresult);
1105 ret4:
1106         big_finish(&tmp1);
1107 ret3:
1108         big_finish(&tmp2);
1109 ret2:
1110         big_finish(&bbhigh);
1111 ret1:
1112         big_finish(&bblow);
1113         return (err);
1114 }
1115 
1116 
1117 /*
1118  * If there is no processor-specific integer implementation of
1119  * the lower level multiply functions, then this code is provided
1120  * for big_mul_set_vec(), big_mul_add_vec(), big_mul_vec() and
1121  * big_sqr_vec().
1122  *
1123  * There are two generic implementations.  One that assumes that
1124  * there is hardware and C compiler support for a 32 x 32 --> 64
1125  * bit unsigned multiply, but otherwise is not specific to any
1126  * processor, platform, or ISA.
1127  *
1128  * The other makes very few assumptions about hardware capabilities.
1129  * It does not even assume that there is any implementation of a
1130  * 32 x 32 --> 64 bit multiply that is accessible to C code and
1131  * appropriate to use.  It falls constructs 32 x 32 --> 64 bit
1132  * multiplies from 16 x 16 --> 32 bit multiplies.
1133  *
1134  */
1135 
1136 #if !defined(PSR_MUL)
1137 
1138 #ifdef UMUL64
1139 
1140 #if (BIG_CHUNK_SIZE == 32)
1141 
1142 #define UNROLL8
1143 
1144 #define MUL_SET_VEC_ROUND_PREFETCH(R) \
1145         p = pf * d; \
1146         pf = (uint64_t)a[R + 1]; \
1147         t = p + cy; \
1148         r[R] = (uint32_t)t; \
1149         cy = t >> 32
1150 
1151 #define MUL_SET_VEC_ROUND_NOPREFETCH(R) \
1152         p = pf * d; \
1153         t = p + cy; \
1154         r[R] = (uint32_t)t; \
1155         cy = t >> 32
1156 
1157 #define MUL_ADD_VEC_ROUND_PREFETCH(R) \
1158         t = (uint64_t)r[R]; \
1159         p = pf * d; \
1160         pf = (uint64_t)a[R + 1]; \
1161         t = p + t + cy; \
1162         r[R] = (uint32_t)t; \
1163         cy = t >> 32
1164 
1165 #define MUL_ADD_VEC_ROUND_NOPREFETCH(R) \
1166         t = (uint64_t)r[R]; \
1167         p = pf * d; \
1168         t = p + t + cy; \
1169         r[R] = (uint32_t)t; \
1170         cy = t >> 32
1171 
1172 #ifdef UNROLL8
1173 
1174 #define UNROLL 8
1175 
1176 /*
1177  * r = a * b
1178  * where r and a are vectors; b is a single 32-bit digit
1179  */
1180 


1296         s = (uint64_t)a[0];
1297         s = s * s;
1298         r[0] = (uint32_t)s;
1299         cy = s >> 32;
1300         p = ((uint64_t)r[1] << 1) + cy;
1301         r[1] = (uint32_t)p;
1302         cy = p >> 32;
1303         row = 1;
1304         col = 2;
1305         while (row < len) {
1306                 s = (uint64_t)a[row];
1307                 s = s * s;
1308                 p = (uint64_t)r[col] << 1;
1309                 t = p + s;
1310                 d = (uint32_t)t;
1311                 t2 = (uint64_t)d + cy;
1312                 r[col] = (uint32_t)t2;
1313                 cy = (t >> 32) + (t2 >> 32);
1314                 if (row == len - 1)
1315                         break;
1316                 p = ((uint64_t)r[col + 1] << 1) + cy;
1317                 r[col + 1] = (uint32_t)p;
1318                 cy = p >> 32;
1319                 ++row;
1320                 col += 2;
1321         }
1322         r[col + 1] = (uint32_t)cy;
1323 }
1324 
1325 #else /* BIG_CHUNK_SIZE == 64 */
1326 
1327 /*
1328  * r = r + a * digit, r and a are vectors of length len
1329  * returns the carry digit
1330  */
1331 BIG_CHUNK_TYPE
1332 big_mul_add_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len,
1333     BIG_CHUNK_TYPE digit)
1334 {
1335         BIG_CHUNK_TYPE  cy, cy1, retcy, dlow, dhigh;
1336         int             i;
1337 
1338         cy1 = 0;
1339         dlow = digit & BIG_CHUNK_LOWHALFBITS;
1340         dhigh = digit >> (BIG_CHUNK_SIZE / 2);
1341         for (i = 0; i < len; i++) {
1342                 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) +


1381 big_mul_set_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len,
1382     BIG_CHUNK_TYPE digit)
1383 {
1384         int     i;
1385 
1386         ASSERT(r != a);
1387         for (i = 0; i < len; i++) {
1388                 r[i] = 0;
1389         }
1390         return (big_mul_add_vec(r, a, len, digit));
1391 }
1392 
1393 void
1394 big_sqr_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len)
1395 {
1396         int i;
1397 
1398         ASSERT(r != a);
1399         r[len] = big_mul_set_vec(r, a, len, a[0]);
1400         for (i = 1; i < len; ++i)
1401                 r[len + i] = big_mul_add_vec(r + i, a, len, a[i]);
1402 }
1403 
1404 #endif /* BIG_CHUNK_SIZE == 32/64 */
1405 
1406 
1407 #else /* ! UMUL64 */
1408 
1409 #if (BIG_CHUNK_SIZE != 32)
1410 #error Don't use 64-bit chunks without defining UMUL64
1411 #endif
1412 
1413 
1414 /*
1415  * r = r + a * digit, r and a are vectors of length len
1416  * returns the carry digit
1417  */
1418 uint32_t
1419 big_mul_add_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit)
1420 {
1421         uint32_t cy, cy1, retcy, dlow, dhigh;
1422         int i;
1423 
1424         cy1 = 0;
1425         dlow = digit & 0xffff;
1426         dhigh = digit >> 16;


1453 big_mul_set_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit)
1454 {
1455         int     i;
1456 
1457         ASSERT(r != a);
1458         for (i = 0; i < len; i++) {
1459                 r[i] = 0;
1460         }
1461 
1462         return (big_mul_add_vec(r, a, len, digit));
1463 }
1464 
1465 void
1466 big_sqr_vec(uint32_t *r, uint32_t *a, int len)
1467 {
1468         int i;
1469 
1470         ASSERT(r != a);
1471         r[len] = big_mul_set_vec(r, a, len, a[0]);
1472         for (i = 1; i < len; ++i)
1473                 r[len + i] = big_mul_add_vec(r + i, a, len, a[i]);
1474 }
1475 
1476 #endif /* UMUL64 */
1477 
1478 void
1479 big_mul_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int alen,
1480     BIG_CHUNK_TYPE *b, int blen)
1481 {
1482         int i;
1483 
1484         r[alen] = big_mul_set_vec(r, a, alen, b[0]);
1485         for (i = 1; i < blen; ++i)
1486                 r[alen + i] = big_mul_add_vec(r + i, a, alen, b[i]);
1487 }
1488 
1489 
1490 #endif /* ! PSR_MUL */
1491 
1492 
1493 /*
1494  * result = aa * bb  result->value should be big enough to hold the result
1495  *
1496  * Implementation: Standard grammar school algorithm
1497  *
1498  */
1499 BIG_ERR_CODE
1500 big_mul(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
1501 {
1502         BIGNUM          tmp1;
1503         BIG_CHUNK_TYPE  tmp1value[BIGTMPSIZE];
1504         BIG_CHUNK_TYPE  *r, *t, *a, *b;
1505         BIG_ERR_CODE    err;
1506         int             i, alen, blen, rsize, sign, diff;


1513                         BIGNUM *tt;
1514                         tt = aa;
1515                         aa = bb;
1516                         bb = tt;
1517                 }
1518         }
1519         a = aa->value;
1520         b = bb->value;
1521         alen = aa->len;
1522         blen = bb->len;
1523         while ((alen > 1) && (a[alen - 1] == 0)) {
1524                 alen--;
1525         }
1526         aa->len = alen;
1527         while ((blen > 1) && (b[blen - 1] == 0)) {
1528                 blen--;
1529         }
1530         bb->len = blen;
1531 
1532         rsize = alen + blen;
1533         ASSERT(rsize > 0);
1534         if (result->size < rsize) {
1535                 err = big_extend(result, rsize);
1536                 if (err != BIG_OK) {
1537                         return (err);
1538                 }
1539                 /* aa or bb might be an alias to result */
1540                 a = aa->value;
1541                 b = bb->value;
1542         }
1543         r = result->value;
1544 
1545         if (((alen == 1) && (a[0] == 0)) || ((blen == 1) && (b[0] == 0))) {
1546                 result->len = 1;
1547                 result->sign = 1;
1548                 r[0] = 0;
1549                 return (BIG_OK);
1550         }
1551         sign = aa->sign * bb->sign;
1552         if ((alen == 1) && (a[0] == 1)) {
1553                 for (i = 0; i < blen; i++) {


1571                 return (err);
1572         }
1573         (void) big_copy(&tmp1, aa);
1574         t = tmp1.value;
1575 
1576         for (i = 0; i < rsize; i++) {
1577                 t[i] = 0;
1578         }
1579 
1580         if (diff == 0 && alen > 2) {
1581                 BIG_SQR_VEC(t, a, alen);
1582         } else if (blen > 0) {
1583                 BIG_MUL_VEC(t, a, alen, b, blen);
1584         }
1585 
1586         if (t[rsize - 1] == 0) {
1587                 tmp1.len = rsize - 1;
1588         } else {
1589                 tmp1.len = rsize;
1590         }
1591 
1592         err = big_copy(result, &tmp1);
1593 
1594         result->sign = sign;
1595 
1596         big_finish(&tmp1);
1597 
1598         return (err);
1599 }
1600 
1601 
1602 /*
1603  * caller must ensure that  a < n,  b < n  and  ret->size >=  2 * n->len + 1
1604  * and that ret is not n
1605  */
1606 BIG_ERR_CODE
1607 big_mont_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, BIGNUM *n, BIG_CHUNK_TYPE n0)
1608 {
1609         int     i, j, nlen, needsubtract;
1610         BIG_CHUNK_TYPE  *nn, *rr;
1611         BIG_CHUNK_TYPE  digit, c;
1612         BIG_ERR_CODE    err;
1613 
1614         nlen = n->len;
1615         nn = n->value;
1616 
1617         rr = ret->value;
1618 


1641         needsubtract = 0;
1642         if ((rr[2 * nlen]  != 0))
1643                 needsubtract = 1;
1644         else {
1645                 for (i = 2 * nlen - 1; i >= nlen; i--) {
1646                         if (rr[i] > nn[i - nlen]) {
1647                                 needsubtract = 1;
1648                                 break;
1649                         } else if (rr[i] < nn[i - nlen]) {
1650                                 break;
1651                         }
1652                 }
1653         }
1654         if (needsubtract)
1655                 big_sub_vec(rr, rr + nlen, nn, nlen);
1656         else {
1657                 for (i = 0; i < nlen; i++) {
1658                         rr[i] = rr[i + nlen];
1659                 }
1660         }
1661 
1662         /* Remove leading zeros, but keep at least 1 digit: */
1663         for (i = nlen - 1; (i > 0) && (rr[i] == 0); i--)
1664                 ;
1665         ret->len = i + 1;
1666 
1667         return (BIG_OK);
1668 }
1669 
1670 
1671 BIG_CHUNK_TYPE
1672 big_n0(BIG_CHUNK_TYPE n)
1673 {
1674         int             i;
1675         BIG_CHUNK_TYPE  result, tmp;
1676 
1677         result = 0;
1678         tmp = BIG_CHUNK_ALLBITS;
1679         for (i = 0; i < BIG_CHUNK_SIZE; i++) {
1680                 if ((tmp & 1) == 1) {
1681                         result = (result >> 1) | BIG_CHUNK_HIGHBIT;
1682                         tmp = tmp - n;
1683                 } else {
1684                         result = (result >> 1);
1685                 }


1788 
1789 #ifdef  USE_FLOATING_POINT
1790 #define big_modexp_ncp_float    big_modexp_ncp_sw
1791 #else
1792 #define big_modexp_ncp_int      big_modexp_ncp_sw
1793 #endif
1794 
1795 #define MAX_EXP_BIT_GROUP_SIZE 6
1796 #define APOWERS_MAX_SIZE (1 << (MAX_EXP_BIT_GROUP_SIZE - 1))
1797 
1798 /* ARGSUSED */
1799 static BIG_ERR_CODE
1800 big_modexp_ncp_int(BIGNUM *result, BIGNUM *ma, BIGNUM *e, BIGNUM *n,
1801     BIGNUM *tmp, BIG_CHUNK_TYPE n0)
1802 
1803 {
1804         BIGNUM          apowers[APOWERS_MAX_SIZE];
1805         BIGNUM          tmp1;
1806         BIG_CHUNK_TYPE  tmp1value[BIGTMPSIZE];
1807         int             i, j, k, l, m, p;
1808         uint32_t        bit, bitind, bitcount, groupbits, apowerssize;
1809         uint32_t        nbits;
1810         BIG_ERR_CODE    err;
1811 
1812         nbits = big_numbits(e);
1813         if (nbits < 50) {
1814                 groupbits = 1;
1815                 apowerssize = 1;
1816         } else {
1817                 groupbits = MAX_EXP_BIT_GROUP_SIZE;
1818                 apowerssize = 1 << (groupbits - 1);
1819         }
1820 
1821 
1822         if ((err = big_init1(&tmp1, 2 * n->len + 1,
1823             tmp1value, arraysize(tmp1value))) != BIG_OK) {
1824                 return (err);
1825         }
1826 
1827         /* clear the malloced bit to help cleanup */
1828         for (i = 0; i < apowerssize; i++) {
1829                 apowers[i].malloced = 0;
1830         }
1831 
1832         for (i = 0; i < apowerssize; i++) {
1833                 if ((err = big_init1(&(apowers[i]), n->len, NULL, 0)) !=
1834                     BIG_OK) {
1835                         goto ret;
1836                 }
1837         }
1838 
1839         (void) big_copy(&(apowers[0]), ma);
1840 
1841         if ((err = big_mont_mul(&tmp1, ma, ma, n, n0)) != BIG_OK) {
1842                 goto ret;
1843         }
1844         (void) big_copy(ma, &tmp1);
1845 
1846         for (i = 1; i < apowerssize; i++) {
1847                 if ((err = big_mont_mul(&tmp1, ma,
1848                     &(apowers[i - 1]), n, n0)) != BIG_OK) {
1849                         goto ret;
1850                 }
1851                 (void) big_copy(&apowers[i], &tmp1);
1852         }
1853 
1854         bitind = nbits % BIG_CHUNK_SIZE;
1855         k = 0;
1856         l = 0;
1857         p = 0;
1858         bitcount = 0;
1859         for (i = nbits / BIG_CHUNK_SIZE; i >= 0; i--) {
1860                 for (j = bitind - 1; j >= 0; j--) {
1861                         bit = (e->value[i] >> j) & 1;
1862                         if ((bitcount == 0) && (bit == 0)) {
1863                                 if ((err = big_mont_mul(tmp,
1864                                     tmp, tmp, n, n0)) != BIG_OK) {
1865                                         goto ret;
1866                                 }
1867                         } else {
1868                                 bitcount++;


1937 #include <sys/sysmacros.h>
1938 #include <sys/regset.h>
1939 #include <sys/fpu/fpusystm.h>
1940 
1941 /* the alignment for block stores to save fp registers */
1942 #define FPR_ALIGN       (64)
1943 
1944 extern void big_savefp(kfpu_t *);
1945 extern void big_restorefp(kfpu_t *);
1946 
1947 #endif /* _KERNEL */
1948 
1949 /*
1950  * This version makes use of floating point for performance
1951  */
1952 static BIG_ERR_CODE
1953 big_modexp_ncp_float(BIGNUM *result, BIGNUM *ma, BIGNUM *e, BIGNUM *n,
1954     BIGNUM *tmp, BIG_CHUNK_TYPE n0)
1955 {
1956 
1957         int             i, j, k, l, m, p;
1958         uint32_t        bit, bitind, bitcount, nlen;
1959         double          dn0;
1960         double          *dn, *dt, *d16r, *d32r;
1961         uint32_t        *nint, *prod;
1962         double          *apowers[APOWERS_MAX_SIZE];
1963         uint32_t        nbits, groupbits, apowerssize;
1964         BIG_ERR_CODE    err = BIG_OK;
1965 
1966 #ifdef _KERNEL
1967         uint8_t fpua[sizeof (kfpu_t) + FPR_ALIGN];
1968         kfpu_t *fpu;
1969 
1970 #ifdef DEBUG
1971         if (!fpu_exists)
1972                 return (BIG_GENERAL_ERR);
1973 #endif
1974 
1975         fpu =  (kfpu_t *)P2ROUNDUP((uintptr_t)fpua, FPR_ALIGN);
1976         big_savefp(fpu);
1977 
1978 #endif /* _KERNEL */
1979 
1980         nbits = big_numbits(e);
1981         if (nbits < 50) {
1982                 groupbits = 1;
1983                 apowerssize = 1;


2103                                     dt, dn, nint, nlen, dn0);
2104                         } else {
2105                                 bitcount++;
2106                                 p = p * 2 + bit;
2107                                 if (bit == 1) {
2108                                         k = k + l + 1;
2109                                         l = 0;
2110                                 } else {
2111                                         l++;
2112                                 }
2113                                 if (bitcount == groupbits) {
2114                                         for (m = 0; m < k; m++) {
2115                                                 conv_i32_to_d32_and_d16(d32r,
2116                                                     d16r, prod, nlen);
2117                                                 mont_mulf_noconv(prod, d32r,
2118                                                     d16r, dt, dn, nint,
2119                                                     nlen, dn0);
2120                                         }
2121                                         conv_i32_to_d32(d32r, prod, nlen);
2122                                         mont_mulf_noconv(prod, d32r,
2123                                             apowers[p >> (l + 1)],
2124                                             dt, dn, nint, nlen, dn0);
2125                                         for (m = 0; m < l; m++) {
2126                                                 conv_i32_to_d32_and_d16(d32r,
2127                                                     d16r, prod, nlen);
2128                                                 mont_mulf_noconv(prod, d32r,
2129                                                     d16r, dt, dn, nint,
2130                                                     nlen, dn0);
2131                                         }
2132                                         k = 0;
2133                                         l = 0;
2134                                         p = 0;
2135                                         bitcount = 0;
2136                                 }
2137                         }
2138                 }
2139                 bitind = BIG_CHUNK_SIZE;
2140         }
2141 
2142         for (m = 0; m < k; m++) {
2143                 conv_i32_to_d32_and_d16(d32r, d16r, prod, nlen);


2209 {
2210         BIGNUM          ma, tmp, rr;
2211         BIG_CHUNK_TYPE  mavalue[BIGTMPSIZE];
2212         BIG_CHUNK_TYPE  tmpvalue[BIGTMPSIZE];
2213         BIG_CHUNK_TYPE  rrvalue[BIGTMPSIZE];
2214         BIG_ERR_CODE    err;
2215         BIG_CHUNK_TYPE  n0;
2216 
2217         if ((err = big_init1(&ma, n->len, mavalue, arraysize(mavalue)))  !=
2218             BIG_OK) {
2219                 return (err);
2220         }
2221         ma.len = 1;
2222         ma.value[0] = 0;
2223 
2224         if ((err = big_init1(&tmp, 2 * n->len + 1,
2225             tmpvalue, arraysize(tmpvalue))) != BIG_OK) {
2226                 goto ret1;
2227         }
2228 
2229         /* clear the malloced bit to help cleanup */
2230         rr.malloced = 0;
2231 
2232         if (n_rr == NULL) {
2233                 if ((err = big_init1(&rr, 2 * n->len + 1,
2234                     rrvalue, arraysize(rrvalue))) != BIG_OK) {
2235                         goto ret2;
2236                 }
2237                 if (big_mont_rr(&rr, n) != BIG_OK) {
2238                         goto ret;
2239                 }
2240                 n_rr = &rr;
2241         }
2242 
2243         n0 = big_n0(n->value[0]);
2244 
2245         if (big_cmp_abs(a, n) > 0) {
2246                 if ((err = big_div_pos(NULL, &ma, a, n)) != BIG_OK) {
2247                         goto ret;
2248                 }
2249                 err = big_mont_conv(&ma, &ma, n, n0, n_rr);
2250         } else {
2251                 err = big_mont_conv(&ma, a, n, n0, n_rr);


2408 
2409 static BIG_CHUNK_TYPE onearr[1] = {(BIG_CHUNK_TYPE)1};
2410 BIGNUM big_One = {1, 1, 1, 0, onearr};
2411 
2412 static BIG_CHUNK_TYPE twoarr[1] = {(BIG_CHUNK_TYPE)2};
2413 BIGNUM big_Two = {1, 1, 1, 0, twoarr};
2414 
2415 static BIG_CHUNK_TYPE fourarr[1] = {(BIG_CHUNK_TYPE)4};
2416 static BIGNUM big_Four = {1, 1, 1, 0, fourarr};
2417 
2418 
2419 BIG_ERR_CODE
2420 big_sqrt_pos(BIGNUM *result, BIGNUM *n)
2421 {
2422         BIGNUM          *high, *low, *mid, *t;
2423         BIGNUM          t1, t2, t3, prod;
2424         BIG_CHUNK_TYPE  t1value[BIGTMPSIZE];
2425         BIG_CHUNK_TYPE  t2value[BIGTMPSIZE];
2426         BIG_CHUNK_TYPE  t3value[BIGTMPSIZE];
2427         BIG_CHUNK_TYPE  prodvalue[BIGTMPSIZE];
2428         int             i, diff;
2429         uint32_t        nbits, nrootbits, highbits;
2430         BIG_ERR_CODE    err;
2431 
2432         nbits = big_numbits(n);
2433 
2434         if ((err = big_init1(&t1, n->len + 1,
2435             t1value, arraysize(t1value))) != BIG_OK)
2436                 return (err);
2437         if ((err = big_init1(&t2, n->len + 1,
2438             t2value, arraysize(t2value))) != BIG_OK)
2439                 goto ret1;
2440         if ((err = big_init1(&t3, n->len + 1,
2441             t3value, arraysize(t3value))) != BIG_OK)
2442                 goto ret2;
2443         if ((err = big_init1(&prod, n->len + 1,
2444             prodvalue, arraysize(prodvalue))) != BIG_OK)
2445                 goto ret3;
2446 
2447         nrootbits = (nbits + 1) / 2;
2448         t1.len = t2.len = t3.len = (nrootbits - 1) / BIG_CHUNK_SIZE + 1;
2449         for (i = 0; i < t1.len; i++) {


2579                         m = n;
2580                         n = t;
2581                 }
2582         }
2583         err = BIG_OK;
2584 
2585 ret:
2586         if (t3.malloced) big_finish(&t3);
2587 ret2:
2588         if (t2.malloced) big_finish(&t2);
2589 ret1:
2590         if (t1.malloced) big_finish(&t1);
2591 
2592         return (err);
2593 }
2594 
2595 
2596 BIG_ERR_CODE
2597 big_Lucas(BIGNUM *Lkminus1, BIGNUM *Lk, BIGNUM *p, BIGNUM *k, BIGNUM *n)
2598 {
2599         int             i;
2600         uint32_t        m, w;
2601         BIG_CHUNK_TYPE  bit;
2602         BIGNUM          ki, tmp, tmp2;
2603         BIG_CHUNK_TYPE  kivalue[BIGTMPSIZE];
2604         BIG_CHUNK_TYPE  tmpvalue[BIGTMPSIZE];
2605         BIG_CHUNK_TYPE  tmp2value[BIGTMPSIZE];
2606         BIG_ERR_CODE    err;
2607 
2608         if (big_cmp_abs(k, &big_One) == 0) {
2609                 (void) big_copy(Lk, p);
2610                 (void) big_copy(Lkminus1, &big_Two);
2611                 return (BIG_OK);
2612         }
2613 
2614         if ((err = big_init1(&ki, k->len + 1,
2615             kivalue, arraysize(kivalue))) != BIG_OK)
2616                 return (err);
2617 
2618         if ((err = big_init1(&tmp, 2 * n->len + 1,
2619             tmpvalue, arraysize(tmpvalue))) != BIG_OK)
2620                 goto ret1;
2621 
2622         if ((err = big_init1(&tmp2, n->len,
2623             tmp2value, arraysize(tmp2value))) != BIG_OK)
2624                 goto ret2;
2625 
2626         m = big_numbits(k);
2627         ki.len = (m - 1) / BIG_CHUNK_SIZE + 1;
2628         w = (m - 1) / BIG_CHUNK_SIZE;
2629         bit = (BIG_CHUNK_TYPE)1 << ((m - 1) % BIG_CHUNK_SIZE);
2630         for (i = 0; i < ki.len; i++) {
2631                 ki.value[i] = 0;
2632         }
2633         ki.value[ki.len - 1] = bit;
2634         if (big_cmp_abs(k, &ki) != 0) {
2635                 (void) big_double(&ki, &ki);
2636         }
2637         (void) big_sub_pos(&ki, &ki, k);
2638 


2810 ret3:
2811         if (tmp.malloced) big_finish(&tmp);
2812 ret2:
2813         if (nminus1.malloced) big_finish(&nminus1);
2814 ret1:
2815         if (o.malloced) big_finish(&o);
2816 
2817         return (err);
2818 }
2819 
2820 
2821 BIG_ERR_CODE
2822 big_isprime_pos(BIGNUM *n)
2823 {
2824         return (big_isprime_pos_ext(n, NULL));
2825 }
2826 
2827 
2828 #define SIEVESIZE 1000
2829 





2830 

2831 BIG_ERR_CODE
2832 big_nextprime_pos_ext(BIGNUM *result, BIGNUM *n, big_modexp_ncp_info_t *info)
2833 {
2834         static const uint32_t smallprimes[] = {
2835             3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47,
2836             51, 53, 59, 61, 67, 71, 73, 79, 83, 89, 91, 97 };
2837         BIG_ERR_CODE    err;
2838         int             sieve[SIEVESIZE];
2839         int             i;
2840         uint32_t        off, p;
2841 
2842         if ((err = big_copy(result, n)) != BIG_OK) {
2843                 return (err);
2844         }
2845         result->value[0] |= 1;
2846         /* CONSTCOND */
2847         while (1) {
2848                 for (i = 0; i < SIEVESIZE; i++) sieve[i] = 0;
2849                 for (i = 0;
2850                     i < sizeof (smallprimes) / sizeof (smallprimes[0]); i++) {
2851                         p = smallprimes[i];
2852                         off = big_modhalf_pos(result, p);
2853                         off = p - off;
2854                         if ((off % 2) == 1) {
2855                                 off = off + p;
2856                         }