2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #pragma ident "%Z%%M% %I% %E% SMI"
27
28 #define big_div_pos_fast big_div_pos
29
30 #include "bignum.h"
31
32 /*
33 * Configuration guide
34 * -------------------
35 *
36 * There are 4 preprocessor symbols used to configure the bignum
37 * implementation. This file contains no logic to configure based on
38 * processor; we leave that to the Makefiles to specify.
39 *
40 * USE_FLOATING_POINT
41 * Meaning: There is support for a fast floating-point implementation of
42 * Montgomery multiply.
43 *
44 * PSR_MUL
45 * Meaning: There are processor-specific versions of the low level
46 * functions to implement big_mul. Those functions are: big_mul_set_vec,
47 * big_mul_add_vec, big_mul_vec, and big_sqr_vec. PSR_MUL implies support
48 * for all 4 functions. You cannot pick and choose which subset of these
49 * functions to support; that would lead to a rat's nest of #ifdefs.
50 *
51 * HWCAP
52 * Meaning: Call multiply support functions through a function pointer.
53 * On x86, there are multiple implementations for differnt hardware
54 * capabilities, such as MMX, SSE2, etc. Tests are made at run-time, when
55 * a function is first used. So, the support functions are called through
56 * a function pointer. There is no need for that on Sparc, because there
57 * is only one implementation; support functions are called directly.
58 * Later, if there were some new VIS instruction, or something, and a
59 * run-time test were needed, rather than variant kernel modules and
60 * libraries, then HWCAP would be defined for Sparc, as well.
61 *
62 * UMUL64
63 * Meaning: It is safe to use generic C code that assumes the existence
64 * of a 32 x 32 --> 64 bit unsigned multiply. If this is not defined,
65 * then the generic code for big_mul_add_vec() must necessarily be very slow,
66 * because it must fall back to using 16 x 16 --> 32 bit multiplication.
67 *
68 */
69
70
71 #ifdef _KERNEL
72 #include <sys/ddi.h>
73 #include <sys/mdesc.h>
74 #include <sys/crypto/common.h>
75
76 #include <sys/types.h>
77 #include <sys/kmem.h>
78 #include <sys/param.h>
79 #include <sys/sunddi.h>
80
81 #define big_malloc(size) kmem_alloc(size, KM_NOSLEEP)
82 #define big_free(ptr, size) kmem_free(ptr, size)
83
84 void *
85 big_realloc(void *from, size_t oldsize, size_t newsize)
86 {
87 void *rv;
88
89 rv = kmem_alloc(newsize, KM_NOSLEEP);
90 if (rv != NULL)
91 bcopy(from, rv, oldsize);
92 kmem_free(from, oldsize);
93 return (rv);
94 }
95
96 #else /* _KERNEL */
97
98 #include <stdlib.h>
99 #include <stdio.h>
100 #include <assert.h>
101 #define ASSERT assert
102
103 #ifndef MALLOC_DEBUG
104
105 #define big_malloc(size) malloc(size)
106 #define big_free(ptr, size) free(ptr)
107
108 #else
109
110 void
111 big_free(void *ptr, size_t size)
112 {
113 printf("freed %d bytes at %p\n", size, ptr);
114 free(ptr);
115 }
116
117 void *
118 big_malloc(size_t size)
119 {
120 void *rv;
121 rv = malloc(size);
122 printf("malloced %d bytes, addr:%p\n", size, rv);
123 return (rv);
124 }
125 #endif /* MALLOC_DEBUG */
126
127 #define big_realloc(x, y, z) realloc((x), (z))
128
129 void
130 printbignum(char *aname, BIGNUM *a)
131 {
132 int i;
133
134 (void) printf("\n%s\n%d\n", aname, a->sign*a->len);
135 for (i = a->len - 1; i >= 0; i--) {
136 #ifdef BIGNUM_CHUNK_32
137 (void) printf("%08x ", a->value[i]);
138 if ((i % 8 == 0) && (i != 0)) {
139 (void) printf("\n");
140 }
141 #else
142 (void) printf("%08x %08x ", (uint32_t)((a->value[i]) >> 32),
143 (uint32_t)((a->value[i]) & 0xffffffff));
144 if ((i % 4 == 0) && (i != 0)) {
145 (void) printf("\n");
146 }
147 #endif
148 }
149 (void) printf("\n");
150 }
151
152 #endif /* _KERNEL */
153
154
155 /* size in BIG_CHUNK_SIZE-bit words */
156 BIG_ERR_CODE
157 big_init(BIGNUM *number, int size)
158 {
159 number->value = big_malloc(sizeof (BIG_CHUNK_TYPE) * size);
160 if (number->value == NULL) {
161 return (BIG_NO_MEM);
162 }
163 number->size = size;
164 number->len = 0;
165 number->sign = 1;
166 number->malloced = 1;
167 return (BIG_OK);
168 }
169
170 /* size in BIG_CHUNK_SIZE-bit words */
171 BIG_ERR_CODE
172 big_init1(BIGNUM *number, int size, BIG_CHUNK_TYPE *buf, int bufsize)
173 {
174 if ((buf == NULL) || (size > bufsize)) {
175 number->value = big_malloc(sizeof (BIG_CHUNK_TYPE) * size);
176 if (number->value == NULL) {
177 return (BIG_NO_MEM);
178 }
179 number->size = size;
180 number->malloced = 1;
181 } else {
182 number->value = buf;
183 number->size = bufsize;
184 number->malloced = 0;
185 }
186 number->len = 0;
187 number->sign = 1;
188
189 return (BIG_OK);
190 }
191
192 void
193 big_finish(BIGNUM *number)
194 {
195 if (number->malloced == 1) {
196 big_free(number->value,
197 sizeof (BIG_CHUNK_TYPE) * number->size);
198 number->malloced = 0;
199 }
200 }
201
202
203 /*
204 * bn->size should be at least
205 * (len + sizeof (BIG_CHUNK_TYPE) - 1) / sizeof (BIG_CHUNK_TYPE) bytes
206 * converts from byte-big-endian format to bignum format (words in
207 * little endian order, but bytes within the words big endian)
208 */
209 void
210 bytestring2bignum(BIGNUM *bn, uchar_t *kn, size_t len)
211 {
212 int i, j, offs;
213 BIG_CHUNK_TYPE word;
214 uchar_t *knwordp;
215
216 #ifdef _LP64
217 offs = (uint32_t)len % sizeof (BIG_CHUNK_TYPE);
218 bn->len = (uint32_t)len / sizeof (BIG_CHUNK_TYPE);
219
220 for (i = 0; i < (uint32_t)len / sizeof (BIG_CHUNK_TYPE); i++) {
221 #else /* !_LP64 */
222 offs = len % sizeof (BIG_CHUNK_TYPE);
223 bn->len = len / sizeof (BIG_CHUNK_TYPE);
224 for (i = 0; i < len / sizeof (BIG_CHUNK_TYPE); i++) {
225 #endif /* _LP64 */
226 knwordp = &(kn[len - sizeof (BIG_CHUNK_TYPE) * (i + 1)]);
227 word = knwordp[0];
228 for (j = 1; j < sizeof (BIG_CHUNK_TYPE); j++) {
229 word = (word << 8)+ knwordp[j];
230 }
231 bn->value[i] = word;
232 }
233 if (offs > 0) {
234 word = kn[0];
235 for (i = 1; i < offs; i++) word = (word << 8) + kn[i];
236 bn->value[bn->len++] = word;
237 }
238 while ((bn->len > 1) && (bn->value[bn->len-1] == 0)) {
239 bn->len --;
240 }
241 }
242
243 /*
244 * copies the least significant len bytes if
245 * len < bn->len * sizeof (BIG_CHUNK_TYPE)
246 * converts from bignum format to byte-big-endian format.
247 * bignum format is words of type BIG_CHUNK_TYPE in little endian order.
248 */
249 void
250 bignum2bytestring(uchar_t *kn, BIGNUM *bn, size_t len)
251 {
252 int i, j, offs;
253 BIG_CHUNK_TYPE word;
254
255 if (len < sizeof (BIG_CHUNK_TYPE) * bn->len) {
256 #ifdef _LP64
257 for (i = 0; i < (uint32_t)len / sizeof (BIG_CHUNK_TYPE); i++) {
258 #else /* !_LP64 */
259 for (i = 0; i < len / sizeof (BIG_CHUNK_TYPE); i++) {
260 #endif /* _LP64 */
261 word = bn->value[i];
262 for (j = 0; j < sizeof (BIG_CHUNK_TYPE); j++) {
263 kn[len - sizeof (BIG_CHUNK_TYPE) * i - j - 1] =
264 word & 0xff;
265 word = word >> 8;
266 }
267 }
268 #ifdef _LP64
269 offs = (uint32_t)len % sizeof (BIG_CHUNK_TYPE);
270 #else /* !_LP64 */
271 offs = len % sizeof (BIG_CHUNK_TYPE);
272 #endif /* _LP64 */
273 if (offs > 0) {
274 word = bn->value[len / sizeof (BIG_CHUNK_TYPE)];
275 #ifdef _LP64
276 for (i = (uint32_t)len % sizeof (BIG_CHUNK_TYPE);
277 i > 0; i --) {
278 #else /* !_LP64 */
279 for (i = len % sizeof (BIG_CHUNK_TYPE);
280 i > 0; i --) {
281 #endif /* _LP64 */
282 kn[i - 1] = word & 0xff;
283 word = word >> 8;
284 }
285 }
286 } else {
287 for (i = 0; i < bn->len; i++) {
288 word = bn->value[i];
289 for (j = 0; j < sizeof (BIG_CHUNK_TYPE); j++) {
290 kn[len - sizeof (BIG_CHUNK_TYPE) * i - j - 1] =
291 word & 0xff;
292 word = word >> 8;
293 }
294 }
295 #ifdef _LP64
296 for (i = 0;
297 i < (uint32_t)len - sizeof (BIG_CHUNK_TYPE) * bn->len;
298 i++) {
299 #else /* !_LP64 */
300 for (i = 0; i < len - sizeof (BIG_CHUNK_TYPE) * bn->len; i++) {
301 #endif /* _LP64 */
302 kn[i] = 0;
303 }
304 }
305 }
306
307
308 int
309 big_bitlength(BIGNUM *a)
310 {
311 int l = 0, b = 0;
312 BIG_CHUNK_TYPE c;
313
314 l = a->len - 1;
315 while ((l > 0) && (a->value[l] == 0)) {
316 l--;
317 }
318 b = sizeof (BIG_CHUNK_TYPE) * BITSINBYTE;
319 c = a->value[l];
320 while ((b > 1) && ((c & BIG_CHUNK_HIGHBIT) == 0)) {
321 c = c << 1;
322 b--;
323 }
324
325 return (l * sizeof (BIG_CHUNK_TYPE) * BITSINBYTE + b);
326 }
327
328
329 BIG_ERR_CODE
330 big_copy(BIGNUM *dest, BIGNUM *src)
331 {
332 BIG_CHUNK_TYPE *newptr;
333 int i, len;
334
335 len = src->len;
336 while ((len > 1) && (src->value[len - 1] == 0)) {
337 len--;
338 }
339 src->len = len;
340 if (dest->size < len) {
341 if (dest->malloced == 1) {
342 newptr = (BIG_CHUNK_TYPE *)big_realloc(dest->value,
343 sizeof (BIG_CHUNK_TYPE) * dest->size,
344 sizeof (BIG_CHUNK_TYPE) * len);
345 } else {
346 newptr = (BIG_CHUNK_TYPE *)
347 big_malloc(sizeof (BIG_CHUNK_TYPE) * len);
348 if (newptr != NULL) {
349 dest->malloced = 1;
350 }
351 }
352 if (newptr == NULL) {
353 return (BIG_NO_MEM);
354 }
355 dest->value = newptr;
356 dest->size = len;
357 }
358 dest->len = len;
359 dest->sign = src->sign;
360 for (i = 0; i < len; i++) {
361 dest->value[i] = src->value[i];
362 }
363
364 return (BIG_OK);
365 }
366
367
368 BIG_ERR_CODE
369 big_extend(BIGNUM *number, int size)
370 {
371 BIG_CHUNK_TYPE *newptr;
372 int i;
373
374 if (number->size >= size)
375 return (BIG_OK);
376 if (number->malloced) {
377 number->value = big_realloc(number->value,
378 sizeof (BIG_CHUNK_TYPE) * number->size,
379 sizeof (BIG_CHUNK_TYPE) * size);
380 } else {
381 newptr = big_malloc(sizeof (BIG_CHUNK_TYPE) * size);
382 if (newptr != NULL) {
383 for (i = 0; i < number->size; i++) {
384 newptr[i] = number->value[i];
385 }
386 }
387 number->value = newptr;
388 }
389
390 if (number->value == NULL) {
391 return (BIG_NO_MEM);
392 }
393
394 number->size = size;
395 number->malloced = 1;
396 return (BIG_OK);
397 }
398
399
400 /* returns 1 if n == 0 */
401 int
544
545 /* returns -1 if |aa|<|bb|, 0 if |aa|==|bb| 1 if |aa|>|bb| */
546 int
547 big_cmp_abs(BIGNUM *aa, BIGNUM *bb)
548 {
549 int i;
550
551 if (aa->len > bb->len) {
552 for (i = aa->len - 1; i > bb->len - 1; i--) {
553 if (aa->value[i] > 0) {
554 return (1);
555 }
556 }
557 } else if (aa->len < bb->len) {
558 for (i = bb->len - 1; i > aa->len - 1; i--) {
559 if (bb->value[i] > 0) {
560 return (-1);
561 }
562 }
563 } else {
564 i = aa->len-1;
565 }
566 for (; i >= 0; i--) {
567 if (aa->value[i] > bb->value[i]) {
568 return (1);
569 } else if (aa->value[i] < bb->value[i]) {
570 return (-1);
571 }
572 }
573
574 return (0);
575 }
576
577
578 BIG_ERR_CODE
579 big_sub(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
580 {
581 BIG_ERR_CODE err;
582
583 if ((bb->sign == -1) && (aa->sign == 1)) {
584 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) {
895 result->sign = aa->sign;
896 }
897
898
899 /* it is assumed that result->size is big enough */
900 void
901 big_shiftright(BIGNUM *result, BIGNUM *aa, int offs)
902 {
903 int i;
904 BIG_CHUNK_TYPE cy, ai;
905
906 if (offs == 0) {
907 if (result != aa) {
908 (void) big_copy(result, aa);
909 }
910 return;
911 }
912 cy = aa->value[0] >> offs;
913 for (i = 1; i < aa->len; i++) {
914 ai = aa->value[i];
915 result->value[i-1] = (ai << (BIG_CHUNK_SIZE - offs)) | cy;
916 cy = ai >> offs;
917 }
918 result->len = aa->len;
919 result->value[result->len - 1] = cy;
920 result->sign = aa->sign;
921 }
922
923
924 /*
925 * result = aa/bb remainder = aa mod bb
926 * it is assumed that aa and bb are positive
927 */
928 BIG_ERR_CODE
929 big_div_pos_fast(BIGNUM *result, BIGNUM *remainder, BIGNUM *aa, BIGNUM *bb)
930 {
931 BIG_ERR_CODE err = BIG_OK;
932 int i, alen, blen, tlen, rlen, offs;
933 BIG_CHUNK_TYPE higha, highb, coeff;
934 BIG_CHUNK_TYPE *a, *b;
935 BIGNUM bbhigh, bblow, tresult, tmp1, tmp2;
936 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE];
937 BIG_CHUNK_TYPE tmp2value[BIGTMPSIZE];
938 BIG_CHUNK_TYPE tresultvalue[BIGTMPSIZE];
939 BIG_CHUNK_TYPE bblowvalue[BIGTMPSIZE];
940 BIG_CHUNK_TYPE bbhighvalue[BIGTMPSIZE];
941
942 a = aa->value;
943 b = bb->value;
944 alen = aa->len;
945 blen = bb->len;
946 while ((alen > 1) && (a[alen - 1] == 0)) {
947 alen = alen - 1;
948 }
949 aa->len = alen;
1060 if ((remainder != NULL) &&
1061 ((err = big_copy(remainder, &tmp1)) != BIG_OK))
1062 goto ret;
1063
1064 if (result != NULL)
1065 err = big_copy(result, &tresult);
1066
1067 ret:
1068 big_finish(&tresult);
1069 ret4:
1070 big_finish(&tmp1);
1071 ret3:
1072 big_finish(&tmp2);
1073 ret2:
1074 big_finish(&bbhigh);
1075 ret1:
1076 big_finish(&bblow);
1077 return (err);
1078 }
1079
1080 /*
1081 * If there is no processor-specific integer implementation of
1082 * the lower level multiply functions, then this code is provided
1083 * for big_mul_set_vec(), big_mul_add_vec(), big_mul_vec() and
1084 * big_sqr_vec().
1085 *
1086 * There are two generic implementations. One that assumes that
1087 * there is hardware and C compiler support for a 32 x 32 --> 64
1088 * bit unsigned multiply, but otherwise is not specific to any
1089 * processor, platform, or ISA.
1090 *
1091 * The other makes very few assumptions about hardware capabilities.
1092 * It does not even assume that there is any implementation of a
1093 * 32 x 32 --> 64 bit multiply that is accessible to C code and
1094 * appropriate to use. It falls constructs 32 x 32 --> 64 bit
1095 * multiplies from 16 x 16 --> 32 bit multiplies.
1096 *
1097 */
1098
1099 #if !defined(PSR_MUL)
1100
1101 #ifdef UMUL64
1102
1103 #if (BIG_CHUNK_SIZE == 32)
1104
1105 #define UNROLL8
1106
1107 #define MUL_SET_VEC_ROUND_PREFETCH(R) \
1108 p = pf * d; \
1109 pf = (uint64_t)a[R+1]; \
1110 t = p + cy; \
1111 r[R] = (uint32_t)t; \
1112 cy = t >> 32
1113
1114 #define MUL_SET_VEC_ROUND_NOPREFETCH(R) \
1115 p = pf * d; \
1116 t = p + cy; \
1117 r[R] = (uint32_t)t; \
1118 cy = t >> 32
1119
1120 #define MUL_ADD_VEC_ROUND_PREFETCH(R) \
1121 t = (uint64_t)r[R]; \
1122 p = pf * d; \
1123 pf = (uint64_t)a[R+1]; \
1124 t = p + t + cy; \
1125 r[R] = (uint32_t)t; \
1126 cy = t >> 32
1127
1128 #define MUL_ADD_VEC_ROUND_NOPREFETCH(R) \
1129 t = (uint64_t)r[R]; \
1130 p = pf * d; \
1131 t = p + t + cy; \
1132 r[R] = (uint32_t)t; \
1133 cy = t >> 32
1134
1135 #ifdef UNROLL8
1136
1137 #define UNROLL 8
1138
1139 /*
1140 * r = a * b
1141 * where r and a are vectors; b is a single 32-bit digit
1142 */
1143
1259 s = (uint64_t)a[0];
1260 s = s * s;
1261 r[0] = (uint32_t)s;
1262 cy = s >> 32;
1263 p = ((uint64_t)r[1] << 1) + cy;
1264 r[1] = (uint32_t)p;
1265 cy = p >> 32;
1266 row = 1;
1267 col = 2;
1268 while (row < len) {
1269 s = (uint64_t)a[row];
1270 s = s * s;
1271 p = (uint64_t)r[col] << 1;
1272 t = p + s;
1273 d = (uint32_t)t;
1274 t2 = (uint64_t)d + cy;
1275 r[col] = (uint32_t)t2;
1276 cy = (t >> 32) + (t2 >> 32);
1277 if (row == len - 1)
1278 break;
1279 p = ((uint64_t)r[col+1] << 1) + cy;
1280 r[col+1] = (uint32_t)p;
1281 cy = p >> 32;
1282 ++row;
1283 col += 2;
1284 }
1285 r[col+1] = (uint32_t)cy;
1286 }
1287
1288 #else /* BIG_CHUNK_SIZE == 64 */
1289
1290 /*
1291 * r = r + a * digit, r and a are vectors of length len
1292 * returns the carry digit
1293 */
1294 BIG_CHUNK_TYPE
1295 big_mul_add_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len,
1296 BIG_CHUNK_TYPE digit)
1297 {
1298 BIG_CHUNK_TYPE cy, cy1, retcy, dlow, dhigh;
1299 int i;
1300
1301 cy1 = 0;
1302 dlow = digit & BIG_CHUNK_LOWHALFBITS;
1303 dhigh = digit >> (BIG_CHUNK_SIZE / 2);
1304 for (i = 0; i < len; i++) {
1305 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) +
1344 big_mul_set_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len,
1345 BIG_CHUNK_TYPE digit)
1346 {
1347 int i;
1348
1349 ASSERT(r != a);
1350 for (i = 0; i < len; i++) {
1351 r[i] = 0;
1352 }
1353 return (big_mul_add_vec(r, a, len, digit));
1354 }
1355
1356 void
1357 big_sqr_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len)
1358 {
1359 int i;
1360
1361 ASSERT(r != a);
1362 r[len] = big_mul_set_vec(r, a, len, a[0]);
1363 for (i = 1; i < len; ++i)
1364 r[len + i] = big_mul_add_vec(r+i, a, len, a[i]);
1365 }
1366
1367 #endif /* BIG_CHUNK_SIZE == 32/64 */
1368
1369 #else /* ! UMUL64 */
1370
1371 #if (BIG_CHUNK_SIZE != 32)
1372 #error Don't use 64-bit chunks without defining UMUL64
1373 #endif
1374
1375
1376 /*
1377 * r = r + a * digit, r and a are vectors of length len
1378 * returns the carry digit
1379 */
1380 uint32_t
1381 big_mul_add_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit)
1382 {
1383 uint32_t cy, cy1, retcy, dlow, dhigh;
1384 int i;
1385
1386 cy1 = 0;
1387 dlow = digit & 0xffff;
1388 dhigh = digit >> 16;
1415 big_mul_set_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit)
1416 {
1417 int i;
1418
1419 ASSERT(r != a);
1420 for (i = 0; i < len; i++) {
1421 r[i] = 0;
1422 }
1423
1424 return (big_mul_add_vec(r, a, len, digit));
1425 }
1426
1427 void
1428 big_sqr_vec(uint32_t *r, uint32_t *a, int len)
1429 {
1430 int i;
1431
1432 ASSERT(r != a);
1433 r[len] = big_mul_set_vec(r, a, len, a[0]);
1434 for (i = 1; i < len; ++i)
1435 r[len + i] = big_mul_add_vec(r+i, a, len, a[i]);
1436 }
1437
1438 #endif /* UMUL64 */
1439
1440 void
1441 big_mul_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int alen,
1442 BIG_CHUNK_TYPE *b, int blen)
1443 {
1444 int i;
1445
1446 r[alen] = big_mul_set_vec(r, a, alen, b[0]);
1447 for (i = 1; i < blen; ++i)
1448 r[alen + i] = big_mul_add_vec(r+i, a, alen, b[i]);
1449 }
1450
1451
1452 #endif /* ! PSR_MUL */
1453
1454
1455 /*
1456 * result = aa * bb result->value should be big enough to hold the result
1457 *
1458 * Implementation: Standard grammar school algorithm
1459 *
1460 */
1461 BIG_ERR_CODE
1462 big_mul(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
1463 {
1464 BIGNUM tmp1;
1465 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE];
1466 BIG_CHUNK_TYPE *r, *t, *a, *b;
1467 BIG_ERR_CODE err;
1468 int i, alen, blen, rsize, sign, diff;
1475 BIGNUM *tt;
1476 tt = aa;
1477 aa = bb;
1478 bb = tt;
1479 }
1480 }
1481 a = aa->value;
1482 b = bb->value;
1483 alen = aa->len;
1484 blen = bb->len;
1485 while ((alen > 1) && (a[alen - 1] == 0)) {
1486 alen--;
1487 }
1488 aa->len = alen;
1489 while ((blen > 1) && (b[blen - 1] == 0)) {
1490 blen--;
1491 }
1492 bb->len = blen;
1493
1494 rsize = alen + blen;
1495 if (result->size < rsize) {
1496 err = big_extend(result, rsize);
1497 if (err != BIG_OK) {
1498 return (err);
1499 }
1500 /* aa or bb might be an alias to result */
1501 a = aa->value;
1502 b = bb->value;
1503 }
1504 r = result->value;
1505
1506 if (((alen == 1) && (a[0] == 0)) || ((blen == 1) && (b[0] == 0))) {
1507 result->len = 1;
1508 result->sign = 1;
1509 r[0] = 0;
1510 return (BIG_OK);
1511 }
1512 sign = aa->sign * bb->sign;
1513 if ((alen == 1) && (a[0] == 1)) {
1514 for (i = 0; i < blen; i++) {
1532 return (err);
1533 }
1534 (void) big_copy(&tmp1, aa);
1535 t = tmp1.value;
1536
1537 for (i = 0; i < rsize; i++) {
1538 t[i] = 0;
1539 }
1540
1541 if (diff == 0 && alen > 2) {
1542 BIG_SQR_VEC(t, a, alen);
1543 } else if (blen > 0) {
1544 BIG_MUL_VEC(t, a, alen, b, blen);
1545 }
1546
1547 if (t[rsize - 1] == 0) {
1548 tmp1.len = rsize - 1;
1549 } else {
1550 tmp1.len = rsize;
1551 }
1552 if ((err = big_copy(result, &tmp1)) != BIG_OK) {
1553 return (err);
1554 }
1555 result->sign = sign;
1556
1557 big_finish(&tmp1);
1558
1559 return (BIG_OK);
1560 }
1561
1562
1563 /*
1564 * caller must ensure that a < n, b < n and ret->size >= 2 * n->len + 1
1565 * and that ret is not n
1566 */
1567 BIG_ERR_CODE
1568 big_mont_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, BIGNUM *n, BIG_CHUNK_TYPE n0)
1569 {
1570 int i, j, nlen, needsubtract;
1571 BIG_CHUNK_TYPE *nn, *rr;
1572 BIG_CHUNK_TYPE digit, c;
1573 BIG_ERR_CODE err;
1574
1575 nlen = n->len;
1576 nn = n->value;
1577
1578 rr = ret->value;
1579
1602 needsubtract = 0;
1603 if ((rr[2 * nlen] != 0))
1604 needsubtract = 1;
1605 else {
1606 for (i = 2 * nlen - 1; i >= nlen; i--) {
1607 if (rr[i] > nn[i - nlen]) {
1608 needsubtract = 1;
1609 break;
1610 } else if (rr[i] < nn[i - nlen]) {
1611 break;
1612 }
1613 }
1614 }
1615 if (needsubtract)
1616 big_sub_vec(rr, rr + nlen, nn, nlen);
1617 else {
1618 for (i = 0; i < nlen; i++) {
1619 rr[i] = rr[i + nlen];
1620 }
1621 }
1622 for (i = nlen - 1; (i >= 0) && (rr[i] == 0); i--)
1623 ;
1624 ret->len = i+1;
1625
1626 return (BIG_OK);
1627 }
1628
1629
1630 BIG_CHUNK_TYPE
1631 big_n0(BIG_CHUNK_TYPE n)
1632 {
1633 int i;
1634 BIG_CHUNK_TYPE result, tmp;
1635
1636 result = 0;
1637 tmp = BIG_CHUNK_ALLBITS;
1638 for (i = 0; i < BIG_CHUNK_SIZE; i++) {
1639 if ((tmp & 1) == 1) {
1640 result = (result >> 1) | BIG_CHUNK_HIGHBIT;
1641 tmp = tmp - n;
1642 } else {
1643 result = (result >> 1);
1644 }
1747
1748 #ifdef USE_FLOATING_POINT
1749 #define big_modexp_ncp_float big_modexp_ncp_sw
1750 #else
1751 #define big_modexp_ncp_int big_modexp_ncp_sw
1752 #endif
1753
1754 #define MAX_EXP_BIT_GROUP_SIZE 6
1755 #define APOWERS_MAX_SIZE (1 << (MAX_EXP_BIT_GROUP_SIZE - 1))
1756
1757 /* ARGSUSED */
1758 static BIG_ERR_CODE
1759 big_modexp_ncp_int(BIGNUM *result, BIGNUM *ma, BIGNUM *e, BIGNUM *n,
1760 BIGNUM *tmp, BIG_CHUNK_TYPE n0)
1761
1762 {
1763 BIGNUM apowers[APOWERS_MAX_SIZE];
1764 BIGNUM tmp1;
1765 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE];
1766 int i, j, k, l, m, p;
1767 int bit, bitind, bitcount, groupbits, apowerssize;
1768 int nbits;
1769 BIG_ERR_CODE err;
1770
1771 nbits = big_numbits(e);
1772 if (nbits < 50) {
1773 groupbits = 1;
1774 apowerssize = 1;
1775 } else {
1776 groupbits = MAX_EXP_BIT_GROUP_SIZE;
1777 apowerssize = 1 << (groupbits - 1);
1778 }
1779
1780
1781 if ((err = big_init1(&tmp1, 2 * n->len + 1,
1782 tmp1value, arraysize(tmp1value))) != BIG_OK) {
1783 return (err);
1784 }
1785
1786 /* set the malloced bit to help cleanup */
1787 for (i = 0; i < apowerssize; i++) {
1788 apowers[i].malloced = 0;
1789 }
1790 for (i = 0; i < apowerssize; i++) {
1791 if ((err = big_init1(&(apowers[i]), n->len, NULL, 0)) !=
1792 BIG_OK) {
1793 goto ret;
1794 }
1795 }
1796
1797 (void) big_copy(&(apowers[0]), ma);
1798
1799 if ((err = big_mont_mul(&tmp1, ma, ma, n, n0)) != BIG_OK) {
1800 goto ret;
1801 }
1802 (void) big_copy(ma, &tmp1);
1803
1804 for (i = 1; i < apowerssize; i++) {
1805 if ((err = big_mont_mul(&tmp1, ma,
1806 &(apowers[i-1]), n, n0)) != BIG_OK) {
1807 goto ret;
1808 }
1809 (void) big_copy(&apowers[i], &tmp1);
1810 }
1811
1812 bitind = nbits % BIG_CHUNK_SIZE;
1813 k = 0;
1814 l = 0;
1815 p = 0;
1816 bitcount = 0;
1817 for (i = nbits / BIG_CHUNK_SIZE; i >= 0; i--) {
1818 for (j = bitind - 1; j >= 0; j--) {
1819 bit = (e->value[i] >> j) & 1;
1820 if ((bitcount == 0) && (bit == 0)) {
1821 if ((err = big_mont_mul(tmp,
1822 tmp, tmp, n, n0)) != BIG_OK) {
1823 goto ret;
1824 }
1825 } else {
1826 bitcount++;
1895 #include <sys/sysmacros.h>
1896 #include <sys/regset.h>
1897 #include <sys/fpu/fpusystm.h>
1898
1899 /* the alignment for block stores to save fp registers */
1900 #define FPR_ALIGN (64)
1901
1902 extern void big_savefp(kfpu_t *);
1903 extern void big_restorefp(kfpu_t *);
1904
1905 #endif /* _KERNEL */
1906
1907 /*
1908 * This version makes use of floating point for performance
1909 */
1910 static BIG_ERR_CODE
1911 big_modexp_ncp_float(BIGNUM *result, BIGNUM *ma, BIGNUM *e, BIGNUM *n,
1912 BIGNUM *tmp, BIG_CHUNK_TYPE n0)
1913 {
1914
1915 int i, j, k, l, m, p, bit, bitind, bitcount, nlen;
1916 double dn0;
1917 double *dn, *dt, *d16r, *d32r;
1918 uint32_t *nint, *prod;
1919 double *apowers[APOWERS_MAX_SIZE];
1920 int nbits, groupbits, apowerssize;
1921 BIG_ERR_CODE err = BIG_OK;
1922
1923 #ifdef _KERNEL
1924 uint8_t fpua[sizeof (kfpu_t) + FPR_ALIGN];
1925 kfpu_t *fpu;
1926
1927 #ifdef DEBUG
1928 if (!fpu_exists)
1929 return (BIG_GENERAL_ERR);
1930 #endif
1931
1932 fpu = (kfpu_t *)P2ROUNDUP((uintptr_t)fpua, FPR_ALIGN);
1933 big_savefp(fpu);
1934
1935 #endif /* _KERNEL */
1936
1937 nbits = big_numbits(e);
1938 if (nbits < 50) {
1939 groupbits = 1;
1940 apowerssize = 1;
2060 dt, dn, nint, nlen, dn0);
2061 } else {
2062 bitcount++;
2063 p = p * 2 + bit;
2064 if (bit == 1) {
2065 k = k + l + 1;
2066 l = 0;
2067 } else {
2068 l++;
2069 }
2070 if (bitcount == groupbits) {
2071 for (m = 0; m < k; m++) {
2072 conv_i32_to_d32_and_d16(d32r,
2073 d16r, prod, nlen);
2074 mont_mulf_noconv(prod, d32r,
2075 d16r, dt, dn, nint,
2076 nlen, dn0);
2077 }
2078 conv_i32_to_d32(d32r, prod, nlen);
2079 mont_mulf_noconv(prod, d32r,
2080 apowers[p >> (l+1)],
2081 dt, dn, nint, nlen, dn0);
2082 for (m = 0; m < l; m++) {
2083 conv_i32_to_d32_and_d16(d32r,
2084 d16r, prod, nlen);
2085 mont_mulf_noconv(prod, d32r,
2086 d16r, dt, dn, nint,
2087 nlen, dn0);
2088 }
2089 k = 0;
2090 l = 0;
2091 p = 0;
2092 bitcount = 0;
2093 }
2094 }
2095 }
2096 bitind = BIG_CHUNK_SIZE;
2097 }
2098
2099 for (m = 0; m < k; m++) {
2100 conv_i32_to_d32_and_d16(d32r, d16r, prod, nlen);
2166 {
2167 BIGNUM ma, tmp, rr;
2168 BIG_CHUNK_TYPE mavalue[BIGTMPSIZE];
2169 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE];
2170 BIG_CHUNK_TYPE rrvalue[BIGTMPSIZE];
2171 BIG_ERR_CODE err;
2172 BIG_CHUNK_TYPE n0;
2173
2174 if ((err = big_init1(&ma, n->len, mavalue, arraysize(mavalue))) !=
2175 BIG_OK) {
2176 return (err);
2177 }
2178 ma.len = 1;
2179 ma.value[0] = 0;
2180
2181 if ((err = big_init1(&tmp, 2 * n->len + 1,
2182 tmpvalue, arraysize(tmpvalue))) != BIG_OK) {
2183 goto ret1;
2184 }
2185
2186 /* set the malloced bit to help cleanup */
2187 rr.malloced = 0;
2188 if (n_rr == NULL) {
2189 if ((err = big_init1(&rr, 2 * n->len + 1,
2190 rrvalue, arraysize(rrvalue))) != BIG_OK) {
2191 goto ret2;
2192 }
2193 if (big_mont_rr(&rr, n) != BIG_OK) {
2194 goto ret;
2195 }
2196 n_rr = &rr;
2197 }
2198
2199 n0 = big_n0(n->value[0]);
2200
2201 if (big_cmp_abs(a, n) > 0) {
2202 if ((err = big_div_pos(NULL, &ma, a, n)) != BIG_OK) {
2203 goto ret;
2204 }
2205 err = big_mont_conv(&ma, &ma, n, n0, n_rr);
2206 } else {
2207 err = big_mont_conv(&ma, a, n, n0, n_rr);
2364
2365 static BIG_CHUNK_TYPE onearr[1] = {(BIG_CHUNK_TYPE)1};
2366 BIGNUM big_One = {1, 1, 1, 0, onearr};
2367
2368 static BIG_CHUNK_TYPE twoarr[1] = {(BIG_CHUNK_TYPE)2};
2369 BIGNUM big_Two = {1, 1, 1, 0, twoarr};
2370
2371 static BIG_CHUNK_TYPE fourarr[1] = {(BIG_CHUNK_TYPE)4};
2372 static BIGNUM big_Four = {1, 1, 1, 0, fourarr};
2373
2374
2375 BIG_ERR_CODE
2376 big_sqrt_pos(BIGNUM *result, BIGNUM *n)
2377 {
2378 BIGNUM *high, *low, *mid, *t;
2379 BIGNUM t1, t2, t3, prod;
2380 BIG_CHUNK_TYPE t1value[BIGTMPSIZE];
2381 BIG_CHUNK_TYPE t2value[BIGTMPSIZE];
2382 BIG_CHUNK_TYPE t3value[BIGTMPSIZE];
2383 BIG_CHUNK_TYPE prodvalue[BIGTMPSIZE];
2384 int i, nbits, diff, nrootbits, highbits;
2385 BIG_ERR_CODE err;
2386
2387 nbits = big_numbits(n);
2388
2389 if ((err = big_init1(&t1, n->len + 1,
2390 t1value, arraysize(t1value))) != BIG_OK)
2391 return (err);
2392 if ((err = big_init1(&t2, n->len + 1,
2393 t2value, arraysize(t2value))) != BIG_OK)
2394 goto ret1;
2395 if ((err = big_init1(&t3, n->len + 1,
2396 t3value, arraysize(t3value))) != BIG_OK)
2397 goto ret2;
2398 if ((err = big_init1(&prod, n->len + 1,
2399 prodvalue, arraysize(prodvalue))) != BIG_OK)
2400 goto ret3;
2401
2402 nrootbits = (nbits + 1) / 2;
2403 t1.len = t2.len = t3.len = (nrootbits - 1) / BIG_CHUNK_SIZE + 1;
2404 for (i = 0; i < t1.len; i++) {
2534 m = n;
2535 n = t;
2536 }
2537 }
2538 err = BIG_OK;
2539
2540 ret:
2541 if (t3.malloced) big_finish(&t3);
2542 ret2:
2543 if (t2.malloced) big_finish(&t2);
2544 ret1:
2545 if (t1.malloced) big_finish(&t1);
2546
2547 return (err);
2548 }
2549
2550
2551 BIG_ERR_CODE
2552 big_Lucas(BIGNUM *Lkminus1, BIGNUM *Lk, BIGNUM *p, BIGNUM *k, BIGNUM *n)
2553 {
2554 int m, w, i;
2555 BIG_CHUNK_TYPE bit;
2556 BIGNUM ki, tmp, tmp2;
2557 BIG_CHUNK_TYPE kivalue[BIGTMPSIZE];
2558 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE];
2559 BIG_CHUNK_TYPE tmp2value[BIGTMPSIZE];
2560 BIG_ERR_CODE err;
2561
2562 if (big_cmp_abs(k, &big_One) == 0) {
2563 (void) big_copy(Lk, p);
2564 (void) big_copy(Lkminus1, &big_Two);
2565 return (BIG_OK);
2566 }
2567
2568 if ((err = big_init1(&ki, k->len + 1,
2569 kivalue, arraysize(kivalue))) != BIG_OK)
2570 return (err);
2571
2572 if ((err = big_init1(&tmp, 2 * n->len +1,
2573 tmpvalue, arraysize(tmpvalue))) != BIG_OK)
2574 goto ret1;
2575
2576 if ((err = big_init1(&tmp2, n->len,
2577 tmp2value, arraysize(tmp2value))) != BIG_OK)
2578 goto ret2;
2579
2580 m = big_numbits(k);
2581 ki.len = (m - 1) / BIG_CHUNK_SIZE + 1;
2582 w = (m - 1) / BIG_CHUNK_SIZE;
2583 bit = (BIG_CHUNK_TYPE)1 << ((m - 1) % BIG_CHUNK_SIZE);
2584 for (i = 0; i < ki.len; i++) {
2585 ki.value[i] = 0;
2586 }
2587 ki.value[ki.len - 1] = bit;
2588 if (big_cmp_abs(k, &ki) != 0) {
2589 (void) big_double(&ki, &ki);
2590 }
2591 (void) big_sub_pos(&ki, &ki, k);
2592
2764 ret3:
2765 if (tmp.malloced) big_finish(&tmp);
2766 ret2:
2767 if (nminus1.malloced) big_finish(&nminus1);
2768 ret1:
2769 if (o.malloced) big_finish(&o);
2770
2771 return (err);
2772 }
2773
2774
2775 BIG_ERR_CODE
2776 big_isprime_pos(BIGNUM *n)
2777 {
2778 return (big_isprime_pos_ext(n, NULL));
2779 }
2780
2781
2782 #define SIEVESIZE 1000
2783
2784 uint32_t smallprimes[] =
2785 {
2786 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47,
2787 51, 53, 59, 61, 67, 71, 73, 79, 83, 89, 91, 97
2788 };
2789
2790
2791 BIG_ERR_CODE
2792 big_nextprime_pos_ext(BIGNUM *result, BIGNUM *n, big_modexp_ncp_info_t *info)
2793 {
2794 BIG_ERR_CODE err;
2795 int sieve[SIEVESIZE];
2796 int i;
2797 uint32_t off, p;
2798
2799 if ((err = big_copy(result, n)) != BIG_OK) {
2800 return (err);
2801 }
2802 result->value[0] |= 1;
2803 /* CONSTCOND */
2804 while (1) {
2805 for (i = 0; i < SIEVESIZE; i++) sieve[i] = 0;
2806 for (i = 0;
2807 i < sizeof (smallprimes) / sizeof (smallprimes[0]); i++) {
2808 p = smallprimes[i];
2809 off = big_modhalf_pos(result, p);
2810 off = p - off;
2811 if ((off % 2) == 1) {
2812 off = off + p;
2813 }
|
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Configuration guide
28 * -------------------
29 *
30 * There are 4 preprocessor symbols used to configure the bignum
31 * implementation. This file contains no logic to configure based on
32 * processor; we leave that to the Makefiles to specify.
33 *
34 * USE_FLOATING_POINT
35 * Meaning: There is support for a fast floating-point implementation of
36 * Montgomery multiply.
37 *
38 * PSR_MUL
39 * Meaning: There are processor-specific versions of the low level
40 * functions to implement big_mul. Those functions are: big_mul_set_vec,
41 * big_mul_add_vec, big_mul_vec, and big_sqr_vec. PSR_MUL implies support
42 * for all 4 functions. You cannot pick and choose which subset of these
43 * functions to support; that would lead to a rat's nest of #ifdefs.
44 *
45 * HWCAP
46 * Meaning: Call multiply support functions through a function pointer.
47 * On x86, there are multiple implementations for different hardware
48 * capabilities, such as MMX, SSE2, etc. Tests are made at run-time, when
49 * a function is first used. So, the support functions are called through
50 * a function pointer. There is no need for that on Sparc, because there
51 * is only one implementation; support functions are called directly.
52 * Later, if there were some new VIS instruction, or something, and a
53 * run-time test were needed, rather than variant kernel modules and
54 * libraries, then HWCAP would be defined for Sparc, as well.
55 *
56 * UMUL64
57 * Meaning: It is safe to use generic C code that assumes the existence
58 * of a 32 x 32 --> 64 bit unsigned multiply. If this is not defined,
59 * then the generic code for big_mul_add_vec() must necessarily be very slow,
60 * because it must fall back to using 16 x 16 --> 32 bit multiplication.
61 *
62 */
63
64
65 #include <sys/types.h>
66 #include "bignum.h"
67
68 #ifdef _KERNEL
69 #include <sys/ddi.h>
70 #include <sys/mdesc.h>
71 #include <sys/crypto/common.h>
72
73 #include <sys/kmem.h>
74 #include <sys/param.h>
75 #include <sys/sunddi.h>
76
77 #else
78 #include <stdlib.h>
79 #include <stdio.h>
80 #include <assert.h>
81 #define ASSERT assert
82 #endif /* _KERNEL */
83
84 #ifdef _LP64 /* truncate 64-bit size_t to 32-bits */
85 #define UI32(ui) ((uint32_t)ui)
86 #else /* size_t already 32-bits */
87 #define UI32(ui) (ui)
88 #endif
89
90
91 #ifdef _KERNEL
92 #define big_malloc(size) kmem_alloc(size, KM_NOSLEEP)
93 #define big_free(ptr, size) kmem_free(ptr, size)
94
95 void *
96 big_realloc(void *from, size_t oldsize, size_t newsize)
97 {
98 void *rv;
99
100 rv = kmem_alloc(newsize, KM_NOSLEEP);
101 if (rv != NULL)
102 bcopy(from, rv, oldsize);
103 kmem_free(from, oldsize);
104 return (rv);
105 }
106
107 #else /* _KERNEL */
108
109 #ifndef MALLOC_DEBUG
110
111 #define big_malloc(size) malloc(size)
112 #define big_free(ptr, size) free(ptr)
113
114 #else
115
116 void
117 big_free(void *ptr, size_t size)
118 {
119 printf("freed %d bytes at %p\n", size, ptr);
120 free(ptr);
121 }
122
123 void *
124 big_malloc(size_t size)
125 {
126 void *rv;
127 rv = malloc(size);
128 printf("malloced %d bytes, addr:%p\n", size, rv);
129 return (rv);
130 }
131 #endif /* MALLOC_DEBUG */
132
133 #define big_realloc(x, y, z) realloc((x), (z))
134
135
136 /*
137 * printbignum()
138 * Print a BIGNUM type to stdout.
139 */
140 void
141 printbignum(char *aname, BIGNUM *a)
142 {
143 int i;
144
145 (void) printf("\n%s\n%d\n", aname, a->sign*a->len);
146 for (i = a->len - 1; i >= 0; i--) {
147 #ifdef BIGNUM_CHUNK_32
148 (void) printf("%08x ", a->value[i]);
149 if (((i & (BITSINBYTE - 1)) == 0) && (i != 0)) {
150 (void) printf("\n");
151 }
152 #else
153 (void) printf("%08x %08x ", (uint32_t)((a->value[i]) >> 32),
154 (uint32_t)((a->value[i]) & 0xffffffff));
155 if (((i & 3) == 0) && (i != 0)) { /* end of this chunk */
156 (void) printf("\n");
157 }
158 #endif
159 }
160 (void) printf("\n");
161 }
162
163 #endif /* _KERNEL */
164
165
166 /*
167 * big_init()
168 * Initialize and allocate memory for a BIGNUM type.
169 *
170 * big_init(number, size) is equivalent to big_init1(number, size, NULL, 0)
171 *
172 * Note: call big_finish() to free memory allocated by big_init().
173 *
174 * Input:
175 * number Uninitialized memory for BIGNUM
176 * size Minimum size, in BIG_CHUNK_SIZE-bit words, required for BIGNUM
177 *
178 * Output:
179 * number Initialized BIGNUM
180 *
181 * Return BIG_OK on success or BIG_NO_MEM for an allocation error.
182 */
183 BIG_ERR_CODE
184 big_init(BIGNUM *number, int size)
185 {
186 number->value = big_malloc(BIGNUM_WORDSIZE * size);
187 if (number->value == NULL) {
188 return (BIG_NO_MEM);
189 }
190 number->size = size;
191 number->len = 0;
192 number->sign = 1;
193 number->malloced = 1;
194 return (BIG_OK);
195 }
196
197
198 /*
199 * big_init1()
200 * Initialize and, if needed, allocate memory for a BIGNUM type.
201 * Use the buffer passed, buf, if any, instad of allocating memory
202 * if it's at least "size" bytes.
203 *
204 * Note: call big_finish() to free memory allocated by big_init().
205 *
206 * Input:
207 * number Uninitialized memory for BIGNUM
208 * size Minimum size, in BIG_CHUNK_SIZE-bit words, required for BIGNUM
209 * buf Buffer for storing a BIGNUM.
210 * If NULL, big_init1() will allocate a buffer
211 * bufsize Size, in BIG_CHUNK_SIZE_bit words, of buf
212 *
213 * Output:
214 * number Initialized BIGNUM
215 *
216 * Return BIG_OK on success or BIG_NO_MEM for an allocation error.
217 */
218 BIG_ERR_CODE
219 big_init1(BIGNUM *number, int size, BIG_CHUNK_TYPE *buf, int bufsize)
220 {
221 if ((buf == NULL) || (size > bufsize)) {
222 number->value = big_malloc(BIGNUM_WORDSIZE * size);
223 if (number->value == NULL) {
224 return (BIG_NO_MEM);
225 }
226 number->size = size;
227 number->malloced = 1;
228 } else {
229 number->value = buf;
230 number->size = bufsize;
231 number->malloced = 0;
232 }
233 number->len = 0;
234 number->sign = 1;
235
236 return (BIG_OK);
237 }
238
239
240 /*
241 * big_finish()
242 * Free memory, if any, allocated by big_init() or big_init1().
243 */
244 void
245 big_finish(BIGNUM *number)
246 {
247 if (number->malloced == 1) {
248 big_free(number->value, BIGNUM_WORDSIZE * number->size);
249 number->malloced = 0;
250 }
251 }
252
253
254 /*
255 * bn->size should be at least
256 * (len + BIGNUM_WORDSIZE - 1) / BIGNUM_WORDSIZE bytes
257 * converts from byte-big-endian format to bignum format (words in
258 * little endian order, but bytes within the words big endian)
259 */
260 void
261 bytestring2bignum(BIGNUM *bn, uchar_t *kn, size_t len)
262 {
263 int i, j;
264 uint32_t offs;
265 const uint32_t slen = UI32(len);
266 BIG_CHUNK_TYPE word;
267 uchar_t *knwordp;
268
269 if (slen == 0) {
270 bn->len = 1;
271 bn->value[0] = 0;
272 return;
273 }
274
275 offs = slen % BIGNUM_WORDSIZE;
276 bn->len = slen / BIGNUM_WORDSIZE;
277
278 for (i = 0; i < slen / BIGNUM_WORDSIZE; i++) {
279 knwordp = &(kn[slen - BIGNUM_WORDSIZE * (i + 1)]);
280 word = knwordp[0];
281 for (j = 1; j < BIGNUM_WORDSIZE; j++) {
282 word = (word << BITSINBYTE) + knwordp[j];
283 }
284 bn->value[i] = word;
285 }
286 if (offs > 0) {
287 word = kn[0];
288 for (i = 1; i < offs; i++) word = (word << BITSINBYTE) + kn[i];
289 bn->value[bn->len++] = word;
290 }
291 while ((bn->len > 1) && (bn->value[bn->len - 1] == 0)) {
292 bn->len --;
293 }
294 }
295
296
297 /*
298 * copies the least significant len bytes if
299 * len < bn->len * BIGNUM_WORDSIZE
300 * converts from bignum format to byte-big-endian format.
301 * bignum format is words of type BIG_CHUNK_TYPE in little endian order.
302 */
303 void
304 bignum2bytestring(uchar_t *kn, BIGNUM *bn, size_t len)
305 {
306 int i, j;
307 uint32_t offs;
308 const uint32_t slen = UI32(len);
309 BIG_CHUNK_TYPE word;
310
311 if (len < BIGNUM_WORDSIZE * bn->len) {
312 for (i = 0; i < slen / BIGNUM_WORDSIZE; i++) {
313 word = bn->value[i];
314 for (j = 0; j < BIGNUM_WORDSIZE; j++) {
315 kn[slen - BIGNUM_WORDSIZE * i - j - 1] =
316 word & 0xff;
317 word = word >> BITSINBYTE;
318 }
319 }
320 offs = slen % BIGNUM_WORDSIZE;
321 if (offs > 0) {
322 word = bn->value[slen / BIGNUM_WORDSIZE];
323 for (i = slen % BIGNUM_WORDSIZE; i > 0; i --) {
324 kn[i - 1] = word & 0xff;
325 word = word >> BITSINBYTE;
326 }
327 }
328 } else {
329 for (i = 0; i < bn->len; i++) {
330 word = bn->value[i];
331 for (j = 0; j < BIGNUM_WORDSIZE; j++) {
332 kn[slen - BIGNUM_WORDSIZE * i - j - 1] =
333 word & 0xff;
334 word = word >> BITSINBYTE;
335 }
336 }
337 for (i = 0; i < slen - BIGNUM_WORDSIZE * bn->len; i++) {
338 kn[i] = 0;
339 }
340 }
341 }
342
343
344 int
345 big_bitlength(BIGNUM *a)
346 {
347 int l = 0, b = 0;
348 BIG_CHUNK_TYPE c;
349
350 l = a->len - 1;
351 while ((l > 0) && (a->value[l] == 0)) {
352 l--;
353 }
354 b = BIG_CHUNK_SIZE;
355 c = a->value[l];
356 while ((b > 1) && ((c & BIG_CHUNK_HIGHBIT) == 0)) {
357 c = c << 1;
358 b--;
359 }
360
361 return (l * BIG_CHUNK_SIZE + b);
362 }
363
364
365 BIG_ERR_CODE
366 big_copy(BIGNUM *dest, BIGNUM *src)
367 {
368 BIG_CHUNK_TYPE *newptr;
369 int i, len;
370
371 len = src->len;
372 while ((len > 1) && (src->value[len - 1] == 0)) {
373 len--;
374 }
375 src->len = len;
376 if (dest->size < len) {
377 if (dest->malloced == 1) {
378 newptr = (BIG_CHUNK_TYPE *)big_realloc(dest->value,
379 BIGNUM_WORDSIZE * dest->size,
380 BIGNUM_WORDSIZE * len);
381 } else {
382 newptr = (BIG_CHUNK_TYPE *)
383 big_malloc(BIGNUM_WORDSIZE * len);
384 if (newptr != NULL) {
385 dest->malloced = 1;
386 }
387 }
388 if (newptr == NULL) {
389 return (BIG_NO_MEM);
390 }
391 dest->value = newptr;
392 dest->size = len;
393 }
394 dest->len = len;
395 dest->sign = src->sign;
396 for (i = 0; i < len; i++) {
397 dest->value[i] = src->value[i];
398 }
399
400 return (BIG_OK);
401 }
402
403
404 BIG_ERR_CODE
405 big_extend(BIGNUM *number, int size)
406 {
407 BIG_CHUNK_TYPE *newptr;
408 int i;
409
410 if (number->size >= size)
411 return (BIG_OK);
412 if (number->malloced) {
413 number->value = big_realloc(number->value,
414 BIGNUM_WORDSIZE * number->size,
415 BIGNUM_WORDSIZE * size);
416 } else {
417 newptr = big_malloc(BIGNUM_WORDSIZE * size);
418 if (newptr != NULL) {
419 for (i = 0; i < number->size; i++) {
420 newptr[i] = number->value[i];
421 }
422 }
423 number->value = newptr;
424 }
425
426 if (number->value == NULL) {
427 return (BIG_NO_MEM);
428 }
429
430 number->size = size;
431 number->malloced = 1;
432 return (BIG_OK);
433 }
434
435
436 /* returns 1 if n == 0 */
437 int
580
581 /* returns -1 if |aa|<|bb|, 0 if |aa|==|bb| 1 if |aa|>|bb| */
582 int
583 big_cmp_abs(BIGNUM *aa, BIGNUM *bb)
584 {
585 int i;
586
587 if (aa->len > bb->len) {
588 for (i = aa->len - 1; i > bb->len - 1; i--) {
589 if (aa->value[i] > 0) {
590 return (1);
591 }
592 }
593 } else if (aa->len < bb->len) {
594 for (i = bb->len - 1; i > aa->len - 1; i--) {
595 if (bb->value[i] > 0) {
596 return (-1);
597 }
598 }
599 } else {
600 i = aa->len - 1;
601 }
602 for (; i >= 0; i--) {
603 if (aa->value[i] > bb->value[i]) {
604 return (1);
605 } else if (aa->value[i] < bb->value[i]) {
606 return (-1);
607 }
608 }
609
610 return (0);
611 }
612
613
614 BIG_ERR_CODE
615 big_sub(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
616 {
617 BIG_ERR_CODE err;
618
619 if ((bb->sign == -1) && (aa->sign == 1)) {
620 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) {
931 result->sign = aa->sign;
932 }
933
934
935 /* it is assumed that result->size is big enough */
936 void
937 big_shiftright(BIGNUM *result, BIGNUM *aa, int offs)
938 {
939 int i;
940 BIG_CHUNK_TYPE cy, ai;
941
942 if (offs == 0) {
943 if (result != aa) {
944 (void) big_copy(result, aa);
945 }
946 return;
947 }
948 cy = aa->value[0] >> offs;
949 for (i = 1; i < aa->len; i++) {
950 ai = aa->value[i];
951 result->value[i - 1] = (ai << (BIG_CHUNK_SIZE - offs)) | cy;
952 cy = ai >> offs;
953 }
954 result->len = aa->len;
955 result->value[result->len - 1] = cy;
956 result->sign = aa->sign;
957 }
958
959
960 /*
961 * result = aa/bb remainder = aa mod bb
962 * it is assumed that aa and bb are positive
963 */
964 BIG_ERR_CODE
965 big_div_pos(BIGNUM *result, BIGNUM *remainder, BIGNUM *aa, BIGNUM *bb)
966 {
967 BIG_ERR_CODE err = BIG_OK;
968 int i, alen, blen, tlen, rlen, offs;
969 BIG_CHUNK_TYPE higha, highb, coeff;
970 BIG_CHUNK_TYPE *a, *b;
971 BIGNUM bbhigh, bblow, tresult, tmp1, tmp2;
972 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE];
973 BIG_CHUNK_TYPE tmp2value[BIGTMPSIZE];
974 BIG_CHUNK_TYPE tresultvalue[BIGTMPSIZE];
975 BIG_CHUNK_TYPE bblowvalue[BIGTMPSIZE];
976 BIG_CHUNK_TYPE bbhighvalue[BIGTMPSIZE];
977
978 a = aa->value;
979 b = bb->value;
980 alen = aa->len;
981 blen = bb->len;
982 while ((alen > 1) && (a[alen - 1] == 0)) {
983 alen = alen - 1;
984 }
985 aa->len = alen;
1096 if ((remainder != NULL) &&
1097 ((err = big_copy(remainder, &tmp1)) != BIG_OK))
1098 goto ret;
1099
1100 if (result != NULL)
1101 err = big_copy(result, &tresult);
1102
1103 ret:
1104 big_finish(&tresult);
1105 ret4:
1106 big_finish(&tmp1);
1107 ret3:
1108 big_finish(&tmp2);
1109 ret2:
1110 big_finish(&bbhigh);
1111 ret1:
1112 big_finish(&bblow);
1113 return (err);
1114 }
1115
1116
1117 /*
1118 * If there is no processor-specific integer implementation of
1119 * the lower level multiply functions, then this code is provided
1120 * for big_mul_set_vec(), big_mul_add_vec(), big_mul_vec() and
1121 * big_sqr_vec().
1122 *
1123 * There are two generic implementations. One that assumes that
1124 * there is hardware and C compiler support for a 32 x 32 --> 64
1125 * bit unsigned multiply, but otherwise is not specific to any
1126 * processor, platform, or ISA.
1127 *
1128 * The other makes very few assumptions about hardware capabilities.
1129 * It does not even assume that there is any implementation of a
1130 * 32 x 32 --> 64 bit multiply that is accessible to C code and
1131 * appropriate to use. It falls constructs 32 x 32 --> 64 bit
1132 * multiplies from 16 x 16 --> 32 bit multiplies.
1133 *
1134 */
1135
1136 #if !defined(PSR_MUL)
1137
1138 #ifdef UMUL64
1139
1140 #if (BIG_CHUNK_SIZE == 32)
1141
1142 #define UNROLL8
1143
1144 #define MUL_SET_VEC_ROUND_PREFETCH(R) \
1145 p = pf * d; \
1146 pf = (uint64_t)a[R + 1]; \
1147 t = p + cy; \
1148 r[R] = (uint32_t)t; \
1149 cy = t >> 32
1150
1151 #define MUL_SET_VEC_ROUND_NOPREFETCH(R) \
1152 p = pf * d; \
1153 t = p + cy; \
1154 r[R] = (uint32_t)t; \
1155 cy = t >> 32
1156
1157 #define MUL_ADD_VEC_ROUND_PREFETCH(R) \
1158 t = (uint64_t)r[R]; \
1159 p = pf * d; \
1160 pf = (uint64_t)a[R + 1]; \
1161 t = p + t + cy; \
1162 r[R] = (uint32_t)t; \
1163 cy = t >> 32
1164
1165 #define MUL_ADD_VEC_ROUND_NOPREFETCH(R) \
1166 t = (uint64_t)r[R]; \
1167 p = pf * d; \
1168 t = p + t + cy; \
1169 r[R] = (uint32_t)t; \
1170 cy = t >> 32
1171
1172 #ifdef UNROLL8
1173
1174 #define UNROLL 8
1175
1176 /*
1177 * r = a * b
1178 * where r and a are vectors; b is a single 32-bit digit
1179 */
1180
1296 s = (uint64_t)a[0];
1297 s = s * s;
1298 r[0] = (uint32_t)s;
1299 cy = s >> 32;
1300 p = ((uint64_t)r[1] << 1) + cy;
1301 r[1] = (uint32_t)p;
1302 cy = p >> 32;
1303 row = 1;
1304 col = 2;
1305 while (row < len) {
1306 s = (uint64_t)a[row];
1307 s = s * s;
1308 p = (uint64_t)r[col] << 1;
1309 t = p + s;
1310 d = (uint32_t)t;
1311 t2 = (uint64_t)d + cy;
1312 r[col] = (uint32_t)t2;
1313 cy = (t >> 32) + (t2 >> 32);
1314 if (row == len - 1)
1315 break;
1316 p = ((uint64_t)r[col + 1] << 1) + cy;
1317 r[col + 1] = (uint32_t)p;
1318 cy = p >> 32;
1319 ++row;
1320 col += 2;
1321 }
1322 r[col + 1] = (uint32_t)cy;
1323 }
1324
1325 #else /* BIG_CHUNK_SIZE == 64 */
1326
1327 /*
1328 * r = r + a * digit, r and a are vectors of length len
1329 * returns the carry digit
1330 */
1331 BIG_CHUNK_TYPE
1332 big_mul_add_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len,
1333 BIG_CHUNK_TYPE digit)
1334 {
1335 BIG_CHUNK_TYPE cy, cy1, retcy, dlow, dhigh;
1336 int i;
1337
1338 cy1 = 0;
1339 dlow = digit & BIG_CHUNK_LOWHALFBITS;
1340 dhigh = digit >> (BIG_CHUNK_SIZE / 2);
1341 for (i = 0; i < len; i++) {
1342 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) +
1381 big_mul_set_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len,
1382 BIG_CHUNK_TYPE digit)
1383 {
1384 int i;
1385
1386 ASSERT(r != a);
1387 for (i = 0; i < len; i++) {
1388 r[i] = 0;
1389 }
1390 return (big_mul_add_vec(r, a, len, digit));
1391 }
1392
1393 void
1394 big_sqr_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len)
1395 {
1396 int i;
1397
1398 ASSERT(r != a);
1399 r[len] = big_mul_set_vec(r, a, len, a[0]);
1400 for (i = 1; i < len; ++i)
1401 r[len + i] = big_mul_add_vec(r + i, a, len, a[i]);
1402 }
1403
1404 #endif /* BIG_CHUNK_SIZE == 32/64 */
1405
1406
1407 #else /* ! UMUL64 */
1408
1409 #if (BIG_CHUNK_SIZE != 32)
1410 #error Don't use 64-bit chunks without defining UMUL64
1411 #endif
1412
1413
1414 /*
1415 * r = r + a * digit, r and a are vectors of length len
1416 * returns the carry digit
1417 */
1418 uint32_t
1419 big_mul_add_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit)
1420 {
1421 uint32_t cy, cy1, retcy, dlow, dhigh;
1422 int i;
1423
1424 cy1 = 0;
1425 dlow = digit & 0xffff;
1426 dhigh = digit >> 16;
1453 big_mul_set_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit)
1454 {
1455 int i;
1456
1457 ASSERT(r != a);
1458 for (i = 0; i < len; i++) {
1459 r[i] = 0;
1460 }
1461
1462 return (big_mul_add_vec(r, a, len, digit));
1463 }
1464
1465 void
1466 big_sqr_vec(uint32_t *r, uint32_t *a, int len)
1467 {
1468 int i;
1469
1470 ASSERT(r != a);
1471 r[len] = big_mul_set_vec(r, a, len, a[0]);
1472 for (i = 1; i < len; ++i)
1473 r[len + i] = big_mul_add_vec(r + i, a, len, a[i]);
1474 }
1475
1476 #endif /* UMUL64 */
1477
1478 void
1479 big_mul_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int alen,
1480 BIG_CHUNK_TYPE *b, int blen)
1481 {
1482 int i;
1483
1484 r[alen] = big_mul_set_vec(r, a, alen, b[0]);
1485 for (i = 1; i < blen; ++i)
1486 r[alen + i] = big_mul_add_vec(r + i, a, alen, b[i]);
1487 }
1488
1489
1490 #endif /* ! PSR_MUL */
1491
1492
1493 /*
1494 * result = aa * bb result->value should be big enough to hold the result
1495 *
1496 * Implementation: Standard grammar school algorithm
1497 *
1498 */
1499 BIG_ERR_CODE
1500 big_mul(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
1501 {
1502 BIGNUM tmp1;
1503 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE];
1504 BIG_CHUNK_TYPE *r, *t, *a, *b;
1505 BIG_ERR_CODE err;
1506 int i, alen, blen, rsize, sign, diff;
1513 BIGNUM *tt;
1514 tt = aa;
1515 aa = bb;
1516 bb = tt;
1517 }
1518 }
1519 a = aa->value;
1520 b = bb->value;
1521 alen = aa->len;
1522 blen = bb->len;
1523 while ((alen > 1) && (a[alen - 1] == 0)) {
1524 alen--;
1525 }
1526 aa->len = alen;
1527 while ((blen > 1) && (b[blen - 1] == 0)) {
1528 blen--;
1529 }
1530 bb->len = blen;
1531
1532 rsize = alen + blen;
1533 ASSERT(rsize > 0);
1534 if (result->size < rsize) {
1535 err = big_extend(result, rsize);
1536 if (err != BIG_OK) {
1537 return (err);
1538 }
1539 /* aa or bb might be an alias to result */
1540 a = aa->value;
1541 b = bb->value;
1542 }
1543 r = result->value;
1544
1545 if (((alen == 1) && (a[0] == 0)) || ((blen == 1) && (b[0] == 0))) {
1546 result->len = 1;
1547 result->sign = 1;
1548 r[0] = 0;
1549 return (BIG_OK);
1550 }
1551 sign = aa->sign * bb->sign;
1552 if ((alen == 1) && (a[0] == 1)) {
1553 for (i = 0; i < blen; i++) {
1571 return (err);
1572 }
1573 (void) big_copy(&tmp1, aa);
1574 t = tmp1.value;
1575
1576 for (i = 0; i < rsize; i++) {
1577 t[i] = 0;
1578 }
1579
1580 if (diff == 0 && alen > 2) {
1581 BIG_SQR_VEC(t, a, alen);
1582 } else if (blen > 0) {
1583 BIG_MUL_VEC(t, a, alen, b, blen);
1584 }
1585
1586 if (t[rsize - 1] == 0) {
1587 tmp1.len = rsize - 1;
1588 } else {
1589 tmp1.len = rsize;
1590 }
1591
1592 err = big_copy(result, &tmp1);
1593
1594 result->sign = sign;
1595
1596 big_finish(&tmp1);
1597
1598 return (err);
1599 }
1600
1601
1602 /*
1603 * caller must ensure that a < n, b < n and ret->size >= 2 * n->len + 1
1604 * and that ret is not n
1605 */
1606 BIG_ERR_CODE
1607 big_mont_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, BIGNUM *n, BIG_CHUNK_TYPE n0)
1608 {
1609 int i, j, nlen, needsubtract;
1610 BIG_CHUNK_TYPE *nn, *rr;
1611 BIG_CHUNK_TYPE digit, c;
1612 BIG_ERR_CODE err;
1613
1614 nlen = n->len;
1615 nn = n->value;
1616
1617 rr = ret->value;
1618
1641 needsubtract = 0;
1642 if ((rr[2 * nlen] != 0))
1643 needsubtract = 1;
1644 else {
1645 for (i = 2 * nlen - 1; i >= nlen; i--) {
1646 if (rr[i] > nn[i - nlen]) {
1647 needsubtract = 1;
1648 break;
1649 } else if (rr[i] < nn[i - nlen]) {
1650 break;
1651 }
1652 }
1653 }
1654 if (needsubtract)
1655 big_sub_vec(rr, rr + nlen, nn, nlen);
1656 else {
1657 for (i = 0; i < nlen; i++) {
1658 rr[i] = rr[i + nlen];
1659 }
1660 }
1661
1662 /* Remove leading zeros, but keep at least 1 digit: */
1663 for (i = nlen - 1; (i > 0) && (rr[i] == 0); i--)
1664 ;
1665 ret->len = i + 1;
1666
1667 return (BIG_OK);
1668 }
1669
1670
1671 BIG_CHUNK_TYPE
1672 big_n0(BIG_CHUNK_TYPE n)
1673 {
1674 int i;
1675 BIG_CHUNK_TYPE result, tmp;
1676
1677 result = 0;
1678 tmp = BIG_CHUNK_ALLBITS;
1679 for (i = 0; i < BIG_CHUNK_SIZE; i++) {
1680 if ((tmp & 1) == 1) {
1681 result = (result >> 1) | BIG_CHUNK_HIGHBIT;
1682 tmp = tmp - n;
1683 } else {
1684 result = (result >> 1);
1685 }
1788
1789 #ifdef USE_FLOATING_POINT
1790 #define big_modexp_ncp_float big_modexp_ncp_sw
1791 #else
1792 #define big_modexp_ncp_int big_modexp_ncp_sw
1793 #endif
1794
1795 #define MAX_EXP_BIT_GROUP_SIZE 6
1796 #define APOWERS_MAX_SIZE (1 << (MAX_EXP_BIT_GROUP_SIZE - 1))
1797
1798 /* ARGSUSED */
1799 static BIG_ERR_CODE
1800 big_modexp_ncp_int(BIGNUM *result, BIGNUM *ma, BIGNUM *e, BIGNUM *n,
1801 BIGNUM *tmp, BIG_CHUNK_TYPE n0)
1802
1803 {
1804 BIGNUM apowers[APOWERS_MAX_SIZE];
1805 BIGNUM tmp1;
1806 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE];
1807 int i, j, k, l, m, p;
1808 uint32_t bit, bitind, bitcount, groupbits, apowerssize;
1809 uint32_t nbits;
1810 BIG_ERR_CODE err;
1811
1812 nbits = big_numbits(e);
1813 if (nbits < 50) {
1814 groupbits = 1;
1815 apowerssize = 1;
1816 } else {
1817 groupbits = MAX_EXP_BIT_GROUP_SIZE;
1818 apowerssize = 1 << (groupbits - 1);
1819 }
1820
1821
1822 if ((err = big_init1(&tmp1, 2 * n->len + 1,
1823 tmp1value, arraysize(tmp1value))) != BIG_OK) {
1824 return (err);
1825 }
1826
1827 /* clear the malloced bit to help cleanup */
1828 for (i = 0; i < apowerssize; i++) {
1829 apowers[i].malloced = 0;
1830 }
1831
1832 for (i = 0; i < apowerssize; i++) {
1833 if ((err = big_init1(&(apowers[i]), n->len, NULL, 0)) !=
1834 BIG_OK) {
1835 goto ret;
1836 }
1837 }
1838
1839 (void) big_copy(&(apowers[0]), ma);
1840
1841 if ((err = big_mont_mul(&tmp1, ma, ma, n, n0)) != BIG_OK) {
1842 goto ret;
1843 }
1844 (void) big_copy(ma, &tmp1);
1845
1846 for (i = 1; i < apowerssize; i++) {
1847 if ((err = big_mont_mul(&tmp1, ma,
1848 &(apowers[i - 1]), n, n0)) != BIG_OK) {
1849 goto ret;
1850 }
1851 (void) big_copy(&apowers[i], &tmp1);
1852 }
1853
1854 bitind = nbits % BIG_CHUNK_SIZE;
1855 k = 0;
1856 l = 0;
1857 p = 0;
1858 bitcount = 0;
1859 for (i = nbits / BIG_CHUNK_SIZE; i >= 0; i--) {
1860 for (j = bitind - 1; j >= 0; j--) {
1861 bit = (e->value[i] >> j) & 1;
1862 if ((bitcount == 0) && (bit == 0)) {
1863 if ((err = big_mont_mul(tmp,
1864 tmp, tmp, n, n0)) != BIG_OK) {
1865 goto ret;
1866 }
1867 } else {
1868 bitcount++;
1937 #include <sys/sysmacros.h>
1938 #include <sys/regset.h>
1939 #include <sys/fpu/fpusystm.h>
1940
1941 /* the alignment for block stores to save fp registers */
1942 #define FPR_ALIGN (64)
1943
1944 extern void big_savefp(kfpu_t *);
1945 extern void big_restorefp(kfpu_t *);
1946
1947 #endif /* _KERNEL */
1948
1949 /*
1950 * This version makes use of floating point for performance
1951 */
1952 static BIG_ERR_CODE
1953 big_modexp_ncp_float(BIGNUM *result, BIGNUM *ma, BIGNUM *e, BIGNUM *n,
1954 BIGNUM *tmp, BIG_CHUNK_TYPE n0)
1955 {
1956
1957 int i, j, k, l, m, p;
1958 uint32_t bit, bitind, bitcount, nlen;
1959 double dn0;
1960 double *dn, *dt, *d16r, *d32r;
1961 uint32_t *nint, *prod;
1962 double *apowers[APOWERS_MAX_SIZE];
1963 uint32_t nbits, groupbits, apowerssize;
1964 BIG_ERR_CODE err = BIG_OK;
1965
1966 #ifdef _KERNEL
1967 uint8_t fpua[sizeof (kfpu_t) + FPR_ALIGN];
1968 kfpu_t *fpu;
1969
1970 #ifdef DEBUG
1971 if (!fpu_exists)
1972 return (BIG_GENERAL_ERR);
1973 #endif
1974
1975 fpu = (kfpu_t *)P2ROUNDUP((uintptr_t)fpua, FPR_ALIGN);
1976 big_savefp(fpu);
1977
1978 #endif /* _KERNEL */
1979
1980 nbits = big_numbits(e);
1981 if (nbits < 50) {
1982 groupbits = 1;
1983 apowerssize = 1;
2103 dt, dn, nint, nlen, dn0);
2104 } else {
2105 bitcount++;
2106 p = p * 2 + bit;
2107 if (bit == 1) {
2108 k = k + l + 1;
2109 l = 0;
2110 } else {
2111 l++;
2112 }
2113 if (bitcount == groupbits) {
2114 for (m = 0; m < k; m++) {
2115 conv_i32_to_d32_and_d16(d32r,
2116 d16r, prod, nlen);
2117 mont_mulf_noconv(prod, d32r,
2118 d16r, dt, dn, nint,
2119 nlen, dn0);
2120 }
2121 conv_i32_to_d32(d32r, prod, nlen);
2122 mont_mulf_noconv(prod, d32r,
2123 apowers[p >> (l + 1)],
2124 dt, dn, nint, nlen, dn0);
2125 for (m = 0; m < l; m++) {
2126 conv_i32_to_d32_and_d16(d32r,
2127 d16r, prod, nlen);
2128 mont_mulf_noconv(prod, d32r,
2129 d16r, dt, dn, nint,
2130 nlen, dn0);
2131 }
2132 k = 0;
2133 l = 0;
2134 p = 0;
2135 bitcount = 0;
2136 }
2137 }
2138 }
2139 bitind = BIG_CHUNK_SIZE;
2140 }
2141
2142 for (m = 0; m < k; m++) {
2143 conv_i32_to_d32_and_d16(d32r, d16r, prod, nlen);
2209 {
2210 BIGNUM ma, tmp, rr;
2211 BIG_CHUNK_TYPE mavalue[BIGTMPSIZE];
2212 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE];
2213 BIG_CHUNK_TYPE rrvalue[BIGTMPSIZE];
2214 BIG_ERR_CODE err;
2215 BIG_CHUNK_TYPE n0;
2216
2217 if ((err = big_init1(&ma, n->len, mavalue, arraysize(mavalue))) !=
2218 BIG_OK) {
2219 return (err);
2220 }
2221 ma.len = 1;
2222 ma.value[0] = 0;
2223
2224 if ((err = big_init1(&tmp, 2 * n->len + 1,
2225 tmpvalue, arraysize(tmpvalue))) != BIG_OK) {
2226 goto ret1;
2227 }
2228
2229 /* clear the malloced bit to help cleanup */
2230 rr.malloced = 0;
2231
2232 if (n_rr == NULL) {
2233 if ((err = big_init1(&rr, 2 * n->len + 1,
2234 rrvalue, arraysize(rrvalue))) != BIG_OK) {
2235 goto ret2;
2236 }
2237 if (big_mont_rr(&rr, n) != BIG_OK) {
2238 goto ret;
2239 }
2240 n_rr = &rr;
2241 }
2242
2243 n0 = big_n0(n->value[0]);
2244
2245 if (big_cmp_abs(a, n) > 0) {
2246 if ((err = big_div_pos(NULL, &ma, a, n)) != BIG_OK) {
2247 goto ret;
2248 }
2249 err = big_mont_conv(&ma, &ma, n, n0, n_rr);
2250 } else {
2251 err = big_mont_conv(&ma, a, n, n0, n_rr);
2408
2409 static BIG_CHUNK_TYPE onearr[1] = {(BIG_CHUNK_TYPE)1};
2410 BIGNUM big_One = {1, 1, 1, 0, onearr};
2411
2412 static BIG_CHUNK_TYPE twoarr[1] = {(BIG_CHUNK_TYPE)2};
2413 BIGNUM big_Two = {1, 1, 1, 0, twoarr};
2414
2415 static BIG_CHUNK_TYPE fourarr[1] = {(BIG_CHUNK_TYPE)4};
2416 static BIGNUM big_Four = {1, 1, 1, 0, fourarr};
2417
2418
2419 BIG_ERR_CODE
2420 big_sqrt_pos(BIGNUM *result, BIGNUM *n)
2421 {
2422 BIGNUM *high, *low, *mid, *t;
2423 BIGNUM t1, t2, t3, prod;
2424 BIG_CHUNK_TYPE t1value[BIGTMPSIZE];
2425 BIG_CHUNK_TYPE t2value[BIGTMPSIZE];
2426 BIG_CHUNK_TYPE t3value[BIGTMPSIZE];
2427 BIG_CHUNK_TYPE prodvalue[BIGTMPSIZE];
2428 int i, diff;
2429 uint32_t nbits, nrootbits, highbits;
2430 BIG_ERR_CODE err;
2431
2432 nbits = big_numbits(n);
2433
2434 if ((err = big_init1(&t1, n->len + 1,
2435 t1value, arraysize(t1value))) != BIG_OK)
2436 return (err);
2437 if ((err = big_init1(&t2, n->len + 1,
2438 t2value, arraysize(t2value))) != BIG_OK)
2439 goto ret1;
2440 if ((err = big_init1(&t3, n->len + 1,
2441 t3value, arraysize(t3value))) != BIG_OK)
2442 goto ret2;
2443 if ((err = big_init1(&prod, n->len + 1,
2444 prodvalue, arraysize(prodvalue))) != BIG_OK)
2445 goto ret3;
2446
2447 nrootbits = (nbits + 1) / 2;
2448 t1.len = t2.len = t3.len = (nrootbits - 1) / BIG_CHUNK_SIZE + 1;
2449 for (i = 0; i < t1.len; i++) {
2579 m = n;
2580 n = t;
2581 }
2582 }
2583 err = BIG_OK;
2584
2585 ret:
2586 if (t3.malloced) big_finish(&t3);
2587 ret2:
2588 if (t2.malloced) big_finish(&t2);
2589 ret1:
2590 if (t1.malloced) big_finish(&t1);
2591
2592 return (err);
2593 }
2594
2595
2596 BIG_ERR_CODE
2597 big_Lucas(BIGNUM *Lkminus1, BIGNUM *Lk, BIGNUM *p, BIGNUM *k, BIGNUM *n)
2598 {
2599 int i;
2600 uint32_t m, w;
2601 BIG_CHUNK_TYPE bit;
2602 BIGNUM ki, tmp, tmp2;
2603 BIG_CHUNK_TYPE kivalue[BIGTMPSIZE];
2604 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE];
2605 BIG_CHUNK_TYPE tmp2value[BIGTMPSIZE];
2606 BIG_ERR_CODE err;
2607
2608 if (big_cmp_abs(k, &big_One) == 0) {
2609 (void) big_copy(Lk, p);
2610 (void) big_copy(Lkminus1, &big_Two);
2611 return (BIG_OK);
2612 }
2613
2614 if ((err = big_init1(&ki, k->len + 1,
2615 kivalue, arraysize(kivalue))) != BIG_OK)
2616 return (err);
2617
2618 if ((err = big_init1(&tmp, 2 * n->len + 1,
2619 tmpvalue, arraysize(tmpvalue))) != BIG_OK)
2620 goto ret1;
2621
2622 if ((err = big_init1(&tmp2, n->len,
2623 tmp2value, arraysize(tmp2value))) != BIG_OK)
2624 goto ret2;
2625
2626 m = big_numbits(k);
2627 ki.len = (m - 1) / BIG_CHUNK_SIZE + 1;
2628 w = (m - 1) / BIG_CHUNK_SIZE;
2629 bit = (BIG_CHUNK_TYPE)1 << ((m - 1) % BIG_CHUNK_SIZE);
2630 for (i = 0; i < ki.len; i++) {
2631 ki.value[i] = 0;
2632 }
2633 ki.value[ki.len - 1] = bit;
2634 if (big_cmp_abs(k, &ki) != 0) {
2635 (void) big_double(&ki, &ki);
2636 }
2637 (void) big_sub_pos(&ki, &ki, k);
2638
2810 ret3:
2811 if (tmp.malloced) big_finish(&tmp);
2812 ret2:
2813 if (nminus1.malloced) big_finish(&nminus1);
2814 ret1:
2815 if (o.malloced) big_finish(&o);
2816
2817 return (err);
2818 }
2819
2820
2821 BIG_ERR_CODE
2822 big_isprime_pos(BIGNUM *n)
2823 {
2824 return (big_isprime_pos_ext(n, NULL));
2825 }
2826
2827
2828 #define SIEVESIZE 1000
2829
2830
2831 BIG_ERR_CODE
2832 big_nextprime_pos_ext(BIGNUM *result, BIGNUM *n, big_modexp_ncp_info_t *info)
2833 {
2834 static const uint32_t smallprimes[] = {
2835 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47,
2836 51, 53, 59, 61, 67, 71, 73, 79, 83, 89, 91, 97 };
2837 BIG_ERR_CODE err;
2838 int sieve[SIEVESIZE];
2839 int i;
2840 uint32_t off, p;
2841
2842 if ((err = big_copy(result, n)) != BIG_OK) {
2843 return (err);
2844 }
2845 result->value[0] |= 1;
2846 /* CONSTCOND */
2847 while (1) {
2848 for (i = 0; i < SIEVESIZE; i++) sieve[i] = 0;
2849 for (i = 0;
2850 i < sizeof (smallprimes) / sizeof (smallprimes[0]); i++) {
2851 p = smallprimes[i];
2852 off = big_modhalf_pos(result, p);
2853 off = p - off;
2854 if ((off % 2) == 1) {
2855 off = off + p;
2856 }
|