1 /*
2 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
3 * Use is subject to license terms.
4 */
5
6 #pragma ident "%Z%%M% %I% %E% SMI"
7
8 /*
9 * The basic framework for this code came from the reference
10 * implementation for MD5. That implementation is Copyright (C)
11 * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
12 *
13 * License to copy and use this software is granted provided that it
14 * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
15 * Algorithm" in all material mentioning or referencing this software
16 * or this function.
17 *
18 * License is also granted to make and use derivative works provided
19 * that such works are identified as "derived from the RSA Data
20 * Security, Inc. MD5 Message-Digest Algorithm" in all material
21 * mentioning or referencing the derived work.
22 *
23 * RSA Data Security, Inc. makes no representations concerning either
24 * the merchantability of this software or the suitability of this
25 * software for any particular purpose. It is provided "as is"
26 * without express or implied warranty of any kind.
27 *
28 * These notices must be retained in any copies of any part of this
29 * documentation and/or software.
30 *
31 * NOTE: Cleaned-up and optimized, version of SHA1, based on the FIPS 180-1
32 * standard, available at http://www.itl.nist.gov/div897/pubs/fip180-1.htm
33 * Not as fast as one would like -- further optimizations are encouraged
34 * and appreciated.
35 */
36
37 #include <sys/types.h>
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/sysmacros.h>
41 #include <sys/sha1.h>
42 #include <sys/sha1_consts.h>
43
44 #ifndef _KERNEL
45 #include <strings.h>
46 #include <stdlib.h>
47 #include <errno.h>
48 #include <sys/systeminfo.h>
49 #endif /* !_KERNEL */
50
51 static void Encode(uint8_t *, const uint32_t *, size_t);
52
53 #if defined(__sparc)
54
55 #define SHA1_TRANSFORM(ctx, in) \
56 SHA1Transform((ctx)->state[0], (ctx)->state[1], (ctx)->state[2], \
57 (ctx)->state[3], (ctx)->state[4], (ctx), (in))
58
59 static void SHA1Transform(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t,
60 SHA1_CTX *, const uint8_t *);
61
62 #elif defined(__amd64)
63
64 #define SHA1_TRANSFORM(ctx, in) sha1_block_data_order((ctx), (in), 1)
65 #define SHA1_TRANSFORM_BLOCKS(ctx, in, num) sha1_block_data_order((ctx), \
66 (in), (num))
67
68 void sha1_block_data_order(SHA1_CTX *ctx, const void *inpp, size_t num_blocks);
69
70 #else
89 * ROTATE_LEFT rotates x left n bits.
90 */
91
92 #if defined(__GNUC__) && defined(_LP64)
93 static __inline__ uint64_t
94 ROTATE_LEFT(uint64_t value, uint32_t n)
95 {
96 uint32_t t32;
97
98 t32 = (uint32_t)value;
99 return ((t32 << n) | (t32 >> (32 - n)));
100 }
101
102 #else
103
104 #define ROTATE_LEFT(x, n) \
105 (((x) << (n)) | ((x) >> ((sizeof (x) * NBBY)-(n))))
106
107 #endif
108
109 #if defined(__GNUC__) && (defined(__i386) || defined(__amd64))
110
111 #define HAVE_BSWAP
112
113 extern __inline__ uint32_t bswap(uint32_t value)
114 {
115 __asm__("bswap %0" : "+r" (value));
116 return (value);
117 }
118
119 #endif
120
121 /*
122 * SHA1Init()
123 *
124 * purpose: initializes the sha1 context and begins and sha1 digest operation
125 * input: SHA1_CTX * : the context to initializes.
126 * output: void
127 */
128
129 void
130 SHA1Init(SHA1_CTX *ctx)
131 {
132 ctx->count[0] = ctx->count[1] = 0;
133
134 /*
135 * load magic initialization constants. Tell lint
136 * that these constants are unsigned by using U.
137 */
138
139 ctx->state[0] = 0x67452301U;
140 ctx->state[1] = 0xefcdab89U;
280 * for alignments other than 4-bytes.
281 */
282 if (usevis) {
283 if (!IS_P2ALIGNED(&input[i], sizeof (uint32_t))) {
284 /*
285 * Main processing loop - input misaligned
286 */
287 for (; i + 63 < input_len; i += 64) {
288 bcopy(&input[i], input64, 64);
289 SHA1TransformVIS(X0,
290 (uint32_t *)input64,
291 &ctx->state[0], VIS);
292 }
293 } else {
294 /*
295 * Main processing loop - input 8-byte aligned
296 */
297 for (; i + 63 < input_len; i += 64) {
298 SHA1TransformVIS(X0,
299 /* LINTED E_BAD_PTR_CAST_ALIGN */
300 (uint32_t *)&input[i],
301 &ctx->state[0], VIS);
302 }
303
304 }
305 #ifdef _KERNEL
306 sha1_restorefp(fpu);
307 #endif /* _KERNEL */
308 } else {
309 for (; i + 63 < input_len; i += 64) {
310 SHA1_TRANSFORM(ctx, &input[i]);
311 }
312 }
313
314 /*
315 * general optimization:
316 *
317 * if i and input_len are the same, return now instead
318 * of calling bcopy(), since the bcopy() in this case
319 * will be an expensive nop.
320 */
438
439 /* zeroize sensitive information */
440 bzero(ctx, sizeof (*ctx));
441 }
442
443
444 #if !defined(__amd64)
445
446 typedef uint32_t sha1word;
447
448 /*
449 * sparc optimization:
450 *
451 * on the sparc, we can load big endian 32-bit data easily. note that
452 * special care must be taken to ensure the address is 32-bit aligned.
453 * in the interest of speed, we don't check to make sure, since
454 * careful programming can guarantee this for us.
455 */
456
457 #if defined(_BIG_ENDIAN)
458
459 #define LOAD_BIG_32(addr) (*(uint32_t *)(addr))
460
461 #else /* !defined(_BIG_ENDIAN) */
462
463 #if defined(HAVE_BSWAP)
464
465 #define LOAD_BIG_32(addr) bswap(*((uint32_t *)(addr)))
466
467 #else /* !defined(HAVE_BSWAP) */
468
469 /* little endian -- will work on big endian, but slowly */
470 #define LOAD_BIG_32(addr) \
471 (((addr)[0] << 24) | ((addr)[1] << 16) | ((addr)[2] << 8) | (addr)[3])
472
473 #endif /* !defined(HAVE_BSWAP) */
474
475 #endif /* !defined(_BIG_ENDIAN) */
476
477 /*
478 * SHA1Transform()
479 */
480 #if defined(W_ARRAY)
481 #define W(n) w[n]
482 #else /* !defined(W_ARRAY) */
483 #define W(n) w_ ## n
484 #endif /* !defined(W_ARRAY) */
485
486
487 #if defined(__sparc)
488
489 /*
490 * sparc register window optimization:
491 *
492 * `a', `b', `c', `d', and `e' are passed into SHA1Transform
493 * explicitly since it increases the number of registers available to
494 * the compiler. under this scheme, these variables can be held in
495 * %i0 - %i4, which leaves more local and out registers available.
496 *
520 * cases: a `sethi' and an `or', but loading a 32-bit value
521 * from memory only takes one `ld' (or `lduw' on v9). while
522 * this increases memory usage, the compiler can find enough
523 * other things to do while waiting to keep the pipeline does
524 * not stall. additionally, it is likely that many of these
525 * constants are cached so that later accesses do not even go
526 * out to the bus.
527 *
528 * this array is declared `static' to keep the compiler from
529 * having to bcopy() this array onto the stack frame of
530 * SHA1Transform() each time it is called -- which is
531 * unacceptably expensive.
532 *
533 * the `const' is to ensure that callers are good citizens and
534 * do not try to munge the array. since these routines are
535 * going to be called from inside multithreaded kernelland,
536 * this is a good safety check. -- `sha1_consts' will end up in
537 * .rodata.
538 *
539 * unfortunately, loading from an array in this manner hurts
540 * performance under intel. so, there is a macro,
541 * SHA1_CONST(), used in SHA1Transform(), that either expands to
542 * a reference to this array, or to the actual constant,
543 * depending on what platform this code is compiled for.
544 */
545
546 static const uint32_t sha1_consts[] = {
547 SHA1_CONST_0, SHA1_CONST_1, SHA1_CONST_2, SHA1_CONST_3,
548 };
549
550 /*
551 * general optimization:
552 *
553 * use individual integers instead of using an array. this is a
554 * win, although the amount it wins by seems to vary quite a bit.
555 */
556
557 uint32_t w_0, w_1, w_2, w_3, w_4, w_5, w_6, w_7;
558 uint32_t w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
559
560 /*
561 * sparc optimization:
562 *
563 * if `block' is already aligned on a 4-byte boundary, use
564 * LOAD_BIG_32() directly. otherwise, bcopy() into a
565 * buffer that *is* aligned on a 4-byte boundary and then do
566 * the LOAD_BIG_32() on that buffer. benchmarks have shown
567 * that using the bcopy() is better than loading the bytes
612 w_8 = LOAD_BIG_32(blk + 32);
613 /*LINTED*/
614 w_7 = LOAD_BIG_32(blk + 28);
615 /*LINTED*/
616 w_6 = LOAD_BIG_32(blk + 24);
617 /*LINTED*/
618 w_5 = LOAD_BIG_32(blk + 20);
619 /*LINTED*/
620 w_4 = LOAD_BIG_32(blk + 16);
621 /*LINTED*/
622 w_3 = LOAD_BIG_32(blk + 12);
623 /*LINTED*/
624 w_2 = LOAD_BIG_32(blk + 8);
625 /*LINTED*/
626 w_1 = LOAD_BIG_32(blk + 4);
627 /*LINTED*/
628 w_0 = LOAD_BIG_32(blk + 0);
629 }
630 #else /* !defined(__sparc) */
631
632 void
633 SHA1Transform(SHA1_CTX *ctx, const uint8_t blk[64])
634 {
635 sha1word a = ctx->state[0];
636 sha1word b = ctx->state[1];
637 sha1word c = ctx->state[2];
638 sha1word d = ctx->state[3];
639 sha1word e = ctx->state[4];
640
641 #if defined(W_ARRAY)
642 sha1word w[16];
643 #else /* !defined(W_ARRAY) */
644 sha1word w_0, w_1, w_2, w_3, w_4, w_5, w_6, w_7;
645 sha1word w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
646 #endif /* !defined(W_ARRAY) */
647
648 W(0) = LOAD_BIG_32(blk + 0);
649 W(1) = LOAD_BIG_32(blk + 4);
650 W(2) = LOAD_BIG_32(blk + 8);
651 W(3) = LOAD_BIG_32(blk + 12);
652 W(4) = LOAD_BIG_32(blk + 16);
653 W(5) = LOAD_BIG_32(blk + 20);
654 W(6) = LOAD_BIG_32(blk + 24);
|
1 /*
2 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
3 * Use is subject to license terms.
4 */
5
6 /*
7 * The basic framework for this code came from the reference
8 * implementation for MD5. That implementation is Copyright (C)
9 * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
10 *
11 * License to copy and use this software is granted provided that it
12 * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
13 * Algorithm" in all material mentioning or referencing this software
14 * or this function.
15 *
16 * License is also granted to make and use derivative works provided
17 * that such works are identified as "derived from the RSA Data
18 * Security, Inc. MD5 Message-Digest Algorithm" in all material
19 * mentioning or referencing the derived work.
20 *
21 * RSA Data Security, Inc. makes no representations concerning either
22 * the merchantability of this software or the suitability of this
23 * software for any particular purpose. It is provided "as is"
24 * without express or implied warranty of any kind.
25 *
26 * These notices must be retained in any copies of any part of this
27 * documentation and/or software.
28 *
29 * NOTE: Cleaned-up and optimized, version of SHA1, based on the FIPS 180-1
30 * standard, available at http://www.itl.nist.gov/fipspubs/fip180-1.htm
31 * Not as fast as one would like -- further optimizations are encouraged
32 * and appreciated.
33 */
34
35 #include <sys/types.h>
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/sysmacros.h>
39 #include <sys/sha1.h>
40 #include <sys/sha1_consts.h>
41
42 #ifndef _KERNEL
43 #include <strings.h>
44 #include <stdlib.h>
45 #include <errno.h>
46 #include <sys/systeminfo.h>
47 #endif /* !_KERNEL */
48
49 #ifdef _LITTLE_ENDIAN
50 #include <sys/byteorder.h>
51 #define HAVE_HTONL
52 #endif
53
54 static void Encode(uint8_t *, const uint32_t *, size_t);
55
56 #if defined(__sparc)
57
58 #define SHA1_TRANSFORM(ctx, in) \
59 SHA1Transform((ctx)->state[0], (ctx)->state[1], (ctx)->state[2], \
60 (ctx)->state[3], (ctx)->state[4], (ctx), (in))
61
62 static void SHA1Transform(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t,
63 SHA1_CTX *, const uint8_t *);
64
65 #elif defined(__amd64)
66
67 #define SHA1_TRANSFORM(ctx, in) sha1_block_data_order((ctx), (in), 1)
68 #define SHA1_TRANSFORM_BLOCKS(ctx, in, num) sha1_block_data_order((ctx), \
69 (in), (num))
70
71 void sha1_block_data_order(SHA1_CTX *ctx, const void *inpp, size_t num_blocks);
72
73 #else
92 * ROTATE_LEFT rotates x left n bits.
93 */
94
95 #if defined(__GNUC__) && defined(_LP64)
96 static __inline__ uint64_t
97 ROTATE_LEFT(uint64_t value, uint32_t n)
98 {
99 uint32_t t32;
100
101 t32 = (uint32_t)value;
102 return ((t32 << n) | (t32 >> (32 - n)));
103 }
104
105 #else
106
107 #define ROTATE_LEFT(x, n) \
108 (((x) << (n)) | ((x) >> ((sizeof (x) * NBBY)-(n))))
109
110 #endif
111
112
113 /*
114 * SHA1Init()
115 *
116 * purpose: initializes the sha1 context and begins and sha1 digest operation
117 * input: SHA1_CTX * : the context to initializes.
118 * output: void
119 */
120
121 void
122 SHA1Init(SHA1_CTX *ctx)
123 {
124 ctx->count[0] = ctx->count[1] = 0;
125
126 /*
127 * load magic initialization constants. Tell lint
128 * that these constants are unsigned by using U.
129 */
130
131 ctx->state[0] = 0x67452301U;
132 ctx->state[1] = 0xefcdab89U;
272 * for alignments other than 4-bytes.
273 */
274 if (usevis) {
275 if (!IS_P2ALIGNED(&input[i], sizeof (uint32_t))) {
276 /*
277 * Main processing loop - input misaligned
278 */
279 for (; i + 63 < input_len; i += 64) {
280 bcopy(&input[i], input64, 64);
281 SHA1TransformVIS(X0,
282 (uint32_t *)input64,
283 &ctx->state[0], VIS);
284 }
285 } else {
286 /*
287 * Main processing loop - input 8-byte aligned
288 */
289 for (; i + 63 < input_len; i += 64) {
290 SHA1TransformVIS(X0,
291 /* LINTED E_BAD_PTR_CAST_ALIGN */
292 (uint32_t *)&input[i], /* CSTYLED */
293 &ctx->state[0], VIS);
294 }
295
296 }
297 #ifdef _KERNEL
298 sha1_restorefp(fpu);
299 #endif /* _KERNEL */
300 } else {
301 for (; i + 63 < input_len; i += 64) {
302 SHA1_TRANSFORM(ctx, &input[i]);
303 }
304 }
305
306 /*
307 * general optimization:
308 *
309 * if i and input_len are the same, return now instead
310 * of calling bcopy(), since the bcopy() in this case
311 * will be an expensive nop.
312 */
430
431 /* zeroize sensitive information */
432 bzero(ctx, sizeof (*ctx));
433 }
434
435
436 #if !defined(__amd64)
437
438 typedef uint32_t sha1word;
439
440 /*
441 * sparc optimization:
442 *
443 * on the sparc, we can load big endian 32-bit data easily. note that
444 * special care must be taken to ensure the address is 32-bit aligned.
445 * in the interest of speed, we don't check to make sure, since
446 * careful programming can guarantee this for us.
447 */
448
449 #if defined(_BIG_ENDIAN)
450 #define LOAD_BIG_32(addr) (*(uint32_t *)(addr))
451
452 #elif defined(HAVE_HTONL)
453 #define LOAD_BIG_32(addr) htonl(*((uint32_t *)(addr)))
454
455 #else
456 /* little endian -- will work on big endian, but slowly */
457 #define LOAD_BIG_32(addr) \
458 (((addr)[0] << 24) | ((addr)[1] << 16) | ((addr)[2] << 8) | (addr)[3])
459 #endif /* _BIG_ENDIAN */
460
461 /*
462 * SHA1Transform()
463 */
464 #if defined(W_ARRAY)
465 #define W(n) w[n]
466 #else /* !defined(W_ARRAY) */
467 #define W(n) w_ ## n
468 #endif /* !defined(W_ARRAY) */
469
470
471 #if defined(__sparc)
472
473 /*
474 * sparc register window optimization:
475 *
476 * `a', `b', `c', `d', and `e' are passed into SHA1Transform
477 * explicitly since it increases the number of registers available to
478 * the compiler. under this scheme, these variables can be held in
479 * %i0 - %i4, which leaves more local and out registers available.
480 *
504 * cases: a `sethi' and an `or', but loading a 32-bit value
505 * from memory only takes one `ld' (or `lduw' on v9). while
506 * this increases memory usage, the compiler can find enough
507 * other things to do while waiting to keep the pipeline does
508 * not stall. additionally, it is likely that many of these
509 * constants are cached so that later accesses do not even go
510 * out to the bus.
511 *
512 * this array is declared `static' to keep the compiler from
513 * having to bcopy() this array onto the stack frame of
514 * SHA1Transform() each time it is called -- which is
515 * unacceptably expensive.
516 *
517 * the `const' is to ensure that callers are good citizens and
518 * do not try to munge the array. since these routines are
519 * going to be called from inside multithreaded kernelland,
520 * this is a good safety check. -- `sha1_consts' will end up in
521 * .rodata.
522 *
523 * unfortunately, loading from an array in this manner hurts
524 * performance under Intel. So, there is a macro,
525 * SHA1_CONST(), used in SHA1Transform(), that either expands to
526 * a reference to this array, or to the actual constant,
527 * depending on what platform this code is compiled for.
528 */
529
530 static const uint32_t sha1_consts[] = {
531 SHA1_CONST_0, SHA1_CONST_1, SHA1_CONST_2, SHA1_CONST_3
532 };
533
534 /*
535 * general optimization:
536 *
537 * use individual integers instead of using an array. this is a
538 * win, although the amount it wins by seems to vary quite a bit.
539 */
540
541 uint32_t w_0, w_1, w_2, w_3, w_4, w_5, w_6, w_7;
542 uint32_t w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
543
544 /*
545 * sparc optimization:
546 *
547 * if `block' is already aligned on a 4-byte boundary, use
548 * LOAD_BIG_32() directly. otherwise, bcopy() into a
549 * buffer that *is* aligned on a 4-byte boundary and then do
550 * the LOAD_BIG_32() on that buffer. benchmarks have shown
551 * that using the bcopy() is better than loading the bytes
596 w_8 = LOAD_BIG_32(blk + 32);
597 /*LINTED*/
598 w_7 = LOAD_BIG_32(blk + 28);
599 /*LINTED*/
600 w_6 = LOAD_BIG_32(blk + 24);
601 /*LINTED*/
602 w_5 = LOAD_BIG_32(blk + 20);
603 /*LINTED*/
604 w_4 = LOAD_BIG_32(blk + 16);
605 /*LINTED*/
606 w_3 = LOAD_BIG_32(blk + 12);
607 /*LINTED*/
608 w_2 = LOAD_BIG_32(blk + 8);
609 /*LINTED*/
610 w_1 = LOAD_BIG_32(blk + 4);
611 /*LINTED*/
612 w_0 = LOAD_BIG_32(blk + 0);
613 }
614 #else /* !defined(__sparc) */
615
616 void /* CSTYLED */
617 SHA1Transform(SHA1_CTX *ctx, const uint8_t blk[64])
618 {
619 /* CSTYLED */
620 sha1word a = ctx->state[0];
621 sha1word b = ctx->state[1];
622 sha1word c = ctx->state[2];
623 sha1word d = ctx->state[3];
624 sha1word e = ctx->state[4];
625
626 #if defined(W_ARRAY)
627 sha1word w[16];
628 #else /* !defined(W_ARRAY) */
629 sha1word w_0, w_1, w_2, w_3, w_4, w_5, w_6, w_7;
630 sha1word w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
631 #endif /* !defined(W_ARRAY) */
632
633 W(0) = LOAD_BIG_32(blk + 0);
634 W(1) = LOAD_BIG_32(blk + 4);
635 W(2) = LOAD_BIG_32(blk + 8);
636 W(3) = LOAD_BIG_32(blk + 12);
637 W(4) = LOAD_BIG_32(blk + 16);
638 W(5) = LOAD_BIG_32(blk + 20);
639 W(6) = LOAD_BIG_32(blk + 24);
|