Print this page
5007142 Add ntohll and htonll to sys/byteorder.h
6717509 Need to use bswap/bswapq for byte swap of 64-bit integer on x32/x64
PSARC 2008/474
   1 /*
   2  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
   3  * Use is subject to license terms.
   4  */
   5 
   6 #pragma ident   "%Z%%M% %I%     %E% SMI"
   7 
   8 /*
   9  * The basic framework for this code came from the reference
  10  * implementation for MD5.  That implementation is Copyright (C)
  11  * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
  12  *
  13  * License to copy and use this software is granted provided that it
  14  * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
  15  * Algorithm" in all material mentioning or referencing this software
  16  * or this function.
  17  *
  18  * License is also granted to make and use derivative works provided
  19  * that such works are identified as "derived from the RSA Data
  20  * Security, Inc. MD5 Message-Digest Algorithm" in all material
  21  * mentioning or referencing the derived work.
  22  *
  23  * RSA Data Security, Inc. makes no representations concerning either
  24  * the merchantability of this software or the suitability of this
  25  * software for any particular purpose. It is provided "as is"
  26  * without express or implied warranty of any kind.
  27  *
  28  * These notices must be retained in any copies of any part of this
  29  * documentation and/or software.
  30  *
  31  * NOTE: Cleaned-up and optimized, version of SHA1, based on the FIPS 180-1
  32  * standard, available at http://www.itl.nist.gov/div897/pubs/fip180-1.htm
  33  * Not as fast as one would like -- further optimizations are encouraged
  34  * and appreciated.
  35  */
  36 
  37 #include <sys/types.h>
  38 #include <sys/param.h>
  39 #include <sys/systm.h>
  40 #include <sys/sysmacros.h>
  41 #include <sys/sha1.h>
  42 #include <sys/sha1_consts.h>
  43 
  44 #ifndef _KERNEL
  45 #include <strings.h>
  46 #include <stdlib.h>
  47 #include <errno.h>
  48 #include <sys/systeminfo.h>
  49 #endif  /* !_KERNEL */
  50 





  51 static void Encode(uint8_t *, const uint32_t *, size_t);
  52 
  53 #if     defined(__sparc)
  54 
  55 #define SHA1_TRANSFORM(ctx, in) \
  56         SHA1Transform((ctx)->state[0], (ctx)->state[1], (ctx)->state[2], \
  57                 (ctx)->state[3], (ctx)->state[4], (ctx), (in))
  58 
  59 static void SHA1Transform(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t,
  60     SHA1_CTX *, const uint8_t *);
  61 
  62 #elif   defined(__amd64)
  63 
  64 #define SHA1_TRANSFORM(ctx, in) sha1_block_data_order((ctx), (in), 1)
  65 #define SHA1_TRANSFORM_BLOCKS(ctx, in, num) sha1_block_data_order((ctx), \
  66                 (in), (num))
  67 
  68 void sha1_block_data_order(SHA1_CTX *ctx, const void *inpp, size_t num_blocks);
  69 
  70 #else


  89  * ROTATE_LEFT rotates x left n bits.
  90  */
  91 
  92 #if     defined(__GNUC__) && defined(_LP64)
  93 static __inline__ uint64_t
  94 ROTATE_LEFT(uint64_t value, uint32_t n)
  95 {
  96         uint32_t t32;
  97 
  98         t32 = (uint32_t)value;
  99         return ((t32 << n) | (t32 >> (32 - n)));
 100 }
 101 
 102 #else
 103 
 104 #define ROTATE_LEFT(x, n)       \
 105         (((x) << (n)) | ((x) >> ((sizeof (x) * NBBY)-(n))))
 106 
 107 #endif
 108 
 109 #if     defined(__GNUC__) && (defined(__i386) || defined(__amd64))
 110 
 111 #define HAVE_BSWAP
 112 
 113 extern __inline__ uint32_t bswap(uint32_t value)
 114 {
 115         __asm__("bswap %0" : "+r" (value));
 116         return (value);
 117 }
 118 
 119 #endif
 120 
 121 /*
 122  * SHA1Init()
 123  *
 124  * purpose: initializes the sha1 context and begins and sha1 digest operation
 125  *   input: SHA1_CTX *  : the context to initializes.
 126  *  output: void
 127  */
 128 
 129 void
 130 SHA1Init(SHA1_CTX *ctx)
 131 {
 132         ctx->count[0] = ctx->count[1] = 0;
 133 
 134         /*
 135          * load magic initialization constants. Tell lint
 136          * that these constants are unsigned by using U.
 137          */
 138 
 139         ctx->state[0] = 0x67452301U;
 140         ctx->state[1] = 0xefcdab89U;


 280                  * for alignments other than 4-bytes.
 281                  */
 282                 if (usevis) {
 283                         if (!IS_P2ALIGNED(&input[i], sizeof (uint32_t))) {
 284                                 /*
 285                                  * Main processing loop - input misaligned
 286                                  */
 287                                 for (; i + 63 < input_len; i += 64) {
 288                                         bcopy(&input[i], input64, 64);
 289                                         SHA1TransformVIS(X0,
 290                                             (uint32_t *)input64,
 291                                             &ctx->state[0], VIS);
 292                                 }
 293                         } else {
 294                                 /*
 295                                  * Main processing loop - input 8-byte aligned
 296                                  */
 297                                 for (; i + 63 < input_len; i += 64) {
 298                                         SHA1TransformVIS(X0,
 299                                         /* LINTED E_BAD_PTR_CAST_ALIGN */
 300                                             (uint32_t *)&input[i],
 301                                             &ctx->state[0], VIS);
 302                                 }
 303 
 304                         }
 305 #ifdef _KERNEL
 306                         sha1_restorefp(fpu);
 307 #endif /* _KERNEL */
 308                 } else {
 309                         for (; i + 63 < input_len; i += 64) {
 310                                 SHA1_TRANSFORM(ctx, &input[i]);
 311                         }
 312                 }
 313 
 314                 /*
 315                  * general optimization:
 316                  *
 317                  * if i and input_len are the same, return now instead
 318                  * of calling bcopy(), since the bcopy() in this case
 319                  * will be an expensive nop.
 320                  */


 438 
 439         /* zeroize sensitive information */
 440         bzero(ctx, sizeof (*ctx));
 441 }
 442 
 443 
 444 #if !defined(__amd64)
 445 
 446 typedef uint32_t sha1word;
 447 
 448 /*
 449  * sparc optimization:
 450  *
 451  * on the sparc, we can load big endian 32-bit data easily.  note that
 452  * special care must be taken to ensure the address is 32-bit aligned.
 453  * in the interest of speed, we don't check to make sure, since
 454  * careful programming can guarantee this for us.
 455  */
 456 
 457 #if     defined(_BIG_ENDIAN)
 458 
 459 #define LOAD_BIG_32(addr)       (*(uint32_t *)(addr))
 460 
 461 #else   /* !defined(_BIG_ENDIAN) */

 462 
 463 #if     defined(HAVE_BSWAP)
 464 
 465 #define LOAD_BIG_32(addr) bswap(*((uint32_t *)(addr)))
 466 
 467 #else   /* !defined(HAVE_BSWAP) */
 468 
 469 /* little endian -- will work on big endian, but slowly */
 470 #define LOAD_BIG_32(addr)       \
 471         (((addr)[0] << 24) | ((addr)[1] << 16) | ((addr)[2] << 8) | (addr)[3])

 472 
 473 #endif  /* !defined(HAVE_BSWAP) */
 474 
 475 #endif  /* !defined(_BIG_ENDIAN) */
 476 
 477 /*
 478  * SHA1Transform()
 479  */
 480 #if     defined(W_ARRAY)
 481 #define W(n) w[n]
 482 #else   /* !defined(W_ARRAY) */
 483 #define W(n) w_ ## n
 484 #endif  /* !defined(W_ARRAY) */
 485 
 486 
 487 #if     defined(__sparc)
 488 
 489 /*
 490  * sparc register window optimization:
 491  *
 492  * `a', `b', `c', `d', and `e' are passed into SHA1Transform
 493  * explicitly since it increases the number of registers available to
 494  * the compiler.  under this scheme, these variables can be held in
 495  * %i0 - %i4, which leaves more local and out registers available.
 496  *


 520          * cases: a `sethi' and an `or', but loading a 32-bit value
 521          * from memory only takes one `ld' (or `lduw' on v9).  while
 522          * this increases memory usage, the compiler can find enough
 523          * other things to do while waiting to keep the pipeline does
 524          * not stall.  additionally, it is likely that many of these
 525          * constants are cached so that later accesses do not even go
 526          * out to the bus.
 527          *
 528          * this array is declared `static' to keep the compiler from
 529          * having to bcopy() this array onto the stack frame of
 530          * SHA1Transform() each time it is called -- which is
 531          * unacceptably expensive.
 532          *
 533          * the `const' is to ensure that callers are good citizens and
 534          * do not try to munge the array.  since these routines are
 535          * going to be called from inside multithreaded kernelland,
 536          * this is a good safety check. -- `sha1_consts' will end up in
 537          * .rodata.
 538          *
 539          * unfortunately, loading from an array in this manner hurts
 540          * performance under intel.  so, there is a macro,
 541          * SHA1_CONST(), used in SHA1Transform(), that either expands to
 542          * a reference to this array, or to the actual constant,
 543          * depending on what platform this code is compiled for.
 544          */
 545 
 546         static const uint32_t sha1_consts[] = {
 547                 SHA1_CONST_0,   SHA1_CONST_1,   SHA1_CONST_2,   SHA1_CONST_3,
 548         };
 549 
 550         /*
 551          * general optimization:
 552          *
 553          * use individual integers instead of using an array.  this is a
 554          * win, although the amount it wins by seems to vary quite a bit.
 555          */
 556 
 557         uint32_t        w_0, w_1, w_2,  w_3,  w_4,  w_5,  w_6,  w_7;
 558         uint32_t        w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
 559 
 560         /*
 561          * sparc optimization:
 562          *
 563          * if `block' is already aligned on a 4-byte boundary, use
 564          * LOAD_BIG_32() directly.  otherwise, bcopy() into a
 565          * buffer that *is* aligned on a 4-byte boundary and then do
 566          * the LOAD_BIG_32() on that buffer.  benchmarks have shown
 567          * that using the bcopy() is better than loading the bytes


 612                 w_8  = LOAD_BIG_32(blk + 32);
 613                 /*LINTED*/
 614                 w_7  = LOAD_BIG_32(blk + 28);
 615                 /*LINTED*/
 616                 w_6  = LOAD_BIG_32(blk + 24);
 617                 /*LINTED*/
 618                 w_5  = LOAD_BIG_32(blk + 20);
 619                 /*LINTED*/
 620                 w_4  = LOAD_BIG_32(blk + 16);
 621                 /*LINTED*/
 622                 w_3  = LOAD_BIG_32(blk + 12);
 623                 /*LINTED*/
 624                 w_2  = LOAD_BIG_32(blk +  8);
 625                 /*LINTED*/
 626                 w_1  = LOAD_BIG_32(blk +  4);
 627                 /*LINTED*/
 628                 w_0  = LOAD_BIG_32(blk +  0);
 629         }
 630 #else   /* !defined(__sparc) */
 631 
 632 void
 633 SHA1Transform(SHA1_CTX *ctx, const uint8_t blk[64])
 634 {

 635         sha1word a = ctx->state[0];
 636         sha1word b = ctx->state[1];
 637         sha1word c = ctx->state[2];
 638         sha1word d = ctx->state[3];
 639         sha1word e = ctx->state[4];
 640 
 641 #if     defined(W_ARRAY)
 642         sha1word        w[16];
 643 #else   /* !defined(W_ARRAY) */
 644         sha1word        w_0, w_1, w_2,  w_3,  w_4,  w_5,  w_6,  w_7;
 645         sha1word        w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
 646 #endif  /* !defined(W_ARRAY) */
 647 
 648         W(0)  = LOAD_BIG_32(blk +  0);
 649         W(1)  = LOAD_BIG_32(blk +  4);
 650         W(2)  = LOAD_BIG_32(blk +  8);
 651         W(3)  = LOAD_BIG_32(blk + 12);
 652         W(4)  = LOAD_BIG_32(blk + 16);
 653         W(5)  = LOAD_BIG_32(blk + 20);
 654         W(6)  = LOAD_BIG_32(blk + 24);


   1 /*
   2  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
   3  * Use is subject to license terms.
   4  */
   5 


   6 /*
   7  * The basic framework for this code came from the reference
   8  * implementation for MD5.  That implementation is Copyright (C)
   9  * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
  10  *
  11  * License to copy and use this software is granted provided that it
  12  * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
  13  * Algorithm" in all material mentioning or referencing this software
  14  * or this function.
  15  *
  16  * License is also granted to make and use derivative works provided
  17  * that such works are identified as "derived from the RSA Data
  18  * Security, Inc. MD5 Message-Digest Algorithm" in all material
  19  * mentioning or referencing the derived work.
  20  *
  21  * RSA Data Security, Inc. makes no representations concerning either
  22  * the merchantability of this software or the suitability of this
  23  * software for any particular purpose. It is provided "as is"
  24  * without express or implied warranty of any kind.
  25  *
  26  * These notices must be retained in any copies of any part of this
  27  * documentation and/or software.
  28  *
  29  * NOTE: Cleaned-up and optimized, version of SHA1, based on the FIPS 180-1
  30  * standard, available at http://www.itl.nist.gov/fipspubs/fip180-1.htm
  31  * Not as fast as one would like -- further optimizations are encouraged
  32  * and appreciated.
  33  */
  34 
  35 #include <sys/types.h>
  36 #include <sys/param.h>
  37 #include <sys/systm.h>
  38 #include <sys/sysmacros.h>
  39 #include <sys/sha1.h>
  40 #include <sys/sha1_consts.h>
  41 
  42 #ifndef _KERNEL
  43 #include <strings.h>
  44 #include <stdlib.h>
  45 #include <errno.h>
  46 #include <sys/systeminfo.h>
  47 #endif  /* !_KERNEL */
  48 
  49 #ifdef _LITTLE_ENDIAN
  50 #include <sys/byteorder.h>
  51 #define HAVE_HTONL
  52 #endif
  53 
  54 static void Encode(uint8_t *, const uint32_t *, size_t);
  55 
  56 #if     defined(__sparc)
  57 
  58 #define SHA1_TRANSFORM(ctx, in) \
  59         SHA1Transform((ctx)->state[0], (ctx)->state[1], (ctx)->state[2], \
  60                 (ctx)->state[3], (ctx)->state[4], (ctx), (in))
  61 
  62 static void SHA1Transform(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t,
  63     SHA1_CTX *, const uint8_t *);
  64 
  65 #elif   defined(__amd64)
  66 
  67 #define SHA1_TRANSFORM(ctx, in) sha1_block_data_order((ctx), (in), 1)
  68 #define SHA1_TRANSFORM_BLOCKS(ctx, in, num) sha1_block_data_order((ctx), \
  69                 (in), (num))
  70 
  71 void sha1_block_data_order(SHA1_CTX *ctx, const void *inpp, size_t num_blocks);
  72 
  73 #else


  92  * ROTATE_LEFT rotates x left n bits.
  93  */
  94 
  95 #if     defined(__GNUC__) && defined(_LP64)
  96 static __inline__ uint64_t
  97 ROTATE_LEFT(uint64_t value, uint32_t n)
  98 {
  99         uint32_t t32;
 100 
 101         t32 = (uint32_t)value;
 102         return ((t32 << n) | (t32 >> (32 - n)));
 103 }
 104 
 105 #else
 106 
 107 #define ROTATE_LEFT(x, n)       \
 108         (((x) << (n)) | ((x) >> ((sizeof (x) * NBBY)-(n))))
 109 
 110 #endif
 111 

 112 










 113 /*
 114  * SHA1Init()
 115  *
 116  * purpose: initializes the sha1 context and begins and sha1 digest operation
 117  *   input: SHA1_CTX *  : the context to initializes.
 118  *  output: void
 119  */
 120 
 121 void
 122 SHA1Init(SHA1_CTX *ctx)
 123 {
 124         ctx->count[0] = ctx->count[1] = 0;
 125 
 126         /*
 127          * load magic initialization constants. Tell lint
 128          * that these constants are unsigned by using U.
 129          */
 130 
 131         ctx->state[0] = 0x67452301U;
 132         ctx->state[1] = 0xefcdab89U;


 272                  * for alignments other than 4-bytes.
 273                  */
 274                 if (usevis) {
 275                         if (!IS_P2ALIGNED(&input[i], sizeof (uint32_t))) {
 276                                 /*
 277                                  * Main processing loop - input misaligned
 278                                  */
 279                                 for (; i + 63 < input_len; i += 64) {
 280                                         bcopy(&input[i], input64, 64);
 281                                         SHA1TransformVIS(X0,
 282                                             (uint32_t *)input64,
 283                                             &ctx->state[0], VIS);
 284                                 }
 285                         } else {
 286                                 /*
 287                                  * Main processing loop - input 8-byte aligned
 288                                  */
 289                                 for (; i + 63 < input_len; i += 64) {
 290                                         SHA1TransformVIS(X0,
 291                                             /* LINTED E_BAD_PTR_CAST_ALIGN */
 292                                             (uint32_t *)&input[i], /* CSTYLED */
 293                                             &ctx->state[0], VIS);
 294                                 }
 295 
 296                         }
 297 #ifdef _KERNEL
 298                         sha1_restorefp(fpu);
 299 #endif /* _KERNEL */
 300                 } else {
 301                         for (; i + 63 < input_len; i += 64) {
 302                                 SHA1_TRANSFORM(ctx, &input[i]);
 303                         }
 304                 }
 305 
 306                 /*
 307                  * general optimization:
 308                  *
 309                  * if i and input_len are the same, return now instead
 310                  * of calling bcopy(), since the bcopy() in this case
 311                  * will be an expensive nop.
 312                  */


 430 
 431         /* zeroize sensitive information */
 432         bzero(ctx, sizeof (*ctx));
 433 }
 434 
 435 
 436 #if !defined(__amd64)
 437 
 438 typedef uint32_t sha1word;
 439 
 440 /*
 441  * sparc optimization:
 442  *
 443  * on the sparc, we can load big endian 32-bit data easily.  note that
 444  * special care must be taken to ensure the address is 32-bit aligned.
 445  * in the interest of speed, we don't check to make sure, since
 446  * careful programming can guarantee this for us.
 447  */
 448 
 449 #if     defined(_BIG_ENDIAN)

 450 #define LOAD_BIG_32(addr)       (*(uint32_t *)(addr))
 451 
 452 #elif   defined(HAVE_HTONL)
 453 #define LOAD_BIG_32(addr) htonl(*((uint32_t *)(addr)))
 454 
 455 #else





 456 /* little endian -- will work on big endian, but slowly */
 457 #define LOAD_BIG_32(addr)       \
 458         (((addr)[0] << 24) | ((addr)[1] << 16) | ((addr)[2] << 8) | (addr)[3])
 459 #endif  /* _BIG_ENDIAN */
 460 




 461 /*
 462  * SHA1Transform()
 463  */
 464 #if     defined(W_ARRAY)
 465 #define W(n) w[n]
 466 #else   /* !defined(W_ARRAY) */
 467 #define W(n) w_ ## n
 468 #endif  /* !defined(W_ARRAY) */
 469 
 470 
 471 #if     defined(__sparc)
 472 
 473 /*
 474  * sparc register window optimization:
 475  *
 476  * `a', `b', `c', `d', and `e' are passed into SHA1Transform
 477  * explicitly since it increases the number of registers available to
 478  * the compiler.  under this scheme, these variables can be held in
 479  * %i0 - %i4, which leaves more local and out registers available.
 480  *


 504          * cases: a `sethi' and an `or', but loading a 32-bit value
 505          * from memory only takes one `ld' (or `lduw' on v9).  while
 506          * this increases memory usage, the compiler can find enough
 507          * other things to do while waiting to keep the pipeline does
 508          * not stall.  additionally, it is likely that many of these
 509          * constants are cached so that later accesses do not even go
 510          * out to the bus.
 511          *
 512          * this array is declared `static' to keep the compiler from
 513          * having to bcopy() this array onto the stack frame of
 514          * SHA1Transform() each time it is called -- which is
 515          * unacceptably expensive.
 516          *
 517          * the `const' is to ensure that callers are good citizens and
 518          * do not try to munge the array.  since these routines are
 519          * going to be called from inside multithreaded kernelland,
 520          * this is a good safety check. -- `sha1_consts' will end up in
 521          * .rodata.
 522          *
 523          * unfortunately, loading from an array in this manner hurts
 524          * performance under Intel.  So, there is a macro,
 525          * SHA1_CONST(), used in SHA1Transform(), that either expands to
 526          * a reference to this array, or to the actual constant,
 527          * depending on what platform this code is compiled for.
 528          */
 529 
 530         static const uint32_t sha1_consts[] = {
 531                 SHA1_CONST_0, SHA1_CONST_1, SHA1_CONST_2, SHA1_CONST_3
 532         };
 533 
 534         /*
 535          * general optimization:
 536          *
 537          * use individual integers instead of using an array.  this is a
 538          * win, although the amount it wins by seems to vary quite a bit.
 539          */
 540 
 541         uint32_t        w_0, w_1, w_2,  w_3,  w_4,  w_5,  w_6,  w_7;
 542         uint32_t        w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
 543 
 544         /*
 545          * sparc optimization:
 546          *
 547          * if `block' is already aligned on a 4-byte boundary, use
 548          * LOAD_BIG_32() directly.  otherwise, bcopy() into a
 549          * buffer that *is* aligned on a 4-byte boundary and then do
 550          * the LOAD_BIG_32() on that buffer.  benchmarks have shown
 551          * that using the bcopy() is better than loading the bytes


 596                 w_8  = LOAD_BIG_32(blk + 32);
 597                 /*LINTED*/
 598                 w_7  = LOAD_BIG_32(blk + 28);
 599                 /*LINTED*/
 600                 w_6  = LOAD_BIG_32(blk + 24);
 601                 /*LINTED*/
 602                 w_5  = LOAD_BIG_32(blk + 20);
 603                 /*LINTED*/
 604                 w_4  = LOAD_BIG_32(blk + 16);
 605                 /*LINTED*/
 606                 w_3  = LOAD_BIG_32(blk + 12);
 607                 /*LINTED*/
 608                 w_2  = LOAD_BIG_32(blk +  8);
 609                 /*LINTED*/
 610                 w_1  = LOAD_BIG_32(blk +  4);
 611                 /*LINTED*/
 612                 w_0  = LOAD_BIG_32(blk +  0);
 613         }
 614 #else   /* !defined(__sparc) */
 615 
 616 void /* CSTYLED */
 617 SHA1Transform(SHA1_CTX *ctx, const uint8_t blk[64])
 618 {
 619         /* CSTYLED */
 620         sha1word a = ctx->state[0];
 621         sha1word b = ctx->state[1];
 622         sha1word c = ctx->state[2];
 623         sha1word d = ctx->state[3];
 624         sha1word e = ctx->state[4];
 625 
 626 #if     defined(W_ARRAY)
 627         sha1word        w[16];
 628 #else   /* !defined(W_ARRAY) */
 629         sha1word        w_0, w_1, w_2,  w_3,  w_4,  w_5,  w_6,  w_7;
 630         sha1word        w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
 631 #endif  /* !defined(W_ARRAY) */
 632 
 633         W(0)  = LOAD_BIG_32(blk +  0);
 634         W(1)  = LOAD_BIG_32(blk +  4);
 635         W(2)  = LOAD_BIG_32(blk +  8);
 636         W(3)  = LOAD_BIG_32(blk + 12);
 637         W(4)  = LOAD_BIG_32(blk + 16);
 638         W(5)  = LOAD_BIG_32(blk + 20);
 639         W(6)  = LOAD_BIG_32(blk + 24);