Print this page
6662791 Need a SHA1 implementation optimized for 64-bit x86

Split Close
Expand all
Collapse all
          --- old/usr/src/common/crypto/sha1/sha1.c
          +++ new/usr/src/common/crypto/sha1/sha1.c
   1    1  /*
   2      - * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
        2 + * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
   3    3   * Use is subject to license terms.
   4    4   */
   5    5  
   6      -#pragma ident   "@(#)sha1.c     1.26    07/04/10 SMI"
        6 +#pragma ident   "@(#)sha1.c     1.27    08/03/02 SMI"
   7    7  
   8    8  /*
   9    9   * The basic framework for this code came from the reference
  10   10   * implementation for MD5.  That implementation is Copyright (C)
  11   11   * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
  12   12   *
  13   13   * License to copy and use this software is granted provided that it
  14   14   * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
  15   15   * Algorithm" in all material mentioning or referencing this software
  16   16   * or this function.
↓ open down ↓ 35 lines elided ↑ open up ↑
  52   52  
  53   53  #if     defined(__sparc)
  54   54  
  55   55  #define SHA1_TRANSFORM(ctx, in) \
  56   56          SHA1Transform((ctx)->state[0], (ctx)->state[1], (ctx)->state[2], \
  57   57                  (ctx)->state[3], (ctx)->state[4], (ctx), (in))
  58   58  
  59   59  static void SHA1Transform(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t,
  60   60      SHA1_CTX *, const uint8_t *);
  61   61  
       62 +#elif   defined(__amd64)
       63 +
       64 +#define SHA1_TRANSFORM(ctx, in) sha1_block_data_order((ctx), (in), 1)
       65 +#define SHA1_TRANSFORM_BLOCKS(ctx, in, num) sha1_block_data_order((ctx), \
       66 +                (in), (num))
       67 +
       68 +void sha1_block_data_order(SHA1_CTX *ctx, const void *inpp, size_t num_blocks);
       69 +
  62   70  #else
  63   71  
  64   72  #define SHA1_TRANSFORM(ctx, in) SHA1Transform((ctx), (in))
  65   73  
  66   74  static void SHA1Transform(SHA1_CTX *, const uint8_t *);
  67   75  
  68   76  #endif
  69   77  
  70   78  
  71   79  static uint8_t PADDING[64] = { 0x80, /* all zeros */ };
↓ open down ↓ 198 lines elided ↑ open up ↑
 270  278                   * Handling is provided to deal with arbitrary byte alingment
 271  279                   * of the input data but the performance gains are reduced
 272  280                   * for alignments other than 4-bytes.
 273  281                   */
 274  282                  if (usevis) {
 275  283                          if (!IS_P2ALIGNED(&input[i], sizeof (uint32_t))) {
 276  284                                  /*
 277  285                                   * Main processing loop - input misaligned
 278  286                                   */
 279  287                                  for (; i + 63 < input_len; i += 64) {
 280      -                                    bcopy(&input[i], input64, 64);
 281      -                                    SHA1TransformVIS(X0, (uint32_t *)input64,
 282      -                                        &ctx->state[0], VIS);
      288 +                                        bcopy(&input[i], input64, 64);
      289 +                                        SHA1TransformVIS(X0,
      290 +                                            (uint32_t *)input64,
      291 +                                            &ctx->state[0], VIS);
 283  292                                  }
 284  293                          } else {
 285  294                                  /*
 286  295                                   * Main processing loop - input 8-byte aligned
 287  296                                   */
 288  297                                  for (; i + 63 < input_len; i += 64) {
 289  298                                          SHA1TransformVIS(X0,
 290      -                                            /* LINTED E_BAD_PTR_CAST_ALIGN */
      299 +                                        /* LINTED E_BAD_PTR_CAST_ALIGN */
 291  300                                              (uint32_t *)&input[i],
 292  301                                              &ctx->state[0], VIS);
 293  302                                  }
 294  303  
 295  304                          }
 296  305  #ifdef _KERNEL
 297  306                          sha1_restorefp(fpu);
 298  307  #endif /* _KERNEL */
 299  308                  } else {
 300  309                          for (; i + 63 < input_len; i += 64) {
 301      -                            SHA1_TRANSFORM(ctx, &input[i]);
      310 +                                SHA1_TRANSFORM(ctx, &input[i]);
 302  311                          }
 303  312                  }
 304  313  
 305  314                  /*
 306  315                   * general optimization:
 307  316                   *
 308  317                   * if i and input_len are the same, return now instead
 309  318                   * of calling bcopy(), since the bcopy() in this case
 310  319                   * will be an expensive nop.
 311  320                   */
↓ open down ↓ 8 lines elided ↑ open up ↑
 320  329          bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
 321  330  }
 322  331  
 323  332  #else /* VIS_SHA1 */
 324  333  
 325  334  void
 326  335  SHA1Update(SHA1_CTX *ctx, const void *inptr, size_t input_len)
 327  336  {
 328  337          uint32_t i, buf_index, buf_len;
 329  338          const uint8_t *input = inptr;
      339 +#if defined(__amd64)
      340 +        uint32_t        block_count;
      341 +#endif  /* __amd64 */
 330  342  
 331  343          /* check for noop */
 332  344          if (input_len == 0)
 333  345                  return;
 334  346  
 335  347          /* compute number of bytes mod 64 */
 336  348          buf_index = (ctx->count[1] >> 3) & 0x3F;
 337  349  
 338  350          /* update number of bits */
 339  351          if ((ctx->count[1] += (input_len << 3)) < (input_len << 3))
↓ open down ↓ 16 lines elided ↑ open up ↑
 356  368                   * wasn't any data left over from a previous call to
 357  369                   * SHA1Update().
 358  370                   */
 359  371  
 360  372                  if (buf_index) {
 361  373                          bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
 362  374                          SHA1_TRANSFORM(ctx, ctx->buf_un.buf8);
 363  375                          i = buf_len;
 364  376                  }
 365  377  
      378 +#if !defined(__amd64)
 366  379                  for (; i + 63 < input_len; i += 64)
 367  380                          SHA1_TRANSFORM(ctx, &input[i]);
      381 +#else
      382 +                block_count = (input_len - i) >> 6;
      383 +                if (block_count > 0) {
      384 +                        SHA1_TRANSFORM_BLOCKS(ctx, &input[i], block_count);
      385 +                        i += block_count << 6;
      386 +                }
      387 +#endif  /* !__amd64 */
 368  388  
 369  389                  /*
 370  390                   * general optimization:
 371  391                   *
 372  392                   * if i and input_len are the same, return now instead
 373  393                   * of calling bcopy(), since the bcopy() in this case
 374  394                   * will be an expensive nop.
 375  395                   */
 376  396  
 377  397                  if (input_len == i)
↓ open down ↓ 6 lines elided ↑ open up ↑
 384  404          bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
 385  405  }
 386  406  
 387  407  #endif /* VIS_SHA1 */
 388  408  
 389  409  /*
 390  410   * SHA1Final()
 391  411   *
 392  412   * purpose: ends an sha1 digest operation, finalizing the message digest and
 393  413   *          zeroing the context.
 394      - *   input: uchar_t *   : a buffer to store the digest in
      414 + *   input: uchar_t *   : A buffer to store the digest.
 395  415   *                      : The function actually uses void* because many
 396  416   *                      : callers pass things other than uchar_t here.
 397  417   *          SHA1_CTX *  : the context to finalize, save, and zero
 398  418   *  output: void
 399  419   */
 400  420  
 401  421  void
 402  422  SHA1Final(void *digest, SHA1_CTX *ctx)
 403  423  {
 404  424          uint8_t         bitcount_be[sizeof (ctx->count)];
↓ open down ↓ 8 lines elided ↑ open up ↑
 413  433          /* append length (before padding) */
 414  434          SHA1Update(ctx, bitcount_be, sizeof (bitcount_be));
 415  435  
 416  436          /* store state in digest */
 417  437          Encode(digest, ctx->state, sizeof (ctx->state));
 418  438  
 419  439          /* zeroize sensitive information */
 420  440          bzero(ctx, sizeof (*ctx));
 421  441  }
 422  442  
      443 +
      444 +#if !defined(__amd64)
      445 +
 423  446  typedef uint32_t sha1word;
 424  447  
 425  448  /*
 426  449   * sparc optimization:
 427  450   *
 428  451   * on the sparc, we can load big endian 32-bit data easily.  note that
 429  452   * special care must be taken to ensure the address is 32-bit aligned.
 430  453   * in the interest of speed, we don't check to make sure, since
 431  454   * careful programming can guarantee this for us.
 432  455   */
↓ open down ↓ 220 lines elided ↑ open up ↑
 653  676           *
 654  677           * for either method given in the spec, there is an "assignment"
 655  678           * phase where the following takes place:
 656  679           *
 657  680           *      tmp = (main_computation);
 658  681           *      e = d; d = c; c = rotate_left(b, 30); b = a; a = tmp;
 659  682           *
 660  683           * we can make the algorithm go faster by not doing this work,
 661  684           * but just pretending that `d' is now `e', etc. this works
 662  685           * really well and obviates the need for a temporary variable.
 663      -         * however, we still explictly perform the rotate action,
      686 +         * however, we still explicitly perform the rotate action,
 664  687           * since it is cheaper on SPARC to do it once than to have to
 665  688           * do it over and over again.
 666  689           */
 667  690  
 668  691          /* round 1 */
 669  692          e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(0) + SHA1_CONST(0); /* 0 */
 670  693          b = ROTATE_LEFT(b, 30);
 671  694  
 672  695          d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(1) + SHA1_CONST(0); /* 1 */
 673  696          a = ROTATE_LEFT(a, 30);
↓ open down ↓ 301 lines elided ↑ open up ↑
 975  998              SHA1_CONST(3);
 976  999          ctx->state[1] += b;
 977 1000          ctx->state[2] += ROTATE_LEFT(c, 30);
 978 1001          ctx->state[3] += d;
 979 1002          ctx->state[4] += e;
 980 1003  
 981 1004          /* zeroize sensitive information */
 982 1005          W(0) = W(1) = W(2) = W(3) = W(4) = W(5) = W(6) = W(7) = W(8) = 0;
 983 1006          W(9) = W(10) = W(11) = W(12) = W(13) = W(14) = W(15) = 0;
 984 1007  }
     1008 +#endif  /* !__amd64 */
 985 1009  
     1010 +
 986 1011  /*
 987 1012   * Encode()
 988 1013   *
 989 1014   * purpose: to convert a list of numbers from little endian to big endian
 990 1015   *   input: uint8_t *   : place to store the converted big endian numbers
 991 1016   *          uint32_t *  : place to get numbers to convert from
 992 1017   *          size_t      : the length of the input in bytes
 993 1018   *  output: void
 994 1019   */
 995 1020  
↓ open down ↓ 24 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX