nv97_bswap Wdiff usr/src/common/crypto/sha1/sha1.c

Print this page

5007142 Add ntohll and htonll to sys/byteorder.h
6717509 Need to use bswap/bswapq for byte swap of 64-bit integer on x32/x64
PSARC 2008/474

Split	Close
Expand all
Collapse all

          --- old/usr/src/common/crypto/sha1/sha1.c
          +++ new/usr/src/common/crypto/sha1/sha1.c
   1    1  /*
   2    2   * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
   3    3   * Use is subject to license terms.
   4    4   */
   5    5  
   6      -#pragma ident   "%Z%%M% %I%     %E% SMI"
   7      -
   8    6  /*
   9    7   * The basic framework for this code came from the reference
  10    8   * implementation for MD5.  That implementation is Copyright (C)
  11    9   * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
  12   10   *
  13   11   * License to copy and use this software is granted provided that it
  14   12   * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
  15   13   * Algorithm" in all material mentioning or referencing this software
  16   14   * or this function.
  17   15   *

  18   16   * License is also granted to make and use derivative works provided
  19   17   * that such works are identified as "derived from the RSA Data
  20   18   * Security, Inc. MD5 Message-Digest Algorithm" in all material
  21   19   * mentioning or referencing the derived work.

↓ open down ↓

4 lines elided

↑ open up ↑

  22   20   *
  23   21   * RSA Data Security, Inc. makes no representations concerning either
  24   22   * the merchantability of this software or the suitability of this
  25   23   * software for any particular purpose. It is provided "as is"
  26   24   * without express or implied warranty of any kind.
  27   25   *
  28   26   * These notices must be retained in any copies of any part of this
  29   27   * documentation and/or software.
  30   28   *
  31   29   * NOTE: Cleaned-up and optimized, version of SHA1, based on the FIPS 180-1
  32      - * standard, available at http://www.itl.nist.gov/div897/pubs/fip180-1.htm
       30 + * standard, available at http://www.itl.nist.gov/fipspubs/fip180-1.htm
  33   31   * Not as fast as one would like -- further optimizations are encouraged
  34   32   * and appreciated.
  35   33   */
  36   34  
  37   35  #include <sys/types.h>
  38   36  #include <sys/param.h>
  39   37  #include <sys/systm.h>
  40   38  #include <sys/sysmacros.h>
  41   39  #include <sys/sha1.h>
  42   40  #include <sys/sha1_consts.h>
  43   41  
  44   42  #ifndef _KERNEL
  45   43  #include <strings.h>
  46   44  #include <stdlib.h>
  47   45  #include <errno.h>
  48   46  #include <sys/systeminfo.h>
  49   47  #endif  /* !_KERNEL */
  50   48  
       49 +#ifdef _LITTLE_ENDIAN
       50 +#include <sys/byteorder.h>
       51 +#define HAVE_HTONL
       52 +#endif
       53 +
  51   54  static void Encode(uint8_t *, const uint32_t *, size_t);
  52   55  
  53   56  #if     defined(__sparc)
  54   57  
  55   58  #define SHA1_TRANSFORM(ctx, in) \
  56   59          SHA1Transform((ctx)->state[0], (ctx)->state[1], (ctx)->state[2], \
  57   60                  (ctx)->state[3], (ctx)->state[4], (ctx), (in))
  58   61  
  59   62  static void SHA1Transform(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t,
  60   63      SHA1_CTX *, const uint8_t *);

  61   64  
  62   65  #elif   defined(__amd64)
  63   66  
  64   67  #define SHA1_TRANSFORM(ctx, in) sha1_block_data_order((ctx), (in), 1)
  65   68  #define SHA1_TRANSFORM_BLOCKS(ctx, in, num) sha1_block_data_order((ctx), \
  66   69                  (in), (num))
  67   70  
  68   71  void sha1_block_data_order(SHA1_CTX *ctx, const void *inpp, size_t num_blocks);
  69   72  
  70   73  #else
  71   74  
  72   75  #define SHA1_TRANSFORM(ctx, in) SHA1Transform((ctx), (in))
  73   76  
  74   77  static void SHA1Transform(SHA1_CTX *, const uint8_t *);
  75   78  
  76   79  #endif
  77   80  
  78   81  
  79   82  static uint8_t PADDING[64] = { 0x80, /* all zeros */ };
  80   83  
  81   84  /*
  82   85   * F, G, and H are the basic SHA1 functions.
  83   86   */
  84   87  #define F(b, c, d)      (((b) & (c)) | ((~b) & (d)))
  85   88  #define G(b, c, d)      ((b) ^ (c) ^ (d))
  86   89  #define H(b, c, d)      (((b) & (c)) | (((b)|(c)) & (d)))
  87   90  
  88   91  /*
  89   92   * ROTATE_LEFT rotates x left n bits.
  90   93   */
  91   94  
  92   95  #if     defined(__GNUC__) && defined(_LP64)
  93   96  static __inline__ uint64_t
  94   97  ROTATE_LEFT(uint64_t value, uint32_t n)
  95   98  {
  96   99          uint32_t t32;
  97  100  
  98  101          t32 = (uint32_t)value;

↓ open down ↓

38 lines elided

↑ open up ↑

  99  102          return ((t32 << n) | (t32 >> (32 - n)));
 100  103  }
 101  104  
 102  105  #else
 103  106  
 104  107  #define ROTATE_LEFT(x, n)       \
 105  108          (((x) << (n)) | ((x) >> ((sizeof (x) * NBBY)-(n))))
 106  109  
 107  110  #endif
 108  111  
 109      -#if     defined(__GNUC__) && (defined(__i386) || defined(__amd64))
 110  112  
 111      -#define HAVE_BSWAP
 112      -
 113      -extern __inline__ uint32_t bswap(uint32_t value)
 114      -{
 115      -        __asm__("bswap %0" : "+r" (value));
 116      -        return (value);
 117      -}
 118      -
 119      -#endif
 120      -
 121  113  /*
 122  114   * SHA1Init()
 123  115   *
 124  116   * purpose: initializes the sha1 context and begins and sha1 digest operation
 125  117   *   input: SHA1_CTX *  : the context to initializes.
 126  118   *  output: void
 127  119   */
 128  120  
 129  121  void
 130  122  SHA1Init(SHA1_CTX *ctx)

 131  123  {
 132  124          ctx->count[0] = ctx->count[1] = 0;
 133  125  
 134  126          /*
 135  127           * load magic initialization constants. Tell lint
 136  128           * that these constants are unsigned by using U.
 137  129           */
 138  130  
 139  131          ctx->state[0] = 0x67452301U;
 140  132          ctx->state[1] = 0xefcdab89U;
 141  133          ctx->state[2] = 0x98badcfeU;
 142  134          ctx->state[3] = 0x10325476U;
 143  135          ctx->state[4] = 0xc3d2e1f0U;
 144  136  }
 145  137  
 146  138  #ifdef VIS_SHA1
 147  139  #ifdef _KERNEL
 148  140  
 149  141  #include <sys/regset.h>
 150  142  #include <sys/vis.h>
 151  143  #include <sys/fpu/fpusystm.h>
 152  144  
 153  145  /* the alignment for block stores to save fp registers */
 154  146  #define VIS_ALIGN       (64)
 155  147  
 156  148  extern int sha1_savefp(kfpu_t *, int);
 157  149  extern void sha1_restorefp(kfpu_t *);
 158  150  
 159  151  uint32_t        vis_sha1_svfp_threshold = 128;
 160  152  
 161  153  #endif /* _KERNEL */
 162  154  
 163  155  /*
 164  156   * VIS SHA-1 consts.
 165  157   */
 166  158  static uint64_t VIS[] = {
 167  159          0x8000000080000000ULL,
 168  160          0x0002000200020002ULL,
 169  161          0x5a8279996ed9eba1ULL,
 170  162          0x8f1bbcdcca62c1d6ULL,
 171  163          0x012389ab456789abULL};
 172  164  
 173  165  extern void SHA1TransformVIS(uint64_t *, uint32_t *, uint32_t *, uint64_t *);
 174  166  
 175  167  
 176  168  /*
 177  169   * SHA1Update()
 178  170   *
 179  171   * purpose: continues an sha1 digest operation, using the message block
 180  172   *          to update the context.
 181  173   *   input: SHA1_CTX *  : the context to update
 182  174   *          void *      : the message block
 183  175   *          size_t    : the length of the message block in bytes
 184  176   *  output: void
 185  177   */
 186  178  
 187  179  void
 188  180  SHA1Update(SHA1_CTX *ctx, const void *inptr, size_t input_len)
 189  181  {
 190  182          uint32_t i, buf_index, buf_len;
 191  183          uint64_t X0[40], input64[8];
 192  184          const uint8_t *input = inptr;
 193  185  #ifdef _KERNEL
 194  186          int usevis = 0;
 195  187  #else
 196  188          int usevis = 1;
 197  189  #endif /* _KERNEL */
 198  190  
 199  191          /* check for noop */
 200  192          if (input_len == 0)
 201  193                  return;
 202  194  
 203  195          /* compute number of bytes mod 64 */
 204  196          buf_index = (ctx->count[1] >> 3) & 0x3F;
 205  197  
 206  198          /* update number of bits */
 207  199          if ((ctx->count[1] += (input_len << 3)) < (input_len << 3))
 208  200                  ctx->count[0]++;
 209  201  
 210  202          ctx->count[0] += (input_len >> 29);
 211  203  
 212  204          buf_len = 64 - buf_index;
 213  205  
 214  206          /* transform as many times as possible */
 215  207          i = 0;
 216  208          if (input_len >= buf_len) {
 217  209  #ifdef _KERNEL
 218  210                  kfpu_t *fpu;
 219  211                  if (fpu_exists) {
 220  212                          uint8_t fpua[sizeof (kfpu_t) + GSR_SIZE + VIS_ALIGN];
 221  213                          uint32_t len = (input_len + buf_index) & ~0x3f;
 222  214                          int svfp_ok;
 223  215  
 224  216                          fpu = (kfpu_t *)P2ROUNDUP((uintptr_t)fpua, 64);
 225  217                          svfp_ok = ((len >= vis_sha1_svfp_threshold) ? 1 : 0);
 226  218                          usevis = fpu_exists && sha1_savefp(fpu, svfp_ok);
 227  219                  } else {
 228  220                          usevis = 0;
 229  221                  }
 230  222  #endif /* _KERNEL */
 231  223  
 232  224                  /*
 233  225                   * general optimization:
 234  226                   *
 235  227                   * only do initial bcopy() and SHA1Transform() if
 236  228                   * buf_index != 0.  if buf_index == 0, we're just
 237  229                   * wasting our time doing the bcopy() since there
 238  230                   * wasn't any data left over from a previous call to
 239  231                   * SHA1Update().
 240  232                   */
 241  233  
 242  234                  if (buf_index) {
 243  235                          bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
 244  236                          if (usevis) {
 245  237                                  SHA1TransformVIS(X0,
 246  238                                      ctx->buf_un.buf32,
 247  239                                      &ctx->state[0], VIS);
 248  240                          } else {
 249  241                                  SHA1_TRANSFORM(ctx, ctx->buf_un.buf8);
 250  242                          }
 251  243                          i = buf_len;
 252  244                  }
 253  245  
 254  246                  /*
 255  247                   * VIS SHA-1: uses the VIS 1.0 instructions to accelerate
 256  248                   * SHA-1 processing. This is achieved by "offloading" the
 257  249                   * computation of the message schedule (MS) to the VIS units.
 258  250                   * This allows the VIS computation of the message schedule
 259  251                   * to be performed in parallel with the standard integer
 260  252                   * processing of the remainder of the SHA-1 computation.
 261  253                   * performance by up to around 1.37X, compared to an optimized
 262  254                   * integer-only implementation.
 263  255                   *
 264  256                   * The VIS implementation of SHA1Transform has a different API
 265  257                   * to the standard integer version:
 266  258                   *
 267  259                   * void SHA1TransformVIS(
 268  260                   *       uint64_t *, // Pointer to MS for ith block
 269  261                   *       uint32_t *, // Pointer to ith block of message data
 270  262                   *       uint32_t *, // Pointer to SHA state i.e ctx->state
 271  263                   *       uint64_t *, // Pointer to various VIS constants
 272  264                   * )
 273  265                   *
 274  266                   * Note: the message data must by 4-byte aligned.
 275  267                   *
 276  268                   * Function requires VIS 1.0 support.
 277  269                   *
 278  270                   * Handling is provided to deal with arbitrary byte alingment
 279  271                   * of the input data but the performance gains are reduced
 280  272                   * for alignments other than 4-bytes.
 281  273                   */
 282  274                  if (usevis) {
 283  275                          if (!IS_P2ALIGNED(&input[i], sizeof (uint32_t))) {
 284  276                                  /*
 285  277                                   * Main processing loop - input misaligned
 286  278                                   */
 287  279                                  for (; i + 63 < input_len; i += 64) {
 288  280                                          bcopy(&input[i], input64, 64);

↓ open down ↓

158 lines elided

↑ open up ↑

 289  281                                          SHA1TransformVIS(X0,
 290  282                                              (uint32_t *)input64,
 291  283                                              &ctx->state[0], VIS);
 292  284                                  }
 293  285                          } else {
 294  286                                  /*
 295  287                                   * Main processing loop - input 8-byte aligned
 296  288                                   */
 297  289                                  for (; i + 63 < input_len; i += 64) {
 298  290                                          SHA1TransformVIS(X0,
 299      -                                        /* LINTED E_BAD_PTR_CAST_ALIGN */
 300      -                                            (uint32_t *)&input[i],
      291 +                                            /* LINTED E_BAD_PTR_CAST_ALIGN */
      292 +                                            (uint32_t *)&input[i], /* CSTYLED */
 301  293                                              &ctx->state[0], VIS);
 302  294                                  }
 303  295  
 304  296                          }
 305  297  #ifdef _KERNEL
 306  298                          sha1_restorefp(fpu);
 307  299  #endif /* _KERNEL */
 308  300                  } else {
 309  301                          for (; i + 63 < input_len; i += 64) {
 310  302                                  SHA1_TRANSFORM(ctx, &input[i]);

 311  303                          }
 312  304                  }
 313  305  
 314  306                  /*
 315  307                   * general optimization:
 316  308                   *
 317  309                   * if i and input_len are the same, return now instead
 318  310                   * of calling bcopy(), since the bcopy() in this case
 319  311                   * will be an expensive nop.
 320  312                   */
 321  313  
 322  314                  if (input_len == i)
 323  315                          return;
 324  316  
 325  317                  buf_index = 0;
 326  318          }
 327  319  
 328  320          /* buffer remaining input */
 329  321          bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
 330  322  }
 331  323  
 332  324  #else /* VIS_SHA1 */
 333  325  
 334  326  void
 335  327  SHA1Update(SHA1_CTX *ctx, const void *inptr, size_t input_len)
 336  328  {
 337  329          uint32_t i, buf_index, buf_len;
 338  330          const uint8_t *input = inptr;
 339  331  #if defined(__amd64)
 340  332          uint32_t        block_count;
 341  333  #endif  /* __amd64 */
 342  334  
 343  335          /* check for noop */
 344  336          if (input_len == 0)
 345  337                  return;
 346  338  
 347  339          /* compute number of bytes mod 64 */
 348  340          buf_index = (ctx->count[1] >> 3) & 0x3F;
 349  341  
 350  342          /* update number of bits */
 351  343          if ((ctx->count[1] += (input_len << 3)) < (input_len << 3))
 352  344                  ctx->count[0]++;
 353  345  
 354  346          ctx->count[0] += (input_len >> 29);
 355  347  
 356  348          buf_len = 64 - buf_index;
 357  349  
 358  350          /* transform as many times as possible */
 359  351          i = 0;
 360  352          if (input_len >= buf_len) {
 361  353  
 362  354                  /*
 363  355                   * general optimization:
 364  356                   *
 365  357                   * only do initial bcopy() and SHA1Transform() if
 366  358                   * buf_index != 0.  if buf_index == 0, we're just
 367  359                   * wasting our time doing the bcopy() since there
 368  360                   * wasn't any data left over from a previous call to
 369  361                   * SHA1Update().
 370  362                   */
 371  363  
 372  364                  if (buf_index) {
 373  365                          bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
 374  366                          SHA1_TRANSFORM(ctx, ctx->buf_un.buf8);
 375  367                          i = buf_len;
 376  368                  }
 377  369  
 378  370  #if !defined(__amd64)
 379  371                  for (; i + 63 < input_len; i += 64)
 380  372                          SHA1_TRANSFORM(ctx, &input[i]);
 381  373  #else
 382  374                  block_count = (input_len - i) >> 6;
 383  375                  if (block_count > 0) {
 384  376                          SHA1_TRANSFORM_BLOCKS(ctx, &input[i], block_count);
 385  377                          i += block_count << 6;
 386  378                  }
 387  379  #endif  /* !__amd64 */
 388  380  
 389  381                  /*
 390  382                   * general optimization:
 391  383                   *
 392  384                   * if i and input_len are the same, return now instead
 393  385                   * of calling bcopy(), since the bcopy() in this case
 394  386                   * will be an expensive nop.
 395  387                   */
 396  388  
 397  389                  if (input_len == i)
 398  390                          return;
 399  391  
 400  392                  buf_index = 0;
 401  393          }
 402  394  
 403  395          /* buffer remaining input */
 404  396          bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
 405  397  }
 406  398  
 407  399  #endif /* VIS_SHA1 */
 408  400  
 409  401  /*
 410  402   * SHA1Final()
 411  403   *
 412  404   * purpose: ends an sha1 digest operation, finalizing the message digest and
 413  405   *          zeroing the context.
 414  406   *   input: uchar_t *   : A buffer to store the digest.
 415  407   *                      : The function actually uses void* because many
 416  408   *                      : callers pass things other than uchar_t here.
 417  409   *          SHA1_CTX *  : the context to finalize, save, and zero
 418  410   *  output: void
 419  411   */
 420  412  
 421  413  void
 422  414  SHA1Final(void *digest, SHA1_CTX *ctx)
 423  415  {
 424  416          uint8_t         bitcount_be[sizeof (ctx->count)];
 425  417          uint32_t        index = (ctx->count[1] >> 3) & 0x3f;
 426  418  
 427  419          /* store bit count, big endian */
 428  420          Encode(bitcount_be, ctx->count, sizeof (bitcount_be));
 429  421  
 430  422          /* pad out to 56 mod 64 */
 431  423          SHA1Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index);
 432  424  
 433  425          /* append length (before padding) */
 434  426          SHA1Update(ctx, bitcount_be, sizeof (bitcount_be));
 435  427  
 436  428          /* store state in digest */
 437  429          Encode(digest, ctx->state, sizeof (ctx->state));
 438  430  
 439  431          /* zeroize sensitive information */
 440  432          bzero(ctx, sizeof (*ctx));
 441  433  }
 442  434  
 443  435  
 444  436  #if !defined(__amd64)
 445  437  
 446  438  typedef uint32_t sha1word;
 447  439

↓ open down ↓

137 lines elided

↑ open up ↑

 448  440  /*
 449  441   * sparc optimization:
 450  442   *
 451  443   * on the sparc, we can load big endian 32-bit data easily.  note that
 452  444   * special care must be taken to ensure the address is 32-bit aligned.
 453  445   * in the interest of speed, we don't check to make sure, since
 454  446   * careful programming can guarantee this for us.
 455  447   */
 456  448  
 457  449  #if     defined(_BIG_ENDIAN)
 458      -
 459  450  #define LOAD_BIG_32(addr)       (*(uint32_t *)(addr))
 460  451  
 461      -#else   /* !defined(_BIG_ENDIAN) */
      452 +#elif   defined(HAVE_HTONL)
      453 +#define LOAD_BIG_32(addr) htonl(*((uint32_t *)(addr)))
 462  454  
 463      -#if     defined(HAVE_BSWAP)
 464      -
 465      -#define LOAD_BIG_32(addr) bswap(*((uint32_t *)(addr)))
 466      -
 467      -#else   /* !defined(HAVE_BSWAP) */
 468      -
      455 +#else
 469  456  /* little endian -- will work on big endian, but slowly */
 470  457  #define LOAD_BIG_32(addr)       \
 471  458          (((addr)[0] << 24) | ((addr)[1] << 16) | ((addr)[2] << 8) | (addr)[3])
      459 +#endif  /* _BIG_ENDIAN */
 472  460  
 473      -#endif  /* !defined(HAVE_BSWAP) */
 474      -
 475      -#endif  /* !defined(_BIG_ENDIAN) */
 476      -
 477  461  /*
 478  462   * SHA1Transform()
 479  463   */
 480  464  #if     defined(W_ARRAY)
 481  465  #define W(n) w[n]
 482  466  #else   /* !defined(W_ARRAY) */
 483  467  #define W(n) w_ ## n
 484  468  #endif  /* !defined(W_ARRAY) */
 485  469  
 486  470

 487  471  #if     defined(__sparc)
 488  472  
 489  473  /*
 490  474   * sparc register window optimization:
 491  475   *
 492  476   * `a', `b', `c', `d', and `e' are passed into SHA1Transform
 493  477   * explicitly since it increases the number of registers available to
 494  478   * the compiler.  under this scheme, these variables can be held in
 495  479   * %i0 - %i4, which leaves more local and out registers available.
 496  480   *
 497  481   * purpose: sha1 transformation -- updates the digest based on `block'
 498  482   *   input: uint32_t    : bytes  1 -  4 of the digest
 499  483   *          uint32_t    : bytes  5 -  8 of the digest
 500  484   *          uint32_t    : bytes  9 - 12 of the digest
 501  485   *          uint32_t    : bytes 12 - 16 of the digest
 502  486   *          uint32_t    : bytes 16 - 20 of the digest
 503  487   *          SHA1_CTX *  : the context to update
 504  488   *          uint8_t [64]: the block to use to update the digest
 505  489   *  output: void
 506  490   */
 507  491  
 508  492  void
 509  493  SHA1Transform(uint32_t a, uint32_t b, uint32_t c, uint32_t d, uint32_t e,
 510  494      SHA1_CTX *ctx, const uint8_t blk[64])
 511  495  {
 512  496          /*
 513  497           * sparc optimization:
 514  498           *
 515  499           * while it is somewhat counter-intuitive, on sparc, it is
 516  500           * more efficient to place all the constants used in this
 517  501           * function in an array and load the values out of the array
 518  502           * than to manually load the constants.  this is because
 519  503           * setting a register to a 32-bit value takes two ops in most
 520  504           * cases: a `sethi' and an `or', but loading a 32-bit value
 521  505           * from memory only takes one `ld' (or `lduw' on v9).  while
 522  506           * this increases memory usage, the compiler can find enough
 523  507           * other things to do while waiting to keep the pipeline does
 524  508           * not stall.  additionally, it is likely that many of these
 525  509           * constants are cached so that later accesses do not even go
 526  510           * out to the bus.
 527  511           *
 528  512           * this array is declared `static' to keep the compiler from
 529  513           * having to bcopy() this array onto the stack frame of

↓ open down ↓

43 lines elided

↑ open up ↑

 530  514           * SHA1Transform() each time it is called -- which is
 531  515           * unacceptably expensive.
 532  516           *
 533  517           * the `const' is to ensure that callers are good citizens and
 534  518           * do not try to munge the array.  since these routines are
 535  519           * going to be called from inside multithreaded kernelland,
 536  520           * this is a good safety check. -- `sha1_consts' will end up in
 537  521           * .rodata.
 538  522           *
 539  523           * unfortunately, loading from an array in this manner hurts
 540      -         * performance under intel.  so, there is a macro,
      524 +         * performance under Intel.  So, there is a macro,
 541  525           * SHA1_CONST(), used in SHA1Transform(), that either expands to
 542  526           * a reference to this array, or to the actual constant,
 543  527           * depending on what platform this code is compiled for.
 544  528           */
 545  529  
 546  530          static const uint32_t sha1_consts[] = {
 547      -                SHA1_CONST_0,   SHA1_CONST_1,   SHA1_CONST_2,   SHA1_CONST_3,
      531 +                SHA1_CONST_0, SHA1_CONST_1, SHA1_CONST_2, SHA1_CONST_3
 548  532          };
 549  533  
 550  534          /*
 551  535           * general optimization:
 552  536           *
 553  537           * use individual integers instead of using an array.  this is a
 554  538           * win, although the amount it wins by seems to vary quite a bit.
 555  539           */
 556  540  
 557  541          uint32_t        w_0, w_1, w_2,  w_3,  w_4,  w_5,  w_6,  w_7;

 558  542          uint32_t        w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
 559  543  
 560  544          /*
 561  545           * sparc optimization:
 562  546           *
 563  547           * if `block' is already aligned on a 4-byte boundary, use
 564  548           * LOAD_BIG_32() directly.  otherwise, bcopy() into a
 565  549           * buffer that *is* aligned on a 4-byte boundary and then do
 566  550           * the LOAD_BIG_32() on that buffer.  benchmarks have shown
 567  551           * that using the bcopy() is better than loading the bytes
 568  552           * individually and doing the endian-swap by hand.
 569  553           *
 570  554           * even though it's quite tempting to assign to do:
 571  555           *
 572  556           * blk = bcopy(ctx->buf_un.buf32, blk, sizeof (ctx->buf_un.buf32));
 573  557           *
 574  558           * and only have one set of LOAD_BIG_32()'s, the compiler
 575  559           * *does not* like that, so please resist the urge.
 576  560           */
 577  561  
 578  562          if ((uintptr_t)blk & 0x3) {             /* not 4-byte aligned? */
 579  563                  bcopy(blk, ctx->buf_un.buf32,  sizeof (ctx->buf_un.buf32));
 580  564                  w_15 = LOAD_BIG_32(ctx->buf_un.buf32 + 15);
 581  565                  w_14 = LOAD_BIG_32(ctx->buf_un.buf32 + 14);
 582  566                  w_13 = LOAD_BIG_32(ctx->buf_un.buf32 + 13);
 583  567                  w_12 = LOAD_BIG_32(ctx->buf_un.buf32 + 12);
 584  568                  w_11 = LOAD_BIG_32(ctx->buf_un.buf32 + 11);
 585  569                  w_10 = LOAD_BIG_32(ctx->buf_un.buf32 + 10);
 586  570                  w_9  = LOAD_BIG_32(ctx->buf_un.buf32 +  9);
 587  571                  w_8  = LOAD_BIG_32(ctx->buf_un.buf32 +  8);
 588  572                  w_7  = LOAD_BIG_32(ctx->buf_un.buf32 +  7);
 589  573                  w_6  = LOAD_BIG_32(ctx->buf_un.buf32 +  6);
 590  574                  w_5  = LOAD_BIG_32(ctx->buf_un.buf32 +  5);
 591  575                  w_4  = LOAD_BIG_32(ctx->buf_un.buf32 +  4);
 592  576                  w_3  = LOAD_BIG_32(ctx->buf_un.buf32 +  3);
 593  577                  w_2  = LOAD_BIG_32(ctx->buf_un.buf32 +  2);
 594  578                  w_1  = LOAD_BIG_32(ctx->buf_un.buf32 +  1);
 595  579                  w_0  = LOAD_BIG_32(ctx->buf_un.buf32 +  0);
 596  580          } else {
 597  581                  /*LINTED*/
 598  582                  w_15 = LOAD_BIG_32(blk + 60);
 599  583                  /*LINTED*/
 600  584                  w_14 = LOAD_BIG_32(blk + 56);
 601  585                  /*LINTED*/
 602  586                  w_13 = LOAD_BIG_32(blk + 52);
 603  587                  /*LINTED*/
 604  588                  w_12 = LOAD_BIG_32(blk + 48);
 605  589                  /*LINTED*/
 606  590                  w_11 = LOAD_BIG_32(blk + 44);
 607  591                  /*LINTED*/
 608  592                  w_10 = LOAD_BIG_32(blk + 40);
 609  593                  /*LINTED*/
 610  594                  w_9  = LOAD_BIG_32(blk + 36);
 611  595                  /*LINTED*/
 612  596                  w_8  = LOAD_BIG_32(blk + 32);
 613  597                  /*LINTED*/
 614  598                  w_7  = LOAD_BIG_32(blk + 28);
 615  599                  /*LINTED*/
 616  600                  w_6  = LOAD_BIG_32(blk + 24);
 617  601                  /*LINTED*/
 618  602                  w_5  = LOAD_BIG_32(blk + 20);
 619  603                  /*LINTED*/
 620  604                  w_4  = LOAD_BIG_32(blk + 16);
 621  605                  /*LINTED*/

↓ open down ↓

64 lines elided

↑ open up ↑

 622  606                  w_3  = LOAD_BIG_32(blk + 12);
 623  607                  /*LINTED*/
 624  608                  w_2  = LOAD_BIG_32(blk +  8);
 625  609                  /*LINTED*/
 626  610                  w_1  = LOAD_BIG_32(blk +  4);
 627  611                  /*LINTED*/
 628  612                  w_0  = LOAD_BIG_32(blk +  0);
 629  613          }
 630  614  #else   /* !defined(__sparc) */
 631  615  
 632      -void
      616 +void /* CSTYLED */
 633  617  SHA1Transform(SHA1_CTX *ctx, const uint8_t blk[64])
 634  618  {
      619 +        /* CSTYLED */
 635  620          sha1word a = ctx->state[0];
 636  621          sha1word b = ctx->state[1];
 637  622          sha1word c = ctx->state[2];
 638  623          sha1word d = ctx->state[3];
 639  624          sha1word e = ctx->state[4];
 640  625  
 641  626  #if     defined(W_ARRAY)
 642  627          sha1word        w[16];
 643  628  #else   /* !defined(W_ARRAY) */
 644  629          sha1word        w_0, w_1, w_2,  w_3,  w_4,  w_5,  w_6,  w_7;

 645  630          sha1word        w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
 646  631  #endif  /* !defined(W_ARRAY) */
 647  632  
 648  633          W(0)  = LOAD_BIG_32(blk +  0);
 649  634          W(1)  = LOAD_BIG_32(blk +  4);
 650  635          W(2)  = LOAD_BIG_32(blk +  8);
 651  636          W(3)  = LOAD_BIG_32(blk + 12);
 652  637          W(4)  = LOAD_BIG_32(blk + 16);
 653  638          W(5)  = LOAD_BIG_32(blk + 20);
 654  639          W(6)  = LOAD_BIG_32(blk + 24);
 655  640          W(7)  = LOAD_BIG_32(blk + 28);
 656  641          W(8)  = LOAD_BIG_32(blk + 32);
 657  642          W(9)  = LOAD_BIG_32(blk + 36);
 658  643          W(10) = LOAD_BIG_32(blk + 40);
 659  644          W(11) = LOAD_BIG_32(blk + 44);
 660  645          W(12) = LOAD_BIG_32(blk + 48);
 661  646          W(13) = LOAD_BIG_32(blk + 52);
 662  647          W(14) = LOAD_BIG_32(blk + 56);
 663  648          W(15) = LOAD_BIG_32(blk + 60);
 664  649  
 665  650  #endif  /* !defined(__sparc) */
 666  651  
 667  652          /*
 668  653           * general optimization:
 669  654           *
 670  655           * even though this approach is described in the standard as
 671  656           * being slower algorithmically, it is 30-40% faster than the
 672  657           * "faster" version under SPARC, because this version has more
 673  658           * of the constraints specified at compile-time and uses fewer
 674  659           * variables (and therefore has better register utilization)
 675  660           * than its "speedier" brother.  (i've tried both, trust me)
 676  661           *
 677  662           * for either method given in the spec, there is an "assignment"
 678  663           * phase where the following takes place:
 679  664           *
 680  665           *      tmp = (main_computation);
 681  666           *      e = d; d = c; c = rotate_left(b, 30); b = a; a = tmp;
 682  667           *
 683  668           * we can make the algorithm go faster by not doing this work,
 684  669           * but just pretending that `d' is now `e', etc. this works
 685  670           * really well and obviates the need for a temporary variable.
 686  671           * however, we still explicitly perform the rotate action,
 687  672           * since it is cheaper on SPARC to do it once than to have to
 688  673           * do it over and over again.
 689  674           */
 690  675  
 691  676          /* round 1 */
 692  677          e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(0) + SHA1_CONST(0); /* 0 */
 693  678          b = ROTATE_LEFT(b, 30);
 694  679  
 695  680          d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(1) + SHA1_CONST(0); /* 1 */
 696  681          a = ROTATE_LEFT(a, 30);
 697  682  
 698  683          c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(2) + SHA1_CONST(0); /* 2 */
 699  684          e = ROTATE_LEFT(e, 30);
 700  685  
 701  686          b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(3) + SHA1_CONST(0); /* 3 */
 702  687          d = ROTATE_LEFT(d, 30);
 703  688  
 704  689          a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(4) + SHA1_CONST(0); /* 4 */
 705  690          c = ROTATE_LEFT(c, 30);
 706  691  
 707  692          e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(5) + SHA1_CONST(0); /* 5 */
 708  693          b = ROTATE_LEFT(b, 30);
 709  694  
 710  695          d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(6) + SHA1_CONST(0); /* 6 */
 711  696          a = ROTATE_LEFT(a, 30);
 712  697  
 713  698          c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(7) + SHA1_CONST(0); /* 7 */
 714  699          e = ROTATE_LEFT(e, 30);
 715  700  
 716  701          b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(8) + SHA1_CONST(0); /* 8 */
 717  702          d = ROTATE_LEFT(d, 30);
 718  703  
 719  704          a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(9) + SHA1_CONST(0); /* 9 */
 720  705          c = ROTATE_LEFT(c, 30);
 721  706  
 722  707          e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(10) + SHA1_CONST(0); /* 10 */
 723  708          b = ROTATE_LEFT(b, 30);
 724  709  
 725  710          d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(11) + SHA1_CONST(0); /* 11 */
 726  711          a = ROTATE_LEFT(a, 30);
 727  712  
 728  713          c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(12) + SHA1_CONST(0); /* 12 */
 729  714          e = ROTATE_LEFT(e, 30);
 730  715  
 731  716          b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(13) + SHA1_CONST(0); /* 13 */
 732  717          d = ROTATE_LEFT(d, 30);
 733  718  
 734  719          a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(14) + SHA1_CONST(0); /* 14 */
 735  720          c = ROTATE_LEFT(c, 30);
 736  721  
 737  722          e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(15) + SHA1_CONST(0); /* 15 */
 738  723          b = ROTATE_LEFT(b, 30);
 739  724  
 740  725          W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1);            /* 16 */
 741  726          d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(0) + SHA1_CONST(0);
 742  727          a = ROTATE_LEFT(a, 30);
 743  728  
 744  729          W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1);            /* 17 */
 745  730          c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(1) + SHA1_CONST(0);
 746  731          e = ROTATE_LEFT(e, 30);
 747  732  
 748  733          W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1);   /* 18 */
 749  734          b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(2) + SHA1_CONST(0);
 750  735          d = ROTATE_LEFT(d, 30);
 751  736  
 752  737          W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1);            /* 19 */
 753  738          a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(3) + SHA1_CONST(0);
 754  739          c = ROTATE_LEFT(c, 30);
 755  740  
 756  741          /* round 2 */
 757  742          W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1);            /* 20 */
 758  743          e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(4) + SHA1_CONST(1);
 759  744          b = ROTATE_LEFT(b, 30);
 760  745  
 761  746          W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1);            /* 21 */
 762  747          d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(5) + SHA1_CONST(1);
 763  748          a = ROTATE_LEFT(a, 30);
 764  749  
 765  750          W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1);            /* 22 */
 766  751          c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(6) + SHA1_CONST(1);
 767  752          e = ROTATE_LEFT(e, 30);
 768  753  
 769  754          W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1);            /* 23 */
 770  755          b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(7) + SHA1_CONST(1);
 771  756          d = ROTATE_LEFT(d, 30);
 772  757  
 773  758          W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1);            /* 24 */
 774  759          a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(8) + SHA1_CONST(1);
 775  760          c = ROTATE_LEFT(c, 30);
 776  761  
 777  762          W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1);            /* 25 */
 778  763          e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(9) + SHA1_CONST(1);
 779  764          b = ROTATE_LEFT(b, 30);
 780  765  
 781  766          W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1);  /* 26 */
 782  767          d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(10) + SHA1_CONST(1);
 783  768          a = ROTATE_LEFT(a, 30);
 784  769  
 785  770          W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1);  /* 27 */
 786  771          c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(11) + SHA1_CONST(1);
 787  772          e = ROTATE_LEFT(e, 30);
 788  773  
 789  774          W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1);  /* 28 */
 790  775          b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(12) + SHA1_CONST(1);
 791  776          d = ROTATE_LEFT(d, 30);
 792  777  
 793  778          W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 29 */
 794  779          a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(13) + SHA1_CONST(1);
 795  780          c = ROTATE_LEFT(c, 30);
 796  781  
 797  782          W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1);  /* 30 */
 798  783          e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(14) + SHA1_CONST(1);
 799  784          b = ROTATE_LEFT(b, 30);
 800  785  
 801  786          W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1);  /* 31 */
 802  787          d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(15) + SHA1_CONST(1);
 803  788          a = ROTATE_LEFT(a, 30);
 804  789  
 805  790          W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1);            /* 32 */
 806  791          c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(0) + SHA1_CONST(1);
 807  792          e = ROTATE_LEFT(e, 30);
 808  793  
 809  794          W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1);            /* 33 */
 810  795          b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(1) + SHA1_CONST(1);
 811  796          d = ROTATE_LEFT(d, 30);
 812  797  
 813  798          W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1);   /* 34 */
 814  799          a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(2) + SHA1_CONST(1);
 815  800          c = ROTATE_LEFT(c, 30);
 816  801  
 817  802          W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1);            /* 35 */
 818  803          e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(3) + SHA1_CONST(1);
 819  804          b = ROTATE_LEFT(b, 30);
 820  805  
 821  806          W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1);            /* 36 */
 822  807          d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(4) + SHA1_CONST(1);
 823  808          a = ROTATE_LEFT(a, 30);
 824  809  
 825  810          W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1);            /* 37 */
 826  811          c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(5) + SHA1_CONST(1);
 827  812          e = ROTATE_LEFT(e, 30);
 828  813  
 829  814          W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1);            /* 38 */
 830  815          b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(6) + SHA1_CONST(1);
 831  816          d = ROTATE_LEFT(d, 30);
 832  817  
 833  818          W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1);            /* 39 */
 834  819          a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(7) + SHA1_CONST(1);
 835  820          c = ROTATE_LEFT(c, 30);
 836  821  
 837  822          /* round 3 */
 838  823          W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1);            /* 40 */
 839  824          e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(8) + SHA1_CONST(2);
 840  825          b = ROTATE_LEFT(b, 30);
 841  826  
 842  827          W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1);            /* 41 */
 843  828          d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(9) + SHA1_CONST(2);
 844  829          a = ROTATE_LEFT(a, 30);
 845  830  
 846  831          W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1);  /* 42 */
 847  832          c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(10) + SHA1_CONST(2);
 848  833          e = ROTATE_LEFT(e, 30);
 849  834  
 850  835          W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1);  /* 43 */
 851  836          b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(11) + SHA1_CONST(2);
 852  837          d = ROTATE_LEFT(d, 30);
 853  838  
 854  839          W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1);  /* 44 */
 855  840          a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(12) + SHA1_CONST(2);
 856  841          c = ROTATE_LEFT(c, 30);
 857  842  
 858  843          W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 45 */
 859  844          e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(13) + SHA1_CONST(2);
 860  845          b = ROTATE_LEFT(b, 30);
 861  846  
 862  847          W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1);  /* 46 */
 863  848          d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(14) + SHA1_CONST(2);
 864  849          a = ROTATE_LEFT(a, 30);
 865  850  
 866  851          W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1);  /* 47 */
 867  852          c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(15) + SHA1_CONST(2);
 868  853          e = ROTATE_LEFT(e, 30);
 869  854  
 870  855          W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1);            /* 48 */
 871  856          b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(0) + SHA1_CONST(2);
 872  857          d = ROTATE_LEFT(d, 30);
 873  858  
 874  859          W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1);            /* 49 */
 875  860          a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(1) + SHA1_CONST(2);
 876  861          c = ROTATE_LEFT(c, 30);
 877  862  
 878  863          W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1);   /* 50 */
 879  864          e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(2) + SHA1_CONST(2);
 880  865          b = ROTATE_LEFT(b, 30);
 881  866  
 882  867          W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1);            /* 51 */
 883  868          d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(3) + SHA1_CONST(2);
 884  869          a = ROTATE_LEFT(a, 30);
 885  870  
 886  871          W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1);            /* 52 */
 887  872          c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(4) + SHA1_CONST(2);
 888  873          e = ROTATE_LEFT(e, 30);
 889  874  
 890  875          W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1);            /* 53 */
 891  876          b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(5) + SHA1_CONST(2);
 892  877          d = ROTATE_LEFT(d, 30);
 893  878  
 894  879          W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1);            /* 54 */
 895  880          a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(6) + SHA1_CONST(2);
 896  881          c = ROTATE_LEFT(c, 30);
 897  882  
 898  883          W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1);            /* 55 */
 899  884          e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(7) + SHA1_CONST(2);
 900  885          b = ROTATE_LEFT(b, 30);
 901  886  
 902  887          W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1);            /* 56 */
 903  888          d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(8) + SHA1_CONST(2);
 904  889          a = ROTATE_LEFT(a, 30);
 905  890  
 906  891          W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1);            /* 57 */
 907  892          c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(9) + SHA1_CONST(2);
 908  893          e = ROTATE_LEFT(e, 30);
 909  894  
 910  895          W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1);  /* 58 */
 911  896          b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(10) + SHA1_CONST(2);
 912  897          d = ROTATE_LEFT(d, 30);
 913  898  
 914  899          W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1);  /* 59 */
 915  900          a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(11) + SHA1_CONST(2);
 916  901          c = ROTATE_LEFT(c, 30);
 917  902  
 918  903          /* round 4 */
 919  904          W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1);  /* 60 */
 920  905          e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(12) + SHA1_CONST(3);
 921  906          b = ROTATE_LEFT(b, 30);
 922  907  
 923  908          W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 61 */
 924  909          d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(13) + SHA1_CONST(3);
 925  910          a = ROTATE_LEFT(a, 30);
 926  911  
 927  912          W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1);  /* 62 */
 928  913          c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(14) + SHA1_CONST(3);
 929  914          e = ROTATE_LEFT(e, 30);
 930  915  
 931  916          W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1);  /* 63 */
 932  917          b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(15) + SHA1_CONST(3);
 933  918          d = ROTATE_LEFT(d, 30);
 934  919  
 935  920          W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1);            /* 64 */
 936  921          a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(0) + SHA1_CONST(3);
 937  922          c = ROTATE_LEFT(c, 30);
 938  923  
 939  924          W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1);            /* 65 */
 940  925          e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(1) + SHA1_CONST(3);
 941  926          b = ROTATE_LEFT(b, 30);
 942  927  
 943  928          W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1);   /* 66 */
 944  929          d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(2) + SHA1_CONST(3);
 945  930          a = ROTATE_LEFT(a, 30);
 946  931  
 947  932          W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1);            /* 67 */
 948  933          c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(3) + SHA1_CONST(3);
 949  934          e = ROTATE_LEFT(e, 30);
 950  935  
 951  936          W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1);            /* 68 */
 952  937          b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(4) + SHA1_CONST(3);
 953  938          d = ROTATE_LEFT(d, 30);
 954  939  
 955  940          W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1);            /* 69 */
 956  941          a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(5) + SHA1_CONST(3);
 957  942          c = ROTATE_LEFT(c, 30);
 958  943  
 959  944          W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1);            /* 70 */
 960  945          e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(6) + SHA1_CONST(3);
 961  946          b = ROTATE_LEFT(b, 30);
 962  947  
 963  948          W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1);            /* 71 */
 964  949          d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(7) + SHA1_CONST(3);
 965  950          a = ROTATE_LEFT(a, 30);
 966  951  
 967  952          W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1);            /* 72 */
 968  953          c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(8) + SHA1_CONST(3);
 969  954          e = ROTATE_LEFT(e, 30);
 970  955  
 971  956          W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1);            /* 73 */
 972  957          b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(9) + SHA1_CONST(3);
 973  958          d = ROTATE_LEFT(d, 30);
 974  959  
 975  960          W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1);  /* 74 */
 976  961          a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(10) + SHA1_CONST(3);
 977  962          c = ROTATE_LEFT(c, 30);
 978  963  
 979  964          W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1);  /* 75 */
 980  965          e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(11) + SHA1_CONST(3);
 981  966          b = ROTATE_LEFT(b, 30);
 982  967  
 983  968          W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1);  /* 76 */
 984  969          d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(12) + SHA1_CONST(3);
 985  970          a = ROTATE_LEFT(a, 30);
 986  971  
 987  972          W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 77 */
 988  973          c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(13) + SHA1_CONST(3);
 989  974          e = ROTATE_LEFT(e, 30);
 990  975  
 991  976          W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1);  /* 78 */
 992  977          b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(14) + SHA1_CONST(3);
 993  978          d = ROTATE_LEFT(d, 30);
 994  979  
 995  980          W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1);  /* 79 */
 996  981  
 997  982          ctx->state[0] += ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(15) +
 998  983              SHA1_CONST(3);
 999  984          ctx->state[1] += b;
1000  985          ctx->state[2] += ROTATE_LEFT(c, 30);
1001  986          ctx->state[3] += d;
1002  987          ctx->state[4] += e;
1003  988  
1004  989          /* zeroize sensitive information */
1005  990          W(0) = W(1) = W(2) = W(3) = W(4) = W(5) = W(6) = W(7) = W(8) = 0;
1006  991          W(9) = W(10) = W(11) = W(12) = W(13) = W(14) = W(15) = 0;
1007  992  }
1008  993  #endif  /* !__amd64 */
1009  994  
1010  995  
1011  996  /*
1012  997   * Encode()
1013  998   *
1014  999   * purpose: to convert a list of numbers from little endian to big endian
1015 1000   *   input: uint8_t *   : place to store the converted big endian numbers
1016 1001   *          uint32_t *  : place to get numbers to convert from
1017 1002   *          size_t      : the length of the input in bytes
1018 1003   *  output: void
1019 1004   */
1020 1005  
1021 1006  static void
1022 1007  Encode(uint8_t *_RESTRICT_KYWD output, const uint32_t *_RESTRICT_KYWD input,
1023 1008      size_t len)
1024 1009  {
1025 1010          size_t          i, j;
1026 1011  
1027 1012  #if     defined(__sparc)
1028 1013          if (IS_P2ALIGNED(output, sizeof (uint32_t))) {
1029 1014                  for (i = 0, j = 0; j < len; i++, j += 4) {
1030 1015                          /* LINTED: pointer alignment */
1031 1016                          *((uint32_t *)(output + j)) = input[i];
1032 1017                  }
1033 1018          } else {
1034 1019  #endif  /* little endian -- will work on big endian, but slowly */
1035 1020                  for (i = 0, j = 0; j < len; i++, j += 4) {
1036 1021                          output[j]       = (input[i] >> 24) & 0xff;
1037 1022                          output[j + 1]   = (input[i] >> 16) & 0xff;
1038 1023                          output[j + 2]   = (input[i] >>  8) & 0xff;
1039 1024                          output[j + 3]   = input[i] & 0xff;
1040 1025                  }
1041 1026  #if     defined(__sparc)
1042 1027          }
1043 1028  #endif
1044 1029  }

↓ open down ↓

400 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX