nv97_bswap Wdiff usr/src/common/crypto/sha1/sha1.c

Print this page

6717509 Need to use bswap/bswapq for byte swap of 64-bit integer on x32/x64 (fix lint)

Split	Close
Expand all
Collapse all

          --- old/usr/src/common/crypto/sha1/sha1.c
          +++ new/usr/src/common/crypto/sha1/sha1.c

   1    1  /*
   2    2   * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
   3    3   * Use is subject to license terms.
   4    4   */
   5    5  
   6    6  /*
   7    7   * The basic framework for this code came from the reference
   8    8   * implementation for MD5.  That implementation is Copyright (C)
   9    9   * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
  10   10   *
  11   11   * License to copy and use this software is granted provided that it
  12   12   * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
  13   13   * Algorithm" in all material mentioning or referencing this software
  14   14   * or this function.
  15   15   *
  16   16   * License is also granted to make and use derivative works provided
  17   17   * that such works are identified as "derived from the RSA Data
  18   18   * Security, Inc. MD5 Message-Digest Algorithm" in all material
  19   19   * mentioning or referencing the derived work.
  20   20   *
  21   21   * RSA Data Security, Inc. makes no representations concerning either
  22   22   * the merchantability of this software or the suitability of this
  23   23   * software for any particular purpose. It is provided "as is"
  24   24   * without express or implied warranty of any kind.
  25   25   *
  26   26   * These notices must be retained in any copies of any part of this
  27   27   * documentation and/or software.
  28   28   *
  29   29   * NOTE: Cleaned-up and optimized, version of SHA1, based on the FIPS 180-1
  30   30   * standard, available at http://www.itl.nist.gov/fipspubs/fip180-1.htm
  31   31   * Not as fast as one would like -- further optimizations are encouraged
  32   32   * and appreciated.
  33   33   */
  34   34  
  35   35  #include <sys/types.h>
  36   36  #include <sys/param.h>
  37   37  #include <sys/systm.h>
  38   38  #include <sys/sysmacros.h>
  39   39  #include <sys/sha1.h>
  40   40  #include <sys/sha1_consts.h>
  41   41  
  42   42  #ifndef _KERNEL
  43   43  #include <strings.h>
  44   44  #include <stdlib.h>
  45   45  #include <errno.h>
  46   46  #include <sys/systeminfo.h>
  47   47  #endif  /* !_KERNEL */
  48   48  
  49   49  #ifdef _LITTLE_ENDIAN
  50   50  #include <sys/byteorder.h>
  51   51  #define HAVE_HTONL
  52   52  #endif
  53   53  
  54   54  static void Encode(uint8_t *, const uint32_t *, size_t);
  55   55  
  56   56  #if     defined(__sparc)
  57   57  
  58   58  #define SHA1_TRANSFORM(ctx, in) \
  59   59          SHA1Transform((ctx)->state[0], (ctx)->state[1], (ctx)->state[2], \
  60   60                  (ctx)->state[3], (ctx)->state[4], (ctx), (in))
  61   61  
  62   62  static void SHA1Transform(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t,
  63   63      SHA1_CTX *, const uint8_t *);
  64   64  
  65   65  #elif   defined(__amd64)
  66   66  
  67   67  #define SHA1_TRANSFORM(ctx, in) sha1_block_data_order((ctx), (in), 1)
  68   68  #define SHA1_TRANSFORM_BLOCKS(ctx, in, num) sha1_block_data_order((ctx), \
  69   69                  (in), (num))
  70   70  
  71   71  void sha1_block_data_order(SHA1_CTX *ctx, const void *inpp, size_t num_blocks);
  72   72  
  73   73  #else
  74   74  
  75   75  #define SHA1_TRANSFORM(ctx, in) SHA1Transform((ctx), (in))
  76   76  
  77   77  static void SHA1Transform(SHA1_CTX *, const uint8_t *);
  78   78  
  79   79  #endif
  80   80  
  81   81  
  82   82  static uint8_t PADDING[64] = { 0x80, /* all zeros */ };
  83   83  
  84   84  /*
  85   85   * F, G, and H are the basic SHA1 functions.
  86   86   */
  87   87  #define F(b, c, d)      (((b) & (c)) | ((~b) & (d)))
  88   88  #define G(b, c, d)      ((b) ^ (c) ^ (d))
  89   89  #define H(b, c, d)      (((b) & (c)) | (((b)|(c)) & (d)))
  90   90  
  91   91  /*
  92   92   * ROTATE_LEFT rotates x left n bits.
  93   93   */
  94   94  
  95   95  #if     defined(__GNUC__) && defined(_LP64)
  96   96  static __inline__ uint64_t
  97   97  ROTATE_LEFT(uint64_t value, uint32_t n)
  98   98  {
  99   99          uint32_t t32;
 100  100  
 101  101          t32 = (uint32_t)value;
 102  102          return ((t32 << n) | (t32 >> (32 - n)));
 103  103  }
 104  104  
 105  105  #else
 106  106  
 107  107  #define ROTATE_LEFT(x, n)       \
 108  108          (((x) << (n)) | ((x) >> ((sizeof (x) * NBBY)-(n))))
 109  109  
 110  110  #endif
 111  111  
 112  112  
 113  113  /*
 114  114   * SHA1Init()
 115  115   *
 116  116   * purpose: initializes the sha1 context and begins and sha1 digest operation
 117  117   *   input: SHA1_CTX *  : the context to initializes.
 118  118   *  output: void
 119  119   */
 120  120  
 121  121  void
 122  122  SHA1Init(SHA1_CTX *ctx)
 123  123  {
 124  124          ctx->count[0] = ctx->count[1] = 0;
 125  125  
 126  126          /*
 127  127           * load magic initialization constants. Tell lint
 128  128           * that these constants are unsigned by using U.
 129  129           */
 130  130  
 131  131          ctx->state[0] = 0x67452301U;
 132  132          ctx->state[1] = 0xefcdab89U;
 133  133          ctx->state[2] = 0x98badcfeU;
 134  134          ctx->state[3] = 0x10325476U;
 135  135          ctx->state[4] = 0xc3d2e1f0U;
 136  136  }
 137  137  
 138  138  #ifdef VIS_SHA1
 139  139  #ifdef _KERNEL
 140  140  
 141  141  #include <sys/regset.h>
 142  142  #include <sys/vis.h>
 143  143  #include <sys/fpu/fpusystm.h>
 144  144  
 145  145  /* the alignment for block stores to save fp registers */
 146  146  #define VIS_ALIGN       (64)
 147  147  
 148  148  extern int sha1_savefp(kfpu_t *, int);
 149  149  extern void sha1_restorefp(kfpu_t *);
 150  150  
 151  151  uint32_t        vis_sha1_svfp_threshold = 128;
 152  152  
 153  153  #endif /* _KERNEL */
 154  154  
 155  155  /*
 156  156   * VIS SHA-1 consts.
 157  157   */
 158  158  static uint64_t VIS[] = {
 159  159          0x8000000080000000ULL,
 160  160          0x0002000200020002ULL,
 161  161          0x5a8279996ed9eba1ULL,
 162  162          0x8f1bbcdcca62c1d6ULL,
 163  163          0x012389ab456789abULL};
 164  164  
 165  165  extern void SHA1TransformVIS(uint64_t *, uint32_t *, uint32_t *, uint64_t *);
 166  166  
 167  167  
 168  168  /*
 169  169   * SHA1Update()
 170  170   *
 171  171   * purpose: continues an sha1 digest operation, using the message block
 172  172   *          to update the context.
 173  173   *   input: SHA1_CTX *  : the context to update
 174  174   *          void *      : the message block
 175  175   *          size_t    : the length of the message block in bytes
 176  176   *  output: void
 177  177   */
 178  178  
 179  179  void
 180  180  SHA1Update(SHA1_CTX *ctx, const void *inptr, size_t input_len)
 181  181  {
 182  182          uint32_t i, buf_index, buf_len;
 183  183          uint64_t X0[40], input64[8];
 184  184          const uint8_t *input = inptr;
 185  185  #ifdef _KERNEL
 186  186          int usevis = 0;
 187  187  #else
 188  188          int usevis = 1;
 189  189  #endif /* _KERNEL */
 190  190  
 191  191          /* check for noop */
 192  192          if (input_len == 0)
 193  193                  return;
 194  194  
 195  195          /* compute number of bytes mod 64 */
 196  196          buf_index = (ctx->count[1] >> 3) & 0x3F;
 197  197  
 198  198          /* update number of bits */
 199  199          if ((ctx->count[1] += (input_len << 3)) < (input_len << 3))
 200  200                  ctx->count[0]++;
 201  201  
 202  202          ctx->count[0] += (input_len >> 29);
 203  203  
 204  204          buf_len = 64 - buf_index;
 205  205  
 206  206          /* transform as many times as possible */
 207  207          i = 0;
 208  208          if (input_len >= buf_len) {
 209  209  #ifdef _KERNEL
 210  210                  kfpu_t *fpu;
 211  211                  if (fpu_exists) {
 212  212                          uint8_t fpua[sizeof (kfpu_t) + GSR_SIZE + VIS_ALIGN];
 213  213                          uint32_t len = (input_len + buf_index) & ~0x3f;
 214  214                          int svfp_ok;
 215  215  
 216  216                          fpu = (kfpu_t *)P2ROUNDUP((uintptr_t)fpua, 64);
 217  217                          svfp_ok = ((len >= vis_sha1_svfp_threshold) ? 1 : 0);
 218  218                          usevis = fpu_exists && sha1_savefp(fpu, svfp_ok);
 219  219                  } else {
 220  220                          usevis = 0;
 221  221                  }
 222  222  #endif /* _KERNEL */
 223  223  
 224  224                  /*
 225  225                   * general optimization:
 226  226                   *
 227  227                   * only do initial bcopy() and SHA1Transform() if
 228  228                   * buf_index != 0.  if buf_index == 0, we're just
 229  229                   * wasting our time doing the bcopy() since there
 230  230                   * wasn't any data left over from a previous call to
 231  231                   * SHA1Update().
 232  232                   */
 233  233  
 234  234                  if (buf_index) {
 235  235                          bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
 236  236                          if (usevis) {
 237  237                                  SHA1TransformVIS(X0,
 238  238                                      ctx->buf_un.buf32,
 239  239                                      &ctx->state[0], VIS);
 240  240                          } else {
 241  241                                  SHA1_TRANSFORM(ctx, ctx->buf_un.buf8);
 242  242                          }
 243  243                          i = buf_len;
 244  244                  }
 245  245  
 246  246                  /*
 247  247                   * VIS SHA-1: uses the VIS 1.0 instructions to accelerate
 248  248                   * SHA-1 processing. This is achieved by "offloading" the
 249  249                   * computation of the message schedule (MS) to the VIS units.
 250  250                   * This allows the VIS computation of the message schedule
 251  251                   * to be performed in parallel with the standard integer
 252  252                   * processing of the remainder of the SHA-1 computation.
 253  253                   * performance by up to around 1.37X, compared to an optimized
 254  254                   * integer-only implementation.
 255  255                   *
 256  256                   * The VIS implementation of SHA1Transform has a different API
 257  257                   * to the standard integer version:
 258  258                   *
 259  259                   * void SHA1TransformVIS(
 260  260                   *       uint64_t *, // Pointer to MS for ith block
 261  261                   *       uint32_t *, // Pointer to ith block of message data
 262  262                   *       uint32_t *, // Pointer to SHA state i.e ctx->state
 263  263                   *       uint64_t *, // Pointer to various VIS constants
 264  264                   * )
 265  265                   *
 266  266                   * Note: the message data must by 4-byte aligned.
 267  267                   *
 268  268                   * Function requires VIS 1.0 support.
 269  269                   *
 270  270                   * Handling is provided to deal with arbitrary byte alingment
 271  271                   * of the input data but the performance gains are reduced
 272  272                   * for alignments other than 4-bytes.
 273  273                   */
 274  274                  if (usevis) {
 275  275                          if (!IS_P2ALIGNED(&input[i], sizeof (uint32_t))) {
 276  276                                  /*
 277  277                                   * Main processing loop - input misaligned
 278  278                                   */
 279  279                                  for (; i + 63 < input_len; i += 64) {
 280  280                                          bcopy(&input[i], input64, 64);
 281  281                                          SHA1TransformVIS(X0,
 282  282                                              (uint32_t *)input64,
 283  283                                              &ctx->state[0], VIS);
 284  284                                  }
 285  285                          } else {
 286  286                                  /*
 287  287                                   * Main processing loop - input 8-byte aligned
 288  288                                   */
 289  289                                  for (; i + 63 < input_len; i += 64) {
 290  290                                          SHA1TransformVIS(X0,
 291  291                                              /* LINTED E_BAD_PTR_CAST_ALIGN */
 292  292                                              (uint32_t *)&input[i], /* CSTYLED */
 293  293                                              &ctx->state[0], VIS);
 294  294                                  }
 295  295  
 296  296                          }
 297  297  #ifdef _KERNEL
 298  298                          sha1_restorefp(fpu);
 299  299  #endif /* _KERNEL */
 300  300                  } else {
 301  301                          for (; i + 63 < input_len; i += 64) {
 302  302                                  SHA1_TRANSFORM(ctx, &input[i]);
 303  303                          }
 304  304                  }
 305  305  
 306  306                  /*
 307  307                   * general optimization:
 308  308                   *
 309  309                   * if i and input_len are the same, return now instead
 310  310                   * of calling bcopy(), since the bcopy() in this case
 311  311                   * will be an expensive nop.
 312  312                   */
 313  313  
 314  314                  if (input_len == i)
 315  315                          return;
 316  316  
 317  317                  buf_index = 0;
 318  318          }
 319  319  
 320  320          /* buffer remaining input */
 321  321          bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
 322  322  }
 323  323  
 324  324  #else /* VIS_SHA1 */
 325  325  
 326  326  void
 327  327  SHA1Update(SHA1_CTX *ctx, const void *inptr, size_t input_len)
 328  328  {
 329  329          uint32_t i, buf_index, buf_len;
 330  330          const uint8_t *input = inptr;
 331  331  #if defined(__amd64)
 332  332          uint32_t        block_count;
 333  333  #endif  /* __amd64 */
 334  334  
 335  335          /* check for noop */
 336  336          if (input_len == 0)
 337  337                  return;
 338  338  
 339  339          /* compute number of bytes mod 64 */
 340  340          buf_index = (ctx->count[1] >> 3) & 0x3F;
 341  341  
 342  342          /* update number of bits */
 343  343          if ((ctx->count[1] += (input_len << 3)) < (input_len << 3))
 344  344                  ctx->count[0]++;
 345  345  
 346  346          ctx->count[0] += (input_len >> 29);
 347  347  
 348  348          buf_len = 64 - buf_index;
 349  349  
 350  350          /* transform as many times as possible */
 351  351          i = 0;
 352  352          if (input_len >= buf_len) {
 353  353  
 354  354                  /*
 355  355                   * general optimization:
 356  356                   *
 357  357                   * only do initial bcopy() and SHA1Transform() if
 358  358                   * buf_index != 0.  if buf_index == 0, we're just
 359  359                   * wasting our time doing the bcopy() since there
 360  360                   * wasn't any data left over from a previous call to
 361  361                   * SHA1Update().
 362  362                   */
 363  363  
 364  364                  if (buf_index) {
 365  365                          bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
 366  366                          SHA1_TRANSFORM(ctx, ctx->buf_un.buf8);
 367  367                          i = buf_len;
 368  368                  }
 369  369  
 370  370  #if !defined(__amd64)
 371  371                  for (; i + 63 < input_len; i += 64)
 372  372                          SHA1_TRANSFORM(ctx, &input[i]);
 373  373  #else
 374  374                  block_count = (input_len - i) >> 6;
 375  375                  if (block_count > 0) {
 376  376                          SHA1_TRANSFORM_BLOCKS(ctx, &input[i], block_count);
 377  377                          i += block_count << 6;
 378  378                  }
 379  379  #endif  /* !__amd64 */
 380  380  
 381  381                  /*
 382  382                   * general optimization:
 383  383                   *
 384  384                   * if i and input_len are the same, return now instead
 385  385                   * of calling bcopy(), since the bcopy() in this case
 386  386                   * will be an expensive nop.
 387  387                   */
 388  388  
 389  389                  if (input_len == i)
 390  390                          return;
 391  391  
 392  392                  buf_index = 0;
 393  393          }
 394  394  
 395  395          /* buffer remaining input */
 396  396          bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
 397  397  }
 398  398  
 399  399  #endif /* VIS_SHA1 */
 400  400  
 401  401  /*
 402  402   * SHA1Final()
 403  403   *
 404  404   * purpose: ends an sha1 digest operation, finalizing the message digest and
 405  405   *          zeroing the context.
 406  406   *   input: uchar_t *   : A buffer to store the digest.
 407  407   *                      : The function actually uses void* because many
 408  408   *                      : callers pass things other than uchar_t here.
 409  409   *          SHA1_CTX *  : the context to finalize, save, and zero
 410  410   *  output: void
 411  411   */
 412  412  
 413  413  void
 414  414  SHA1Final(void *digest, SHA1_CTX *ctx)
 415  415  {
 416  416          uint8_t         bitcount_be[sizeof (ctx->count)];
 417  417          uint32_t        index = (ctx->count[1] >> 3) & 0x3f;
 418  418  
 419  419          /* store bit count, big endian */
 420  420          Encode(bitcount_be, ctx->count, sizeof (bitcount_be));
 421  421  
 422  422          /* pad out to 56 mod 64 */
 423  423          SHA1Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index);
 424  424  
 425  425          /* append length (before padding) */
 426  426          SHA1Update(ctx, bitcount_be, sizeof (bitcount_be));
 427  427  
 428  428          /* store state in digest */
 429  429          Encode(digest, ctx->state, sizeof (ctx->state));
 430  430  
 431  431          /* zeroize sensitive information */
 432  432          bzero(ctx, sizeof (*ctx));
 433  433  }
 434  434  
 435  435  
 436  436  #if !defined(__amd64)
 437  437  
 438  438  typedef uint32_t sha1word;
 439  439  
 440  440  /*
 441  441   * sparc optimization:
 442  442   *
 443  443   * on the sparc, we can load big endian 32-bit data easily.  note that
 444  444   * special care must be taken to ensure the address is 32-bit aligned.
 445  445   * in the interest of speed, we don't check to make sure, since
 446  446   * careful programming can guarantee this for us.
 447  447   */
 448  448  
 449  449  #if     defined(_BIG_ENDIAN)
 450  450  #define LOAD_BIG_32(addr)       (*(uint32_t *)(addr))
 451  451  
 452  452  #elif   defined(HAVE_HTONL)
 453  453  #define LOAD_BIG_32(addr) htonl(*((uint32_t *)(addr)))
 454  454  
 455  455  #else
 456  456  /* little endian -- will work on big endian, but slowly */
 457  457  #define LOAD_BIG_32(addr)       \
 458  458          (((addr)[0] << 24) | ((addr)[1] << 16) | ((addr)[2] << 8) | (addr)[3])
 459  459  #endif  /* _BIG_ENDIAN */
 460  460  
 461  461  /*
 462  462   * SHA1Transform()
 463  463   */
 464  464  #if     defined(W_ARRAY)
 465  465  #define W(n) w[n]
 466  466  #else   /* !defined(W_ARRAY) */
 467  467  #define W(n) w_ ## n
 468  468  #endif  /* !defined(W_ARRAY) */
 469  469  
 470  470  
 471  471  #if     defined(__sparc)
 472  472  
 473  473  /*
 474  474   * sparc register window optimization:
 475  475   *
 476  476   * `a', `b', `c', `d', and `e' are passed into SHA1Transform
 477  477   * explicitly since it increases the number of registers available to
 478  478   * the compiler.  under this scheme, these variables can be held in
 479  479   * %i0 - %i4, which leaves more local and out registers available.
 480  480   *
 481  481   * purpose: sha1 transformation -- updates the digest based on `block'
 482  482   *   input: uint32_t    : bytes  1 -  4 of the digest
 483  483   *          uint32_t    : bytes  5 -  8 of the digest
 484  484   *          uint32_t    : bytes  9 - 12 of the digest
 485  485   *          uint32_t    : bytes 12 - 16 of the digest
 486  486   *          uint32_t    : bytes 16 - 20 of the digest
 487  487   *          SHA1_CTX *  : the context to update
 488  488   *          uint8_t [64]: the block to use to update the digest
 489  489   *  output: void
 490  490   */
 491  491  
 492  492  void
 493  493  SHA1Transform(uint32_t a, uint32_t b, uint32_t c, uint32_t d, uint32_t e,
 494  494      SHA1_CTX *ctx, const uint8_t blk[64])
 495  495  {
 496  496          /*
 497  497           * sparc optimization:
 498  498           *
 499  499           * while it is somewhat counter-intuitive, on sparc, it is
 500  500           * more efficient to place all the constants used in this
 501  501           * function in an array and load the values out of the array
 502  502           * than to manually load the constants.  this is because
 503  503           * setting a register to a 32-bit value takes two ops in most
 504  504           * cases: a `sethi' and an `or', but loading a 32-bit value
 505  505           * from memory only takes one `ld' (or `lduw' on v9).  while
 506  506           * this increases memory usage, the compiler can find enough
 507  507           * other things to do while waiting to keep the pipeline does
 508  508           * not stall.  additionally, it is likely that many of these
 509  509           * constants are cached so that later accesses do not even go
 510  510           * out to the bus.
 511  511           *
 512  512           * this array is declared `static' to keep the compiler from
 513  513           * having to bcopy() this array onto the stack frame of
 514  514           * SHA1Transform() each time it is called -- which is
 515  515           * unacceptably expensive.
 516  516           *
 517  517           * the `const' is to ensure that callers are good citizens and
 518  518           * do not try to munge the array.  since these routines are
 519  519           * going to be called from inside multithreaded kernelland,
 520  520           * this is a good safety check. -- `sha1_consts' will end up in
 521  521           * .rodata.
 522  522           *
 523  523           * unfortunately, loading from an array in this manner hurts
 524  524           * performance under Intel.  So, there is a macro,
 525  525           * SHA1_CONST(), used in SHA1Transform(), that either expands to
 526  526           * a reference to this array, or to the actual constant,
 527  527           * depending on what platform this code is compiled for.
 528  528           */
 529  529  
 530  530          static const uint32_t sha1_consts[] = {
 531  531                  SHA1_CONST_0, SHA1_CONST_1, SHA1_CONST_2, SHA1_CONST_3
 532  532          };
 533  533  
 534  534          /*
 535  535           * general optimization:
 536  536           *
 537  537           * use individual integers instead of using an array.  this is a
 538  538           * win, although the amount it wins by seems to vary quite a bit.
 539  539           */
 540  540  
 541  541          uint32_t        w_0, w_1, w_2,  w_3,  w_4,  w_5,  w_6,  w_7;
 542  542          uint32_t        w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
 543  543  
 544  544          /*
 545  545           * sparc optimization:
 546  546           *
 547  547           * if `block' is already aligned on a 4-byte boundary, use
 548  548           * LOAD_BIG_32() directly.  otherwise, bcopy() into a
 549  549           * buffer that *is* aligned on a 4-byte boundary and then do
 550  550           * the LOAD_BIG_32() on that buffer.  benchmarks have shown
 551  551           * that using the bcopy() is better than loading the bytes
 552  552           * individually and doing the endian-swap by hand.
 553  553           *
 554  554           * even though it's quite tempting to assign to do:
 555  555           *
 556  556           * blk = bcopy(ctx->buf_un.buf32, blk, sizeof (ctx->buf_un.buf32));
 557  557           *
 558  558           * and only have one set of LOAD_BIG_32()'s, the compiler
 559  559           * *does not* like that, so please resist the urge.
 560  560           */
 561  561  
 562  562          if ((uintptr_t)blk & 0x3) {             /* not 4-byte aligned? */
 563  563                  bcopy(blk, ctx->buf_un.buf32,  sizeof (ctx->buf_un.buf32));
 564  564                  w_15 = LOAD_BIG_32(ctx->buf_un.buf32 + 15);
 565  565                  w_14 = LOAD_BIG_32(ctx->buf_un.buf32 + 14);
 566  566                  w_13 = LOAD_BIG_32(ctx->buf_un.buf32 + 13);
 567  567                  w_12 = LOAD_BIG_32(ctx->buf_un.buf32 + 12);
 568  568                  w_11 = LOAD_BIG_32(ctx->buf_un.buf32 + 11);
 569  569                  w_10 = LOAD_BIG_32(ctx->buf_un.buf32 + 10);
 570  570                  w_9  = LOAD_BIG_32(ctx->buf_un.buf32 +  9);
 571  571                  w_8  = LOAD_BIG_32(ctx->buf_un.buf32 +  8);
 572  572                  w_7  = LOAD_BIG_32(ctx->buf_un.buf32 +  7);
 573  573                  w_6  = LOAD_BIG_32(ctx->buf_un.buf32 +  6);
 574  574                  w_5  = LOAD_BIG_32(ctx->buf_un.buf32 +  5);
 575  575                  w_4  = LOAD_BIG_32(ctx->buf_un.buf32 +  4);
 576  576                  w_3  = LOAD_BIG_32(ctx->buf_un.buf32 +  3);
 577  577                  w_2  = LOAD_BIG_32(ctx->buf_un.buf32 +  2);
 578  578                  w_1  = LOAD_BIG_32(ctx->buf_un.buf32 +  1);
 579  579                  w_0  = LOAD_BIG_32(ctx->buf_un.buf32 +  0);
 580  580          } else {
 581  581                  /*LINTED*/
 582  582                  w_15 = LOAD_BIG_32(blk + 60);
 583  583                  /*LINTED*/
 584  584                  w_14 = LOAD_BIG_32(blk + 56);
 585  585                  /*LINTED*/
 586  586                  w_13 = LOAD_BIG_32(blk + 52);
 587  587                  /*LINTED*/
 588  588                  w_12 = LOAD_BIG_32(blk + 48);
 589  589                  /*LINTED*/
 590  590                  w_11 = LOAD_BIG_32(blk + 44);
 591  591                  /*LINTED*/
 592  592                  w_10 = LOAD_BIG_32(blk + 40);
 593  593                  /*LINTED*/
 594  594                  w_9  = LOAD_BIG_32(blk + 36);
 595  595                  /*LINTED*/
 596  596                  w_8  = LOAD_BIG_32(blk + 32);
 597  597                  /*LINTED*/
 598  598                  w_7  = LOAD_BIG_32(blk + 28);
 599  599                  /*LINTED*/
 600  600                  w_6  = LOAD_BIG_32(blk + 24);
 601  601                  /*LINTED*/
 602  602                  w_5  = LOAD_BIG_32(blk + 20);
 603  603                  /*LINTED*/
 604  604                  w_4  = LOAD_BIG_32(blk + 16);
 605  605                  /*LINTED*/
 606  606                  w_3  = LOAD_BIG_32(blk + 12);
 607  607                  /*LINTED*/
 608  608                  w_2  = LOAD_BIG_32(blk +  8);
 609  609                  /*LINTED*/
 610  610                  w_1  = LOAD_BIG_32(blk +  4);
 611  611                  /*LINTED*/
 612  612                  w_0  = LOAD_BIG_32(blk +  0);
 613  613          }
 614  614  #else   /* !defined(__sparc) */
 615  615  
 616  616  void /* CSTYLED */
 617  617  SHA1Transform(SHA1_CTX *ctx, const uint8_t blk[64])
 618  618  {
 619  619          /* CSTYLED */
 620  620          sha1word a = ctx->state[0];
 621  621          sha1word b = ctx->state[1];
 622  622          sha1word c = ctx->state[2];

↓ open down ↓

622 lines elided

↑ open up ↑

 623  623          sha1word d = ctx->state[3];
 624  624          sha1word e = ctx->state[4];
 625  625  
 626  626  #if     defined(W_ARRAY)
 627  627          sha1word        w[16];
 628  628  #else   /* !defined(W_ARRAY) */
 629  629          sha1word        w_0, w_1, w_2,  w_3,  w_4,  w_5,  w_6,  w_7;
 630  630          sha1word        w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
 631  631  #endif  /* !defined(W_ARRAY) */
 632  632  
 633      -        W(0)  = LOAD_BIG_32(blk +  0);
 634      -        W(1)  = LOAD_BIG_32(blk +  4);
 635      -        W(2)  = LOAD_BIG_32(blk +  8);
 636      -        W(3)  = LOAD_BIG_32(blk + 12);
 637      -        W(4)  = LOAD_BIG_32(blk + 16);
 638      -        W(5)  = LOAD_BIG_32(blk + 20);
 639      -        W(6)  = LOAD_BIG_32(blk + 24);
 640      -        W(7)  = LOAD_BIG_32(blk + 28);
 641      -        W(8)  = LOAD_BIG_32(blk + 32);
 642      -        W(9)  = LOAD_BIG_32(blk + 36);
 643      -        W(10) = LOAD_BIG_32(blk + 40);
 644      -        W(11) = LOAD_BIG_32(blk + 44);
 645      -        W(12) = LOAD_BIG_32(blk + 48);
 646      -        W(13) = LOAD_BIG_32(blk + 52);
 647      -        W(14) = LOAD_BIG_32(blk + 56);
 648      -        W(15) = LOAD_BIG_32(blk + 60);
      633 +        W(0)  = LOAD_BIG_32((void *)(blk +  0));
      634 +        W(1)  = LOAD_BIG_32((void *)(blk +  4));
      635 +        W(2)  = LOAD_BIG_32((void *)(blk +  8));
      636 +        W(3)  = LOAD_BIG_32((void *)(blk + 12));
      637 +        W(4)  = LOAD_BIG_32((void *)(blk + 16));
      638 +        W(5)  = LOAD_BIG_32((void *)(blk + 20));
      639 +        W(6)  = LOAD_BIG_32((void *)(blk + 24));
      640 +        W(7)  = LOAD_BIG_32((void *)(blk + 28));
      641 +        W(8)  = LOAD_BIG_32((void *)(blk + 32));
      642 +        W(9)  = LOAD_BIG_32((void *)(blk + 36));
      643 +        W(10) = LOAD_BIG_32((void *)(blk + 40));
      644 +        W(11) = LOAD_BIG_32((void *)(blk + 44));
      645 +        W(12) = LOAD_BIG_32((void *)(blk + 48));
      646 +        W(13) = LOAD_BIG_32((void *)(blk + 52));
      647 +        W(14) = LOAD_BIG_32((void *)(blk + 56));
      648 +        W(15) = LOAD_BIG_32((void *)(blk + 60));
 649  649  
 650  650  #endif  /* !defined(__sparc) */
 651  651  
 652  652          /*
 653  653           * general optimization:
 654  654           *
 655  655           * even though this approach is described in the standard as
 656  656           * being slower algorithmically, it is 30-40% faster than the
 657  657           * "faster" version under SPARC, because this version has more
 658  658           * of the constraints specified at compile-time and uses fewer

 659  659           * variables (and therefore has better register utilization)
 660  660           * than its "speedier" brother.  (i've tried both, trust me)
 661  661           *
 662  662           * for either method given in the spec, there is an "assignment"
 663  663           * phase where the following takes place:
 664  664           *
 665  665           *      tmp = (main_computation);
 666  666           *      e = d; d = c; c = rotate_left(b, 30); b = a; a = tmp;
 667  667           *
 668  668           * we can make the algorithm go faster by not doing this work,
 669  669           * but just pretending that `d' is now `e', etc. this works
 670  670           * really well and obviates the need for a temporary variable.
 671  671           * however, we still explicitly perform the rotate action,
 672  672           * since it is cheaper on SPARC to do it once than to have to
 673  673           * do it over and over again.
 674  674           */
 675  675  
 676  676          /* round 1 */
 677  677          e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(0) + SHA1_CONST(0); /* 0 */
 678  678          b = ROTATE_LEFT(b, 30);
 679  679  
 680  680          d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(1) + SHA1_CONST(0); /* 1 */
 681  681          a = ROTATE_LEFT(a, 30);
 682  682  
 683  683          c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(2) + SHA1_CONST(0); /* 2 */
 684  684          e = ROTATE_LEFT(e, 30);
 685  685  
 686  686          b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(3) + SHA1_CONST(0); /* 3 */
 687  687          d = ROTATE_LEFT(d, 30);
 688  688  
 689  689          a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(4) + SHA1_CONST(0); /* 4 */
 690  690          c = ROTATE_LEFT(c, 30);
 691  691  
 692  692          e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(5) + SHA1_CONST(0); /* 5 */
 693  693          b = ROTATE_LEFT(b, 30);
 694  694  
 695  695          d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(6) + SHA1_CONST(0); /* 6 */
 696  696          a = ROTATE_LEFT(a, 30);
 697  697  
 698  698          c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(7) + SHA1_CONST(0); /* 7 */
 699  699          e = ROTATE_LEFT(e, 30);
 700  700  
 701  701          b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(8) + SHA1_CONST(0); /* 8 */
 702  702          d = ROTATE_LEFT(d, 30);
 703  703  
 704  704          a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(9) + SHA1_CONST(0); /* 9 */
 705  705          c = ROTATE_LEFT(c, 30);
 706  706  
 707  707          e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(10) + SHA1_CONST(0); /* 10 */
 708  708          b = ROTATE_LEFT(b, 30);
 709  709  
 710  710          d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(11) + SHA1_CONST(0); /* 11 */
 711  711          a = ROTATE_LEFT(a, 30);
 712  712  
 713  713          c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(12) + SHA1_CONST(0); /* 12 */
 714  714          e = ROTATE_LEFT(e, 30);
 715  715  
 716  716          b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(13) + SHA1_CONST(0); /* 13 */
 717  717          d = ROTATE_LEFT(d, 30);
 718  718  
 719  719          a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(14) + SHA1_CONST(0); /* 14 */
 720  720          c = ROTATE_LEFT(c, 30);
 721  721  
 722  722          e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(15) + SHA1_CONST(0); /* 15 */
 723  723          b = ROTATE_LEFT(b, 30);
 724  724  
 725  725          W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1);            /* 16 */
 726  726          d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(0) + SHA1_CONST(0);
 727  727          a = ROTATE_LEFT(a, 30);
 728  728  
 729  729          W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1);            /* 17 */
 730  730          c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(1) + SHA1_CONST(0);
 731  731          e = ROTATE_LEFT(e, 30);
 732  732  
 733  733          W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1);   /* 18 */
 734  734          b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(2) + SHA1_CONST(0);
 735  735          d = ROTATE_LEFT(d, 30);
 736  736  
 737  737          W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1);            /* 19 */
 738  738          a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(3) + SHA1_CONST(0);
 739  739          c = ROTATE_LEFT(c, 30);
 740  740  
 741  741          /* round 2 */
 742  742          W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1);            /* 20 */
 743  743          e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(4) + SHA1_CONST(1);
 744  744          b = ROTATE_LEFT(b, 30);
 745  745  
 746  746          W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1);            /* 21 */
 747  747          d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(5) + SHA1_CONST(1);
 748  748          a = ROTATE_LEFT(a, 30);
 749  749  
 750  750          W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1);            /* 22 */
 751  751          c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(6) + SHA1_CONST(1);
 752  752          e = ROTATE_LEFT(e, 30);
 753  753  
 754  754          W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1);            /* 23 */
 755  755          b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(7) + SHA1_CONST(1);
 756  756          d = ROTATE_LEFT(d, 30);
 757  757  
 758  758          W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1);            /* 24 */
 759  759          a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(8) + SHA1_CONST(1);
 760  760          c = ROTATE_LEFT(c, 30);
 761  761  
 762  762          W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1);            /* 25 */
 763  763          e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(9) + SHA1_CONST(1);
 764  764          b = ROTATE_LEFT(b, 30);
 765  765  
 766  766          W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1);  /* 26 */
 767  767          d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(10) + SHA1_CONST(1);
 768  768          a = ROTATE_LEFT(a, 30);
 769  769  
 770  770          W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1);  /* 27 */
 771  771          c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(11) + SHA1_CONST(1);
 772  772          e = ROTATE_LEFT(e, 30);
 773  773  
 774  774          W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1);  /* 28 */
 775  775          b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(12) + SHA1_CONST(1);
 776  776          d = ROTATE_LEFT(d, 30);
 777  777  
 778  778          W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 29 */
 779  779          a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(13) + SHA1_CONST(1);
 780  780          c = ROTATE_LEFT(c, 30);
 781  781  
 782  782          W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1);  /* 30 */
 783  783          e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(14) + SHA1_CONST(1);
 784  784          b = ROTATE_LEFT(b, 30);
 785  785  
 786  786          W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1);  /* 31 */
 787  787          d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(15) + SHA1_CONST(1);
 788  788          a = ROTATE_LEFT(a, 30);
 789  789  
 790  790          W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1);            /* 32 */
 791  791          c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(0) + SHA1_CONST(1);
 792  792          e = ROTATE_LEFT(e, 30);
 793  793  
 794  794          W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1);            /* 33 */
 795  795          b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(1) + SHA1_CONST(1);
 796  796          d = ROTATE_LEFT(d, 30);
 797  797  
 798  798          W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1);   /* 34 */
 799  799          a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(2) + SHA1_CONST(1);
 800  800          c = ROTATE_LEFT(c, 30);
 801  801  
 802  802          W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1);            /* 35 */
 803  803          e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(3) + SHA1_CONST(1);
 804  804          b = ROTATE_LEFT(b, 30);
 805  805  
 806  806          W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1);            /* 36 */
 807  807          d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(4) + SHA1_CONST(1);
 808  808          a = ROTATE_LEFT(a, 30);
 809  809  
 810  810          W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1);            /* 37 */
 811  811          c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(5) + SHA1_CONST(1);
 812  812          e = ROTATE_LEFT(e, 30);
 813  813  
 814  814          W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1);            /* 38 */
 815  815          b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(6) + SHA1_CONST(1);
 816  816          d = ROTATE_LEFT(d, 30);
 817  817  
 818  818          W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1);            /* 39 */
 819  819          a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(7) + SHA1_CONST(1);
 820  820          c = ROTATE_LEFT(c, 30);
 821  821  
 822  822          /* round 3 */
 823  823          W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1);            /* 40 */
 824  824          e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(8) + SHA1_CONST(2);
 825  825          b = ROTATE_LEFT(b, 30);
 826  826  
 827  827          W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1);            /* 41 */
 828  828          d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(9) + SHA1_CONST(2);
 829  829          a = ROTATE_LEFT(a, 30);
 830  830  
 831  831          W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1);  /* 42 */
 832  832          c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(10) + SHA1_CONST(2);
 833  833          e = ROTATE_LEFT(e, 30);
 834  834  
 835  835          W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1);  /* 43 */
 836  836          b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(11) + SHA1_CONST(2);
 837  837          d = ROTATE_LEFT(d, 30);
 838  838  
 839  839          W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1);  /* 44 */
 840  840          a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(12) + SHA1_CONST(2);
 841  841          c = ROTATE_LEFT(c, 30);
 842  842  
 843  843          W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 45 */
 844  844          e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(13) + SHA1_CONST(2);
 845  845          b = ROTATE_LEFT(b, 30);
 846  846  
 847  847          W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1);  /* 46 */
 848  848          d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(14) + SHA1_CONST(2);
 849  849          a = ROTATE_LEFT(a, 30);
 850  850  
 851  851          W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1);  /* 47 */
 852  852          c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(15) + SHA1_CONST(2);
 853  853          e = ROTATE_LEFT(e, 30);
 854  854  
 855  855          W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1);            /* 48 */
 856  856          b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(0) + SHA1_CONST(2);
 857  857          d = ROTATE_LEFT(d, 30);
 858  858  
 859  859          W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1);            /* 49 */
 860  860          a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(1) + SHA1_CONST(2);
 861  861          c = ROTATE_LEFT(c, 30);
 862  862  
 863  863          W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1);   /* 50 */
 864  864          e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(2) + SHA1_CONST(2);
 865  865          b = ROTATE_LEFT(b, 30);
 866  866  
 867  867          W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1);            /* 51 */
 868  868          d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(3) + SHA1_CONST(2);
 869  869          a = ROTATE_LEFT(a, 30);
 870  870  
 871  871          W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1);            /* 52 */
 872  872          c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(4) + SHA1_CONST(2);
 873  873          e = ROTATE_LEFT(e, 30);
 874  874  
 875  875          W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1);            /* 53 */
 876  876          b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(5) + SHA1_CONST(2);
 877  877          d = ROTATE_LEFT(d, 30);
 878  878  
 879  879          W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1);            /* 54 */
 880  880          a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(6) + SHA1_CONST(2);
 881  881          c = ROTATE_LEFT(c, 30);
 882  882  
 883  883          W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1);            /* 55 */
 884  884          e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(7) + SHA1_CONST(2);
 885  885          b = ROTATE_LEFT(b, 30);
 886  886  
 887  887          W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1);            /* 56 */
 888  888          d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(8) + SHA1_CONST(2);
 889  889          a = ROTATE_LEFT(a, 30);
 890  890  
 891  891          W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1);            /* 57 */
 892  892          c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(9) + SHA1_CONST(2);
 893  893          e = ROTATE_LEFT(e, 30);
 894  894  
 895  895          W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1);  /* 58 */
 896  896          b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(10) + SHA1_CONST(2);
 897  897          d = ROTATE_LEFT(d, 30);
 898  898  
 899  899          W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1);  /* 59 */
 900  900          a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(11) + SHA1_CONST(2);
 901  901          c = ROTATE_LEFT(c, 30);
 902  902  
 903  903          /* round 4 */
 904  904          W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1);  /* 60 */
 905  905          e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(12) + SHA1_CONST(3);
 906  906          b = ROTATE_LEFT(b, 30);
 907  907  
 908  908          W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 61 */
 909  909          d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(13) + SHA1_CONST(3);
 910  910          a = ROTATE_LEFT(a, 30);
 911  911  
 912  912          W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1);  /* 62 */
 913  913          c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(14) + SHA1_CONST(3);
 914  914          e = ROTATE_LEFT(e, 30);
 915  915  
 916  916          W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1);  /* 63 */
 917  917          b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(15) + SHA1_CONST(3);
 918  918          d = ROTATE_LEFT(d, 30);
 919  919  
 920  920          W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1);            /* 64 */
 921  921          a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(0) + SHA1_CONST(3);
 922  922          c = ROTATE_LEFT(c, 30);
 923  923  
 924  924          W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1);            /* 65 */
 925  925          e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(1) + SHA1_CONST(3);
 926  926          b = ROTATE_LEFT(b, 30);
 927  927  
 928  928          W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1);   /* 66 */
 929  929          d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(2) + SHA1_CONST(3);
 930  930          a = ROTATE_LEFT(a, 30);
 931  931  
 932  932          W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1);            /* 67 */
 933  933          c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(3) + SHA1_CONST(3);
 934  934          e = ROTATE_LEFT(e, 30);
 935  935  
 936  936          W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1);            /* 68 */
 937  937          b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(4) + SHA1_CONST(3);
 938  938          d = ROTATE_LEFT(d, 30);
 939  939  
 940  940          W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1);            /* 69 */
 941  941          a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(5) + SHA1_CONST(3);
 942  942          c = ROTATE_LEFT(c, 30);
 943  943  
 944  944          W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1);            /* 70 */
 945  945          e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(6) + SHA1_CONST(3);
 946  946          b = ROTATE_LEFT(b, 30);
 947  947  
 948  948          W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1);            /* 71 */
 949  949          d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(7) + SHA1_CONST(3);
 950  950          a = ROTATE_LEFT(a, 30);
 951  951  
 952  952          W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1);            /* 72 */
 953  953          c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(8) + SHA1_CONST(3);
 954  954          e = ROTATE_LEFT(e, 30);
 955  955  
 956  956          W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1);            /* 73 */
 957  957          b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(9) + SHA1_CONST(3);
 958  958          d = ROTATE_LEFT(d, 30);
 959  959  
 960  960          W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1);  /* 74 */
 961  961          a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(10) + SHA1_CONST(3);
 962  962          c = ROTATE_LEFT(c, 30);
 963  963  
 964  964          W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1);  /* 75 */
 965  965          e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(11) + SHA1_CONST(3);
 966  966          b = ROTATE_LEFT(b, 30);
 967  967  
 968  968          W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1);  /* 76 */
 969  969          d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(12) + SHA1_CONST(3);
 970  970          a = ROTATE_LEFT(a, 30);
 971  971  
 972  972          W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 77 */
 973  973          c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(13) + SHA1_CONST(3);
 974  974          e = ROTATE_LEFT(e, 30);
 975  975  
 976  976          W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1);  /* 78 */
 977  977          b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(14) + SHA1_CONST(3);
 978  978          d = ROTATE_LEFT(d, 30);
 979  979  
 980  980          W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1);  /* 79 */
 981  981  
 982  982          ctx->state[0] += ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(15) +
 983  983              SHA1_CONST(3);
 984  984          ctx->state[1] += b;
 985  985          ctx->state[2] += ROTATE_LEFT(c, 30);
 986  986          ctx->state[3] += d;
 987  987          ctx->state[4] += e;
 988  988  
 989  989          /* zeroize sensitive information */
 990  990          W(0) = W(1) = W(2) = W(3) = W(4) = W(5) = W(6) = W(7) = W(8) = 0;
 991  991          W(9) = W(10) = W(11) = W(12) = W(13) = W(14) = W(15) = 0;
 992  992  }
 993  993  #endif  /* !__amd64 */
 994  994  
 995  995  
 996  996  /*
 997  997   * Encode()
 998  998   *
 999  999   * purpose: to convert a list of numbers from little endian to big endian
1000 1000   *   input: uint8_t *   : place to store the converted big endian numbers
1001 1001   *          uint32_t *  : place to get numbers to convert from
1002 1002   *          size_t      : the length of the input in bytes
1003 1003   *  output: void
1004 1004   */
1005 1005  
1006 1006  static void
1007 1007  Encode(uint8_t *_RESTRICT_KYWD output, const uint32_t *_RESTRICT_KYWD input,
1008 1008      size_t len)
1009 1009  {
1010 1010          size_t          i, j;
1011 1011  
1012 1012  #if     defined(__sparc)
1013 1013          if (IS_P2ALIGNED(output, sizeof (uint32_t))) {
1014 1014                  for (i = 0, j = 0; j < len; i++, j += 4) {
1015 1015                          /* LINTED: pointer alignment */
1016 1016                          *((uint32_t *)(output + j)) = input[i];
1017 1017                  }
1018 1018          } else {
1019 1019  #endif  /* little endian -- will work on big endian, but slowly */
1020 1020                  for (i = 0, j = 0; j < len; i++, j += 4) {
1021 1021                          output[j]       = (input[i] >> 24) & 0xff;
1022 1022                          output[j + 1]   = (input[i] >> 16) & 0xff;
1023 1023                          output[j + 2]   = (input[i] >>  8) & 0xff;
1024 1024                          output[j + 3]   = input[i] & 0xff;
1025 1025                  }
1026 1026  #if     defined(__sparc)
1027 1027          }
1028 1028  #endif
1029 1029  }

↓ open down ↓

371 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX