nv87_ac4sha2mac64 Wdiff usr/src/common/crypto/sha2/sha2.c

Print this page

6665607 Need a SHA256/SHA384/SHA512 implementation optimized for 64-bit x86

Split	Close
Expand all
Collapse all

          --- old/usr/src/common/crypto/sha2/sha2.c
          +++ new/usr/src/common/crypto/sha2/sha2.c
   1    1  /*
   2      - * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
        2 + * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
   3    3   * Use is subject to license terms.
   4    4   */
   5    5  
   6      -#pragma ident   "@(#)sha2.c     1.7     07/04/10 SMI"
        6 +#pragma ident   "@(#)sha2.c     1.8     08/03/20 SMI"
   7    7  
   8      -
   9    8  /*
  10    9   * The basic framework for this code came from the reference
  11   10   * implementation for MD5.  That implementation is Copyright (C)
  12   11   * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
  13   12   *
  14   13   * License to copy and use this software is granted provided that it
  15   14   * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
  16   15   * Algorithm" in all material mentioning or referencing this software
  17   16   * or this function.
  18   17   *

  19   18   * License is also granted to make and use derivative works provided
  20   19   * that such works are identified as "derived from the RSA Data
  21   20   * Security, Inc. MD5 Message-Digest Algorithm" in all material
  22   21   * mentioning or referencing the derived work.
  23   22   *
  24   23   * RSA Data Security, Inc. makes no representations concerning either
  25   24   * the merchantability of this software or the suitability of this
  26   25   * software for any particular purpose. It is provided "as is"
  27   26   * without express or implied warranty of any kind.
  28   27   *
  29   28   * These notices must be retained in any copies of any part of this
  30   29   * documentation and/or software.
  31   30   *
  32   31   * NOTE: Cleaned-up and optimized, version of SHA2, based on the FIPS 180-2
  33   32   * standard, available at http://www.itl.nist.gov/div897/pubs/fip180-2.htm
  34   33   * Not as fast as one would like -- further optimizations are encouraged
  35   34   * and appreciated.

↓ open down ↓

17 lines elided

↑ open up ↑

  36   35   */
  37   36  
  38   37  #include <sys/types.h>
  39   38  #include <sys/param.h>
  40   39  #include <sys/systm.h>
  41   40  #include <sys/sysmacros.h>
  42   41  #define _SHA2_IMPL
  43   42  #include <sys/sha2.h>
  44   43  #include <sys/sha2_consts.h>
  45   44  
  46      -#ifndef _KERNEL
       45 +#ifdef _KERNEL
       46 +#include <sys/cmn_err.h>
  47   47  
       48 +#else
  48   49  #include <strings.h>
  49   50  #include <stdlib.h>
  50   51  #include <errno.h>
  51   52  
  52   53  #pragma weak SHA256Update = SHA2Update
  53   54  #pragma weak SHA384Update = SHA2Update
  54   55  #pragma weak SHA512Update = SHA2Update
  55   56  
  56   57  #pragma weak SHA256Final = SHA2Final
  57   58  #pragma weak SHA384Final = SHA2Final
  58   59  #pragma weak SHA512Final = SHA2Final
  59   60  
  60      -#endif  /* !_KERNEL */
       61 +#endif  /* _KERNEL */
  61   62  
  62      -#ifdef _KERNEL
  63      -#include <sys/cmn_err.h>
  64      -#endif /* _KERNEL */
  65      -
  66   63  static void Encode(uint8_t *, uint32_t *, size_t);
  67   64  static void Encode64(uint8_t *, uint64_t *, size_t);
       65 +
       66 +#if     defined(__amd64)
       67 +#define SHA512Transform(ctx, in) SHA512TransformBlocks((ctx), (in), 1)
       68 +#define SHA256Transform(ctx, in) SHA256TransformBlocks((ctx), (in), 1)
       69 +
       70 +void SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
       71 +void SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
       72 +
       73 +#else
  68   74  static void SHA256Transform(SHA2_CTX *, const uint8_t *);
  69   75  static void SHA512Transform(SHA2_CTX *, const uint8_t *);
       76 +#endif  /* __amd64 */
  70   77  
  71   78  static uint8_t PADDING[128] = { 0x80, /* all zeros */ };
  72   79  
  73   80  /* Ch and Maj are the basic SHA2 functions. */
  74   81  #define Ch(b, c, d)     (((b) & (c)) ^ ((~b) & (d)))
  75   82  #define Maj(b, c, d)    (((b) & (c)) ^ ((b) & (d)) ^ ((c) & (d)))
  76   83  
  77   84  /* Rotates x right n bits. */
  78   85  #define ROTR(x, n)      \
  79   86          (((x) >> (n)) | ((x) << ((sizeof (x) * NBBY)-(n))))

  80   87  
  81   88  /* Shift x right n bits */
  82   89  #define SHR(x, n)       ((x) >> (n))
  83   90  
  84   91  /* SHA256 Functions */
  85   92  #define BIGSIGMA0_256(x)        (ROTR((x), 2) ^ ROTR((x), 13) ^ ROTR((x), 22))
  86   93  #define BIGSIGMA1_256(x)        (ROTR((x), 6) ^ ROTR((x), 11) ^ ROTR((x), 25))
  87   94  #define SIGMA0_256(x)           (ROTR((x), 7) ^ ROTR((x), 18) ^ SHR((x), 3))
  88   95  #define SIGMA1_256(x)           (ROTR((x), 17) ^ ROTR((x), 19) ^ SHR((x), 10))
  89   96  
  90   97  #define SHA256ROUND(a, b, c, d, e, f, g, h, i, w)                       \
  91   98          T1 = h + BIGSIGMA1_256(e) + Ch(e, f, g) + SHA256_CONST(i) + w;  \
  92   99          d += T1;                                                        \
  93  100          T2 = BIGSIGMA0_256(a) + Maj(a, b, c);                           \
  94  101          h = T1 + T2
  95  102  
  96  103  /* SHA384/512 Functions */
  97  104  #define BIGSIGMA0(x)    (ROTR((x), 28) ^ ROTR((x), 34) ^ ROTR((x), 39))
  98  105  #define BIGSIGMA1(x)    (ROTR((x), 14) ^ ROTR((x), 18) ^ ROTR((x), 41))
  99  106  #define SIGMA0(x)       (ROTR((x), 1) ^ ROTR((x), 8) ^ SHR((x), 7))
 100  107  #define SIGMA1(x)       (ROTR((x), 19) ^ ROTR((x), 61) ^ SHR((x), 6))
 101  108  #define SHA512ROUND(a, b, c, d, e, f, g, h, i, w)                       \
 102  109          T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + SHA512_CONST(i) + w;      \
 103  110          d += T1;                                                        \
 104  111          T2 = BIGSIGMA0(a) + Maj(a, b, c);                               \
 105  112          h = T1 + T2
 106  113  
 107  114  /*
 108  115   * sparc optimization:
 109  116   *
 110  117   * on the sparc, we can load big endian 32-bit data easily.  note that
 111  118   * special care must be taken to ensure the address is 32-bit aligned.
 112  119   * in the interest of speed, we don't check to make sure, since
 113  120   * careful programming can guarantee this for us.
 114  121   */
 115  122  
 116  123  #if     defined(_BIG_ENDIAN)
 117  124  
 118  125  #define LOAD_BIG_32(addr)       (*(uint32_t *)(addr))
 119  126  
 120  127  #else   /* little endian -- will work on big endian, but slowly */
 121  128  
 122  129  #define LOAD_BIG_32(addr)       \
 123  130          (((addr)[0] << 24) | ((addr)[1] << 16) | ((addr)[2] << 8) | (addr)[3])
 124  131  #endif
 125  132  
 126  133  
 127  134  #if     defined(_BIG_ENDIAN)

↓ open down ↓

48 lines elided

↑ open up ↑

 128  135  
 129  136  #define LOAD_BIG_64(addr)       (*(uint64_t *)(addr))
 130  137  
 131  138  #else   /* little endian -- will work on big endian, but slowly */
 132  139  
 133  140  #define LOAD_BIG_64(addr)       \
 134  141          (((uint64_t)(addr)[0] << 56) | ((uint64_t)(addr)[1] << 48) |    \
 135  142              ((uint64_t)(addr)[2] << 40) | ((uint64_t)(addr)[3] << 32) | \
 136  143              ((uint64_t)(addr)[4] << 24) | ((uint64_t)(addr)[5] << 16) | \
 137  144              ((uint64_t)(addr)[6] << 8) | (uint64_t)(addr)[7])
 138      -
 139  145  #endif
 140  146  
 141  147  
      148 +#if     !defined(__amd64)
 142  149  /* SHA256 Transform */
 143  150  
 144  151  static void
 145  152  SHA256Transform(SHA2_CTX *ctx, const uint8_t *blk)
 146  153  {
 147      -
 148  154          uint32_t a = ctx->state.s32[0];
 149  155          uint32_t b = ctx->state.s32[1];
 150  156          uint32_t c = ctx->state.s32[2];
 151  157          uint32_t d = ctx->state.s32[3];
 152  158          uint32_t e = ctx->state.s32[4];
 153  159          uint32_t f = ctx->state.s32[5];
 154  160          uint32_t g = ctx->state.s32[6];
 155  161          uint32_t h = ctx->state.s32[7];
 156  162  
 157  163          uint32_t w0, w1, w2, w3, w4, w5, w6, w7;

 158  164          uint32_t w8, w9, w10, w11, w12, w13, w14, w15;
 159  165          uint32_t T1, T2;
 160  166  
 161  167  #if     defined(__sparc)
 162  168          static const uint32_t sha256_consts[] = {
 163  169                  SHA256_CONST_0, SHA256_CONST_1, SHA256_CONST_2,
 164  170                  SHA256_CONST_3, SHA256_CONST_4, SHA256_CONST_5,
 165  171                  SHA256_CONST_6, SHA256_CONST_7, SHA256_CONST_8,
 166  172                  SHA256_CONST_9, SHA256_CONST_10, SHA256_CONST_11,
 167  173                  SHA256_CONST_12, SHA256_CONST_13, SHA256_CONST_14,
 168  174                  SHA256_CONST_15, SHA256_CONST_16, SHA256_CONST_17,
 169  175                  SHA256_CONST_18, SHA256_CONST_19, SHA256_CONST_20,
 170  176                  SHA256_CONST_21, SHA256_CONST_22, SHA256_CONST_23,
 171  177                  SHA256_CONST_24, SHA256_CONST_25, SHA256_CONST_26,
 172  178                  SHA256_CONST_27, SHA256_CONST_28, SHA256_CONST_29,
 173  179                  SHA256_CONST_30, SHA256_CONST_31, SHA256_CONST_32,
 174  180                  SHA256_CONST_33, SHA256_CONST_34, SHA256_CONST_35,
 175  181                  SHA256_CONST_36, SHA256_CONST_37, SHA256_CONST_38,

↓ open down ↓

18 lines elided

↑ open up ↑

 176  182                  SHA256_CONST_39, SHA256_CONST_40, SHA256_CONST_41,
 177  183                  SHA256_CONST_42, SHA256_CONST_43, SHA256_CONST_44,
 178  184                  SHA256_CONST_45, SHA256_CONST_46, SHA256_CONST_47,
 179  185                  SHA256_CONST_48, SHA256_CONST_49, SHA256_CONST_50,
 180  186                  SHA256_CONST_51, SHA256_CONST_52, SHA256_CONST_53,
 181  187                  SHA256_CONST_54, SHA256_CONST_55, SHA256_CONST_56,
 182  188                  SHA256_CONST_57, SHA256_CONST_58, SHA256_CONST_59,
 183  189                  SHA256_CONST_60, SHA256_CONST_61, SHA256_CONST_62,
 184  190                  SHA256_CONST_63
 185  191          };
 186      -#endif
      192 +#endif  /* __sparc */
 187  193  
 188  194          if ((uintptr_t)blk & 0x3) {             /* not 4-byte aligned? */
 189  195                  bcopy(blk, ctx->buf_un.buf32,  sizeof (ctx->buf_un.buf32));
 190  196                  blk = (uint8_t *)ctx->buf_un.buf32;
 191  197          }
 192  198  
 193  199          /* LINTED E_BAD_PTR_CAST_ALIGN */
 194  200          w0 =  LOAD_BIG_32(blk + 4 * 0);
 195  201          SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);
 196  202          /* LINTED E_BAD_PTR_CAST_ALIGN */

 197  203          w1 =  LOAD_BIG_32(blk + 4 * 1);
 198  204          SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1);
 199  205          /* LINTED E_BAD_PTR_CAST_ALIGN */
 200  206          w2 =  LOAD_BIG_32(blk + 4 * 2);
 201  207          SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2);
 202  208          /* LINTED E_BAD_PTR_CAST_ALIGN */
 203  209          w3 =  LOAD_BIG_32(blk + 4 * 3);
 204  210          SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3);
 205  211          /* LINTED E_BAD_PTR_CAST_ALIGN */
 206  212          w4 =  LOAD_BIG_32(blk + 4 * 4);
 207  213          SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4);
 208  214          /* LINTED E_BAD_PTR_CAST_ALIGN */
 209  215          w5 =  LOAD_BIG_32(blk + 4 * 5);
 210  216          SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5);
 211  217          /* LINTED E_BAD_PTR_CAST_ALIGN */
 212  218          w6 =  LOAD_BIG_32(blk + 4 * 6);
 213  219          SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6);
 214  220          /* LINTED E_BAD_PTR_CAST_ALIGN */
 215  221          w7 =  LOAD_BIG_32(blk + 4 * 7);
 216  222          SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7);
 217  223          /* LINTED E_BAD_PTR_CAST_ALIGN */
 218  224          w8 =  LOAD_BIG_32(blk + 4 * 8);
 219  225          SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8);
 220  226          /* LINTED E_BAD_PTR_CAST_ALIGN */
 221  227          w9 =  LOAD_BIG_32(blk + 4 * 9);
 222  228          SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9);
 223  229          /* LINTED E_BAD_PTR_CAST_ALIGN */
 224  230          w10 =  LOAD_BIG_32(blk + 4 * 10);
 225  231          SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10);
 226  232          /* LINTED E_BAD_PTR_CAST_ALIGN */
 227  233          w11 =  LOAD_BIG_32(blk + 4 * 11);
 228  234          SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11);
 229  235          /* LINTED E_BAD_PTR_CAST_ALIGN */
 230  236          w12 =  LOAD_BIG_32(blk + 4 * 12);
 231  237          SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12);
 232  238          /* LINTED E_BAD_PTR_CAST_ALIGN */
 233  239          w13 =  LOAD_BIG_32(blk + 4 * 13);
 234  240          SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13);
 235  241          /* LINTED E_BAD_PTR_CAST_ALIGN */
 236  242          w14 =  LOAD_BIG_32(blk + 4 * 14);
 237  243          SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14);
 238  244          /* LINTED E_BAD_PTR_CAST_ALIGN */
 239  245          w15 =  LOAD_BIG_32(blk + 4 * 15);
 240  246          SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15);
 241  247  
 242  248          w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
 243  249          SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0);
 244  250          w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
 245  251          SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1);
 246  252          w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
 247  253          SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2);
 248  254          w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
 249  255          SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3);
 250  256          w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
 251  257          SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4);
 252  258          w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
 253  259          SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5);
 254  260          w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
 255  261          SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6);
 256  262          w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
 257  263          SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7);
 258  264          w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
 259  265          SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8);
 260  266          w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
 261  267          SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9);
 262  268          w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
 263  269          SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10);
 264  270          w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
 265  271          SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11);
 266  272          w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
 267  273          SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12);
 268  274          w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
 269  275          SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13);
 270  276          w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
 271  277          SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14);
 272  278          w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
 273  279          SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15);
 274  280  
 275  281          w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
 276  282          SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0);
 277  283          w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
 278  284          SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1);
 279  285          w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
 280  286          SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2);
 281  287          w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
 282  288          SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3);
 283  289          w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
 284  290          SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4);
 285  291          w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
 286  292          SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5);
 287  293          w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
 288  294          SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6);
 289  295          w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
 290  296          SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7);
 291  297          w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
 292  298          SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8);
 293  299          w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
 294  300          SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9);
 295  301          w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
 296  302          SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10);
 297  303          w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
 298  304          SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11);
 299  305          w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
 300  306          SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12);
 301  307          w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
 302  308          SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13);
 303  309          w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
 304  310          SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14);
 305  311          w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
 306  312          SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15);
 307  313  
 308  314          w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
 309  315          SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0);
 310  316          w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
 311  317          SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1);
 312  318          w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
 313  319          SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2);
 314  320          w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
 315  321          SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3);
 316  322          w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
 317  323          SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4);
 318  324          w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
 319  325          SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5);
 320  326          w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
 321  327          SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6);
 322  328          w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
 323  329          SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7);
 324  330          w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
 325  331          SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8);
 326  332          w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
 327  333          SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9);
 328  334          w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
 329  335          SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10);
 330  336          w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
 331  337          SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11);
 332  338          w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
 333  339          SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12);
 334  340          w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
 335  341          SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13);
 336  342          w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
 337  343          SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14);
 338  344          w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
 339  345          SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15);
 340  346  
 341  347          ctx->state.s32[0] += a;
 342  348          ctx->state.s32[1] += b;
 343  349          ctx->state.s32[2] += c;
 344  350          ctx->state.s32[3] += d;
 345  351          ctx->state.s32[4] += e;
 346  352          ctx->state.s32[5] += f;
 347  353          ctx->state.s32[6] += g;
 348  354          ctx->state.s32[7] += h;
 349  355  }
 350  356  
 351  357  
 352  358  /* SHA384 and SHA512 Transform */
 353  359  
 354  360  static void
 355  361  SHA512Transform(SHA2_CTX *ctx, const uint8_t *blk)
 356  362  {
 357  363  
 358  364          uint64_t a = ctx->state.s64[0];
 359  365          uint64_t b = ctx->state.s64[1];
 360  366          uint64_t c = ctx->state.s64[2];
 361  367          uint64_t d = ctx->state.s64[3];
 362  368          uint64_t e = ctx->state.s64[4];
 363  369          uint64_t f = ctx->state.s64[5];
 364  370          uint64_t g = ctx->state.s64[6];
 365  371          uint64_t h = ctx->state.s64[7];
 366  372  
 367  373          uint64_t w0, w1, w2, w3, w4, w5, w6, w7;
 368  374          uint64_t w8, w9, w10, w11, w12, w13, w14, w15;
 369  375          uint64_t T1, T2;
 370  376  
 371  377  #if     defined(__sparc)
 372  378          static const uint64_t sha512_consts[] = {
 373  379                  SHA512_CONST_0, SHA512_CONST_1, SHA512_CONST_2,
 374  380                  SHA512_CONST_3, SHA512_CONST_4, SHA512_CONST_5,
 375  381                  SHA512_CONST_6, SHA512_CONST_7, SHA512_CONST_8,
 376  382                  SHA512_CONST_9, SHA512_CONST_10, SHA512_CONST_11,
 377  383                  SHA512_CONST_12, SHA512_CONST_13, SHA512_CONST_14,
 378  384                  SHA512_CONST_15, SHA512_CONST_16, SHA512_CONST_17,
 379  385                  SHA512_CONST_18, SHA512_CONST_19, SHA512_CONST_20,
 380  386                  SHA512_CONST_21, SHA512_CONST_22, SHA512_CONST_23,
 381  387                  SHA512_CONST_24, SHA512_CONST_25, SHA512_CONST_26,
 382  388                  SHA512_CONST_27, SHA512_CONST_28, SHA512_CONST_29,
 383  389                  SHA512_CONST_30, SHA512_CONST_31, SHA512_CONST_32,
 384  390                  SHA512_CONST_33, SHA512_CONST_34, SHA512_CONST_35,
 385  391                  SHA512_CONST_36, SHA512_CONST_37, SHA512_CONST_38,
 386  392                  SHA512_CONST_39, SHA512_CONST_40, SHA512_CONST_41,
 387  393                  SHA512_CONST_42, SHA512_CONST_43, SHA512_CONST_44,
 388  394                  SHA512_CONST_45, SHA512_CONST_46, SHA512_CONST_47,
 389  395                  SHA512_CONST_48, SHA512_CONST_49, SHA512_CONST_50,
 390  396                  SHA512_CONST_51, SHA512_CONST_52, SHA512_CONST_53,

↓ open down ↓

194 lines elided

↑ open up ↑

 391  397                  SHA512_CONST_54, SHA512_CONST_55, SHA512_CONST_56,
 392  398                  SHA512_CONST_57, SHA512_CONST_58, SHA512_CONST_59,
 393  399                  SHA512_CONST_60, SHA512_CONST_61, SHA512_CONST_62,
 394  400                  SHA512_CONST_63, SHA512_CONST_64, SHA512_CONST_65,
 395  401                  SHA512_CONST_66, SHA512_CONST_67, SHA512_CONST_68,
 396  402                  SHA512_CONST_69, SHA512_CONST_70, SHA512_CONST_71,
 397  403                  SHA512_CONST_72, SHA512_CONST_73, SHA512_CONST_74,
 398  404                  SHA512_CONST_75, SHA512_CONST_76, SHA512_CONST_77,
 399  405                  SHA512_CONST_78, SHA512_CONST_79
 400  406          };
 401      -#endif
      407 +#endif  /* __sparc */
 402  408  
 403  409  
 404  410          if ((uintptr_t)blk & 0x7) {             /* not 8-byte aligned? */
 405  411                  bcopy(blk, ctx->buf_un.buf64,  sizeof (ctx->buf_un.buf64));
 406  412                  blk = (uint8_t *)ctx->buf_un.buf64;
 407  413          }
 408  414  
 409  415          /* LINTED E_BAD_PTR_CAST_ALIGN */
 410  416          w0 =  LOAD_BIG_64(blk + 8 * 0);
 411  417          SHA512ROUND(a, b, c, d, e, f, g, h, 0, w0);

 412  418          /* LINTED E_BAD_PTR_CAST_ALIGN */
 413  419          w1 =  LOAD_BIG_64(blk + 8 * 1);
 414  420          SHA512ROUND(h, a, b, c, d, e, f, g, 1, w1);
 415  421          /* LINTED E_BAD_PTR_CAST_ALIGN */
 416  422          w2 =  LOAD_BIG_64(blk + 8 * 2);
 417  423          SHA512ROUND(g, h, a, b, c, d, e, f, 2, w2);
 418  424          /* LINTED E_BAD_PTR_CAST_ALIGN */
 419  425          w3 =  LOAD_BIG_64(blk + 8 * 3);
 420  426          SHA512ROUND(f, g, h, a, b, c, d, e, 3, w3);
 421  427          /* LINTED E_BAD_PTR_CAST_ALIGN */
 422  428          w4 =  LOAD_BIG_64(blk + 8 * 4);
 423  429          SHA512ROUND(e, f, g, h, a, b, c, d, 4, w4);
 424  430          /* LINTED E_BAD_PTR_CAST_ALIGN */
 425  431          w5 =  LOAD_BIG_64(blk + 8 * 5);
 426  432          SHA512ROUND(d, e, f, g, h, a, b, c, 5, w5);
 427  433          /* LINTED E_BAD_PTR_CAST_ALIGN */
 428  434          w6 =  LOAD_BIG_64(blk + 8 * 6);
 429  435          SHA512ROUND(c, d, e, f, g, h, a, b, 6, w6);
 430  436          /* LINTED E_BAD_PTR_CAST_ALIGN */
 431  437          w7 =  LOAD_BIG_64(blk + 8 * 7);
 432  438          SHA512ROUND(b, c, d, e, f, g, h, a, 7, w7);
 433  439          /* LINTED E_BAD_PTR_CAST_ALIGN */
 434  440          w8 =  LOAD_BIG_64(blk + 8 * 8);
 435  441          SHA512ROUND(a, b, c, d, e, f, g, h, 8, w8);
 436  442          /* LINTED E_BAD_PTR_CAST_ALIGN */
 437  443          w9 =  LOAD_BIG_64(blk + 8 * 9);
 438  444          SHA512ROUND(h, a, b, c, d, e, f, g, 9, w9);
 439  445          /* LINTED E_BAD_PTR_CAST_ALIGN */
 440  446          w10 =  LOAD_BIG_64(blk + 8 * 10);
 441  447          SHA512ROUND(g, h, a, b, c, d, e, f, 10, w10);
 442  448          /* LINTED E_BAD_PTR_CAST_ALIGN */
 443  449          w11 =  LOAD_BIG_64(blk + 8 * 11);
 444  450          SHA512ROUND(f, g, h, a, b, c, d, e, 11, w11);
 445  451          /* LINTED E_BAD_PTR_CAST_ALIGN */
 446  452          w12 =  LOAD_BIG_64(blk + 8 * 12);
 447  453          SHA512ROUND(e, f, g, h, a, b, c, d, 12, w12);
 448  454          /* LINTED E_BAD_PTR_CAST_ALIGN */
 449  455          w13 =  LOAD_BIG_64(blk + 8 * 13);
 450  456          SHA512ROUND(d, e, f, g, h, a, b, c, 13, w13);
 451  457          /* LINTED E_BAD_PTR_CAST_ALIGN */
 452  458          w14 =  LOAD_BIG_64(blk + 8 * 14);
 453  459          SHA512ROUND(c, d, e, f, g, h, a, b, 14, w14);
 454  460          /* LINTED E_BAD_PTR_CAST_ALIGN */
 455  461          w15 =  LOAD_BIG_64(blk + 8 * 15);
 456  462          SHA512ROUND(b, c, d, e, f, g, h, a, 15, w15);
 457  463  
 458  464          w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
 459  465          SHA512ROUND(a, b, c, d, e, f, g, h, 16, w0);
 460  466          w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
 461  467          SHA512ROUND(h, a, b, c, d, e, f, g, 17, w1);
 462  468          w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
 463  469          SHA512ROUND(g, h, a, b, c, d, e, f, 18, w2);
 464  470          w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
 465  471          SHA512ROUND(f, g, h, a, b, c, d, e, 19, w3);
 466  472          w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
 467  473          SHA512ROUND(e, f, g, h, a, b, c, d, 20, w4);
 468  474          w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
 469  475          SHA512ROUND(d, e, f, g, h, a, b, c, 21, w5);
 470  476          w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
 471  477          SHA512ROUND(c, d, e, f, g, h, a, b, 22, w6);
 472  478          w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
 473  479          SHA512ROUND(b, c, d, e, f, g, h, a, 23, w7);
 474  480          w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
 475  481          SHA512ROUND(a, b, c, d, e, f, g, h, 24, w8);
 476  482          w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
 477  483          SHA512ROUND(h, a, b, c, d, e, f, g, 25, w9);
 478  484          w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
 479  485          SHA512ROUND(g, h, a, b, c, d, e, f, 26, w10);
 480  486          w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
 481  487          SHA512ROUND(f, g, h, a, b, c, d, e, 27, w11);
 482  488          w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
 483  489          SHA512ROUND(e, f, g, h, a, b, c, d, 28, w12);
 484  490          w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
 485  491          SHA512ROUND(d, e, f, g, h, a, b, c, 29, w13);
 486  492          w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
 487  493          SHA512ROUND(c, d, e, f, g, h, a, b, 30, w14);
 488  494          w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
 489  495          SHA512ROUND(b, c, d, e, f, g, h, a, 31, w15);
 490  496  
 491  497          w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
 492  498          SHA512ROUND(a, b, c, d, e, f, g, h, 32, w0);
 493  499          w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
 494  500          SHA512ROUND(h, a, b, c, d, e, f, g, 33, w1);
 495  501          w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
 496  502          SHA512ROUND(g, h, a, b, c, d, e, f, 34, w2);
 497  503          w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
 498  504          SHA512ROUND(f, g, h, a, b, c, d, e, 35, w3);
 499  505          w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
 500  506          SHA512ROUND(e, f, g, h, a, b, c, d, 36, w4);
 501  507          w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
 502  508          SHA512ROUND(d, e, f, g, h, a, b, c, 37, w5);
 503  509          w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
 504  510          SHA512ROUND(c, d, e, f, g, h, a, b, 38, w6);
 505  511          w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
 506  512          SHA512ROUND(b, c, d, e, f, g, h, a, 39, w7);
 507  513          w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
 508  514          SHA512ROUND(a, b, c, d, e, f, g, h, 40, w8);
 509  515          w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
 510  516          SHA512ROUND(h, a, b, c, d, e, f, g, 41, w9);
 511  517          w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
 512  518          SHA512ROUND(g, h, a, b, c, d, e, f, 42, w10);
 513  519          w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
 514  520          SHA512ROUND(f, g, h, a, b, c, d, e, 43, w11);
 515  521          w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
 516  522          SHA512ROUND(e, f, g, h, a, b, c, d, 44, w12);
 517  523          w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
 518  524          SHA512ROUND(d, e, f, g, h, a, b, c, 45, w13);
 519  525          w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
 520  526          SHA512ROUND(c, d, e, f, g, h, a, b, 46, w14);
 521  527          w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
 522  528          SHA512ROUND(b, c, d, e, f, g, h, a, 47, w15);
 523  529  
 524  530          w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
 525  531          SHA512ROUND(a, b, c, d, e, f, g, h, 48, w0);
 526  532          w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
 527  533          SHA512ROUND(h, a, b, c, d, e, f, g, 49, w1);
 528  534          w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
 529  535          SHA512ROUND(g, h, a, b, c, d, e, f, 50, w2);
 530  536          w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
 531  537          SHA512ROUND(f, g, h, a, b, c, d, e, 51, w3);
 532  538          w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
 533  539          SHA512ROUND(e, f, g, h, a, b, c, d, 52, w4);
 534  540          w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
 535  541          SHA512ROUND(d, e, f, g, h, a, b, c, 53, w5);
 536  542          w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
 537  543          SHA512ROUND(c, d, e, f, g, h, a, b, 54, w6);
 538  544          w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
 539  545          SHA512ROUND(b, c, d, e, f, g, h, a, 55, w7);
 540  546          w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
 541  547          SHA512ROUND(a, b, c, d, e, f, g, h, 56, w8);
 542  548          w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
 543  549          SHA512ROUND(h, a, b, c, d, e, f, g, 57, w9);
 544  550          w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
 545  551          SHA512ROUND(g, h, a, b, c, d, e, f, 58, w10);
 546  552          w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
 547  553          SHA512ROUND(f, g, h, a, b, c, d, e, 59, w11);
 548  554          w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
 549  555          SHA512ROUND(e, f, g, h, a, b, c, d, 60, w12);
 550  556          w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
 551  557          SHA512ROUND(d, e, f, g, h, a, b, c, 61, w13);
 552  558          w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
 553  559          SHA512ROUND(c, d, e, f, g, h, a, b, 62, w14);
 554  560          w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
 555  561          SHA512ROUND(b, c, d, e, f, g, h, a, 63, w15);
 556  562  
 557  563          w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
 558  564          SHA512ROUND(a, b, c, d, e, f, g, h, 64, w0);
 559  565          w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
 560  566          SHA512ROUND(h, a, b, c, d, e, f, g, 65, w1);
 561  567          w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
 562  568          SHA512ROUND(g, h, a, b, c, d, e, f, 66, w2);
 563  569          w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
 564  570          SHA512ROUND(f, g, h, a, b, c, d, e, 67, w3);
 565  571          w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
 566  572          SHA512ROUND(e, f, g, h, a, b, c, d, 68, w4);
 567  573          w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
 568  574          SHA512ROUND(d, e, f, g, h, a, b, c, 69, w5);
 569  575          w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
 570  576          SHA512ROUND(c, d, e, f, g, h, a, b, 70, w6);
 571  577          w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
 572  578          SHA512ROUND(b, c, d, e, f, g, h, a, 71, w7);
 573  579          w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
 574  580          SHA512ROUND(a, b, c, d, e, f, g, h, 72, w8);
 575  581          w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
 576  582          SHA512ROUND(h, a, b, c, d, e, f, g, 73, w9);
 577  583          w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
 578  584          SHA512ROUND(g, h, a, b, c, d, e, f, 74, w10);
 579  585          w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
 580  586          SHA512ROUND(f, g, h, a, b, c, d, e, 75, w11);
 581  587          w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
 582  588          SHA512ROUND(e, f, g, h, a, b, c, d, 76, w12);
 583  589          w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
 584  590          SHA512ROUND(d, e, f, g, h, a, b, c, 77, w13);
 585  591          w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
 586  592          SHA512ROUND(c, d, e, f, g, h, a, b, 78, w14);
 587  593          w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
 588  594          SHA512ROUND(b, c, d, e, f, g, h, a, 79, w15);
 589  595

↓ open down ↓

178 lines elided

↑ open up ↑

 590  596          ctx->state.s64[0] += a;
 591  597          ctx->state.s64[1] += b;
 592  598          ctx->state.s64[2] += c;
 593  599          ctx->state.s64[3] += d;
 594  600          ctx->state.s64[4] += e;
 595  601          ctx->state.s64[5] += f;
 596  602          ctx->state.s64[6] += g;
 597  603          ctx->state.s64[7] += h;
 598  604  
 599  605  }
      606 +#endif  /* !__amd64 */
 600  607  
 601  608  
 602  609  /*
 603  610   * Encode()
 604  611   *
 605  612   * purpose: to convert a list of numbers from little endian to big endian
 606  613   *   input: uint8_t *   : place to store the converted big endian numbers
 607  614   *          uint32_t *  : place to get numbers to convert from
 608  615   *          size_t      : the length of the input in bytes
 609  616   *  output: void

 610  617   */
 611  618  
 612  619  static void
 613  620  Encode(uint8_t *_RESTRICT_KYWD output, uint32_t *_RESTRICT_KYWD input,
 614  621      size_t len)
 615  622  {
 616  623          size_t          i, j;
 617  624  
 618  625  #if     defined(__sparc)
 619  626          if (IS_P2ALIGNED(output, sizeof (uint32_t))) {
 620  627                  for (i = 0, j = 0; j < len; i++, j += 4) {
 621  628                          /* LINTED: pointer alignment */
 622  629                          *((uint32_t *)(output + j)) = input[i];
 623  630                  }
 624  631          } else {
 625  632  #endif  /* little endian -- will work on big endian, but slowly */
 626  633                  for (i = 0, j = 0; j < len; i++, j += 4) {
 627  634                          output[j]       = (input[i] >> 24) & 0xff;
 628  635                          output[j + 1]   = (input[i] >> 16) & 0xff;
 629  636                          output[j + 2]   = (input[i] >>  8) & 0xff;
 630  637                          output[j + 3]   = input[i] & 0xff;
 631  638                  }
 632  639  #if     defined(__sparc)
 633  640          }
 634  641  #endif
 635  642  }
 636  643  
 637  644  static void
 638  645  Encode64(uint8_t *_RESTRICT_KYWD output, uint64_t *_RESTRICT_KYWD input,
 639  646      size_t len)
 640  647  {
 641  648          size_t          i, j;
 642  649  
 643  650  #if     defined(__sparc)
 644  651          if (IS_P2ALIGNED(output, sizeof (uint64_t))) {
 645  652                  for (i = 0, j = 0; j < len; i++, j += 8) {
 646  653                          /* LINTED: pointer alignment */
 647  654                          *((uint64_t *)(output + j)) = input[i];
 648  655                  }
 649  656          } else {
 650  657  #endif  /* little endian -- will work on big endian, but slowly */
 651  658                  for (i = 0, j = 0; j < len; i++, j += 8) {
 652  659  
 653  660                          output[j]       = (input[i] >> 56) & 0xff;
 654  661                          output[j + 1]   = (input[i] >> 48) & 0xff;
 655  662                          output[j + 2]   = (input[i] >> 40) & 0xff;
 656  663                          output[j + 3]   = (input[i] >> 32) & 0xff;
 657  664                          output[j + 4]   = (input[i] >> 24) & 0xff;
 658  665                          output[j + 5]   = (input[i] >> 16) & 0xff;
 659  666                          output[j + 6]   = (input[i] >>  8) & 0xff;
 660  667                          output[j + 7]   = input[i] & 0xff;
 661  668                  }
 662  669  #if     defined(__sparc)
 663  670          }
 664  671  #endif
 665  672  }
 666  673  
 667  674  
 668  675  void
 669  676  SHA2Init(uint64_t mech, SHA2_CTX *ctx)
 670  677  {
 671  678  
 672  679          switch (mech) {
 673  680          case SHA256_MECH_INFO_TYPE:
 674  681          case SHA256_HMAC_MECH_INFO_TYPE:
 675  682          case SHA256_HMAC_GEN_MECH_INFO_TYPE:
 676  683                  ctx->state.s32[0] = 0x6a09e667U;
 677  684                  ctx->state.s32[1] = 0xbb67ae85U;
 678  685                  ctx->state.s32[2] = 0x3c6ef372U;
 679  686                  ctx->state.s32[3] = 0xa54ff53aU;
 680  687                  ctx->state.s32[4] = 0x510e527fU;
 681  688                  ctx->state.s32[5] = 0x9b05688cU;
 682  689                  ctx->state.s32[6] = 0x1f83d9abU;
 683  690                  ctx->state.s32[7] = 0x5be0cd19U;
 684  691                  break;
 685  692          case SHA384_MECH_INFO_TYPE:
 686  693          case SHA384_HMAC_MECH_INFO_TYPE:
 687  694          case SHA384_HMAC_GEN_MECH_INFO_TYPE:
 688  695                  ctx->state.s64[0] = 0xcbbb9d5dc1059ed8ULL;
 689  696                  ctx->state.s64[1] = 0x629a292a367cd507ULL;
 690  697                  ctx->state.s64[2] = 0x9159015a3070dd17ULL;
 691  698                  ctx->state.s64[3] = 0x152fecd8f70e5939ULL;
 692  699                  ctx->state.s64[4] = 0x67332667ffc00b31ULL;
 693  700                  ctx->state.s64[5] = 0x8eb44a8768581511ULL;
 694  701                  ctx->state.s64[6] = 0xdb0c2e0d64f98fa7ULL;
 695  702                  ctx->state.s64[7] = 0x47b5481dbefa4fa4ULL;
 696  703                  break;
 697  704          case SHA512_MECH_INFO_TYPE:
 698  705          case SHA512_HMAC_MECH_INFO_TYPE:
 699  706          case SHA512_HMAC_GEN_MECH_INFO_TYPE:
 700  707                  ctx->state.s64[0] = 0x6a09e667f3bcc908ULL;
 701  708                  ctx->state.s64[1] = 0xbb67ae8584caa73bULL;
 702  709                  ctx->state.s64[2] = 0x3c6ef372fe94f82bULL;
 703  710                  ctx->state.s64[3] = 0xa54ff53a5f1d36f1ULL;
 704  711                  ctx->state.s64[4] = 0x510e527fade682d1ULL;
 705  712                  ctx->state.s64[5] = 0x9b05688c2b3e6c1fULL;
 706  713                  ctx->state.s64[6] = 0x1f83d9abfb41bd6bULL;
 707  714                  ctx->state.s64[7] = 0x5be0cd19137e2179ULL;
 708  715                  break;
 709  716  #ifdef _KERNEL
 710  717          default:
 711  718                  cmn_err(CE_PANIC, "sha2_init: "
 712  719                      "failed to find a supported algorithm: 0x%x",
 713  720                      (uint32_t)mech);
 714  721  
 715  722  #endif /* _KERNEL */
 716  723          }
 717  724  
 718  725          ctx->algotype = mech;
 719  726          ctx->count.c64[0] = ctx->count.c64[1] = 0;
 720  727  }
 721  728  
 722  729  #ifndef _KERNEL
 723  730  
 724  731  #pragma inline(SHA256Init, SHA384Init, SHA512Init)
 725  732  void
 726  733  SHA256Init(SHA256_CTX *ctx)
 727  734  {
 728  735          SHA2Init(SHA256, ctx);
 729  736  }
 730  737  
 731  738  void
 732  739  SHA384Init(SHA384_CTX *ctx)
 733  740  {
 734  741          SHA2Init(SHA384, ctx);
 735  742  }
 736  743  
 737  744  void
 738  745  SHA512Init(SHA512_CTX *ctx)
 739  746  {
 740  747          SHA2Init(SHA512, ctx);
 741  748  }

↓ open down ↓

132 lines elided

↑ open up ↑

 742  749  
 743  750  #endif /* _KERNEL */
 744  751  
 745  752  /*
 746  753   * SHA2Update()
 747  754   *
 748  755   * purpose: continues an sha2 digest operation, using the message block
 749  756   *          to update the context.
 750  757   *   input: SHA2_CTX *  : the context to update
 751  758   *          void *      : the message block
 752      - *          size_t    : the length of the message block in bytes
      759 + *          size_t      : the length of the message block, in bytes
 753  760   *  output: void
 754  761   */
 755  762  
 756  763  void
 757  764  SHA2Update(SHA2_CTX *ctx, const void *inptr, size_t input_len)
 758  765  {
 759      -        uint32_t i, buf_index, buf_len, buf_limit;
 760      -        const uint8_t *input = inptr;
      766 +        uint32_t        i, buf_index, buf_len, buf_limit;
      767 +        const uint8_t   *input = inptr;
      768 +        uint32_t        algotype = ctx->algotype;
      769 +#if defined(__amd64)
      770 +        uint32_t        block_count;
      771 +#endif  /* !__amd64 */
 761  772  
      773 +
 762  774          /* check for noop */
 763  775          if (input_len == 0)
 764  776                  return;
 765  777  
 766      -        if (ctx->algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
      778 +        if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
 767  779                  buf_limit = 64;
 768  780  
 769  781                  /* compute number of bytes mod 64 */
 770  782                  buf_index = (ctx->count.c32[1] >> 3) & 0x3F;
 771  783  
 772  784                  /* update number of bits */
 773  785                  if ((ctx->count.c32[1] += (input_len << 3)) < (input_len << 3))
 774  786                          ctx->count.c32[0]++;
 775  787  
 776  788                  ctx->count.c32[0] += (input_len >> 29);

 777  789  
 778  790          } else {
 779  791                  buf_limit = 128;
 780  792  
 781  793                  /* compute number of bytes mod 128 */
 782  794                  buf_index = (ctx->count.c64[1] >> 3) & 0x7F;
 783  795  
 784  796                  /* update number of bits */
 785  797                  if ((ctx->count.c64[1] += (input_len << 3)) < (input_len << 3))
 786  798                          ctx->count.c64[0]++;
 787  799  
 788  800                  ctx->count.c64[0] += (input_len >> 29);
 789  801          }
 790  802  
 791  803          buf_len = buf_limit - buf_index;
 792  804  
 793  805          /* transform as many times as possible */
 794  806          i = 0;
 795  807          if (input_len >= buf_len) {
 796  808  
 797  809                  /*

↓ open down ↓

21 lines elided

↑ open up ↑

 798  810                   * general optimization:
 799  811                   *
 800  812                   * only do initial bcopy() and SHA2Transform() if
 801  813                   * buf_index != 0.  if buf_index == 0, we're just
 802  814                   * wasting our time doing the bcopy() since there
 803  815                   * wasn't any data left over from a previous call to
 804  816                   * SHA2Update().
 805  817                   */
 806  818                  if (buf_index) {
 807  819                          bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
 808      -                        if (ctx->algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE)
      820 +                        if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE)
 809  821                                  SHA256Transform(ctx, ctx->buf_un.buf8);
 810  822                          else
 811  823                                  SHA512Transform(ctx, ctx->buf_un.buf8);
 812  824  
 813  825                          i = buf_len;
 814  826                  }
 815  827  
 816      -
 817      -                for (; i + buf_limit - 1 < input_len; i += buf_limit) {
 818      -                        if (ctx->algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE)
      828 +#if !defined(__amd64)
      829 +                if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
      830 +                        for (; i + buf_limit - 1 < input_len; i += buf_limit) {
 819  831                                  SHA256Transform(ctx, &input[i]);
 820      -                        else
      832 +                        }
      833 +                } else {
      834 +                        for (; i + buf_limit - 1 < input_len; i += buf_limit) {
 821  835                                  SHA512Transform(ctx, &input[i]);
      836 +                        }
 822  837                  }
 823  838  
      839 +#else
      840 +                if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
      841 +                        block_count = (input_len - i) >> 6;
      842 +                        if (block_count > 0) {
      843 +                                SHA256TransformBlocks(ctx, &input[i],
      844 +                                    block_count);
      845 +                                i += block_count << 6;
      846 +                        }
      847 +                } else {
      848 +                        block_count = (input_len - i) >> 7;
      849 +                        if (block_count > 0) {
      850 +                                SHA512TransformBlocks(ctx, &input[i],
      851 +                                    block_count);
      852 +                                i += block_count << 7;
      853 +                        }
      854 +                }
      855 +#endif  /* !__amd64 */
      856 +
 824  857                  /*
 825  858                   * general optimization:
 826  859                   *
 827  860                   * if i and input_len are the same, return now instead
 828  861                   * of calling bcopy(), since the bcopy() in this case
 829      -                 * will be an expensive nop.
      862 +                 * will be an expensive noop.
 830  863                   */
 831  864  
 832  865                  if (input_len == i)
 833  866                          return;
 834  867  
 835  868                  buf_index = 0;
 836  869          }
 837  870  
 838  871          /* buffer remaining input */
 839  872          bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
 840  873  }
 841  874  
 842  875  
 843  876  /*
 844  877   * SHA2Final()
 845  878   *
 846  879   * purpose: ends an sha2 digest operation, finalizing the message digest and
 847  880   *          zeroing the context.
 848      - *   input: uchar_t *   : a buffer to store the digest in
      881 + *   input: uchar_t *   : a buffer to store the digest
 849  882   *                      : The function actually uses void* because many
 850  883   *                      : callers pass things other than uchar_t here.
 851  884   *          SHA2_CTX *  : the context to finalize, save, and zero
 852  885   *  output: void
 853  886   */
 854  887  
 855  888  void
 856  889  SHA2Final(void *digest, SHA2_CTX *ctx)
 857  890  {
 858  891          uint8_t         bitcount_be[sizeof (ctx->count.c32)];
 859  892          uint8_t         bitcount_be64[sizeof (ctx->count.c64)];
 860  893          uint32_t        index;
      894 +        uint32_t        algotype = ctx->algotype;
 861  895  
 862      -
 863      -        if (ctx->algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
      896 +        if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
 864  897                  index  = (ctx->count.c32[1] >> 3) & 0x3f;
 865  898                  Encode(bitcount_be, ctx->count.c32, sizeof (bitcount_be));
 866  899                  SHA2Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index);
 867  900                  SHA2Update(ctx, bitcount_be, sizeof (bitcount_be));
 868  901                  Encode(digest, ctx->state.s32, sizeof (ctx->state.s32));
 869  902  
 870  903          } else {
 871  904                  index  = (ctx->count.c64[1] >> 3) & 0x7f;
 872  905                  Encode64(bitcount_be64, ctx->count.c64,
 873  906                      sizeof (bitcount_be64));
 874  907                  SHA2Update(ctx, PADDING, ((index < 112) ? 112 : 240) - index);
 875  908                  SHA2Update(ctx, bitcount_be64, sizeof (bitcount_be64));
 876      -                if (ctx->algotype <= SHA384_HMAC_GEN_MECH_INFO_TYPE) {
      909 +                if (algotype <= SHA384_HMAC_GEN_MECH_INFO_TYPE) {
 877  910                          ctx->state.s64[6] = ctx->state.s64[7] = 0;
 878  911                          Encode64(digest, ctx->state.s64,
 879  912                              sizeof (uint64_t) * 6);
 880  913                  } else
 881  914                          Encode64(digest, ctx->state.s64,
 882  915                              sizeof (ctx->state.s64));
 883  916          }
 884  917  
 885  918          /* zeroize sensitive information */
 886  919          bzero(ctx, sizeof (*ctx));
 887  920  }

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX