Print this page
6665607 Need a SHA256/SHA384/SHA512 implementation optimized for 64-bit x86

Split Close
Expand all
Collapse all
          --- old/usr/src/common/crypto/sha2/sha2.c
          +++ new/usr/src/common/crypto/sha2/sha2.c
   1    1  /*
   2      - * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
        2 + * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
   3    3   * Use is subject to license terms.
   4    4   */
   5    5  
   6      -#pragma ident   "@(#)sha2.c     1.7     07/04/10 SMI"
        6 +#pragma ident   "@(#)sha2.c     1.8     08/03/20 SMI"
   7    7  
   8      -
   9    8  /*
  10    9   * The basic framework for this code came from the reference
  11   10   * implementation for MD5.  That implementation is Copyright (C)
  12   11   * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
  13   12   *
  14   13   * License to copy and use this software is granted provided that it
  15   14   * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
  16   15   * Algorithm" in all material mentioning or referencing this software
  17   16   * or this function.
  18   17   *
↓ open down ↓ 17 lines elided ↑ open up ↑
  36   35   */
  37   36  
  38   37  #include <sys/types.h>
  39   38  #include <sys/param.h>
  40   39  #include <sys/systm.h>
  41   40  #include <sys/sysmacros.h>
  42   41  #define _SHA2_IMPL
  43   42  #include <sys/sha2.h>
  44   43  #include <sys/sha2_consts.h>
  45   44  
  46      -#ifndef _KERNEL
       45 +#ifdef _KERNEL
       46 +#include <sys/cmn_err.h>
  47   47  
       48 +#else
  48   49  #include <strings.h>
  49   50  #include <stdlib.h>
  50   51  #include <errno.h>
  51   52  
  52   53  #pragma weak SHA256Update = SHA2Update
  53   54  #pragma weak SHA384Update = SHA2Update
  54   55  #pragma weak SHA512Update = SHA2Update
  55   56  
  56   57  #pragma weak SHA256Final = SHA2Final
  57   58  #pragma weak SHA384Final = SHA2Final
  58   59  #pragma weak SHA512Final = SHA2Final
  59   60  
  60      -#endif  /* !_KERNEL */
       61 +#endif  /* _KERNEL */
  61   62  
  62      -#ifdef _KERNEL
  63      -#include <sys/cmn_err.h>
  64      -#endif /* _KERNEL */
  65      -
  66   63  static void Encode(uint8_t *, uint32_t *, size_t);
  67   64  static void Encode64(uint8_t *, uint64_t *, size_t);
       65 +
       66 +#if     defined(__amd64)
       67 +#define SHA512Transform(ctx, in) SHA512TransformBlocks((ctx), (in), 1)
       68 +#define SHA256Transform(ctx, in) SHA256TransformBlocks((ctx), (in), 1)
       69 +
       70 +void SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
       71 +void SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
       72 +
       73 +#else
  68   74  static void SHA256Transform(SHA2_CTX *, const uint8_t *);
  69   75  static void SHA512Transform(SHA2_CTX *, const uint8_t *);
       76 +#endif  /* __amd64 */
  70   77  
  71   78  static uint8_t PADDING[128] = { 0x80, /* all zeros */ };
  72   79  
  73   80  /* Ch and Maj are the basic SHA2 functions. */
  74   81  #define Ch(b, c, d)     (((b) & (c)) ^ ((~b) & (d)))
  75   82  #define Maj(b, c, d)    (((b) & (c)) ^ ((b) & (d)) ^ ((c) & (d)))
  76   83  
  77   84  /* Rotates x right n bits. */
  78   85  #define ROTR(x, n)      \
  79   86          (((x) >> (n)) | ((x) << ((sizeof (x) * NBBY)-(n))))
↓ open down ↓ 48 lines elided ↑ open up ↑
 128  135  
 129  136  #define LOAD_BIG_64(addr)       (*(uint64_t *)(addr))
 130  137  
 131  138  #else   /* little endian -- will work on big endian, but slowly */
 132  139  
 133  140  #define LOAD_BIG_64(addr)       \
 134  141          (((uint64_t)(addr)[0] << 56) | ((uint64_t)(addr)[1] << 48) |    \
 135  142              ((uint64_t)(addr)[2] << 40) | ((uint64_t)(addr)[3] << 32) | \
 136  143              ((uint64_t)(addr)[4] << 24) | ((uint64_t)(addr)[5] << 16) | \
 137  144              ((uint64_t)(addr)[6] << 8) | (uint64_t)(addr)[7])
 138      -
 139  145  #endif
 140  146  
 141  147  
      148 +#if     !defined(__amd64)
 142  149  /* SHA256 Transform */
 143  150  
 144  151  static void
 145  152  SHA256Transform(SHA2_CTX *ctx, const uint8_t *blk)
 146  153  {
 147      -
 148  154          uint32_t a = ctx->state.s32[0];
 149  155          uint32_t b = ctx->state.s32[1];
 150  156          uint32_t c = ctx->state.s32[2];
 151  157          uint32_t d = ctx->state.s32[3];
 152  158          uint32_t e = ctx->state.s32[4];
 153  159          uint32_t f = ctx->state.s32[5];
 154  160          uint32_t g = ctx->state.s32[6];
 155  161          uint32_t h = ctx->state.s32[7];
 156  162  
 157  163          uint32_t w0, w1, w2, w3, w4, w5, w6, w7;
↓ open down ↓ 18 lines elided ↑ open up ↑
 176  182                  SHA256_CONST_39, SHA256_CONST_40, SHA256_CONST_41,
 177  183                  SHA256_CONST_42, SHA256_CONST_43, SHA256_CONST_44,
 178  184                  SHA256_CONST_45, SHA256_CONST_46, SHA256_CONST_47,
 179  185                  SHA256_CONST_48, SHA256_CONST_49, SHA256_CONST_50,
 180  186                  SHA256_CONST_51, SHA256_CONST_52, SHA256_CONST_53,
 181  187                  SHA256_CONST_54, SHA256_CONST_55, SHA256_CONST_56,
 182  188                  SHA256_CONST_57, SHA256_CONST_58, SHA256_CONST_59,
 183  189                  SHA256_CONST_60, SHA256_CONST_61, SHA256_CONST_62,
 184  190                  SHA256_CONST_63
 185  191          };
 186      -#endif
      192 +#endif  /* __sparc */
 187  193  
 188  194          if ((uintptr_t)blk & 0x3) {             /* not 4-byte aligned? */
 189  195                  bcopy(blk, ctx->buf_un.buf32,  sizeof (ctx->buf_un.buf32));
 190  196                  blk = (uint8_t *)ctx->buf_un.buf32;
 191  197          }
 192  198  
 193  199          /* LINTED E_BAD_PTR_CAST_ALIGN */
 194  200          w0 =  LOAD_BIG_32(blk + 4 * 0);
 195  201          SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);
 196  202          /* LINTED E_BAD_PTR_CAST_ALIGN */
↓ open down ↓ 194 lines elided ↑ open up ↑
 391  397                  SHA512_CONST_54, SHA512_CONST_55, SHA512_CONST_56,
 392  398                  SHA512_CONST_57, SHA512_CONST_58, SHA512_CONST_59,
 393  399                  SHA512_CONST_60, SHA512_CONST_61, SHA512_CONST_62,
 394  400                  SHA512_CONST_63, SHA512_CONST_64, SHA512_CONST_65,
 395  401                  SHA512_CONST_66, SHA512_CONST_67, SHA512_CONST_68,
 396  402                  SHA512_CONST_69, SHA512_CONST_70, SHA512_CONST_71,
 397  403                  SHA512_CONST_72, SHA512_CONST_73, SHA512_CONST_74,
 398  404                  SHA512_CONST_75, SHA512_CONST_76, SHA512_CONST_77,
 399  405                  SHA512_CONST_78, SHA512_CONST_79
 400  406          };
 401      -#endif
      407 +#endif  /* __sparc */
 402  408  
 403  409  
 404  410          if ((uintptr_t)blk & 0x7) {             /* not 8-byte aligned? */
 405  411                  bcopy(blk, ctx->buf_un.buf64,  sizeof (ctx->buf_un.buf64));
 406  412                  blk = (uint8_t *)ctx->buf_un.buf64;
 407  413          }
 408  414  
 409  415          /* LINTED E_BAD_PTR_CAST_ALIGN */
 410  416          w0 =  LOAD_BIG_64(blk + 8 * 0);
 411  417          SHA512ROUND(a, b, c, d, e, f, g, h, 0, w0);
↓ open down ↓ 178 lines elided ↑ open up ↑
 590  596          ctx->state.s64[0] += a;
 591  597          ctx->state.s64[1] += b;
 592  598          ctx->state.s64[2] += c;
 593  599          ctx->state.s64[3] += d;
 594  600          ctx->state.s64[4] += e;
 595  601          ctx->state.s64[5] += f;
 596  602          ctx->state.s64[6] += g;
 597  603          ctx->state.s64[7] += h;
 598  604  
 599  605  }
      606 +#endif  /* !__amd64 */
 600  607  
 601  608  
 602  609  /*
 603  610   * Encode()
 604  611   *
 605  612   * purpose: to convert a list of numbers from little endian to big endian
 606  613   *   input: uint8_t *   : place to store the converted big endian numbers
 607  614   *          uint32_t *  : place to get numbers to convert from
 608  615   *          size_t      : the length of the input in bytes
 609  616   *  output: void
↓ open down ↓ 132 lines elided ↑ open up ↑
 742  749  
 743  750  #endif /* _KERNEL */
 744  751  
 745  752  /*
 746  753   * SHA2Update()
 747  754   *
 748  755   * purpose: continues an sha2 digest operation, using the message block
 749  756   *          to update the context.
 750  757   *   input: SHA2_CTX *  : the context to update
 751  758   *          void *      : the message block
 752      - *          size_t    : the length of the message block in bytes
      759 + *          size_t      : the length of the message block, in bytes
 753  760   *  output: void
 754  761   */
 755  762  
 756  763  void
 757  764  SHA2Update(SHA2_CTX *ctx, const void *inptr, size_t input_len)
 758  765  {
 759      -        uint32_t i, buf_index, buf_len, buf_limit;
 760      -        const uint8_t *input = inptr;
      766 +        uint32_t        i, buf_index, buf_len, buf_limit;
      767 +        const uint8_t   *input = inptr;
      768 +        uint32_t        algotype = ctx->algotype;
      769 +#if defined(__amd64)
      770 +        uint32_t        block_count;
      771 +#endif  /* !__amd64 */
 761  772  
      773 +
 762  774          /* check for noop */
 763  775          if (input_len == 0)
 764  776                  return;
 765  777  
 766      -        if (ctx->algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
      778 +        if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
 767  779                  buf_limit = 64;
 768  780  
 769  781                  /* compute number of bytes mod 64 */
 770  782                  buf_index = (ctx->count.c32[1] >> 3) & 0x3F;
 771  783  
 772  784                  /* update number of bits */
 773  785                  if ((ctx->count.c32[1] += (input_len << 3)) < (input_len << 3))
 774  786                          ctx->count.c32[0]++;
 775  787  
 776  788                  ctx->count.c32[0] += (input_len >> 29);
↓ open down ↓ 21 lines elided ↑ open up ↑
 798  810                   * general optimization:
 799  811                   *
 800  812                   * only do initial bcopy() and SHA2Transform() if
 801  813                   * buf_index != 0.  if buf_index == 0, we're just
 802  814                   * wasting our time doing the bcopy() since there
 803  815                   * wasn't any data left over from a previous call to
 804  816                   * SHA2Update().
 805  817                   */
 806  818                  if (buf_index) {
 807  819                          bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
 808      -                        if (ctx->algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE)
      820 +                        if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE)
 809  821                                  SHA256Transform(ctx, ctx->buf_un.buf8);
 810  822                          else
 811  823                                  SHA512Transform(ctx, ctx->buf_un.buf8);
 812  824  
 813  825                          i = buf_len;
 814  826                  }
 815  827  
 816      -
 817      -                for (; i + buf_limit - 1 < input_len; i += buf_limit) {
 818      -                        if (ctx->algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE)
      828 +#if !defined(__amd64)
      829 +                if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
      830 +                        for (; i + buf_limit - 1 < input_len; i += buf_limit) {
 819  831                                  SHA256Transform(ctx, &input[i]);
 820      -                        else
      832 +                        }
      833 +                } else {
      834 +                        for (; i + buf_limit - 1 < input_len; i += buf_limit) {
 821  835                                  SHA512Transform(ctx, &input[i]);
      836 +                        }
 822  837                  }
 823  838  
      839 +#else
      840 +                if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
      841 +                        block_count = (input_len - i) >> 6;
      842 +                        if (block_count > 0) {
      843 +                                SHA256TransformBlocks(ctx, &input[i],
      844 +                                    block_count);
      845 +                                i += block_count << 6;
      846 +                        }
      847 +                } else {
      848 +                        block_count = (input_len - i) >> 7;
      849 +                        if (block_count > 0) {
      850 +                                SHA512TransformBlocks(ctx, &input[i],
      851 +                                    block_count);
      852 +                                i += block_count << 7;
      853 +                        }
      854 +                }
      855 +#endif  /* !__amd64 */
      856 +
 824  857                  /*
 825  858                   * general optimization:
 826  859                   *
 827  860                   * if i and input_len are the same, return now instead
 828  861                   * of calling bcopy(), since the bcopy() in this case
 829      -                 * will be an expensive nop.
      862 +                 * will be an expensive noop.
 830  863                   */
 831  864  
 832  865                  if (input_len == i)
 833  866                          return;
 834  867  
 835  868                  buf_index = 0;
 836  869          }
 837  870  
 838  871          /* buffer remaining input */
 839  872          bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
 840  873  }
 841  874  
 842  875  
 843  876  /*
 844  877   * SHA2Final()
 845  878   *
 846  879   * purpose: ends an sha2 digest operation, finalizing the message digest and
 847  880   *          zeroing the context.
 848      - *   input: uchar_t *   : a buffer to store the digest in
      881 + *   input: uchar_t *   : a buffer to store the digest
 849  882   *                      : The function actually uses void* because many
 850  883   *                      : callers pass things other than uchar_t here.
 851  884   *          SHA2_CTX *  : the context to finalize, save, and zero
 852  885   *  output: void
 853  886   */
 854  887  
 855  888  void
 856  889  SHA2Final(void *digest, SHA2_CTX *ctx)
 857  890  {
 858  891          uint8_t         bitcount_be[sizeof (ctx->count.c32)];
 859  892          uint8_t         bitcount_be64[sizeof (ctx->count.c64)];
 860  893          uint32_t        index;
      894 +        uint32_t        algotype = ctx->algotype;
 861  895  
 862      -
 863      -        if (ctx->algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
      896 +        if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
 864  897                  index  = (ctx->count.c32[1] >> 3) & 0x3f;
 865  898                  Encode(bitcount_be, ctx->count.c32, sizeof (bitcount_be));
 866  899                  SHA2Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index);
 867  900                  SHA2Update(ctx, bitcount_be, sizeof (bitcount_be));
 868  901                  Encode(digest, ctx->state.s32, sizeof (ctx->state.s32));
 869  902  
 870  903          } else {
 871  904                  index  = (ctx->count.c64[1] >> 3) & 0x7f;
 872  905                  Encode64(bitcount_be64, ctx->count.c64,
 873  906                      sizeof (bitcount_be64));
 874  907                  SHA2Update(ctx, PADDING, ((index < 112) ? 112 : 240) - index);
 875  908                  SHA2Update(ctx, bitcount_be64, sizeof (bitcount_be64));
 876      -                if (ctx->algotype <= SHA384_HMAC_GEN_MECH_INFO_TYPE) {
      909 +                if (algotype <= SHA384_HMAC_GEN_MECH_INFO_TYPE) {
 877  910                          ctx->state.s64[6] = ctx->state.s64[7] = 0;
 878  911                          Encode64(digest, ctx->state.s64,
 879  912                              sizeof (uint64_t) * 6);
 880  913                  } else
 881  914                          Encode64(digest, ctx->state.s64,
 882  915                              sizeof (ctx->state.s64));
 883  916          }
 884  917  
 885  918          /* zeroize sensitive information */
 886  919          bzero(ctx, sizeof (*ctx));
 887  920  }
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX