Print this page
5072961 Need an optimized MD5 implementation for amd64

Split Close
Expand all
Collapse all
          --- old/usr/src/common/crypto/md5/md5.c
          +++ new/usr/src/common/crypto/md5/md5.c
   1    1  /*
   2      - * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
        2 + * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
   3    3   * Use is subject to license terms.
   4    4   */
   5    5  
   6    6  /*
   7    7   * Cleaned-up and optimized version of MD5, based on the reference
   8    8   * implementation provided in RFC 1321.  See RSA Copyright information
   9    9   * below.
  10   10   */
  11   11  
  12      -#pragma ident   "@(#)md5.c      1.27    07/04/10 SMI"
       12 +#pragma ident   "@(#)md5.c      1.28    08/01/02 SMI"
  13   13  
  14   14  /*
  15   15   * MD5C.C - RSA Data Security, Inc., MD5 message-digest algorithm
  16   16   */
  17   17  
  18   18  /*
  19   19   * Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
  20   20   * rights reserved.
  21   21   *
  22   22   * License to copy and use this software is granted provided that it
↓ open down ↓ 21 lines elided ↑ open up ↑
  44   44  #include "md5_byteswap.h"
  45   45  #if     !defined(_KERNEL) || defined(_BOOT)
  46   46  #include <strings.h>
  47   47  #endif /* !_KERNEL || _BOOT */
  48   48  
  49   49  #ifdef _KERNEL
  50   50  #include <sys/systm.h>
  51   51  #endif /* _KERNEL */
  52   52  
  53   53  static void Encode(uint8_t *, const uint32_t *, size_t);
       54 +
       55 +#if !defined(__amd64)
  54   56  static void MD5Transform(uint32_t, uint32_t, uint32_t, uint32_t, MD5_CTX *,
  55   57      const uint8_t [64]);
       58 +#else
       59 +void md5_block_asm_host_order(MD5_CTX *ctx, const void *inpp,
       60 +    unsigned int input_length_in_blocks);
       61 +#endif /* !defined(__amd64) */
  56   62  
  57   63  static uint8_t PADDING[64] = { 0x80, /* all zeros */ };
  58   64  
  59   65  /*
  60   66   * F, G, H and I are the basic MD5 functions.
  61   67   */
  62   68  #define F(b, c, d)      (((b) & (c)) | ((~b) & (d)))
  63   69  #define G(b, c, d)      (((b) & (d)) | ((c) & (~d)))
  64   70  #define H(b, c, d)      ((b) ^ (c) ^ (d))
  65   71  #define I(b, c, d)      ((c) ^ ((b) | (~d)))
↓ open down ↓ 170 lines elided ↑ open up ↑
 236  242   * that property of MD5.
 237  243   */
 238  244  
 239  245  void
 240  246  MD5Update(MD5_CTX *ctx, const void *inpp, unsigned int input_len)
 241  247  {
 242  248          uint32_t                i, buf_index, buf_len;
 243  249  #ifdef  sun4v
 244  250          uint32_t                old_asi;
 245  251  #endif  /* sun4v */
      252 +#if defined(__amd64)
      253 +        uint32_t                block_count;
      254 +#endif /* !defined(__amd64) */
 246  255          const unsigned char     *input = (const unsigned char *)inpp;
 247  256  
 248  257          /* compute (number of bytes computed so far) mod 64 */
 249  258          buf_index = (ctx->count[0] >> 3) & 0x3F;
 250  259  
 251  260          /* update number of bits hashed into this MD5 computation so far */
 252  261          if ((ctx->count[0] += (input_len << 3)) < (input_len << 3))
 253      -            ctx->count[1]++;
      262 +                ctx->count[1]++;
 254  263          ctx->count[1] += (input_len >> 29);
 255  264  
 256  265          buf_len = 64 - buf_index;
 257  266  
 258  267          /* transform as many times as possible */
 259  268          i = 0;
 260  269          if (input_len >= buf_len) {
 261  270  
 262  271                  /*
 263  272                   * general optimization:
↓ open down ↓ 11 lines elided ↑ open up ↑
 275  284                   * in MD5Transform. Therefore, set once here.
 276  285                   * Should probably restore the old value afterwards...
 277  286                   */
 278  287                  old_asi = get_little();
 279  288                  set_little(0x88);
 280  289  #endif /* sun4v */
 281  290  
 282  291                  if (buf_index) {
 283  292                          bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
 284  293  
      294 +#if !defined(__amd64)
 285  295                          MD5Transform(ctx->state[0], ctx->state[1],
 286  296                              ctx->state[2], ctx->state[3], ctx,
 287  297                              ctx->buf_un.buf8);
      298 +#else
      299 +                        md5_block_asm_host_order(ctx, ctx->buf_un.buf8, 1);
      300 +#endif /* !defined(__amd64) */
 288  301  
 289  302                          i = buf_len;
 290  303                  }
 291  304  
      305 +#if !defined(__amd64)
 292  306                  for (; i + 63 < input_len; i += 64)
 293  307                          MD5Transform(ctx->state[0], ctx->state[1],
 294  308                              ctx->state[2], ctx->state[3], ctx, &input[i]);
 295  309  
      310 +#else
      311 +                block_count = (input_len - i) >> 6;
      312 +                if (block_count > 0) {
      313 +                        md5_block_asm_host_order(ctx, &input[i], block_count);
      314 +                        i += block_count << 6;
      315 +                }
      316 +#endif /* !defined(__amd64) */
 296  317  
      318 +
 297  319  #ifdef sun4v
 298  320                  /*
 299  321                   * Restore old %ASI value
 300  322                   */
 301  323                  set_little(old_asi);
 302  324  #endif /* sun4v */
 303  325  
 304  326                  /*
 305  327                   * general optimization:
 306  328                   *
↓ open down ↓ 53 lines elided ↑ open up ↑
 360  382  {
 361  383          MD5_CTX context;
 362  384  
 363  385          MD5Init(&context);
 364  386          MD5Update(&context, input, inlen);
 365  387          MD5Final(output, &context);
 366  388  }
 367  389  
 368  390  #endif  /* !_KERNEL */
 369  391  
      392 +#if !defined(__amd64)
 370  393  /*
 371  394   * sparc register window optimization:
 372  395   *
 373  396   * `a', `b', `c', and `d' are passed into MD5Transform explicitly
 374  397   * since it increases the number of registers available to the
 375  398   * compiler.  under this scheme, these variables can be held in
 376  399   * %i0 - %i3, which leaves more local and out registers available.
 377  400   */
 378  401  
 379  402  /*
↓ open down ↓ 248 lines elided ↑ open up ↑
 628  651          ctx->state[3] += d;
 629  652  
 630  653          /*
 631  654           * zeroize sensitive information -- compiler will optimize
 632  655           * this out if everything is kept in registers
 633  656           */
 634  657  
 635  658          x_0 = x_1  = x_2  = x_3  = x_4  = x_5  = x_6  = x_7 = x_8 = 0;
 636  659          x_9 = x_10 = x_11 = x_12 = x_13 = x_14 = x_15 = 0;
 637  660  }
      661 +#endif /* !defined(__amd64) */
 638  662  
 639  663  /*
 640  664   * Encode()
 641  665   *
 642  666   * purpose: to convert a list of numbers from big endian to little endian
 643  667   *   input: uint8_t *   : place to store the converted little endian numbers
 644  668   *          uint32_t *  : place to get numbers to convert from
 645  669   *          size_t      : the length of the input in bytes
 646  670   *  output: void
 647  671   */
↓ open down ↓ 29 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX