Print this page
5072961 Need an optimized MD5 implementation for amd64
   1 /*
   2  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
   3  * Use is subject to license terms.
   4  */
   5 
   6 /*
   7  * Cleaned-up and optimized version of MD5, based on the reference
   8  * implementation provided in RFC 1321.  See RSA Copyright information
   9  * below.
  10  */
  11 
  12 #pragma ident   "@(#)md5.c      1.27    07/04/10 SMI"
  13 
  14 /*
  15  * MD5C.C - RSA Data Security, Inc., MD5 message-digest algorithm
  16  */
  17 
  18 /*
  19  * Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
  20  * rights reserved.
  21  *
  22  * License to copy and use this software is granted provided that it
  23  * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
  24  * Algorithm" in all material mentioning or referencing this software
  25  * or this function.
  26  *
  27  * License is also granted to make and use derivative works provided
  28  * that such works are identified as "derived from the RSA Data
  29  * Security, Inc. MD5 Message-Digest Algorithm" in all material
  30  * mentioning or referencing the derived work.
  31  *
  32  * RSA Data Security, Inc. makes no representations concerning either


  34  * software for any particular purpose. It is provided "as is"
  35  * without express or implied warranty of any kind.
  36  *
  37  * These notices must be retained in any copies of any part of this
  38  * documentation and/or software.
  39  */
  40 
  41 #include <sys/types.h>
  42 #include <sys/md5.h>
  43 #include <sys/md5_consts.h>       /* MD5_CONST() optimization */
  44 #include "md5_byteswap.h"
  45 #if     !defined(_KERNEL) || defined(_BOOT)
  46 #include <strings.h>
  47 #endif /* !_KERNEL || _BOOT */
  48 
  49 #ifdef _KERNEL
  50 #include <sys/systm.h>
  51 #endif /* _KERNEL */
  52 
  53 static void Encode(uint8_t *, const uint32_t *, size_t);


  54 static void MD5Transform(uint32_t, uint32_t, uint32_t, uint32_t, MD5_CTX *,
  55     const uint8_t [64]);




  56 
  57 static uint8_t PADDING[64] = { 0x80, /* all zeros */ };
  58 
  59 /*
  60  * F, G, H and I are the basic MD5 functions.
  61  */
  62 #define F(b, c, d)      (((b) & (c)) | ((~b) & (d)))
  63 #define G(b, c, d)      (((b) & (d)) | ((c) & (~d)))
  64 #define H(b, c, d)      ((b) ^ (c) ^ (d))
  65 #define I(b, c, d)      ((c) ^ ((b) | (~d)))
  66 
  67 /*
  68  * ROTATE_LEFT rotates x left n bits.
  69  */
  70 #define ROTATE_LEFT(x, n)       \
  71         (((x) << (n)) | ((x) >> ((sizeof (x) << 3) - (n))))
  72 
  73 /*
  74  * FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
  75  * Rotation is separate from addition to prevent recomputation.


 226  * MD5Update()
 227  *
 228  * purpose: continues an md5 digest operation, using the message block
 229  *          to update the context.
 230  *   input: MD5_CTX *   : the context to update
 231  *          uint8_t *   : the message block
 232  *          uint32_t    : the length of the message block in bytes
 233  *  output: void
 234  *
 235  * MD5 crunches in 64-byte blocks.  All numeric constants here are related to
 236  * that property of MD5.
 237  */
 238 
 239 void
 240 MD5Update(MD5_CTX *ctx, const void *inpp, unsigned int input_len)
 241 {
 242         uint32_t                i, buf_index, buf_len;
 243 #ifdef  sun4v
 244         uint32_t                old_asi;
 245 #endif  /* sun4v */



 246         const unsigned char     *input = (const unsigned char *)inpp;
 247 
 248         /* compute (number of bytes computed so far) mod 64 */
 249         buf_index = (ctx->count[0] >> 3) & 0x3F;
 250 
 251         /* update number of bits hashed into this MD5 computation so far */
 252         if ((ctx->count[0] += (input_len << 3)) < (input_len << 3))
 253             ctx->count[1]++;
 254         ctx->count[1] += (input_len >> 29);
 255 
 256         buf_len = 64 - buf_index;
 257 
 258         /* transform as many times as possible */
 259         i = 0;
 260         if (input_len >= buf_len) {
 261 
 262                 /*
 263                  * general optimization:
 264                  *
 265                  * only do initial bcopy() and MD5Transform() if
 266                  * buf_index != 0.  if buf_index == 0, we're just
 267                  * wasting our time doing the bcopy() since there
 268                  * wasn't any data left over from a previous call to
 269                  * MD5Update().
 270                  */
 271 
 272 #ifdef sun4v
 273                 /*
 274                  * For N1 use %asi register. However, costly to repeatedly set
 275                  * in MD5Transform. Therefore, set once here.
 276                  * Should probably restore the old value afterwards...
 277                  */
 278                 old_asi = get_little();
 279                 set_little(0x88);
 280 #endif /* sun4v */
 281 
 282                 if (buf_index) {
 283                         bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
 284 

 285                         MD5Transform(ctx->state[0], ctx->state[1],
 286                             ctx->state[2], ctx->state[3], ctx,
 287                             ctx->buf_un.buf8);



 288 
 289                         i = buf_len;
 290                 }
 291 

 292                 for (; i + 63 < input_len; i += 64)
 293                         MD5Transform(ctx->state[0], ctx->state[1],
 294                             ctx->state[2], ctx->state[3], ctx, &input[i]);
 295 







 296 

 297 #ifdef sun4v
 298                 /*
 299                  * Restore old %ASI value
 300                  */
 301                 set_little(old_asi);
 302 #endif /* sun4v */
 303 
 304                 /*
 305                  * general optimization:
 306                  *
 307                  * if i and input_len are the same, return now instead
 308                  * of calling bcopy(), since the bcopy() in this
 309                  * case will be an expensive nop.
 310                  */
 311 
 312                 if (input_len == i)
 313                         return;
 314 
 315                 buf_index = 0;
 316         }


 350         Encode(digest, ctx->state, sizeof (ctx->state));
 351 
 352         /* zeroize sensitive information */
 353         bzero(ctx, sizeof (*ctx));
 354 }
 355 
 356 #ifndef _KERNEL
 357 
 358 void
 359 md5_calc(unsigned char *output, unsigned char *input, unsigned int inlen)
 360 {
 361         MD5_CTX context;
 362 
 363         MD5Init(&context);
 364         MD5Update(&context, input, inlen);
 365         MD5Final(output, &context);
 366 }
 367 
 368 #endif  /* !_KERNEL */
 369 

 370 /*
 371  * sparc register window optimization:
 372  *
 373  * `a', `b', `c', and `d' are passed into MD5Transform explicitly
 374  * since it increases the number of registers available to the
 375  * compiler.  under this scheme, these variables can be held in
 376  * %i0 - %i3, which leaves more local and out registers available.
 377  */
 378 
 379 /*
 380  * MD5Transform()
 381  *
 382  * purpose: md5 transformation -- updates the digest based on `block'
 383  *   input: uint32_t    : bytes  1 -  4 of the digest
 384  *          uint32_t    : bytes  5 -  8 of the digest
 385  *          uint32_t    : bytes  9 - 12 of the digest
 386  *          uint32_t    : bytes 12 - 16 of the digest
 387  *          MD5_CTX *   : the context to update
 388  *          uint8_t [64]: the block to use to update the digest
 389  *  output: void


 618         II(c, d, a, b,  x_6, MD5_SHIFT_43, MD5_CONST_e(58)); /* 59 */
 619         II(b, c, d, a, x_13, MD5_SHIFT_44, MD5_CONST_o(59)); /* 60 */
 620         II(a, b, c, d,  x_4, MD5_SHIFT_41, MD5_CONST_e(60)); /* 61 */
 621         II(d, a, b, c, x_11, MD5_SHIFT_42, MD5_CONST_o(61)); /* 62 */
 622         II(c, d, a, b,  x_2, MD5_SHIFT_43, MD5_CONST_e(62)); /* 63 */
 623         II(b, c, d, a,  x_9, MD5_SHIFT_44, MD5_CONST_o(63)); /* 64 */
 624 
 625         ctx->state[0] += a;
 626         ctx->state[1] += b;
 627         ctx->state[2] += c;
 628         ctx->state[3] += d;
 629 
 630         /*
 631          * zeroize sensitive information -- compiler will optimize
 632          * this out if everything is kept in registers
 633          */
 634 
 635         x_0 = x_1  = x_2  = x_3  = x_4  = x_5  = x_6  = x_7 = x_8 = 0;
 636         x_9 = x_10 = x_11 = x_12 = x_13 = x_14 = x_15 = 0;
 637 }

 638 
 639 /*
 640  * Encode()
 641  *
 642  * purpose: to convert a list of numbers from big endian to little endian
 643  *   input: uint8_t *   : place to store the converted little endian numbers
 644  *          uint32_t *  : place to get numbers to convert from
 645  *          size_t      : the length of the input in bytes
 646  *  output: void
 647  */
 648 
 649 static void
 650 Encode(uint8_t *_RESTRICT_KYWD output, const uint32_t *_RESTRICT_KYWD input,
 651     size_t input_len)
 652 {
 653         size_t          i, j;
 654 
 655         for (i = 0, j = 0; j < input_len; i++, j += sizeof (uint32_t)) {
 656 
 657 #ifdef _LITTLE_ENDIAN
   1 /*
   2  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
   3  * Use is subject to license terms.
   4  */
   5 
   6 /*
   7  * Cleaned-up and optimized version of MD5, based on the reference
   8  * implementation provided in RFC 1321.  See RSA Copyright information
   9  * below.
  10  */
  11 
  12 #pragma ident   "@(#)md5.c      1.28    08/01/02 SMI"
  13 
  14 /*
  15  * MD5C.C - RSA Data Security, Inc., MD5 message-digest algorithm
  16  */
  17 
  18 /*
  19  * Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
  20  * rights reserved.
  21  *
  22  * License to copy and use this software is granted provided that it
  23  * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
  24  * Algorithm" in all material mentioning or referencing this software
  25  * or this function.
  26  *
  27  * License is also granted to make and use derivative works provided
  28  * that such works are identified as "derived from the RSA Data
  29  * Security, Inc. MD5 Message-Digest Algorithm" in all material
  30  * mentioning or referencing the derived work.
  31  *
  32  * RSA Data Security, Inc. makes no representations concerning either


  34  * software for any particular purpose. It is provided "as is"
  35  * without express or implied warranty of any kind.
  36  *
  37  * These notices must be retained in any copies of any part of this
  38  * documentation and/or software.
  39  */
  40 
  41 #include <sys/types.h>
  42 #include <sys/md5.h>
  43 #include <sys/md5_consts.h>       /* MD5_CONST() optimization */
  44 #include "md5_byteswap.h"
  45 #if     !defined(_KERNEL) || defined(_BOOT)
  46 #include <strings.h>
  47 #endif /* !_KERNEL || _BOOT */
  48 
  49 #ifdef _KERNEL
  50 #include <sys/systm.h>
  51 #endif /* _KERNEL */
  52 
  53 static void Encode(uint8_t *, const uint32_t *, size_t);
  54 
  55 #if !defined(__amd64)
  56 static void MD5Transform(uint32_t, uint32_t, uint32_t, uint32_t, MD5_CTX *,
  57     const uint8_t [64]);
  58 #else
  59 void md5_block_asm_host_order(MD5_CTX *ctx, const void *inpp,
  60     unsigned int input_length_in_blocks);
  61 #endif /* !defined(__amd64) */
  62 
  63 static uint8_t PADDING[64] = { 0x80, /* all zeros */ };
  64 
  65 /*
  66  * F, G, H and I are the basic MD5 functions.
  67  */
  68 #define F(b, c, d)      (((b) & (c)) | ((~b) & (d)))
  69 #define G(b, c, d)      (((b) & (d)) | ((c) & (~d)))
  70 #define H(b, c, d)      ((b) ^ (c) ^ (d))
  71 #define I(b, c, d)      ((c) ^ ((b) | (~d)))
  72 
  73 /*
  74  * ROTATE_LEFT rotates x left n bits.
  75  */
  76 #define ROTATE_LEFT(x, n)       \
  77         (((x) << (n)) | ((x) >> ((sizeof (x) << 3) - (n))))
  78 
  79 /*
  80  * FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
  81  * Rotation is separate from addition to prevent recomputation.


 232  * MD5Update()
 233  *
 234  * purpose: continues an md5 digest operation, using the message block
 235  *          to update the context.
 236  *   input: MD5_CTX *   : the context to update
 237  *          uint8_t *   : the message block
 238  *          uint32_t    : the length of the message block in bytes
 239  *  output: void
 240  *
 241  * MD5 crunches in 64-byte blocks.  All numeric constants here are related to
 242  * that property of MD5.
 243  */
 244 
 245 void
 246 MD5Update(MD5_CTX *ctx, const void *inpp, unsigned int input_len)
 247 {
 248         uint32_t                i, buf_index, buf_len;
 249 #ifdef  sun4v
 250         uint32_t                old_asi;
 251 #endif  /* sun4v */
 252 #if defined(__amd64)
 253         uint32_t                block_count;
 254 #endif /* !defined(__amd64) */
 255         const unsigned char     *input = (const unsigned char *)inpp;
 256 
 257         /* compute (number of bytes computed so far) mod 64 */
 258         buf_index = (ctx->count[0] >> 3) & 0x3F;
 259 
 260         /* update number of bits hashed into this MD5 computation so far */
 261         if ((ctx->count[0] += (input_len << 3)) < (input_len << 3))
 262                 ctx->count[1]++;
 263         ctx->count[1] += (input_len >> 29);
 264 
 265         buf_len = 64 - buf_index;
 266 
 267         /* transform as many times as possible */
 268         i = 0;
 269         if (input_len >= buf_len) {
 270 
 271                 /*
 272                  * general optimization:
 273                  *
 274                  * only do initial bcopy() and MD5Transform() if
 275                  * buf_index != 0.  if buf_index == 0, we're just
 276                  * wasting our time doing the bcopy() since there
 277                  * wasn't any data left over from a previous call to
 278                  * MD5Update().
 279                  */
 280 
 281 #ifdef sun4v
 282                 /*
 283                  * For N1 use %asi register. However, costly to repeatedly set
 284                  * in MD5Transform. Therefore, set once here.
 285                  * Should probably restore the old value afterwards...
 286                  */
 287                 old_asi = get_little();
 288                 set_little(0x88);
 289 #endif /* sun4v */
 290 
 291                 if (buf_index) {
 292                         bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
 293 
 294 #if !defined(__amd64)
 295                         MD5Transform(ctx->state[0], ctx->state[1],
 296                             ctx->state[2], ctx->state[3], ctx,
 297                             ctx->buf_un.buf8);
 298 #else
 299                         md5_block_asm_host_order(ctx, ctx->buf_un.buf8, 1);
 300 #endif /* !defined(__amd64) */
 301 
 302                         i = buf_len;
 303                 }
 304 
 305 #if !defined(__amd64)
 306                 for (; i + 63 < input_len; i += 64)
 307                         MD5Transform(ctx->state[0], ctx->state[1],
 308                             ctx->state[2], ctx->state[3], ctx, &input[i]);
 309 
 310 #else
 311                 block_count = (input_len - i) >> 6;
 312                 if (block_count > 0) {
 313                         md5_block_asm_host_order(ctx, &input[i], block_count);
 314                         i += block_count << 6;
 315                 }
 316 #endif /* !defined(__amd64) */
 317 
 318 
 319 #ifdef sun4v
 320                 /*
 321                  * Restore old %ASI value
 322                  */
 323                 set_little(old_asi);
 324 #endif /* sun4v */
 325 
 326                 /*
 327                  * general optimization:
 328                  *
 329                  * if i and input_len are the same, return now instead
 330                  * of calling bcopy(), since the bcopy() in this
 331                  * case will be an expensive nop.
 332                  */
 333 
 334                 if (input_len == i)
 335                         return;
 336 
 337                 buf_index = 0;
 338         }


 372         Encode(digest, ctx->state, sizeof (ctx->state));
 373 
 374         /* zeroize sensitive information */
 375         bzero(ctx, sizeof (*ctx));
 376 }
 377 
 378 #ifndef _KERNEL
 379 
 380 void
 381 md5_calc(unsigned char *output, unsigned char *input, unsigned int inlen)
 382 {
 383         MD5_CTX context;
 384 
 385         MD5Init(&context);
 386         MD5Update(&context, input, inlen);
 387         MD5Final(output, &context);
 388 }
 389 
 390 #endif  /* !_KERNEL */
 391 
 392 #if !defined(__amd64)
 393 /*
 394  * sparc register window optimization:
 395  *
 396  * `a', `b', `c', and `d' are passed into MD5Transform explicitly
 397  * since it increases the number of registers available to the
 398  * compiler.  under this scheme, these variables can be held in
 399  * %i0 - %i3, which leaves more local and out registers available.
 400  */
 401 
 402 /*
 403  * MD5Transform()
 404  *
 405  * purpose: md5 transformation -- updates the digest based on `block'
 406  *   input: uint32_t    : bytes  1 -  4 of the digest
 407  *          uint32_t    : bytes  5 -  8 of the digest
 408  *          uint32_t    : bytes  9 - 12 of the digest
 409  *          uint32_t    : bytes 12 - 16 of the digest
 410  *          MD5_CTX *   : the context to update
 411  *          uint8_t [64]: the block to use to update the digest
 412  *  output: void


 641         II(c, d, a, b,  x_6, MD5_SHIFT_43, MD5_CONST_e(58)); /* 59 */
 642         II(b, c, d, a, x_13, MD5_SHIFT_44, MD5_CONST_o(59)); /* 60 */
 643         II(a, b, c, d,  x_4, MD5_SHIFT_41, MD5_CONST_e(60)); /* 61 */
 644         II(d, a, b, c, x_11, MD5_SHIFT_42, MD5_CONST_o(61)); /* 62 */
 645         II(c, d, a, b,  x_2, MD5_SHIFT_43, MD5_CONST_e(62)); /* 63 */
 646         II(b, c, d, a,  x_9, MD5_SHIFT_44, MD5_CONST_o(63)); /* 64 */
 647 
 648         ctx->state[0] += a;
 649         ctx->state[1] += b;
 650         ctx->state[2] += c;
 651         ctx->state[3] += d;
 652 
 653         /*
 654          * zeroize sensitive information -- compiler will optimize
 655          * this out if everything is kept in registers
 656          */
 657 
 658         x_0 = x_1  = x_2  = x_3  = x_4  = x_5  = x_6  = x_7 = x_8 = 0;
 659         x_9 = x_10 = x_11 = x_12 = x_13 = x_14 = x_15 = 0;
 660 }
 661 #endif /* !defined(__amd64) */
 662 
 663 /*
 664  * Encode()
 665  *
 666  * purpose: to convert a list of numbers from big endian to little endian
 667  *   input: uint8_t *   : place to store the converted little endian numbers
 668  *          uint32_t *  : place to get numbers to convert from
 669  *          size_t      : the length of the input in bytes
 670  *  output: void
 671  */
 672 
 673 static void
 674 Encode(uint8_t *_RESTRICT_KYWD output, const uint32_t *_RESTRICT_KYWD input,
 675     size_t input_len)
 676 {
 677         size_t          i, j;
 678 
 679         for (i = 0, j = 0; j < input_len; i++, j += sizeof (uint32_t)) {
 680 
 681 #ifdef _LITTLE_ENDIAN