1 /*
   2  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
   3  * Use is subject to license terms.
   4  */
   5 
   6 /*
   7  * Cleaned-up and optimized version of MD5, based on the reference
   8  * implementation provided in RFC 1321.  See RSA Copyright information
   9  * below.
  10  */
  11 
  12 #pragma ident   "@(#)md5.c      1.28    08/01/02 SMI"
  13 
  14 /*
  15  * MD5C.C - RSA Data Security, Inc., MD5 message-digest algorithm
  16  */
  17 
  18 /*
  19  * Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
  20  * rights reserved.
  21  *
  22  * License to copy and use this software is granted provided that it
  23  * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
  24  * Algorithm" in all material mentioning or referencing this software
  25  * or this function.
  26  *
  27  * License is also granted to make and use derivative works provided
  28  * that such works are identified as "derived from the RSA Data
  29  * Security, Inc. MD5 Message-Digest Algorithm" in all material
  30  * mentioning or referencing the derived work.
  31  *
  32  * RSA Data Security, Inc. makes no representations concerning either
  33  * the merchantability of this software or the suitability of this
  34  * software for any particular purpose. It is provided "as is"
  35  * without express or implied warranty of any kind.
  36  *
  37  * These notices must be retained in any copies of any part of this
  38  * documentation and/or software.
  39  */
  40 
  41 #include <sys/types.h>
  42 #include <sys/md5.h>
  43 #include <sys/md5_consts.h>       /* MD5_CONST() optimization */
  44 #include "md5_byteswap.h"
  45 #if     !defined(_KERNEL) || defined(_BOOT)
  46 #include <strings.h>
  47 #endif /* !_KERNEL || _BOOT */
  48 
  49 #ifdef _KERNEL
  50 #include <sys/systm.h>
  51 #endif /* _KERNEL */
  52 
  53 static void Encode(uint8_t *, const uint32_t *, size_t);
  54 
  55 #if !defined(__amd64)
  56 static void MD5Transform(uint32_t, uint32_t, uint32_t, uint32_t, MD5_CTX *,
  57     const uint8_t [64]);
  58 #else
  59 void md5_block_asm_host_order(MD5_CTX *ctx, const void *inpp,
  60     unsigned int input_length_in_blocks);
  61 #endif /* !defined(__amd64) */
  62 
  63 static uint8_t PADDING[64] = { 0x80, /* all zeros */ };
  64 
  65 /*
  66  * F, G, H and I are the basic MD5 functions.
  67  */
  68 #define F(b, c, d)      (((b) & (c)) | ((~b) & (d)))
  69 #define G(b, c, d)      (((b) & (d)) | ((c) & (~d)))
  70 #define H(b, c, d)      ((b) ^ (c) ^ (d))
  71 #define I(b, c, d)      ((c) ^ ((b) | (~d)))
  72 
  73 /*
  74  * ROTATE_LEFT rotates x left n bits.
  75  */
  76 #define ROTATE_LEFT(x, n)       \
  77         (((x) << (n)) | ((x) >> ((sizeof (x) << 3) - (n))))
  78 
  79 /*
  80  * FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
  81  * Rotation is separate from addition to prevent recomputation.
  82  */
  83 
  84 #define FF(a, b, c, d, x, s, ac) { \
  85         (a) += F((b), (c), (d)) + (x) + ((unsigned long long)(ac)); \
  86         (a) = ROTATE_LEFT((a), (s)); \
  87         (a) += (b); \
  88         }
  89 
  90 #define GG(a, b, c, d, x, s, ac) { \
  91         (a) += G((b), (c), (d)) + (x) + ((unsigned long long)(ac)); \
  92         (a) = ROTATE_LEFT((a), (s)); \
  93         (a) += (b); \
  94         }
  95 
  96 #define HH(a, b, c, d, x, s, ac) { \
  97         (a) += H((b), (c), (d)) + (x) + ((unsigned long long)(ac)); \
  98         (a) = ROTATE_LEFT((a), (s)); \
  99         (a) += (b); \
 100         }
 101 
 102 #define II(a, b, c, d, x, s, ac) { \
 103         (a) += I((b), (c), (d)) + (x) + ((unsigned long long)(ac)); \
 104         (a) = ROTATE_LEFT((a), (s)); \
 105         (a) += (b); \
 106         }
 107 
 108 /*
 109  * Loading 32-bit constants on a RISC is expensive since it involves both a
 110  * `sethi' and an `or'.  thus, we instead have the compiler generate `ld's to
 111  * load the constants from an array called `md5_consts'.  however, on intel
 112  * (and other CISC processors), it is cheaper to load the constant
 113  * directly.  thus, the c code in MD5Transform() uses the macro MD5_CONST()
 114  * which either expands to a constant or an array reference, depending on the
 115  * architecture the code is being compiled for.
 116  *
 117  * Right now, i386 and amd64 are the CISC exceptions.
 118  * If we get another CISC ISA, we'll have to change the ifdef.
 119  */
 120 
 121 #if defined(__i386) || defined(__amd64)
 122 
 123 #define MD5_CONST(x)            (MD5_CONST_ ## x)
 124 #define MD5_CONST_e(x)          MD5_CONST(x)
 125 #define MD5_CONST_o(x)          MD5_CONST(x)
 126 
 127 #else
 128 /*
 129  * sparc/RISC optimization:
 130  *
 131  * while it is somewhat counter-intuitive, on sparc (and presumably other RISC
 132  * machines), it is more efficient to place all the constants used in this
 133  * function in an array and load the values out of the array than to manually
 134  * load the constants.  this is because setting a register to a 32-bit value
 135  * takes two ops in most cases: a `sethi' and an `or', but loading a 32-bit
 136  * value from memory only takes one `ld' (or `lduw' on v9).  while this
 137  * increases memory usage, the compiler can find enough other things to do
 138  * while waiting to keep the pipeline does not stall.  additionally, it is
 139  * likely that many of these constants are cached so that later accesses do
 140  * not even go out to the bus.
 141  *
 142  * this array is declared `static' to keep the compiler from having to
 143  * bcopy() this array onto the stack frame of MD5Transform() each time it is
 144  * called -- which is unacceptably expensive.
 145  *
 146  * the `const' is to ensure that callers are good citizens and do not try to
 147  * munge the array.  since these routines are going to be called from inside
 148  * multithreaded kernelland, this is a good safety check. -- `constants' will
 149  * end up in .rodata.
 150  *
 151  * unfortunately, loading from an array in this manner hurts performance under
 152  * intel (and presumably other CISC machines).  so, there is a macro,
 153  * MD5_CONST(), used in MD5Transform(), that either expands to a reference to
 154  * this array, or to the actual constant, depending on what platform this code
 155  * is compiled for.
 156  */
 157 
 158 #ifdef sun4v
 159 
 160 /*
 161  * Going to load these consts in 8B chunks, so need to enforce 8B alignment
 162  */
 163 
 164 /* CSTYLED */
 165 #pragma align 64 (md5_consts)
 166 #define _MD5_CHECK_ALIGNMENT
 167 
 168 #endif /* sun4v */
 169 
 170 static const uint32_t md5_consts[] = {
 171         MD5_CONST_0,    MD5_CONST_1,    MD5_CONST_2,    MD5_CONST_3,
 172         MD5_CONST_4,    MD5_CONST_5,    MD5_CONST_6,    MD5_CONST_7,
 173         MD5_CONST_8,    MD5_CONST_9,    MD5_CONST_10,   MD5_CONST_11,
 174         MD5_CONST_12,   MD5_CONST_13,   MD5_CONST_14,   MD5_CONST_15,
 175         MD5_CONST_16,   MD5_CONST_17,   MD5_CONST_18,   MD5_CONST_19,
 176         MD5_CONST_20,   MD5_CONST_21,   MD5_CONST_22,   MD5_CONST_23,
 177         MD5_CONST_24,   MD5_CONST_25,   MD5_CONST_26,   MD5_CONST_27,
 178         MD5_CONST_28,   MD5_CONST_29,   MD5_CONST_30,   MD5_CONST_31,
 179         MD5_CONST_32,   MD5_CONST_33,   MD5_CONST_34,   MD5_CONST_35,
 180         MD5_CONST_36,   MD5_CONST_37,   MD5_CONST_38,   MD5_CONST_39,
 181         MD5_CONST_40,   MD5_CONST_41,   MD5_CONST_42,   MD5_CONST_43,
 182         MD5_CONST_44,   MD5_CONST_45,   MD5_CONST_46,   MD5_CONST_47,
 183         MD5_CONST_48,   MD5_CONST_49,   MD5_CONST_50,   MD5_CONST_51,
 184         MD5_CONST_52,   MD5_CONST_53,   MD5_CONST_54,   MD5_CONST_55,
 185         MD5_CONST_56,   MD5_CONST_57,   MD5_CONST_58,   MD5_CONST_59,
 186         MD5_CONST_60,   MD5_CONST_61,   MD5_CONST_62,   MD5_CONST_63
 187 };
 188 
 189 
 190 #ifdef sun4v
 191 /*
 192  * To reduce the number of loads, load consts in 64-bit
 193  * chunks and then split.
 194  *
 195  * No need to mask upper 32-bits, as just interested in
 196  * low 32-bits (saves an & operation and means that this
 197  * optimization doesn't increases the icount.
 198  */
 199 #define MD5_CONST_e(x)          (md5_consts64[x/2] >> 32)
 200 #define MD5_CONST_o(x)          (md5_consts64[x/2])
 201 
 202 #else
 203 
 204 #define MD5_CONST_e(x)          (md5_consts[x])
 205 #define MD5_CONST_o(x)          (md5_consts[x])
 206 
 207 #endif /* sun4v */
 208 
 209 #endif
 210 
 211 /*
 212  * MD5Init()
 213  *
 214  * purpose: initializes the md5 context and begins and md5 digest operation
 215  *   input: MD5_CTX *   : the context to initialize.
 216  *  output: void
 217  */
 218 
 219 void
 220 MD5Init(MD5_CTX *ctx)
 221 {
 222         ctx->count[0] = ctx->count[1] = 0;
 223 
 224         /* load magic initialization constants */
 225         ctx->state[0] = MD5_INIT_CONST_1;
 226         ctx->state[1] = MD5_INIT_CONST_2;
 227         ctx->state[2] = MD5_INIT_CONST_3;
 228         ctx->state[3] = MD5_INIT_CONST_4;
 229 }
 230 
 231 /*
 232  * MD5Update()
 233  *
 234  * purpose: continues an md5 digest operation, using the message block
 235  *          to update the context.
 236  *   input: MD5_CTX *   : the context to update
 237  *          uint8_t *   : the message block
 238  *          uint32_t    : the length of the message block in bytes
 239  *  output: void
 240  *
 241  * MD5 crunches in 64-byte blocks.  All numeric constants here are related to
 242  * that property of MD5.
 243  */
 244 
 245 void
 246 MD5Update(MD5_CTX *ctx, const void *inpp, unsigned int input_len)
 247 {
 248         uint32_t                i, buf_index, buf_len;
 249 #ifdef  sun4v
 250         uint32_t                old_asi;
 251 #endif  /* sun4v */
 252 #if defined(__amd64)
 253         uint32_t                block_count;
 254 #endif /* !defined(__amd64) */
 255         const unsigned char     *input = (const unsigned char *)inpp;
 256 
 257         /* compute (number of bytes computed so far) mod 64 */
 258         buf_index = (ctx->count[0] >> 3) & 0x3F;
 259 
 260         /* update number of bits hashed into this MD5 computation so far */
 261         if ((ctx->count[0] += (input_len << 3)) < (input_len << 3))
 262                 ctx->count[1]++;
 263         ctx->count[1] += (input_len >> 29);
 264 
 265         buf_len = 64 - buf_index;
 266 
 267         /* transform as many times as possible */
 268         i = 0;
 269         if (input_len >= buf_len) {
 270 
 271                 /*
 272                  * general optimization:
 273                  *
 274                  * only do initial bcopy() and MD5Transform() if
 275                  * buf_index != 0.  if buf_index == 0, we're just
 276                  * wasting our time doing the bcopy() since there
 277                  * wasn't any data left over from a previous call to
 278                  * MD5Update().
 279                  */
 280 
 281 #ifdef sun4v
 282                 /*
 283                  * For N1 use %asi register. However, costly to repeatedly set
 284                  * in MD5Transform. Therefore, set once here.
 285                  * Should probably restore the old value afterwards...
 286                  */
 287                 old_asi = get_little();
 288                 set_little(0x88);
 289 #endif /* sun4v */
 290 
 291                 if (buf_index) {
 292                         bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
 293 
 294 #if !defined(__amd64)
 295                         MD5Transform(ctx->state[0], ctx->state[1],
 296                             ctx->state[2], ctx->state[3], ctx,
 297                             ctx->buf_un.buf8);
 298 #else
 299                         md5_block_asm_host_order(ctx, ctx->buf_un.buf8, 1);
 300 #endif /* !defined(__amd64) */
 301 
 302                         i = buf_len;
 303                 }
 304 
 305 #if !defined(__amd64)
 306                 for (; i + 63 < input_len; i += 64)
 307                         MD5Transform(ctx->state[0], ctx->state[1],
 308                             ctx->state[2], ctx->state[3], ctx, &input[i]);
 309 
 310 #else
 311                 block_count = (input_len - i) >> 6;
 312                 if (block_count > 0) {
 313                         md5_block_asm_host_order(ctx, &input[i], block_count);
 314                         i += block_count << 6;
 315                 }
 316 #endif /* !defined(__amd64) */
 317 
 318 
 319 #ifdef sun4v
 320                 /*
 321                  * Restore old %ASI value
 322                  */
 323                 set_little(old_asi);
 324 #endif /* sun4v */
 325 
 326                 /*
 327                  * general optimization:
 328                  *
 329                  * if i and input_len are the same, return now instead
 330                  * of calling bcopy(), since the bcopy() in this
 331                  * case will be an expensive nop.
 332                  */
 333 
 334                 if (input_len == i)
 335                         return;
 336 
 337                 buf_index = 0;
 338         }
 339 
 340         /* buffer remaining input */
 341         bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
 342 }
 343 
 344 /*
 345  * MD5Final()
 346  *
 347  * purpose: ends an md5 digest operation, finalizing the message digest and
 348  *          zeroing the context.
 349  *   input: uchar_t *   : a buffer to store the digest in
 350  *                      : The function actually uses void* because many
 351  *                      : callers pass things other than uchar_t here.
 352  *          MD5_CTX *   : the context to finalize, save, and zero
 353  *  output: void
 354  */
 355 
 356 void
 357 MD5Final(void *digest, MD5_CTX *ctx)
 358 {
 359         uint8_t         bitcount_le[sizeof (ctx->count)];
 360         uint32_t        index = (ctx->count[0] >> 3) & 0x3f;
 361 
 362         /* store bit count, little endian */
 363         Encode(bitcount_le, ctx->count, sizeof (bitcount_le));
 364 
 365         /* pad out to 56 mod 64 */
 366         MD5Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index);
 367 
 368         /* append length (before padding) */
 369         MD5Update(ctx, bitcount_le, sizeof (bitcount_le));
 370 
 371         /* store state in digest */
 372         Encode(digest, ctx->state, sizeof (ctx->state));
 373 
 374         /* zeroize sensitive information */
 375         bzero(ctx, sizeof (*ctx));
 376 }
 377 
 378 #ifndef _KERNEL
 379 
 380 void
 381 md5_calc(unsigned char *output, unsigned char *input, unsigned int inlen)
 382 {
 383         MD5_CTX context;
 384 
 385         MD5Init(&context);
 386         MD5Update(&context, input, inlen);
 387         MD5Final(output, &context);
 388 }
 389 
 390 #endif  /* !_KERNEL */
 391 
 392 #if !defined(__amd64)
 393 /*
 394  * sparc register window optimization:
 395  *
 396  * `a', `b', `c', and `d' are passed into MD5Transform explicitly
 397  * since it increases the number of registers available to the
 398  * compiler.  under this scheme, these variables can be held in
 399  * %i0 - %i3, which leaves more local and out registers available.
 400  */
 401 
 402 /*
 403  * MD5Transform()
 404  *
 405  * purpose: md5 transformation -- updates the digest based on `block'
 406  *   input: uint32_t    : bytes  1 -  4 of the digest
 407  *          uint32_t    : bytes  5 -  8 of the digest
 408  *          uint32_t    : bytes  9 - 12 of the digest
 409  *          uint32_t    : bytes 12 - 16 of the digest
 410  *          MD5_CTX *   : the context to update
 411  *          uint8_t [64]: the block to use to update the digest
 412  *  output: void
 413  */
 414 
 415 static void
 416 MD5Transform(uint32_t a, uint32_t b, uint32_t c, uint32_t d,
 417     MD5_CTX *ctx, const uint8_t block[64])
 418 {
 419         /*
 420          * general optimization:
 421          *
 422          * use individual integers instead of using an array.  this is a
 423          * win, although the amount it wins by seems to vary quite a bit.
 424          */
 425 
 426         register uint32_t       x_0, x_1, x_2,  x_3,  x_4,  x_5,  x_6,  x_7;
 427         register uint32_t       x_8, x_9, x_10, x_11, x_12, x_13, x_14, x_15;
 428 #ifdef sun4v
 429         unsigned long long      *md5_consts64;
 430 
 431                 /* LINTED E_BAD_PTR_CAST_ALIGN */
 432         md5_consts64 = (unsigned long long *) md5_consts;
 433 #endif  /* sun4v */
 434 
 435         /*
 436          * general optimization:
 437          *
 438          * the compiler (at least SC4.2/5.x) generates better code if
 439          * variable use is localized.  in this case, swapping the integers in
 440          * this order allows `x_0 'to be swapped nearest to its first use in
 441          * FF(), and likewise for `x_1' and up.  note that the compiler
 442          * prefers this to doing each swap right before the FF() that
 443          * uses it.
 444          */
 445 
 446         /*
 447          * sparc v9/v8plus optimization:
 448          *
 449          * if `block' is already aligned on a 4-byte boundary, use the
 450          * optimized load_little_32() directly.  otherwise, bcopy()
 451          * into a buffer that *is* aligned on a 4-byte boundary and
 452          * then do the load_little_32() on that buffer.  benchmarks
 453          * have shown that using the bcopy() is better than loading
 454          * the bytes individually and doing the endian-swap by hand.
 455          *
 456          * even though it's quite tempting to assign to do:
 457          *
 458          * blk = bcopy(blk, ctx->buf_un.buf32, sizeof (ctx->buf_un.buf32));
 459          *
 460          * and only have one set of LOAD_LITTLE_32()'s, the compiler (at least
 461          * SC4.2/5.x) *does not* like that, so please resist the urge.
 462          */
 463 
 464 #ifdef _MD5_CHECK_ALIGNMENT
 465         if ((uintptr_t)block & 0x3) {               /* not 4-byte aligned? */
 466                 bcopy(block, ctx->buf_un.buf32, sizeof (ctx->buf_un.buf32));
 467 
 468 #ifdef sun4v
 469                 x_15 = LOAD_LITTLE_32_f(ctx->buf_un.buf32);
 470                 x_14 = LOAD_LITTLE_32_e(ctx->buf_un.buf32);
 471                 x_13 = LOAD_LITTLE_32_d(ctx->buf_un.buf32);
 472                 x_12 = LOAD_LITTLE_32_c(ctx->buf_un.buf32);
 473                 x_11 = LOAD_LITTLE_32_b(ctx->buf_un.buf32);
 474                 x_10 = LOAD_LITTLE_32_a(ctx->buf_un.buf32);
 475                 x_9  = LOAD_LITTLE_32_9(ctx->buf_un.buf32);
 476                 x_8  = LOAD_LITTLE_32_8(ctx->buf_un.buf32);
 477                 x_7  = LOAD_LITTLE_32_7(ctx->buf_un.buf32);
 478                 x_6  = LOAD_LITTLE_32_6(ctx->buf_un.buf32);
 479                 x_5  = LOAD_LITTLE_32_5(ctx->buf_un.buf32);
 480                 x_4  = LOAD_LITTLE_32_4(ctx->buf_un.buf32);
 481                 x_3  = LOAD_LITTLE_32_3(ctx->buf_un.buf32);
 482                 x_2  = LOAD_LITTLE_32_2(ctx->buf_un.buf32);
 483                 x_1  = LOAD_LITTLE_32_1(ctx->buf_un.buf32);
 484                 x_0  = LOAD_LITTLE_32_0(ctx->buf_un.buf32);
 485 #else
 486                 x_15 = LOAD_LITTLE_32(ctx->buf_un.buf32 + 15);
 487                 x_14 = LOAD_LITTLE_32(ctx->buf_un.buf32 + 14);
 488                 x_13 = LOAD_LITTLE_32(ctx->buf_un.buf32 + 13);
 489                 x_12 = LOAD_LITTLE_32(ctx->buf_un.buf32 + 12);
 490                 x_11 = LOAD_LITTLE_32(ctx->buf_un.buf32 + 11);
 491                 x_10 = LOAD_LITTLE_32(ctx->buf_un.buf32 + 10);
 492                 x_9  = LOAD_LITTLE_32(ctx->buf_un.buf32 +  9);
 493                 x_8  = LOAD_LITTLE_32(ctx->buf_un.buf32 +  8);
 494                 x_7  = LOAD_LITTLE_32(ctx->buf_un.buf32 +  7);
 495                 x_6  = LOAD_LITTLE_32(ctx->buf_un.buf32 +  6);
 496                 x_5  = LOAD_LITTLE_32(ctx->buf_un.buf32 +  5);
 497                 x_4  = LOAD_LITTLE_32(ctx->buf_un.buf32 +  4);
 498                 x_3  = LOAD_LITTLE_32(ctx->buf_un.buf32 +  3);
 499                 x_2  = LOAD_LITTLE_32(ctx->buf_un.buf32 +  2);
 500                 x_1  = LOAD_LITTLE_32(ctx->buf_un.buf32 +  1);
 501                 x_0  = LOAD_LITTLE_32(ctx->buf_un.buf32 +  0);
 502 #endif /* sun4v */
 503         } else
 504 #endif
 505         {
 506 
 507 #ifdef sun4v
 508                 /* LINTED E_BAD_PTR_CAST_ALIGN */
 509                 x_15 = LOAD_LITTLE_32_f(block);
 510                 /* LINTED E_BAD_PTR_CAST_ALIGN */
 511                 x_14 = LOAD_LITTLE_32_e(block);
 512                 /* LINTED E_BAD_PTR_CAST_ALIGN */
 513                 x_13 = LOAD_LITTLE_32_d(block);
 514                 /* LINTED E_BAD_PTR_CAST_ALIGN */
 515                 x_12 = LOAD_LITTLE_32_c(block);
 516                 /* LINTED E_BAD_PTR_CAST_ALIGN */
 517                 x_11 = LOAD_LITTLE_32_b(block);
 518                 /* LINTED E_BAD_PTR_CAST_ALIGN */
 519                 x_10 = LOAD_LITTLE_32_a(block);
 520                 /* LINTED E_BAD_PTR_CAST_ALIGN */
 521                 x_9  = LOAD_LITTLE_32_9(block);
 522                 /* LINTED E_BAD_PTR_CAST_ALIGN */
 523                 x_8  = LOAD_LITTLE_32_8(block);
 524                 /* LINTED E_BAD_PTR_CAST_ALIGN */
 525                 x_7  = LOAD_LITTLE_32_7(block);
 526                 /* LINTED E_BAD_PTR_CAST_ALIGN */
 527                 x_6  = LOAD_LITTLE_32_6(block);
 528                 /* LINTED E_BAD_PTR_CAST_ALIGN */
 529                 x_5  = LOAD_LITTLE_32_5(block);
 530                 /* LINTED E_BAD_PTR_CAST_ALIGN */
 531                 x_4  = LOAD_LITTLE_32_4(block);
 532                 /* LINTED E_BAD_PTR_CAST_ALIGN */
 533                 x_3  = LOAD_LITTLE_32_3(block);
 534                 /* LINTED E_BAD_PTR_CAST_ALIGN */
 535                 x_2  = LOAD_LITTLE_32_2(block);
 536                 /* LINTED E_BAD_PTR_CAST_ALIGN */
 537                 x_1  = LOAD_LITTLE_32_1(block);
 538                 /* LINTED E_BAD_PTR_CAST_ALIGN */
 539                 x_0  = LOAD_LITTLE_32_0(block);
 540 #else
 541                 /* LINTED E_BAD_PTR_CAST_ALIGN */
 542                 x_15 = LOAD_LITTLE_32(block + 60);
 543                 /* LINTED E_BAD_PTR_CAST_ALIGN */
 544                 x_14 = LOAD_LITTLE_32(block + 56);
 545                 /* LINTED E_BAD_PTR_CAST_ALIGN */
 546                 x_13 = LOAD_LITTLE_32(block + 52);
 547                 /* LINTED E_BAD_PTR_CAST_ALIGN */
 548                 x_12 = LOAD_LITTLE_32(block + 48);
 549                 /* LINTED E_BAD_PTR_CAST_ALIGN */
 550                 x_11 = LOAD_LITTLE_32(block + 44);
 551                 /* LINTED E_BAD_PTR_CAST_ALIGN */
 552                 x_10 = LOAD_LITTLE_32(block + 40);
 553                 /* LINTED E_BAD_PTR_CAST_ALIGN */
 554                 x_9  = LOAD_LITTLE_32(block + 36);
 555                 /* LINTED E_BAD_PTR_CAST_ALIGN */
 556                 x_8  = LOAD_LITTLE_32(block + 32);
 557                 /* LINTED E_BAD_PTR_CAST_ALIGN */
 558                 x_7  = LOAD_LITTLE_32(block + 28);
 559                 /* LINTED E_BAD_PTR_CAST_ALIGN */
 560                 x_6  = LOAD_LITTLE_32(block + 24);
 561                 /* LINTED E_BAD_PTR_CAST_ALIGN */
 562                 x_5  = LOAD_LITTLE_32(block + 20);
 563                 /* LINTED E_BAD_PTR_CAST_ALIGN */
 564                 x_4  = LOAD_LITTLE_32(block + 16);
 565                 /* LINTED E_BAD_PTR_CAST_ALIGN */
 566                 x_3  = LOAD_LITTLE_32(block + 12);
 567                 /* LINTED E_BAD_PTR_CAST_ALIGN */
 568                 x_2  = LOAD_LITTLE_32(block +  8);
 569                 /* LINTED E_BAD_PTR_CAST_ALIGN */
 570                 x_1  = LOAD_LITTLE_32(block +  4);
 571                 /* LINTED E_BAD_PTR_CAST_ALIGN */
 572                 x_0  = LOAD_LITTLE_32(block +  0);
 573 #endif /* sun4v */
 574         }
 575 
 576         /* round 1 */
 577         FF(a, b, c, d,  x_0, MD5_SHIFT_11, MD5_CONST_e(0));  /* 1 */
 578         FF(d, a, b, c,  x_1, MD5_SHIFT_12, MD5_CONST_o(1));  /* 2 */
 579         FF(c, d, a, b,  x_2, MD5_SHIFT_13, MD5_CONST_e(2));  /* 3 */
 580         FF(b, c, d, a,  x_3, MD5_SHIFT_14, MD5_CONST_o(3));  /* 4 */
 581         FF(a, b, c, d,  x_4, MD5_SHIFT_11, MD5_CONST_e(4));  /* 5 */
 582         FF(d, a, b, c,  x_5, MD5_SHIFT_12, MD5_CONST_o(5));  /* 6 */
 583         FF(c, d, a, b,  x_6, MD5_SHIFT_13, MD5_CONST_e(6));  /* 7 */
 584         FF(b, c, d, a,  x_7, MD5_SHIFT_14, MD5_CONST_o(7));  /* 8 */
 585         FF(a, b, c, d,  x_8, MD5_SHIFT_11, MD5_CONST_e(8));  /* 9 */
 586         FF(d, a, b, c,  x_9, MD5_SHIFT_12, MD5_CONST_o(9));  /* 10 */
 587         FF(c, d, a, b, x_10, MD5_SHIFT_13, MD5_CONST_e(10)); /* 11 */
 588         FF(b, c, d, a, x_11, MD5_SHIFT_14, MD5_CONST_o(11)); /* 12 */
 589         FF(a, b, c, d, x_12, MD5_SHIFT_11, MD5_CONST_e(12)); /* 13 */
 590         FF(d, a, b, c, x_13, MD5_SHIFT_12, MD5_CONST_o(13)); /* 14 */
 591         FF(c, d, a, b, x_14, MD5_SHIFT_13, MD5_CONST_e(14)); /* 15 */
 592         FF(b, c, d, a, x_15, MD5_SHIFT_14, MD5_CONST_o(15)); /* 16 */
 593 
 594         /* round 2 */
 595         GG(a, b, c, d,  x_1, MD5_SHIFT_21, MD5_CONST_e(16)); /* 17 */
 596         GG(d, a, b, c,  x_6, MD5_SHIFT_22, MD5_CONST_o(17)); /* 18 */
 597         GG(c, d, a, b, x_11, MD5_SHIFT_23, MD5_CONST_e(18)); /* 19 */
 598         GG(b, c, d, a,  x_0, MD5_SHIFT_24, MD5_CONST_o(19)); /* 20 */
 599         GG(a, b, c, d,  x_5, MD5_SHIFT_21, MD5_CONST_e(20)); /* 21 */
 600         GG(d, a, b, c, x_10, MD5_SHIFT_22, MD5_CONST_o(21)); /* 22 */
 601         GG(c, d, a, b, x_15, MD5_SHIFT_23, MD5_CONST_e(22)); /* 23 */
 602         GG(b, c, d, a,  x_4, MD5_SHIFT_24, MD5_CONST_o(23)); /* 24 */
 603         GG(a, b, c, d,  x_9, MD5_SHIFT_21, MD5_CONST_e(24)); /* 25 */
 604         GG(d, a, b, c, x_14, MD5_SHIFT_22, MD5_CONST_o(25)); /* 26 */
 605         GG(c, d, a, b,  x_3, MD5_SHIFT_23, MD5_CONST_e(26)); /* 27 */
 606         GG(b, c, d, a,  x_8, MD5_SHIFT_24, MD5_CONST_o(27)); /* 28 */
 607         GG(a, b, c, d, x_13, MD5_SHIFT_21, MD5_CONST_e(28)); /* 29 */
 608         GG(d, a, b, c,  x_2, MD5_SHIFT_22, MD5_CONST_o(29)); /* 30 */
 609         GG(c, d, a, b,  x_7, MD5_SHIFT_23, MD5_CONST_e(30)); /* 31 */
 610         GG(b, c, d, a, x_12, MD5_SHIFT_24, MD5_CONST_o(31)); /* 32 */
 611 
 612         /* round 3 */
 613         HH(a, b, c, d,  x_5, MD5_SHIFT_31, MD5_CONST_e(32)); /* 33 */
 614         HH(d, a, b, c,  x_8, MD5_SHIFT_32, MD5_CONST_o(33)); /* 34 */
 615         HH(c, d, a, b, x_11, MD5_SHIFT_33, MD5_CONST_e(34)); /* 35 */
 616         HH(b, c, d, a, x_14, MD5_SHIFT_34, MD5_CONST_o(35)); /* 36 */
 617         HH(a, b, c, d,  x_1, MD5_SHIFT_31, MD5_CONST_e(36)); /* 37 */
 618         HH(d, a, b, c,  x_4, MD5_SHIFT_32, MD5_CONST_o(37)); /* 38 */
 619         HH(c, d, a, b,  x_7, MD5_SHIFT_33, MD5_CONST_e(38)); /* 39 */
 620         HH(b, c, d, a, x_10, MD5_SHIFT_34, MD5_CONST_o(39)); /* 40 */
 621         HH(a, b, c, d, x_13, MD5_SHIFT_31, MD5_CONST_e(40)); /* 41 */
 622         HH(d, a, b, c,  x_0, MD5_SHIFT_32, MD5_CONST_o(41)); /* 42 */
 623         HH(c, d, a, b,  x_3, MD5_SHIFT_33, MD5_CONST_e(42)); /* 43 */
 624         HH(b, c, d, a,  x_6, MD5_SHIFT_34, MD5_CONST_o(43)); /* 44 */
 625         HH(a, b, c, d,  x_9, MD5_SHIFT_31, MD5_CONST_e(44)); /* 45 */
 626         HH(d, a, b, c, x_12, MD5_SHIFT_32, MD5_CONST_o(45)); /* 46 */
 627         HH(c, d, a, b, x_15, MD5_SHIFT_33, MD5_CONST_e(46)); /* 47 */
 628         HH(b, c, d, a,  x_2, MD5_SHIFT_34, MD5_CONST_o(47)); /* 48 */
 629 
 630         /* round 4 */
 631         II(a, b, c, d,  x_0, MD5_SHIFT_41, MD5_CONST_e(48)); /* 49 */
 632         II(d, a, b, c,  x_7, MD5_SHIFT_42, MD5_CONST_o(49)); /* 50 */
 633         II(c, d, a, b, x_14, MD5_SHIFT_43, MD5_CONST_e(50)); /* 51 */
 634         II(b, c, d, a,  x_5, MD5_SHIFT_44, MD5_CONST_o(51)); /* 52 */
 635         II(a, b, c, d, x_12, MD5_SHIFT_41, MD5_CONST_e(52)); /* 53 */
 636         II(d, a, b, c,  x_3, MD5_SHIFT_42, MD5_CONST_o(53)); /* 54 */
 637         II(c, d, a, b, x_10, MD5_SHIFT_43, MD5_CONST_e(54)); /* 55 */
 638         II(b, c, d, a,  x_1, MD5_SHIFT_44, MD5_CONST_o(55)); /* 56 */
 639         II(a, b, c, d,  x_8, MD5_SHIFT_41, MD5_CONST_e(56)); /* 57 */
 640         II(d, a, b, c, x_15, MD5_SHIFT_42, MD5_CONST_o(57)); /* 58 */
 641         II(c, d, a, b,  x_6, MD5_SHIFT_43, MD5_CONST_e(58)); /* 59 */
 642         II(b, c, d, a, x_13, MD5_SHIFT_44, MD5_CONST_o(59)); /* 60 */
 643         II(a, b, c, d,  x_4, MD5_SHIFT_41, MD5_CONST_e(60)); /* 61 */
 644         II(d, a, b, c, x_11, MD5_SHIFT_42, MD5_CONST_o(61)); /* 62 */
 645         II(c, d, a, b,  x_2, MD5_SHIFT_43, MD5_CONST_e(62)); /* 63 */
 646         II(b, c, d, a,  x_9, MD5_SHIFT_44, MD5_CONST_o(63)); /* 64 */
 647 
 648         ctx->state[0] += a;
 649         ctx->state[1] += b;
 650         ctx->state[2] += c;
 651         ctx->state[3] += d;
 652 
 653         /*
 654          * zeroize sensitive information -- compiler will optimize
 655          * this out if everything is kept in registers
 656          */
 657 
 658         x_0 = x_1  = x_2  = x_3  = x_4  = x_5  = x_6  = x_7 = x_8 = 0;
 659         x_9 = x_10 = x_11 = x_12 = x_13 = x_14 = x_15 = 0;
 660 }
 661 #endif /* !defined(__amd64) */
 662 
 663 /*
 664  * Encode()
 665  *
 666  * purpose: to convert a list of numbers from big endian to little endian
 667  *   input: uint8_t *   : place to store the converted little endian numbers
 668  *          uint32_t *  : place to get numbers to convert from
 669  *          size_t      : the length of the input in bytes
 670  *  output: void
 671  */
 672 
 673 static void
 674 Encode(uint8_t *_RESTRICT_KYWD output, const uint32_t *_RESTRICT_KYWD input,
 675     size_t input_len)
 676 {
 677         size_t          i, j;
 678 
 679         for (i = 0, j = 0; j < input_len; i++, j += sizeof (uint32_t)) {
 680 
 681 #ifdef _LITTLE_ENDIAN
 682 
 683 #ifdef _MD5_CHECK_ALIGNMENT
 684                 if ((uintptr_t)output & 0x3)        /* Not 4-byte aligned */
 685                         bcopy(input + i, output + j, 4);
 686                 else *(uint32_t *)(output + j) = input[i];
 687 #else
 688                 /*LINTED E_BAD_PTR_CAST_ALIGN*/
 689                 *(uint32_t *)(output + j) = input[i];
 690 #endif /* _MD5_CHECK_ALIGNMENT */
 691 
 692 #else   /* big endian -- will work on little endian, but slowly */
 693 
 694                 output[j] = input[i] & 0xff;
 695                 output[j + 1] = (input[i] >> 8)  & 0xff;
 696                 output[j + 2] = (input[i] >> 16) & 0xff;
 697                 output[j + 3] = (input[i] >> 24) & 0xff;
 698 #endif
 699         }
 700 }