nv85_sha1 Sdiff usr/src/common/crypto/sha1

Print this page

6662791 Need a SHA1 implementation optimized for 64-bit x86

   1 /*
   2  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
   3  * Use is subject to license terms.
   4  */
   5 
   6 #pragma ident   "@(#)sha1.c     1.26    07/04/10 SMI"
   7 
   8 /*
   9  * The basic framework for this code came from the reference
  10  * implementation for MD5.  That implementation is Copyright (C)
  11  * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
  12  *
  13  * License to copy and use this software is granted provided that it
  14  * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
  15  * Algorithm" in all material mentioning or referencing this software
  16  * or this function.
  17  *
  18  * License is also granted to make and use derivative works provided
  19  * that such works are identified as "derived from the RSA Data
  20  * Security, Inc. MD5 Message-Digest Algorithm" in all material
  21  * mentioning or referencing the derived work.
  22  *
  23  * RSA Data Security, Inc. makes no representations concerning either
  24  * the merchantability of this software or the suitability of this
  25  * software for any particular purpose. It is provided "as is"
  26  * without express or implied warranty of any kind.

  42 #include <sys/sha1_consts.h>
  43 
  44 #ifndef _KERNEL
  45 #include <strings.h>
  46 #include <stdlib.h>
  47 #include <errno.h>
  48 #include <sys/systeminfo.h>
  49 #endif  /* !_KERNEL */
  50 
  51 static void Encode(uint8_t *, const uint32_t *, size_t);
  52 
  53 #if     defined(__sparc)
  54 
  55 #define SHA1_TRANSFORM(ctx, in) \
  56         SHA1Transform((ctx)->state[0], (ctx)->state[1], (ctx)->state[2], \
  57                 (ctx)->state[3], (ctx)->state[4], (ctx), (in))
  58 
  59 static void SHA1Transform(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t,
  60     SHA1_CTX *, const uint8_t *);
  61 








  62 #else
  63 
  64 #define SHA1_TRANSFORM(ctx, in) SHA1Transform((ctx), (in))
  65 
  66 static void SHA1Transform(SHA1_CTX *, const uint8_t *);
  67 
  68 #endif
  69 
  70 
  71 static uint8_t PADDING[64] = { 0x80, /* all zeros */ };
  72 
  73 /*
  74  * F, G, and H are the basic SHA1 functions.
  75  */
  76 #define F(b, c, d)      (((b) & (c)) | ((~b) & (d)))
  77 #define G(b, c, d)      ((b) ^ (c) ^ (d))
  78 #define H(b, c, d)      (((b) & (c)) | (((b)|(c)) & (d)))
  79 
  80 /*
  81  * ROTATE_LEFT rotates x left n bits.

 261                  *       uint32_t *, // Pointer to ith block of message data
 262                  *       uint32_t *, // Pointer to SHA state i.e ctx->state
 263                  *       uint64_t *, // Pointer to various VIS constants
 264                  * )
 265                  *
 266                  * Note: the message data must by 4-byte aligned.
 267                  *
 268                  * Function requires VIS 1.0 support.
 269                  *
 270                  * Handling is provided to deal with arbitrary byte alingment
 271                  * of the input data but the performance gains are reduced
 272                  * for alignments other than 4-bytes.
 273                  */
 274                 if (usevis) {
 275                         if (!IS_P2ALIGNED(&input[i], sizeof (uint32_t))) {
 276                                 /*
 277                                  * Main processing loop - input misaligned
 278                                  */
 279                                 for (; i + 63 < input_len; i += 64) {
 280                                     bcopy(&input[i], input64, 64);
 281                                     SHA1TransformVIS(X0, (uint32_t *)input64,

 282                                         &ctx->state[0], VIS);
 283                                 }
 284                         } else {
 285                                 /*
 286                                  * Main processing loop - input 8-byte aligned
 287                                  */
 288                                 for (; i + 63 < input_len; i += 64) {
 289                                         SHA1TransformVIS(X0,
 290                                             /* LINTED E_BAD_PTR_CAST_ALIGN */
 291                                             (uint32_t *)&input[i],
 292                                             &ctx->state[0], VIS);
 293                                 }
 294 
 295                         }
 296 #ifdef _KERNEL
 297                         sha1_restorefp(fpu);
 298 #endif /* _KERNEL */
 299                 } else {
 300                         for (; i + 63 < input_len; i += 64) {
 301                             SHA1_TRANSFORM(ctx, &input[i]);

 310                  * will be an expensive nop.
 311                  */
 312 
 313                 if (input_len == i)
 314                         return;
 315 
 316                 buf_index = 0;
 317         }
 318 
 319         /* buffer remaining input */
 320         bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
 321 }
 322 
 323 #else /* VIS_SHA1 */
 324 
 325 void
 326 SHA1Update(SHA1_CTX *ctx, const void *inptr, size_t input_len)
 327 {
 328         uint32_t i, buf_index, buf_len;
 329         const uint8_t *input = inptr;



 330 
 331         /* check for noop */
 332         if (input_len == 0)
 333                 return;
 334 
 335         /* compute number of bytes mod 64 */
 336         buf_index = (ctx->count[1] >> 3) & 0x3F;
 337 
 338         /* update number of bits */
 339         if ((ctx->count[1] += (input_len << 3)) < (input_len << 3))
 340                 ctx->count[0]++;
 341 
 342         ctx->count[0] += (input_len >> 29);
 343 
 344         buf_len = 64 - buf_index;
 345 
 346         /* transform as many times as possible */
 347         i = 0;
 348         if (input_len >= buf_len) {
 349 
 350                 /*
 351                  * general optimization:
 352                  *
 353                  * only do initial bcopy() and SHA1Transform() if
 354                  * buf_index != 0.  if buf_index == 0, we're just
 355                  * wasting our time doing the bcopy() since there
 356                  * wasn't any data left over from a previous call to
 357                  * SHA1Update().
 358                  */
 359 
 360                 if (buf_index) {
 361                         bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
 362                         SHA1_TRANSFORM(ctx, ctx->buf_un.buf8);
 363                         i = buf_len;
 364                 }
 365 

 366                 for (; i + 63 < input_len; i += 64)
 367                         SHA1_TRANSFORM(ctx, &input[i]);







 368 
 369                 /*
 370                  * general optimization:
 371                  *
 372                  * if i and input_len are the same, return now instead
 373                  * of calling bcopy(), since the bcopy() in this case
 374                  * will be an expensive nop.
 375                  */
 376 
 377                 if (input_len == i)
 378                         return;
 379 
 380                 buf_index = 0;
 381         }
 382 
 383         /* buffer remaining input */
 384         bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
 385 }
 386 
 387 #endif /* VIS_SHA1 */
 388 
 389 /*
 390  * SHA1Final()
 391  *
 392  * purpose: ends an sha1 digest operation, finalizing the message digest and
 393  *          zeroing the context.
 394  *   input: uchar_t *   : a buffer to store the digest in
 395  *                      : The function actually uses void* because many
 396  *                      : callers pass things other than uchar_t here.
 397  *          SHA1_CTX *  : the context to finalize, save, and zero
 398  *  output: void
 399  */
 400 
 401 void
 402 SHA1Final(void *digest, SHA1_CTX *ctx)
 403 {
 404         uint8_t         bitcount_be[sizeof (ctx->count)];
 405         uint32_t        index = (ctx->count[1] >> 3) & 0x3f;
 406 
 407         /* store bit count, big endian */
 408         Encode(bitcount_be, ctx->count, sizeof (bitcount_be));
 409 
 410         /* pad out to 56 mod 64 */
 411         SHA1Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index);
 412 
 413         /* append length (before padding) */
 414         SHA1Update(ctx, bitcount_be, sizeof (bitcount_be));
 415 
 416         /* store state in digest */
 417         Encode(digest, ctx->state, sizeof (ctx->state));
 418 
 419         /* zeroize sensitive information */
 420         bzero(ctx, sizeof (*ctx));
 421 }
 422 



 423 typedef uint32_t sha1word;
 424 
 425 /*
 426  * sparc optimization:
 427  *
 428  * on the sparc, we can load big endian 32-bit data easily.  note that
 429  * special care must be taken to ensure the address is 32-bit aligned.
 430  * in the interest of speed, we don't check to make sure, since
 431  * careful programming can guarantee this for us.
 432  */
 433 
 434 #if     defined(_BIG_ENDIAN)
 435 
 436 #define LOAD_BIG_32(addr)       (*(uint32_t *)(addr))
 437 
 438 #else   /* !defined(_BIG_ENDIAN) */
 439 
 440 #if     defined(HAVE_BSWAP)
 441 
 442 #define LOAD_BIG_32(addr) bswap(*((uint32_t *)(addr)))

 643 
 644         /*
 645          * general optimization:
 646          *
 647          * even though this approach is described in the standard as
 648          * being slower algorithmically, it is 30-40% faster than the
 649          * "faster" version under SPARC, because this version has more
 650          * of the constraints specified at compile-time and uses fewer
 651          * variables (and therefore has better register utilization)
 652          * than its "speedier" brother.  (i've tried both, trust me)
 653          *
 654          * for either method given in the spec, there is an "assignment"
 655          * phase where the following takes place:
 656          *
 657          *      tmp = (main_computation);
 658          *      e = d; d = c; c = rotate_left(b, 30); b = a; a = tmp;
 659          *
 660          * we can make the algorithm go faster by not doing this work,
 661          * but just pretending that `d' is now `e', etc. this works
 662          * really well and obviates the need for a temporary variable.
 663          * however, we still explictly perform the rotate action,
 664          * since it is cheaper on SPARC to do it once than to have to
 665          * do it over and over again.
 666          */
 667 
 668         /* round 1 */
 669         e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(0) + SHA1_CONST(0); /* 0 */
 670         b = ROTATE_LEFT(b, 30);
 671 
 672         d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(1) + SHA1_CONST(0); /* 1 */
 673         a = ROTATE_LEFT(a, 30);
 674 
 675         c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(2) + SHA1_CONST(0); /* 2 */
 676         e = ROTATE_LEFT(e, 30);
 677 
 678         b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(3) + SHA1_CONST(0); /* 3 */
 679         d = ROTATE_LEFT(d, 30);
 680 
 681         a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(4) + SHA1_CONST(0); /* 4 */
 682         c = ROTATE_LEFT(c, 30);
 683

 965         c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(13) + SHA1_CONST(3);
 966         e = ROTATE_LEFT(e, 30);
 967 
 968         W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1);  /* 78 */
 969         b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(14) + SHA1_CONST(3);
 970         d = ROTATE_LEFT(d, 30);
 971 
 972         W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1);  /* 79 */
 973 
 974         ctx->state[0] += ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(15) +
 975             SHA1_CONST(3);
 976         ctx->state[1] += b;
 977         ctx->state[2] += ROTATE_LEFT(c, 30);
 978         ctx->state[3] += d;
 979         ctx->state[4] += e;
 980 
 981         /* zeroize sensitive information */
 982         W(0) = W(1) = W(2) = W(3) = W(4) = W(5) = W(6) = W(7) = W(8) = 0;
 983         W(9) = W(10) = W(11) = W(12) = W(13) = W(14) = W(15) = 0;
 984 }

 985 

 986 /*
 987  * Encode()
 988  *
 989  * purpose: to convert a list of numbers from little endian to big endian
 990  *   input: uint8_t *   : place to store the converted big endian numbers
 991  *          uint32_t *  : place to get numbers to convert from
 992  *          size_t      : the length of the input in bytes
 993  *  output: void
 994  */
 995 
 996 static void
 997 Encode(uint8_t *_RESTRICT_KYWD output, const uint32_t *_RESTRICT_KYWD input,
 998     size_t len)
 999 {
1000         size_t          i, j;
1001 
1002 #if     defined(__sparc)
1003         if (IS_P2ALIGNED(output, sizeof (uint32_t))) {
1004                 for (i = 0, j = 0; j < len; i++, j += 4) {
1005                         /* LINTED: pointer alignment */

   1 /*
   2  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
   3  * Use is subject to license terms.
   4  */
   5 
   6 #pragma ident   "@(#)sha1.c     1.27    08/03/02 SMI"
   7 
   8 /*
   9  * The basic framework for this code came from the reference
  10  * implementation for MD5.  That implementation is Copyright (C)
  11  * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
  12  *
  13  * License to copy and use this software is granted provided that it
  14  * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
  15  * Algorithm" in all material mentioning or referencing this software
  16  * or this function.
  17  *
  18  * License is also granted to make and use derivative works provided
  19  * that such works are identified as "derived from the RSA Data
  20  * Security, Inc. MD5 Message-Digest Algorithm" in all material
  21  * mentioning or referencing the derived work.
  22  *
  23  * RSA Data Security, Inc. makes no representations concerning either
  24  * the merchantability of this software or the suitability of this
  25  * software for any particular purpose. It is provided "as is"
  26  * without express or implied warranty of any kind.

  42 #include <sys/sha1_consts.h>
  43 
  44 #ifndef _KERNEL
  45 #include <strings.h>
  46 #include <stdlib.h>
  47 #include <errno.h>
  48 #include <sys/systeminfo.h>
  49 #endif  /* !_KERNEL */
  50 
  51 static void Encode(uint8_t *, const uint32_t *, size_t);
  52 
  53 #if     defined(__sparc)
  54 
  55 #define SHA1_TRANSFORM(ctx, in) \
  56         SHA1Transform((ctx)->state[0], (ctx)->state[1], (ctx)->state[2], \
  57                 (ctx)->state[3], (ctx)->state[4], (ctx), (in))
  58 
  59 static void SHA1Transform(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t,
  60     SHA1_CTX *, const uint8_t *);
  61 
  62 #elif   defined(__amd64)
  63 
  64 #define SHA1_TRANSFORM(ctx, in) sha1_block_data_order((ctx), (in), 1)
  65 #define SHA1_TRANSFORM_BLOCKS(ctx, in, num) sha1_block_data_order((ctx), \
  66                 (in), (num))
  67 
  68 void sha1_block_data_order(SHA1_CTX *ctx, const void *inpp, size_t num_blocks);
  69 
  70 #else
  71 
  72 #define SHA1_TRANSFORM(ctx, in) SHA1Transform((ctx), (in))
  73 
  74 static void SHA1Transform(SHA1_CTX *, const uint8_t *);
  75 
  76 #endif
  77 
  78 
  79 static uint8_t PADDING[64] = { 0x80, /* all zeros */ };
  80 
  81 /*
  82  * F, G, and H are the basic SHA1 functions.
  83  */
  84 #define F(b, c, d)      (((b) & (c)) | ((~b) & (d)))
  85 #define G(b, c, d)      ((b) ^ (c) ^ (d))
  86 #define H(b, c, d)      (((b) & (c)) | (((b)|(c)) & (d)))
  87 
  88 /*
  89  * ROTATE_LEFT rotates x left n bits.

 269                  *       uint32_t *, // Pointer to ith block of message data
 270                  *       uint32_t *, // Pointer to SHA state i.e ctx->state
 271                  *       uint64_t *, // Pointer to various VIS constants
 272                  * )
 273                  *
 274                  * Note: the message data must by 4-byte aligned.
 275                  *
 276                  * Function requires VIS 1.0 support.
 277                  *
 278                  * Handling is provided to deal with arbitrary byte alingment
 279                  * of the input data but the performance gains are reduced
 280                  * for alignments other than 4-bytes.
 281                  */
 282                 if (usevis) {
 283                         if (!IS_P2ALIGNED(&input[i], sizeof (uint32_t))) {
 284                                 /*
 285                                  * Main processing loop - input misaligned
 286                                  */
 287                                 for (; i + 63 < input_len; i += 64) {
 288                                         bcopy(&input[i], input64, 64);
 289                                         SHA1TransformVIS(X0,
 290                                             (uint32_t *)input64,
 291                                             &ctx->state[0], VIS);
 292                                 }
 293                         } else {
 294                                 /*
 295                                  * Main processing loop - input 8-byte aligned
 296                                  */
 297                                 for (; i + 63 < input_len; i += 64) {
 298                                         SHA1TransformVIS(X0,
 299                                         /* LINTED E_BAD_PTR_CAST_ALIGN */
 300                                             (uint32_t *)&input[i],
 301                                             &ctx->state[0], VIS);
 302                                 }
 303 
 304                         }
 305 #ifdef _KERNEL
 306                         sha1_restorefp(fpu);
 307 #endif /* _KERNEL */
 308                 } else {
 309                         for (; i + 63 < input_len; i += 64) {
 310                                 SHA1_TRANSFORM(ctx, &input[i]);

 319                  * will be an expensive nop.
 320                  */
 321 
 322                 if (input_len == i)
 323                         return;
 324 
 325                 buf_index = 0;
 326         }
 327 
 328         /* buffer remaining input */
 329         bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
 330 }
 331 
 332 #else /* VIS_SHA1 */
 333 
 334 void
 335 SHA1Update(SHA1_CTX *ctx, const void *inptr, size_t input_len)
 336 {
 337         uint32_t i, buf_index, buf_len;
 338         const uint8_t *input = inptr;
 339 #if defined(__amd64)
 340         uint32_t        block_count;
 341 #endif  /* __amd64 */
 342 
 343         /* check for noop */
 344         if (input_len == 0)
 345                 return;
 346 
 347         /* compute number of bytes mod 64 */
 348         buf_index = (ctx->count[1] >> 3) & 0x3F;
 349 
 350         /* update number of bits */
 351         if ((ctx->count[1] += (input_len << 3)) < (input_len << 3))
 352                 ctx->count[0]++;
 353 
 354         ctx->count[0] += (input_len >> 29);
 355 
 356         buf_len = 64 - buf_index;
 357 
 358         /* transform as many times as possible */
 359         i = 0;
 360         if (input_len >= buf_len) {
 361 
 362                 /*
 363                  * general optimization:
 364                  *
 365                  * only do initial bcopy() and SHA1Transform() if
 366                  * buf_index != 0.  if buf_index == 0, we're just
 367                  * wasting our time doing the bcopy() since there
 368                  * wasn't any data left over from a previous call to
 369                  * SHA1Update().
 370                  */
 371 
 372                 if (buf_index) {
 373                         bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
 374                         SHA1_TRANSFORM(ctx, ctx->buf_un.buf8);
 375                         i = buf_len;
 376                 }
 377 
 378 #if !defined(__amd64)
 379                 for (; i + 63 < input_len; i += 64)
 380                         SHA1_TRANSFORM(ctx, &input[i]);
 381 #else
 382                 block_count = (input_len - i) >> 6;
 383                 if (block_count > 0) {
 384                         SHA1_TRANSFORM_BLOCKS(ctx, &input[i], block_count);
 385                         i += block_count << 6;
 386                 }
 387 #endif  /* !__amd64 */
 388 
 389                 /*
 390                  * general optimization:
 391                  *
 392                  * if i and input_len are the same, return now instead
 393                  * of calling bcopy(), since the bcopy() in this case
 394                  * will be an expensive nop.
 395                  */
 396 
 397                 if (input_len == i)
 398                         return;
 399 
 400                 buf_index = 0;
 401         }
 402 
 403         /* buffer remaining input */
 404         bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
 405 }
 406 
 407 #endif /* VIS_SHA1 */
 408 
 409 /*
 410  * SHA1Final()
 411  *
 412  * purpose: ends an sha1 digest operation, finalizing the message digest and
 413  *          zeroing the context.
 414  *   input: uchar_t *   : A buffer to store the digest.
 415  *                      : The function actually uses void* because many
 416  *                      : callers pass things other than uchar_t here.
 417  *          SHA1_CTX *  : the context to finalize, save, and zero
 418  *  output: void
 419  */
 420 
 421 void
 422 SHA1Final(void *digest, SHA1_CTX *ctx)
 423 {
 424         uint8_t         bitcount_be[sizeof (ctx->count)];
 425         uint32_t        index = (ctx->count[1] >> 3) & 0x3f;
 426 
 427         /* store bit count, big endian */
 428         Encode(bitcount_be, ctx->count, sizeof (bitcount_be));
 429 
 430         /* pad out to 56 mod 64 */
 431         SHA1Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index);
 432 
 433         /* append length (before padding) */
 434         SHA1Update(ctx, bitcount_be, sizeof (bitcount_be));
 435 
 436         /* store state in digest */
 437         Encode(digest, ctx->state, sizeof (ctx->state));
 438 
 439         /* zeroize sensitive information */
 440         bzero(ctx, sizeof (*ctx));
 441 }
 442 
 443 
 444 #if !defined(__amd64)
 445 
 446 typedef uint32_t sha1word;
 447 
 448 /*
 449  * sparc optimization:
 450  *
 451  * on the sparc, we can load big endian 32-bit data easily.  note that
 452  * special care must be taken to ensure the address is 32-bit aligned.
 453  * in the interest of speed, we don't check to make sure, since
 454  * careful programming can guarantee this for us.
 455  */
 456 
 457 #if     defined(_BIG_ENDIAN)
 458 
 459 #define LOAD_BIG_32(addr)       (*(uint32_t *)(addr))
 460 
 461 #else   /* !defined(_BIG_ENDIAN) */
 462 
 463 #if     defined(HAVE_BSWAP)
 464 
 465 #define LOAD_BIG_32(addr) bswap(*((uint32_t *)(addr)))

 666 
 667         /*
 668          * general optimization:
 669          *
 670          * even though this approach is described in the standard as
 671          * being slower algorithmically, it is 30-40% faster than the
 672          * "faster" version under SPARC, because this version has more
 673          * of the constraints specified at compile-time and uses fewer
 674          * variables (and therefore has better register utilization)
 675          * than its "speedier" brother.  (i've tried both, trust me)
 676          *
 677          * for either method given in the spec, there is an "assignment"
 678          * phase where the following takes place:
 679          *
 680          *      tmp = (main_computation);
 681          *      e = d; d = c; c = rotate_left(b, 30); b = a; a = tmp;
 682          *
 683          * we can make the algorithm go faster by not doing this work,
 684          * but just pretending that `d' is now `e', etc. this works
 685          * really well and obviates the need for a temporary variable.
 686          * however, we still explicitly perform the rotate action,
 687          * since it is cheaper on SPARC to do it once than to have to
 688          * do it over and over again.
 689          */
 690 
 691         /* round 1 */
 692         e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(0) + SHA1_CONST(0); /* 0 */
 693         b = ROTATE_LEFT(b, 30);
 694 
 695         d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(1) + SHA1_CONST(0); /* 1 */
 696         a = ROTATE_LEFT(a, 30);
 697 
 698         c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(2) + SHA1_CONST(0); /* 2 */
 699         e = ROTATE_LEFT(e, 30);
 700 
 701         b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(3) + SHA1_CONST(0); /* 3 */
 702         d = ROTATE_LEFT(d, 30);
 703 
 704         a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(4) + SHA1_CONST(0); /* 4 */
 705         c = ROTATE_LEFT(c, 30);
 706

 988         c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(13) + SHA1_CONST(3);
 989         e = ROTATE_LEFT(e, 30);
 990 
 991         W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1);  /* 78 */
 992         b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(14) + SHA1_CONST(3);
 993         d = ROTATE_LEFT(d, 30);
 994 
 995         W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1);  /* 79 */
 996 
 997         ctx->state[0] += ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(15) +
 998             SHA1_CONST(3);
 999         ctx->state[1] += b;
1000         ctx->state[2] += ROTATE_LEFT(c, 30);
1001         ctx->state[3] += d;
1002         ctx->state[4] += e;
1003 
1004         /* zeroize sensitive information */
1005         W(0) = W(1) = W(2) = W(3) = W(4) = W(5) = W(6) = W(7) = W(8) = 0;
1006         W(9) = W(10) = W(11) = W(12) = W(13) = W(14) = W(15) = 0;
1007 }
1008 #endif  /* !__amd64 */
1009 
1010 
1011 /*
1012  * Encode()
1013  *
1014  * purpose: to convert a list of numbers from little endian to big endian
1015  *   input: uint8_t *   : place to store the converted big endian numbers
1016  *          uint32_t *  : place to get numbers to convert from
1017  *          size_t      : the length of the input in bytes
1018  *  output: void
1019  */
1020 
1021 static void
1022 Encode(uint8_t *_RESTRICT_KYWD output, const uint32_t *_RESTRICT_KYWD input,
1023     size_t len)
1024 {
1025         size_t          i, j;
1026 
1027 #if     defined(__sparc)
1028         if (IS_P2ALIGNED(output, sizeof (uint32_t))) {
1029                 for (i = 0, j = 0; j < len; i++, j += 4) {
1030                         /* LINTED: pointer alignment */