Print this page
6662791 Need a SHA1 implementation optimized for 64-bit x86

@@ -1,11 +1,11 @@
 /*
- * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
-#pragma ident   "@(#)sha1.c     1.26    07/04/10 SMI"
+#pragma ident   "@(#)sha1.c     1.27    08/03/02 SMI"
 
 /*
  * The basic framework for this code came from the reference
  * implementation for MD5.  That implementation is Copyright (C)
  * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.

@@ -57,10 +57,18 @@
                 (ctx)->state[3], (ctx)->state[4], (ctx), (in))
 
 static void SHA1Transform(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t,
     SHA1_CTX *, const uint8_t *);
 
+#elif   defined(__amd64)
+
+#define SHA1_TRANSFORM(ctx, in) sha1_block_data_order((ctx), (in), 1)
+#define SHA1_TRANSFORM_BLOCKS(ctx, in, num) sha1_block_data_order((ctx), \
+                (in), (num))
+
+void sha1_block_data_order(SHA1_CTX *ctx, const void *inpp, size_t num_blocks);
+
 #else
 
 #define SHA1_TRANSFORM(ctx, in) SHA1Transform((ctx), (in))
 
 static void SHA1Transform(SHA1_CTX *, const uint8_t *);

@@ -276,11 +284,12 @@
                                 /*
                                  * Main processing loop - input misaligned
                                  */
                                 for (; i + 63 < input_len; i += 64) {
                                     bcopy(&input[i], input64, 64);
-                                    SHA1TransformVIS(X0, (uint32_t *)input64,
+                                        SHA1TransformVIS(X0,
+                                            (uint32_t *)input64,
                                         &ctx->state[0], VIS);
                                 }
                         } else {
                                 /*
                                  * Main processing loop - input 8-byte aligned

@@ -325,10 +334,13 @@
 void
 SHA1Update(SHA1_CTX *ctx, const void *inptr, size_t input_len)
 {
         uint32_t i, buf_index, buf_len;
         const uint8_t *input = inptr;
+#if defined(__amd64)
+        uint32_t        block_count;
+#endif  /* __amd64 */
 
         /* check for noop */
         if (input_len == 0)
                 return;
 

@@ -361,12 +373,20 @@
                         bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
                         SHA1_TRANSFORM(ctx, ctx->buf_un.buf8);
                         i = buf_len;
                 }
 
+#if !defined(__amd64)
                 for (; i + 63 < input_len; i += 64)
                         SHA1_TRANSFORM(ctx, &input[i]);
+#else
+                block_count = (input_len - i) >> 6;
+                if (block_count > 0) {
+                        SHA1_TRANSFORM_BLOCKS(ctx, &input[i], block_count);
+                        i += block_count << 6;
+                }
+#endif  /* !__amd64 */
 
                 /*
                  * general optimization:
                  *
                  * if i and input_len are the same, return now instead

@@ -389,11 +409,11 @@
 /*
  * SHA1Final()
  *
  * purpose: ends an sha1 digest operation, finalizing the message digest and
  *          zeroing the context.
- *   input: uchar_t *   : a buffer to store the digest in
+ *   input: uchar_t *   : A buffer to store the digest.
  *                      : The function actually uses void* because many
  *                      : callers pass things other than uchar_t here.
  *          SHA1_CTX *  : the context to finalize, save, and zero
  *  output: void
  */

@@ -418,10 +438,13 @@
 
         /* zeroize sensitive information */
         bzero(ctx, sizeof (*ctx));
 }
 
+
+#if !defined(__amd64)
+
 typedef uint32_t sha1word;
 
 /*
  * sparc optimization:
  *

@@ -658,11 +681,11 @@
          *      e = d; d = c; c = rotate_left(b, 30); b = a; a = tmp;
          *
          * we can make the algorithm go faster by not doing this work,
          * but just pretending that `d' is now `e', etc. this works
          * really well and obviates the need for a temporary variable.
-         * however, we still explictly perform the rotate action,
+         * however, we still explicitly perform the rotate action,
          * since it is cheaper on SPARC to do it once than to have to
          * do it over and over again.
          */
 
         /* round 1 */

@@ -980,11 +1003,13 @@
 
         /* zeroize sensitive information */
         W(0) = W(1) = W(2) = W(3) = W(4) = W(5) = W(6) = W(7) = W(8) = 0;
         W(9) = W(10) = W(11) = W(12) = W(13) = W(14) = W(15) = 0;
 }
+#endif  /* !__amd64 */
 
+
 /*
  * Encode()
  *
  * purpose: to convert a list of numbers from little endian to big endian
  *   input: uint8_t *   : place to store the converted big endian numbers