Print this page
6662791 Need a SHA1 implementation optimized for 64-bit x86
*** 1,11 ****
/*
! * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
! #pragma ident "@(#)sha1.c 1.26 07/04/10 SMI"
/*
* The basic framework for this code came from the reference
* implementation for MD5. That implementation is Copyright (C)
* 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
--- 1,11 ----
/*
! * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
! #pragma ident "@(#)sha1.c 1.27 08/03/02 SMI"
/*
* The basic framework for this code came from the reference
* implementation for MD5. That implementation is Copyright (C)
* 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
*** 57,66 ****
--- 57,74 ----
(ctx)->state[3], (ctx)->state[4], (ctx), (in))
static void SHA1Transform(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t,
SHA1_CTX *, const uint8_t *);
+ #elif defined(__amd64)
+
+ #define SHA1_TRANSFORM(ctx, in) sha1_block_data_order((ctx), (in), 1)
+ #define SHA1_TRANSFORM_BLOCKS(ctx, in, num) sha1_block_data_order((ctx), \
+ (in), (num))
+
+ void sha1_block_data_order(SHA1_CTX *ctx, const void *inpp, size_t num_blocks);
+
#else
#define SHA1_TRANSFORM(ctx, in) SHA1Transform((ctx), (in))
static void SHA1Transform(SHA1_CTX *, const uint8_t *);
*** 276,286 ****
/*
* Main processing loop - input misaligned
*/
for (; i + 63 < input_len; i += 64) {
bcopy(&input[i], input64, 64);
! SHA1TransformVIS(X0, (uint32_t *)input64,
&ctx->state[0], VIS);
}
} else {
/*
* Main processing loop - input 8-byte aligned
--- 284,295 ----
/*
* Main processing loop - input misaligned
*/
for (; i + 63 < input_len; i += 64) {
bcopy(&input[i], input64, 64);
! SHA1TransformVIS(X0,
! (uint32_t *)input64,
&ctx->state[0], VIS);
}
} else {
/*
* Main processing loop - input 8-byte aligned
*** 325,334 ****
--- 334,346 ----
void
SHA1Update(SHA1_CTX *ctx, const void *inptr, size_t input_len)
{
uint32_t i, buf_index, buf_len;
const uint8_t *input = inptr;
+ #if defined(__amd64)
+ uint32_t block_count;
+ #endif /* __amd64 */
/* check for noop */
if (input_len == 0)
return;
*** 361,372 ****
--- 373,392 ----
bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
SHA1_TRANSFORM(ctx, ctx->buf_un.buf8);
i = buf_len;
}
+ #if !defined(__amd64)
for (; i + 63 < input_len; i += 64)
SHA1_TRANSFORM(ctx, &input[i]);
+ #else
+ block_count = (input_len - i) >> 6;
+ if (block_count > 0) {
+ SHA1_TRANSFORM_BLOCKS(ctx, &input[i], block_count);
+ i += block_count << 6;
+ }
+ #endif /* !__amd64 */
/*
* general optimization:
*
* if i and input_len are the same, return now instead
*** 389,399 ****
/*
* SHA1Final()
*
* purpose: ends an sha1 digest operation, finalizing the message digest and
* zeroing the context.
! * input: uchar_t * : a buffer to store the digest in
* : The function actually uses void* because many
* : callers pass things other than uchar_t here.
* SHA1_CTX * : the context to finalize, save, and zero
* output: void
*/
--- 409,419 ----
/*
* SHA1Final()
*
* purpose: ends an sha1 digest operation, finalizing the message digest and
* zeroing the context.
! * input: uchar_t * : A buffer to store the digest.
* : The function actually uses void* because many
* : callers pass things other than uchar_t here.
* SHA1_CTX * : the context to finalize, save, and zero
* output: void
*/
*** 418,427 ****
--- 438,450 ----
/* zeroize sensitive information */
bzero(ctx, sizeof (*ctx));
}
+
+ #if !defined(__amd64)
+
typedef uint32_t sha1word;
/*
* sparc optimization:
*
*** 658,668 ****
* e = d; d = c; c = rotate_left(b, 30); b = a; a = tmp;
*
* we can make the algorithm go faster by not doing this work,
* but just pretending that `d' is now `e', etc. this works
* really well and obviates the need for a temporary variable.
! * however, we still explictly perform the rotate action,
* since it is cheaper on SPARC to do it once than to have to
* do it over and over again.
*/
/* round 1 */
--- 681,691 ----
* e = d; d = c; c = rotate_left(b, 30); b = a; a = tmp;
*
* we can make the algorithm go faster by not doing this work,
* but just pretending that `d' is now `e', etc. this works
* really well and obviates the need for a temporary variable.
! * however, we still explicitly perform the rotate action,
* since it is cheaper on SPARC to do it once than to have to
* do it over and over again.
*/
/* round 1 */
*** 980,990 ****
--- 1003,1015 ----
/* zeroize sensitive information */
W(0) = W(1) = W(2) = W(3) = W(4) = W(5) = W(6) = W(7) = W(8) = 0;
W(9) = W(10) = W(11) = W(12) = W(13) = W(14) = W(15) = 0;
}
+ #endif /* !__amd64 */
+
/*
* Encode()
*
* purpose: to convert a list of numbers from little endian to big endian
* input: uint8_t * : place to store the converted big endian numbers