Print this page
6662791 Need a SHA1 implementation optimized for 64-bit x86

*** 1,11 **** /* ! * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ ! #pragma ident "@(#)sha1.c 1.26 07/04/10 SMI" /* * The basic framework for this code came from the reference * implementation for MD5. That implementation is Copyright (C) * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved. --- 1,11 ---- /* ! * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ ! #pragma ident "@(#)sha1.c 1.27 08/03/02 SMI" /* * The basic framework for this code came from the reference * implementation for MD5. That implementation is Copyright (C) * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
*** 57,66 **** --- 57,74 ---- (ctx)->state[3], (ctx)->state[4], (ctx), (in)) static void SHA1Transform(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t, SHA1_CTX *, const uint8_t *); + #elif defined(__amd64) + + #define SHA1_TRANSFORM(ctx, in) sha1_block_data_order((ctx), (in), 1) + #define SHA1_TRANSFORM_BLOCKS(ctx, in, num) sha1_block_data_order((ctx), \ + (in), (num)) + + void sha1_block_data_order(SHA1_CTX *ctx, const void *inpp, size_t num_blocks); + #else #define SHA1_TRANSFORM(ctx, in) SHA1Transform((ctx), (in)) static void SHA1Transform(SHA1_CTX *, const uint8_t *);
*** 276,286 **** /* * Main processing loop - input misaligned */ for (; i + 63 < input_len; i += 64) { bcopy(&input[i], input64, 64); ! SHA1TransformVIS(X0, (uint32_t *)input64, &ctx->state[0], VIS); } } else { /* * Main processing loop - input 8-byte aligned --- 284,295 ---- /* * Main processing loop - input misaligned */ for (; i + 63 < input_len; i += 64) { bcopy(&input[i], input64, 64); ! SHA1TransformVIS(X0, ! (uint32_t *)input64, &ctx->state[0], VIS); } } else { /* * Main processing loop - input 8-byte aligned
*** 325,334 **** --- 334,346 ---- void SHA1Update(SHA1_CTX *ctx, const void *inptr, size_t input_len) { uint32_t i, buf_index, buf_len; const uint8_t *input = inptr; + #if defined(__amd64) + uint32_t block_count; + #endif /* __amd64 */ /* check for noop */ if (input_len == 0) return;
*** 361,372 **** --- 373,392 ---- bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len); SHA1_TRANSFORM(ctx, ctx->buf_un.buf8); i = buf_len; } + #if !defined(__amd64) for (; i + 63 < input_len; i += 64) SHA1_TRANSFORM(ctx, &input[i]); + #else + block_count = (input_len - i) >> 6; + if (block_count > 0) { + SHA1_TRANSFORM_BLOCKS(ctx, &input[i], block_count); + i += block_count << 6; + } + #endif /* !__amd64 */ /* * general optimization: * * if i and input_len are the same, return now instead
*** 389,399 **** /* * SHA1Final() * * purpose: ends an sha1 digest operation, finalizing the message digest and * zeroing the context. ! * input: uchar_t * : a buffer to store the digest in * : The function actually uses void* because many * : callers pass things other than uchar_t here. * SHA1_CTX * : the context to finalize, save, and zero * output: void */ --- 409,419 ---- /* * SHA1Final() * * purpose: ends an sha1 digest operation, finalizing the message digest and * zeroing the context. ! * input: uchar_t * : A buffer to store the digest. * : The function actually uses void* because many * : callers pass things other than uchar_t here. * SHA1_CTX * : the context to finalize, save, and zero * output: void */
*** 418,427 **** --- 438,450 ---- /* zeroize sensitive information */ bzero(ctx, sizeof (*ctx)); } + + #if !defined(__amd64) + typedef uint32_t sha1word; /* * sparc optimization: *
*** 658,668 **** * e = d; d = c; c = rotate_left(b, 30); b = a; a = tmp; * * we can make the algorithm go faster by not doing this work, * but just pretending that `d' is now `e', etc. this works * really well and obviates the need for a temporary variable. ! * however, we still explictly perform the rotate action, * since it is cheaper on SPARC to do it once than to have to * do it over and over again. */ /* round 1 */ --- 681,691 ---- * e = d; d = c; c = rotate_left(b, 30); b = a; a = tmp; * * we can make the algorithm go faster by not doing this work, * but just pretending that `d' is now `e', etc. this works * really well and obviates the need for a temporary variable. ! * however, we still explicitly perform the rotate action, * since it is cheaper on SPARC to do it once than to have to * do it over and over again. */ /* round 1 */
*** 980,990 **** --- 1003,1015 ---- /* zeroize sensitive information */ W(0) = W(1) = W(2) = W(3) = W(4) = W(5) = W(6) = W(7) = W(8) = 0; W(9) = W(10) = W(11) = W(12) = W(13) = W(14) = W(15) = 0; } + #endif /* !__amd64 */ + /* * Encode() * * purpose: to convert a list of numbers from little endian to big endian * input: uint8_t * : place to store the converted big endian numbers