Print this page
5007142 Add ntohll and htonll to sys/byteorder.h
6717509 Need to use bswap/bswapq for byte swap of 64-bit integer on x32/x64
PSARC 2008/474
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/common/crypto/sha1/sha1.c
+++ new/usr/src/common/crypto/sha1/sha1.c
1 1 /*
2 2 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
3 3 * Use is subject to license terms.
4 4 */
5 5
6 -#pragma ident "%Z%%M% %I% %E% SMI"
7 -
8 6 /*
9 7 * The basic framework for this code came from the reference
10 8 * implementation for MD5. That implementation is Copyright (C)
11 9 * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
12 10 *
13 11 * License to copy and use this software is granted provided that it
14 12 * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
15 13 * Algorithm" in all material mentioning or referencing this software
16 14 * or this function.
17 15 *
18 16 * License is also granted to make and use derivative works provided
19 17 * that such works are identified as "derived from the RSA Data
20 18 * Security, Inc. MD5 Message-Digest Algorithm" in all material
21 19 * mentioning or referencing the derived work.
↓ open down ↓ |
4 lines elided |
↑ open up ↑ |
22 20 *
23 21 * RSA Data Security, Inc. makes no representations concerning either
24 22 * the merchantability of this software or the suitability of this
25 23 * software for any particular purpose. It is provided "as is"
26 24 * without express or implied warranty of any kind.
27 25 *
28 26 * These notices must be retained in any copies of any part of this
29 27 * documentation and/or software.
30 28 *
31 29 * NOTE: Cleaned-up and optimized, version of SHA1, based on the FIPS 180-1
32 - * standard, available at http://www.itl.nist.gov/div897/pubs/fip180-1.htm
30 + * standard, available at http://www.itl.nist.gov/fipspubs/fip180-1.htm
33 31 * Not as fast as one would like -- further optimizations are encouraged
34 32 * and appreciated.
35 33 */
36 34
37 35 #include <sys/types.h>
38 36 #include <sys/param.h>
39 37 #include <sys/systm.h>
40 38 #include <sys/sysmacros.h>
41 39 #include <sys/sha1.h>
42 40 #include <sys/sha1_consts.h>
43 41
44 42 #ifndef _KERNEL
45 43 #include <strings.h>
46 44 #include <stdlib.h>
47 45 #include <errno.h>
48 46 #include <sys/systeminfo.h>
49 47 #endif /* !_KERNEL */
50 48
49 +#ifdef _LITTLE_ENDIAN
50 +#include <sys/byteorder.h>
51 +#define HAVE_HTONL
52 +#endif
53 +
51 54 static void Encode(uint8_t *, const uint32_t *, size_t);
52 55
53 56 #if defined(__sparc)
54 57
55 58 #define SHA1_TRANSFORM(ctx, in) \
56 59 SHA1Transform((ctx)->state[0], (ctx)->state[1], (ctx)->state[2], \
57 60 (ctx)->state[3], (ctx)->state[4], (ctx), (in))
58 61
59 62 static void SHA1Transform(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t,
60 63 SHA1_CTX *, const uint8_t *);
61 64
62 65 #elif defined(__amd64)
63 66
64 67 #define SHA1_TRANSFORM(ctx, in) sha1_block_data_order((ctx), (in), 1)
65 68 #define SHA1_TRANSFORM_BLOCKS(ctx, in, num) sha1_block_data_order((ctx), \
66 69 (in), (num))
67 70
68 71 void sha1_block_data_order(SHA1_CTX *ctx, const void *inpp, size_t num_blocks);
69 72
70 73 #else
71 74
72 75 #define SHA1_TRANSFORM(ctx, in) SHA1Transform((ctx), (in))
73 76
74 77 static void SHA1Transform(SHA1_CTX *, const uint8_t *);
75 78
76 79 #endif
77 80
78 81
79 82 static uint8_t PADDING[64] = { 0x80, /* all zeros */ };
80 83
81 84 /*
82 85 * F, G, and H are the basic SHA1 functions.
83 86 */
84 87 #define F(b, c, d) (((b) & (c)) | ((~b) & (d)))
85 88 #define G(b, c, d) ((b) ^ (c) ^ (d))
86 89 #define H(b, c, d) (((b) & (c)) | (((b)|(c)) & (d)))
87 90
88 91 /*
89 92 * ROTATE_LEFT rotates x left n bits.
90 93 */
91 94
92 95 #if defined(__GNUC__) && defined(_LP64)
93 96 static __inline__ uint64_t
94 97 ROTATE_LEFT(uint64_t value, uint32_t n)
95 98 {
96 99 uint32_t t32;
97 100
98 101 t32 = (uint32_t)value;
↓ open down ↓ |
38 lines elided |
↑ open up ↑ |
99 102 return ((t32 << n) | (t32 >> (32 - n)));
100 103 }
101 104
102 105 #else
103 106
104 107 #define ROTATE_LEFT(x, n) \
105 108 (((x) << (n)) | ((x) >> ((sizeof (x) * NBBY)-(n))))
106 109
107 110 #endif
108 111
109 -#if defined(__GNUC__) && (defined(__i386) || defined(__amd64))
110 112
111 -#define HAVE_BSWAP
112 -
113 -extern __inline__ uint32_t bswap(uint32_t value)
114 -{
115 - __asm__("bswap %0" : "+r" (value));
116 - return (value);
117 -}
118 -
119 -#endif
120 -
121 113 /*
122 114 * SHA1Init()
123 115 *
124 116 * purpose: initializes the sha1 context and begins and sha1 digest operation
125 117 * input: SHA1_CTX * : the context to initializes.
126 118 * output: void
127 119 */
128 120
129 121 void
130 122 SHA1Init(SHA1_CTX *ctx)
131 123 {
132 124 ctx->count[0] = ctx->count[1] = 0;
133 125
134 126 /*
135 127 * load magic initialization constants. Tell lint
136 128 * that these constants are unsigned by using U.
137 129 */
138 130
139 131 ctx->state[0] = 0x67452301U;
140 132 ctx->state[1] = 0xefcdab89U;
141 133 ctx->state[2] = 0x98badcfeU;
142 134 ctx->state[3] = 0x10325476U;
143 135 ctx->state[4] = 0xc3d2e1f0U;
144 136 }
145 137
146 138 #ifdef VIS_SHA1
147 139 #ifdef _KERNEL
148 140
149 141 #include <sys/regset.h>
150 142 #include <sys/vis.h>
151 143 #include <sys/fpu/fpusystm.h>
152 144
153 145 /* the alignment for block stores to save fp registers */
154 146 #define VIS_ALIGN (64)
155 147
156 148 extern int sha1_savefp(kfpu_t *, int);
157 149 extern void sha1_restorefp(kfpu_t *);
158 150
159 151 uint32_t vis_sha1_svfp_threshold = 128;
160 152
161 153 #endif /* _KERNEL */
162 154
163 155 /*
164 156 * VIS SHA-1 consts.
165 157 */
166 158 static uint64_t VIS[] = {
167 159 0x8000000080000000ULL,
168 160 0x0002000200020002ULL,
169 161 0x5a8279996ed9eba1ULL,
170 162 0x8f1bbcdcca62c1d6ULL,
171 163 0x012389ab456789abULL};
172 164
173 165 extern void SHA1TransformVIS(uint64_t *, uint32_t *, uint32_t *, uint64_t *);
174 166
175 167
176 168 /*
177 169 * SHA1Update()
178 170 *
179 171 * purpose: continues an sha1 digest operation, using the message block
180 172 * to update the context.
181 173 * input: SHA1_CTX * : the context to update
182 174 * void * : the message block
183 175 * size_t : the length of the message block in bytes
184 176 * output: void
185 177 */
186 178
187 179 void
188 180 SHA1Update(SHA1_CTX *ctx, const void *inptr, size_t input_len)
189 181 {
190 182 uint32_t i, buf_index, buf_len;
191 183 uint64_t X0[40], input64[8];
192 184 const uint8_t *input = inptr;
193 185 #ifdef _KERNEL
194 186 int usevis = 0;
195 187 #else
196 188 int usevis = 1;
197 189 #endif /* _KERNEL */
198 190
199 191 /* check for noop */
200 192 if (input_len == 0)
201 193 return;
202 194
203 195 /* compute number of bytes mod 64 */
204 196 buf_index = (ctx->count[1] >> 3) & 0x3F;
205 197
206 198 /* update number of bits */
207 199 if ((ctx->count[1] += (input_len << 3)) < (input_len << 3))
208 200 ctx->count[0]++;
209 201
210 202 ctx->count[0] += (input_len >> 29);
211 203
212 204 buf_len = 64 - buf_index;
213 205
214 206 /* transform as many times as possible */
215 207 i = 0;
216 208 if (input_len >= buf_len) {
217 209 #ifdef _KERNEL
218 210 kfpu_t *fpu;
219 211 if (fpu_exists) {
220 212 uint8_t fpua[sizeof (kfpu_t) + GSR_SIZE + VIS_ALIGN];
221 213 uint32_t len = (input_len + buf_index) & ~0x3f;
222 214 int svfp_ok;
223 215
224 216 fpu = (kfpu_t *)P2ROUNDUP((uintptr_t)fpua, 64);
225 217 svfp_ok = ((len >= vis_sha1_svfp_threshold) ? 1 : 0);
226 218 usevis = fpu_exists && sha1_savefp(fpu, svfp_ok);
227 219 } else {
228 220 usevis = 0;
229 221 }
230 222 #endif /* _KERNEL */
231 223
232 224 /*
233 225 * general optimization:
234 226 *
235 227 * only do initial bcopy() and SHA1Transform() if
236 228 * buf_index != 0. if buf_index == 0, we're just
237 229 * wasting our time doing the bcopy() since there
238 230 * wasn't any data left over from a previous call to
239 231 * SHA1Update().
240 232 */
241 233
242 234 if (buf_index) {
243 235 bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
244 236 if (usevis) {
245 237 SHA1TransformVIS(X0,
246 238 ctx->buf_un.buf32,
247 239 &ctx->state[0], VIS);
248 240 } else {
249 241 SHA1_TRANSFORM(ctx, ctx->buf_un.buf8);
250 242 }
251 243 i = buf_len;
252 244 }
253 245
254 246 /*
255 247 * VIS SHA-1: uses the VIS 1.0 instructions to accelerate
256 248 * SHA-1 processing. This is achieved by "offloading" the
257 249 * computation of the message schedule (MS) to the VIS units.
258 250 * This allows the VIS computation of the message schedule
259 251 * to be performed in parallel with the standard integer
260 252 * processing of the remainder of the SHA-1 computation.
261 253 * performance by up to around 1.37X, compared to an optimized
262 254 * integer-only implementation.
263 255 *
264 256 * The VIS implementation of SHA1Transform has a different API
265 257 * to the standard integer version:
266 258 *
267 259 * void SHA1TransformVIS(
268 260 * uint64_t *, // Pointer to MS for ith block
269 261 * uint32_t *, // Pointer to ith block of message data
270 262 * uint32_t *, // Pointer to SHA state i.e ctx->state
271 263 * uint64_t *, // Pointer to various VIS constants
272 264 * )
273 265 *
274 266 * Note: the message data must by 4-byte aligned.
275 267 *
276 268 * Function requires VIS 1.0 support.
277 269 *
278 270 * Handling is provided to deal with arbitrary byte alingment
279 271 * of the input data but the performance gains are reduced
280 272 * for alignments other than 4-bytes.
281 273 */
282 274 if (usevis) {
283 275 if (!IS_P2ALIGNED(&input[i], sizeof (uint32_t))) {
284 276 /*
285 277 * Main processing loop - input misaligned
286 278 */
287 279 for (; i + 63 < input_len; i += 64) {
288 280 bcopy(&input[i], input64, 64);
↓ open down ↓ |
158 lines elided |
↑ open up ↑ |
289 281 SHA1TransformVIS(X0,
290 282 (uint32_t *)input64,
291 283 &ctx->state[0], VIS);
292 284 }
293 285 } else {
294 286 /*
295 287 * Main processing loop - input 8-byte aligned
296 288 */
297 289 for (; i + 63 < input_len; i += 64) {
298 290 SHA1TransformVIS(X0,
299 - /* LINTED E_BAD_PTR_CAST_ALIGN */
300 - (uint32_t *)&input[i],
291 + /* LINTED E_BAD_PTR_CAST_ALIGN */
292 + (uint32_t *)&input[i], /* CSTYLED */
301 293 &ctx->state[0], VIS);
302 294 }
303 295
304 296 }
305 297 #ifdef _KERNEL
306 298 sha1_restorefp(fpu);
307 299 #endif /* _KERNEL */
308 300 } else {
309 301 for (; i + 63 < input_len; i += 64) {
310 302 SHA1_TRANSFORM(ctx, &input[i]);
311 303 }
312 304 }
313 305
314 306 /*
315 307 * general optimization:
316 308 *
317 309 * if i and input_len are the same, return now instead
318 310 * of calling bcopy(), since the bcopy() in this case
319 311 * will be an expensive nop.
320 312 */
321 313
322 314 if (input_len == i)
323 315 return;
324 316
325 317 buf_index = 0;
326 318 }
327 319
328 320 /* buffer remaining input */
329 321 bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
330 322 }
331 323
332 324 #else /* VIS_SHA1 */
333 325
334 326 void
335 327 SHA1Update(SHA1_CTX *ctx, const void *inptr, size_t input_len)
336 328 {
337 329 uint32_t i, buf_index, buf_len;
338 330 const uint8_t *input = inptr;
339 331 #if defined(__amd64)
340 332 uint32_t block_count;
341 333 #endif /* __amd64 */
342 334
343 335 /* check for noop */
344 336 if (input_len == 0)
345 337 return;
346 338
347 339 /* compute number of bytes mod 64 */
348 340 buf_index = (ctx->count[1] >> 3) & 0x3F;
349 341
350 342 /* update number of bits */
351 343 if ((ctx->count[1] += (input_len << 3)) < (input_len << 3))
352 344 ctx->count[0]++;
353 345
354 346 ctx->count[0] += (input_len >> 29);
355 347
356 348 buf_len = 64 - buf_index;
357 349
358 350 /* transform as many times as possible */
359 351 i = 0;
360 352 if (input_len >= buf_len) {
361 353
362 354 /*
363 355 * general optimization:
364 356 *
365 357 * only do initial bcopy() and SHA1Transform() if
366 358 * buf_index != 0. if buf_index == 0, we're just
367 359 * wasting our time doing the bcopy() since there
368 360 * wasn't any data left over from a previous call to
369 361 * SHA1Update().
370 362 */
371 363
372 364 if (buf_index) {
373 365 bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
374 366 SHA1_TRANSFORM(ctx, ctx->buf_un.buf8);
375 367 i = buf_len;
376 368 }
377 369
378 370 #if !defined(__amd64)
379 371 for (; i + 63 < input_len; i += 64)
380 372 SHA1_TRANSFORM(ctx, &input[i]);
381 373 #else
382 374 block_count = (input_len - i) >> 6;
383 375 if (block_count > 0) {
384 376 SHA1_TRANSFORM_BLOCKS(ctx, &input[i], block_count);
385 377 i += block_count << 6;
386 378 }
387 379 #endif /* !__amd64 */
388 380
389 381 /*
390 382 * general optimization:
391 383 *
392 384 * if i and input_len are the same, return now instead
393 385 * of calling bcopy(), since the bcopy() in this case
394 386 * will be an expensive nop.
395 387 */
396 388
397 389 if (input_len == i)
398 390 return;
399 391
400 392 buf_index = 0;
401 393 }
402 394
403 395 /* buffer remaining input */
404 396 bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
405 397 }
406 398
407 399 #endif /* VIS_SHA1 */
408 400
409 401 /*
410 402 * SHA1Final()
411 403 *
412 404 * purpose: ends an sha1 digest operation, finalizing the message digest and
413 405 * zeroing the context.
414 406 * input: uchar_t * : A buffer to store the digest.
415 407 * : The function actually uses void* because many
416 408 * : callers pass things other than uchar_t here.
417 409 * SHA1_CTX * : the context to finalize, save, and zero
418 410 * output: void
419 411 */
420 412
421 413 void
422 414 SHA1Final(void *digest, SHA1_CTX *ctx)
423 415 {
424 416 uint8_t bitcount_be[sizeof (ctx->count)];
425 417 uint32_t index = (ctx->count[1] >> 3) & 0x3f;
426 418
427 419 /* store bit count, big endian */
428 420 Encode(bitcount_be, ctx->count, sizeof (bitcount_be));
429 421
430 422 /* pad out to 56 mod 64 */
431 423 SHA1Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index);
432 424
433 425 /* append length (before padding) */
434 426 SHA1Update(ctx, bitcount_be, sizeof (bitcount_be));
435 427
436 428 /* store state in digest */
437 429 Encode(digest, ctx->state, sizeof (ctx->state));
438 430
439 431 /* zeroize sensitive information */
440 432 bzero(ctx, sizeof (*ctx));
441 433 }
442 434
443 435
444 436 #if !defined(__amd64)
445 437
446 438 typedef uint32_t sha1word;
447 439
↓ open down ↓ |
137 lines elided |
↑ open up ↑ |
448 440 /*
449 441 * sparc optimization:
450 442 *
451 443 * on the sparc, we can load big endian 32-bit data easily. note that
452 444 * special care must be taken to ensure the address is 32-bit aligned.
453 445 * in the interest of speed, we don't check to make sure, since
454 446 * careful programming can guarantee this for us.
455 447 */
456 448
457 449 #if defined(_BIG_ENDIAN)
458 -
459 450 #define LOAD_BIG_32(addr) (*(uint32_t *)(addr))
460 451
461 -#else /* !defined(_BIG_ENDIAN) */
452 +#elif defined(HAVE_HTONL)
453 +#define LOAD_BIG_32(addr) htonl(*((uint32_t *)(addr)))
462 454
463 -#if defined(HAVE_BSWAP)
464 -
465 -#define LOAD_BIG_32(addr) bswap(*((uint32_t *)(addr)))
466 -
467 -#else /* !defined(HAVE_BSWAP) */
468 -
455 +#else
469 456 /* little endian -- will work on big endian, but slowly */
470 457 #define LOAD_BIG_32(addr) \
471 458 (((addr)[0] << 24) | ((addr)[1] << 16) | ((addr)[2] << 8) | (addr)[3])
459 +#endif /* _BIG_ENDIAN */
472 460
473 -#endif /* !defined(HAVE_BSWAP) */
474 -
475 -#endif /* !defined(_BIG_ENDIAN) */
476 -
477 461 /*
478 462 * SHA1Transform()
479 463 */
480 464 #if defined(W_ARRAY)
481 465 #define W(n) w[n]
482 466 #else /* !defined(W_ARRAY) */
483 467 #define W(n) w_ ## n
484 468 #endif /* !defined(W_ARRAY) */
485 469
486 470
487 471 #if defined(__sparc)
488 472
489 473 /*
490 474 * sparc register window optimization:
491 475 *
492 476 * `a', `b', `c', `d', and `e' are passed into SHA1Transform
493 477 * explicitly since it increases the number of registers available to
494 478 * the compiler. under this scheme, these variables can be held in
495 479 * %i0 - %i4, which leaves more local and out registers available.
496 480 *
497 481 * purpose: sha1 transformation -- updates the digest based on `block'
498 482 * input: uint32_t : bytes 1 - 4 of the digest
499 483 * uint32_t : bytes 5 - 8 of the digest
500 484 * uint32_t : bytes 9 - 12 of the digest
501 485 * uint32_t : bytes 12 - 16 of the digest
502 486 * uint32_t : bytes 16 - 20 of the digest
503 487 * SHA1_CTX * : the context to update
504 488 * uint8_t [64]: the block to use to update the digest
505 489 * output: void
506 490 */
507 491
508 492 void
509 493 SHA1Transform(uint32_t a, uint32_t b, uint32_t c, uint32_t d, uint32_t e,
510 494 SHA1_CTX *ctx, const uint8_t blk[64])
511 495 {
512 496 /*
513 497 * sparc optimization:
514 498 *
515 499 * while it is somewhat counter-intuitive, on sparc, it is
516 500 * more efficient to place all the constants used in this
517 501 * function in an array and load the values out of the array
518 502 * than to manually load the constants. this is because
519 503 * setting a register to a 32-bit value takes two ops in most
520 504 * cases: a `sethi' and an `or', but loading a 32-bit value
521 505 * from memory only takes one `ld' (or `lduw' on v9). while
522 506 * this increases memory usage, the compiler can find enough
523 507 * other things to do while waiting to keep the pipeline does
524 508 * not stall. additionally, it is likely that many of these
525 509 * constants are cached so that later accesses do not even go
526 510 * out to the bus.
527 511 *
528 512 * this array is declared `static' to keep the compiler from
529 513 * having to bcopy() this array onto the stack frame of
↓ open down ↓ |
43 lines elided |
↑ open up ↑ |
530 514 * SHA1Transform() each time it is called -- which is
531 515 * unacceptably expensive.
532 516 *
533 517 * the `const' is to ensure that callers are good citizens and
534 518 * do not try to munge the array. since these routines are
535 519 * going to be called from inside multithreaded kernelland,
536 520 * this is a good safety check. -- `sha1_consts' will end up in
537 521 * .rodata.
538 522 *
539 523 * unfortunately, loading from an array in this manner hurts
540 - * performance under intel. so, there is a macro,
524 + * performance under Intel. So, there is a macro,
541 525 * SHA1_CONST(), used in SHA1Transform(), that either expands to
542 526 * a reference to this array, or to the actual constant,
543 527 * depending on what platform this code is compiled for.
544 528 */
545 529
546 530 static const uint32_t sha1_consts[] = {
547 - SHA1_CONST_0, SHA1_CONST_1, SHA1_CONST_2, SHA1_CONST_3,
531 + SHA1_CONST_0, SHA1_CONST_1, SHA1_CONST_2, SHA1_CONST_3
548 532 };
549 533
550 534 /*
551 535 * general optimization:
552 536 *
553 537 * use individual integers instead of using an array. this is a
554 538 * win, although the amount it wins by seems to vary quite a bit.
555 539 */
556 540
557 541 uint32_t w_0, w_1, w_2, w_3, w_4, w_5, w_6, w_7;
558 542 uint32_t w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
559 543
560 544 /*
561 545 * sparc optimization:
562 546 *
563 547 * if `block' is already aligned on a 4-byte boundary, use
564 548 * LOAD_BIG_32() directly. otherwise, bcopy() into a
565 549 * buffer that *is* aligned on a 4-byte boundary and then do
566 550 * the LOAD_BIG_32() on that buffer. benchmarks have shown
567 551 * that using the bcopy() is better than loading the bytes
568 552 * individually and doing the endian-swap by hand.
569 553 *
570 554 * even though it's quite tempting to assign to do:
571 555 *
572 556 * blk = bcopy(ctx->buf_un.buf32, blk, sizeof (ctx->buf_un.buf32));
573 557 *
574 558 * and only have one set of LOAD_BIG_32()'s, the compiler
575 559 * *does not* like that, so please resist the urge.
576 560 */
577 561
578 562 if ((uintptr_t)blk & 0x3) { /* not 4-byte aligned? */
579 563 bcopy(blk, ctx->buf_un.buf32, sizeof (ctx->buf_un.buf32));
580 564 w_15 = LOAD_BIG_32(ctx->buf_un.buf32 + 15);
581 565 w_14 = LOAD_BIG_32(ctx->buf_un.buf32 + 14);
582 566 w_13 = LOAD_BIG_32(ctx->buf_un.buf32 + 13);
583 567 w_12 = LOAD_BIG_32(ctx->buf_un.buf32 + 12);
584 568 w_11 = LOAD_BIG_32(ctx->buf_un.buf32 + 11);
585 569 w_10 = LOAD_BIG_32(ctx->buf_un.buf32 + 10);
586 570 w_9 = LOAD_BIG_32(ctx->buf_un.buf32 + 9);
587 571 w_8 = LOAD_BIG_32(ctx->buf_un.buf32 + 8);
588 572 w_7 = LOAD_BIG_32(ctx->buf_un.buf32 + 7);
589 573 w_6 = LOAD_BIG_32(ctx->buf_un.buf32 + 6);
590 574 w_5 = LOAD_BIG_32(ctx->buf_un.buf32 + 5);
591 575 w_4 = LOAD_BIG_32(ctx->buf_un.buf32 + 4);
592 576 w_3 = LOAD_BIG_32(ctx->buf_un.buf32 + 3);
593 577 w_2 = LOAD_BIG_32(ctx->buf_un.buf32 + 2);
594 578 w_1 = LOAD_BIG_32(ctx->buf_un.buf32 + 1);
595 579 w_0 = LOAD_BIG_32(ctx->buf_un.buf32 + 0);
596 580 } else {
597 581 /*LINTED*/
598 582 w_15 = LOAD_BIG_32(blk + 60);
599 583 /*LINTED*/
600 584 w_14 = LOAD_BIG_32(blk + 56);
601 585 /*LINTED*/
602 586 w_13 = LOAD_BIG_32(blk + 52);
603 587 /*LINTED*/
604 588 w_12 = LOAD_BIG_32(blk + 48);
605 589 /*LINTED*/
606 590 w_11 = LOAD_BIG_32(blk + 44);
607 591 /*LINTED*/
608 592 w_10 = LOAD_BIG_32(blk + 40);
609 593 /*LINTED*/
610 594 w_9 = LOAD_BIG_32(blk + 36);
611 595 /*LINTED*/
612 596 w_8 = LOAD_BIG_32(blk + 32);
613 597 /*LINTED*/
614 598 w_7 = LOAD_BIG_32(blk + 28);
615 599 /*LINTED*/
616 600 w_6 = LOAD_BIG_32(blk + 24);
617 601 /*LINTED*/
618 602 w_5 = LOAD_BIG_32(blk + 20);
619 603 /*LINTED*/
620 604 w_4 = LOAD_BIG_32(blk + 16);
621 605 /*LINTED*/
↓ open down ↓ |
64 lines elided |
↑ open up ↑ |
622 606 w_3 = LOAD_BIG_32(blk + 12);
623 607 /*LINTED*/
624 608 w_2 = LOAD_BIG_32(blk + 8);
625 609 /*LINTED*/
626 610 w_1 = LOAD_BIG_32(blk + 4);
627 611 /*LINTED*/
628 612 w_0 = LOAD_BIG_32(blk + 0);
629 613 }
630 614 #else /* !defined(__sparc) */
631 615
632 -void
616 +void /* CSTYLED */
633 617 SHA1Transform(SHA1_CTX *ctx, const uint8_t blk[64])
634 618 {
619 + /* CSTYLED */
635 620 sha1word a = ctx->state[0];
636 621 sha1word b = ctx->state[1];
637 622 sha1word c = ctx->state[2];
638 623 sha1word d = ctx->state[3];
639 624 sha1word e = ctx->state[4];
640 625
641 626 #if defined(W_ARRAY)
642 627 sha1word w[16];
643 628 #else /* !defined(W_ARRAY) */
644 629 sha1word w_0, w_1, w_2, w_3, w_4, w_5, w_6, w_7;
645 630 sha1word w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
646 631 #endif /* !defined(W_ARRAY) */
647 632
648 633 W(0) = LOAD_BIG_32(blk + 0);
649 634 W(1) = LOAD_BIG_32(blk + 4);
650 635 W(2) = LOAD_BIG_32(blk + 8);
651 636 W(3) = LOAD_BIG_32(blk + 12);
652 637 W(4) = LOAD_BIG_32(blk + 16);
653 638 W(5) = LOAD_BIG_32(blk + 20);
654 639 W(6) = LOAD_BIG_32(blk + 24);
655 640 W(7) = LOAD_BIG_32(blk + 28);
656 641 W(8) = LOAD_BIG_32(blk + 32);
657 642 W(9) = LOAD_BIG_32(blk + 36);
658 643 W(10) = LOAD_BIG_32(blk + 40);
659 644 W(11) = LOAD_BIG_32(blk + 44);
660 645 W(12) = LOAD_BIG_32(blk + 48);
661 646 W(13) = LOAD_BIG_32(blk + 52);
662 647 W(14) = LOAD_BIG_32(blk + 56);
663 648 W(15) = LOAD_BIG_32(blk + 60);
664 649
665 650 #endif /* !defined(__sparc) */
666 651
667 652 /*
668 653 * general optimization:
669 654 *
670 655 * even though this approach is described in the standard as
671 656 * being slower algorithmically, it is 30-40% faster than the
672 657 * "faster" version under SPARC, because this version has more
673 658 * of the constraints specified at compile-time and uses fewer
674 659 * variables (and therefore has better register utilization)
675 660 * than its "speedier" brother. (i've tried both, trust me)
676 661 *
677 662 * for either method given in the spec, there is an "assignment"
678 663 * phase where the following takes place:
679 664 *
680 665 * tmp = (main_computation);
681 666 * e = d; d = c; c = rotate_left(b, 30); b = a; a = tmp;
682 667 *
683 668 * we can make the algorithm go faster by not doing this work,
684 669 * but just pretending that `d' is now `e', etc. this works
685 670 * really well and obviates the need for a temporary variable.
686 671 * however, we still explicitly perform the rotate action,
687 672 * since it is cheaper on SPARC to do it once than to have to
688 673 * do it over and over again.
689 674 */
690 675
691 676 /* round 1 */
692 677 e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(0) + SHA1_CONST(0); /* 0 */
693 678 b = ROTATE_LEFT(b, 30);
694 679
695 680 d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(1) + SHA1_CONST(0); /* 1 */
696 681 a = ROTATE_LEFT(a, 30);
697 682
698 683 c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(2) + SHA1_CONST(0); /* 2 */
699 684 e = ROTATE_LEFT(e, 30);
700 685
701 686 b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(3) + SHA1_CONST(0); /* 3 */
702 687 d = ROTATE_LEFT(d, 30);
703 688
704 689 a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(4) + SHA1_CONST(0); /* 4 */
705 690 c = ROTATE_LEFT(c, 30);
706 691
707 692 e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(5) + SHA1_CONST(0); /* 5 */
708 693 b = ROTATE_LEFT(b, 30);
709 694
710 695 d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(6) + SHA1_CONST(0); /* 6 */
711 696 a = ROTATE_LEFT(a, 30);
712 697
713 698 c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(7) + SHA1_CONST(0); /* 7 */
714 699 e = ROTATE_LEFT(e, 30);
715 700
716 701 b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(8) + SHA1_CONST(0); /* 8 */
717 702 d = ROTATE_LEFT(d, 30);
718 703
719 704 a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(9) + SHA1_CONST(0); /* 9 */
720 705 c = ROTATE_LEFT(c, 30);
721 706
722 707 e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(10) + SHA1_CONST(0); /* 10 */
723 708 b = ROTATE_LEFT(b, 30);
724 709
725 710 d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(11) + SHA1_CONST(0); /* 11 */
726 711 a = ROTATE_LEFT(a, 30);
727 712
728 713 c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(12) + SHA1_CONST(0); /* 12 */
729 714 e = ROTATE_LEFT(e, 30);
730 715
731 716 b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(13) + SHA1_CONST(0); /* 13 */
732 717 d = ROTATE_LEFT(d, 30);
733 718
734 719 a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(14) + SHA1_CONST(0); /* 14 */
735 720 c = ROTATE_LEFT(c, 30);
736 721
737 722 e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(15) + SHA1_CONST(0); /* 15 */
738 723 b = ROTATE_LEFT(b, 30);
739 724
740 725 W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1); /* 16 */
741 726 d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(0) + SHA1_CONST(0);
742 727 a = ROTATE_LEFT(a, 30);
743 728
744 729 W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1); /* 17 */
745 730 c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(1) + SHA1_CONST(0);
746 731 e = ROTATE_LEFT(e, 30);
747 732
748 733 W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1); /* 18 */
749 734 b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(2) + SHA1_CONST(0);
750 735 d = ROTATE_LEFT(d, 30);
751 736
752 737 W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1); /* 19 */
753 738 a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(3) + SHA1_CONST(0);
754 739 c = ROTATE_LEFT(c, 30);
755 740
756 741 /* round 2 */
757 742 W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1); /* 20 */
758 743 e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(4) + SHA1_CONST(1);
759 744 b = ROTATE_LEFT(b, 30);
760 745
761 746 W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1); /* 21 */
762 747 d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(5) + SHA1_CONST(1);
763 748 a = ROTATE_LEFT(a, 30);
764 749
765 750 W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1); /* 22 */
766 751 c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(6) + SHA1_CONST(1);
767 752 e = ROTATE_LEFT(e, 30);
768 753
769 754 W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1); /* 23 */
770 755 b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(7) + SHA1_CONST(1);
771 756 d = ROTATE_LEFT(d, 30);
772 757
773 758 W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1); /* 24 */
774 759 a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(8) + SHA1_CONST(1);
775 760 c = ROTATE_LEFT(c, 30);
776 761
777 762 W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1); /* 25 */
778 763 e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(9) + SHA1_CONST(1);
779 764 b = ROTATE_LEFT(b, 30);
780 765
781 766 W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1); /* 26 */
782 767 d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(10) + SHA1_CONST(1);
783 768 a = ROTATE_LEFT(a, 30);
784 769
785 770 W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1); /* 27 */
786 771 c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(11) + SHA1_CONST(1);
787 772 e = ROTATE_LEFT(e, 30);
788 773
789 774 W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1); /* 28 */
790 775 b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(12) + SHA1_CONST(1);
791 776 d = ROTATE_LEFT(d, 30);
792 777
793 778 W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 29 */
794 779 a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(13) + SHA1_CONST(1);
795 780 c = ROTATE_LEFT(c, 30);
796 781
797 782 W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1); /* 30 */
798 783 e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(14) + SHA1_CONST(1);
799 784 b = ROTATE_LEFT(b, 30);
800 785
801 786 W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1); /* 31 */
802 787 d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(15) + SHA1_CONST(1);
803 788 a = ROTATE_LEFT(a, 30);
804 789
805 790 W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1); /* 32 */
806 791 c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(0) + SHA1_CONST(1);
807 792 e = ROTATE_LEFT(e, 30);
808 793
809 794 W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1); /* 33 */
810 795 b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(1) + SHA1_CONST(1);
811 796 d = ROTATE_LEFT(d, 30);
812 797
813 798 W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1); /* 34 */
814 799 a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(2) + SHA1_CONST(1);
815 800 c = ROTATE_LEFT(c, 30);
816 801
817 802 W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1); /* 35 */
818 803 e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(3) + SHA1_CONST(1);
819 804 b = ROTATE_LEFT(b, 30);
820 805
821 806 W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1); /* 36 */
822 807 d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(4) + SHA1_CONST(1);
823 808 a = ROTATE_LEFT(a, 30);
824 809
825 810 W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1); /* 37 */
826 811 c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(5) + SHA1_CONST(1);
827 812 e = ROTATE_LEFT(e, 30);
828 813
829 814 W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1); /* 38 */
830 815 b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(6) + SHA1_CONST(1);
831 816 d = ROTATE_LEFT(d, 30);
832 817
833 818 W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1); /* 39 */
834 819 a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(7) + SHA1_CONST(1);
835 820 c = ROTATE_LEFT(c, 30);
836 821
837 822 /* round 3 */
838 823 W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1); /* 40 */
839 824 e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(8) + SHA1_CONST(2);
840 825 b = ROTATE_LEFT(b, 30);
841 826
842 827 W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1); /* 41 */
843 828 d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(9) + SHA1_CONST(2);
844 829 a = ROTATE_LEFT(a, 30);
845 830
846 831 W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1); /* 42 */
847 832 c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(10) + SHA1_CONST(2);
848 833 e = ROTATE_LEFT(e, 30);
849 834
850 835 W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1); /* 43 */
851 836 b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(11) + SHA1_CONST(2);
852 837 d = ROTATE_LEFT(d, 30);
853 838
854 839 W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1); /* 44 */
855 840 a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(12) + SHA1_CONST(2);
856 841 c = ROTATE_LEFT(c, 30);
857 842
858 843 W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 45 */
859 844 e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(13) + SHA1_CONST(2);
860 845 b = ROTATE_LEFT(b, 30);
861 846
862 847 W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1); /* 46 */
863 848 d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(14) + SHA1_CONST(2);
864 849 a = ROTATE_LEFT(a, 30);
865 850
866 851 W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1); /* 47 */
867 852 c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(15) + SHA1_CONST(2);
868 853 e = ROTATE_LEFT(e, 30);
869 854
870 855 W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1); /* 48 */
871 856 b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(0) + SHA1_CONST(2);
872 857 d = ROTATE_LEFT(d, 30);
873 858
874 859 W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1); /* 49 */
875 860 a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(1) + SHA1_CONST(2);
876 861 c = ROTATE_LEFT(c, 30);
877 862
878 863 W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1); /* 50 */
879 864 e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(2) + SHA1_CONST(2);
880 865 b = ROTATE_LEFT(b, 30);
881 866
882 867 W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1); /* 51 */
883 868 d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(3) + SHA1_CONST(2);
884 869 a = ROTATE_LEFT(a, 30);
885 870
886 871 W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1); /* 52 */
887 872 c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(4) + SHA1_CONST(2);
888 873 e = ROTATE_LEFT(e, 30);
889 874
890 875 W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1); /* 53 */
891 876 b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(5) + SHA1_CONST(2);
892 877 d = ROTATE_LEFT(d, 30);
893 878
894 879 W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1); /* 54 */
895 880 a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(6) + SHA1_CONST(2);
896 881 c = ROTATE_LEFT(c, 30);
897 882
898 883 W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1); /* 55 */
899 884 e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(7) + SHA1_CONST(2);
900 885 b = ROTATE_LEFT(b, 30);
901 886
902 887 W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1); /* 56 */
903 888 d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(8) + SHA1_CONST(2);
904 889 a = ROTATE_LEFT(a, 30);
905 890
906 891 W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1); /* 57 */
907 892 c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(9) + SHA1_CONST(2);
908 893 e = ROTATE_LEFT(e, 30);
909 894
910 895 W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1); /* 58 */
911 896 b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(10) + SHA1_CONST(2);
912 897 d = ROTATE_LEFT(d, 30);
913 898
914 899 W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1); /* 59 */
915 900 a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(11) + SHA1_CONST(2);
916 901 c = ROTATE_LEFT(c, 30);
917 902
918 903 /* round 4 */
919 904 W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1); /* 60 */
920 905 e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(12) + SHA1_CONST(3);
921 906 b = ROTATE_LEFT(b, 30);
922 907
923 908 W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 61 */
924 909 d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(13) + SHA1_CONST(3);
925 910 a = ROTATE_LEFT(a, 30);
926 911
927 912 W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1); /* 62 */
928 913 c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(14) + SHA1_CONST(3);
929 914 e = ROTATE_LEFT(e, 30);
930 915
931 916 W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1); /* 63 */
932 917 b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(15) + SHA1_CONST(3);
933 918 d = ROTATE_LEFT(d, 30);
934 919
935 920 W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1); /* 64 */
936 921 a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(0) + SHA1_CONST(3);
937 922 c = ROTATE_LEFT(c, 30);
938 923
939 924 W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1); /* 65 */
940 925 e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(1) + SHA1_CONST(3);
941 926 b = ROTATE_LEFT(b, 30);
942 927
943 928 W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1); /* 66 */
944 929 d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(2) + SHA1_CONST(3);
945 930 a = ROTATE_LEFT(a, 30);
946 931
947 932 W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1); /* 67 */
948 933 c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(3) + SHA1_CONST(3);
949 934 e = ROTATE_LEFT(e, 30);
950 935
951 936 W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1); /* 68 */
952 937 b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(4) + SHA1_CONST(3);
953 938 d = ROTATE_LEFT(d, 30);
954 939
955 940 W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1); /* 69 */
956 941 a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(5) + SHA1_CONST(3);
957 942 c = ROTATE_LEFT(c, 30);
958 943
959 944 W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1); /* 70 */
960 945 e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(6) + SHA1_CONST(3);
961 946 b = ROTATE_LEFT(b, 30);
962 947
963 948 W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1); /* 71 */
964 949 d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(7) + SHA1_CONST(3);
965 950 a = ROTATE_LEFT(a, 30);
966 951
967 952 W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1); /* 72 */
968 953 c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(8) + SHA1_CONST(3);
969 954 e = ROTATE_LEFT(e, 30);
970 955
971 956 W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1); /* 73 */
972 957 b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(9) + SHA1_CONST(3);
973 958 d = ROTATE_LEFT(d, 30);
974 959
975 960 W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1); /* 74 */
976 961 a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(10) + SHA1_CONST(3);
977 962 c = ROTATE_LEFT(c, 30);
978 963
979 964 W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1); /* 75 */
980 965 e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(11) + SHA1_CONST(3);
981 966 b = ROTATE_LEFT(b, 30);
982 967
983 968 W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1); /* 76 */
984 969 d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(12) + SHA1_CONST(3);
985 970 a = ROTATE_LEFT(a, 30);
986 971
987 972 W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 77 */
988 973 c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(13) + SHA1_CONST(3);
989 974 e = ROTATE_LEFT(e, 30);
990 975
991 976 W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1); /* 78 */
992 977 b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(14) + SHA1_CONST(3);
993 978 d = ROTATE_LEFT(d, 30);
994 979
995 980 W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1); /* 79 */
996 981
997 982 ctx->state[0] += ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(15) +
998 983 SHA1_CONST(3);
999 984 ctx->state[1] += b;
1000 985 ctx->state[2] += ROTATE_LEFT(c, 30);
1001 986 ctx->state[3] += d;
1002 987 ctx->state[4] += e;
1003 988
1004 989 /* zeroize sensitive information */
1005 990 W(0) = W(1) = W(2) = W(3) = W(4) = W(5) = W(6) = W(7) = W(8) = 0;
1006 991 W(9) = W(10) = W(11) = W(12) = W(13) = W(14) = W(15) = 0;
1007 992 }
1008 993 #endif /* !__amd64 */
1009 994
1010 995
1011 996 /*
1012 997 * Encode()
1013 998 *
1014 999 * purpose: to convert a list of numbers from little endian to big endian
1015 1000 * input: uint8_t * : place to store the converted big endian numbers
1016 1001 * uint32_t * : place to get numbers to convert from
1017 1002 * size_t : the length of the input in bytes
1018 1003 * output: void
1019 1004 */
1020 1005
1021 1006 static void
1022 1007 Encode(uint8_t *_RESTRICT_KYWD output, const uint32_t *_RESTRICT_KYWD input,
1023 1008 size_t len)
1024 1009 {
1025 1010 size_t i, j;
1026 1011
1027 1012 #if defined(__sparc)
1028 1013 if (IS_P2ALIGNED(output, sizeof (uint32_t))) {
1029 1014 for (i = 0, j = 0; j < len; i++, j += 4) {
1030 1015 /* LINTED: pointer alignment */
1031 1016 *((uint32_t *)(output + j)) = input[i];
1032 1017 }
1033 1018 } else {
1034 1019 #endif /* little endian -- will work on big endian, but slowly */
1035 1020 for (i = 0, j = 0; j < len; i++, j += 4) {
1036 1021 output[j] = (input[i] >> 24) & 0xff;
1037 1022 output[j + 1] = (input[i] >> 16) & 0xff;
1038 1023 output[j + 2] = (input[i] >> 8) & 0xff;
1039 1024 output[j + 3] = input[i] & 0xff;
1040 1025 }
1041 1026 #if defined(__sparc)
1042 1027 }
1043 1028 #endif
1044 1029 }
↓ open down ↓ |
400 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX