Print this page
6717509 Need to use bswap/bswapq for byte swap of 64-bit integer on x32/x64 (fix lint)


 613         }
 614 #else   /* !defined(__sparc) */
 615 
 616 void /* CSTYLED */
 617 SHA1Transform(SHA1_CTX *ctx, const uint8_t blk[64])
 618 {
 619         /* CSTYLED */
 620         sha1word a = ctx->state[0];
 621         sha1word b = ctx->state[1];
 622         sha1word c = ctx->state[2];
 623         sha1word d = ctx->state[3];
 624         sha1word e = ctx->state[4];
 625 
 626 #if     defined(W_ARRAY)
 627         sha1word        w[16];
 628 #else   /* !defined(W_ARRAY) */
 629         sha1word        w_0, w_1, w_2,  w_3,  w_4,  w_5,  w_6,  w_7;
 630         sha1word        w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
 631 #endif  /* !defined(W_ARRAY) */
 632 
 633         W(0)  = LOAD_BIG_32(blk +  0);
 634         W(1)  = LOAD_BIG_32(blk +  4);
 635         W(2)  = LOAD_BIG_32(blk +  8);
 636         W(3)  = LOAD_BIG_32(blk + 12);
 637         W(4)  = LOAD_BIG_32(blk + 16);
 638         W(5)  = LOAD_BIG_32(blk + 20);
 639         W(6)  = LOAD_BIG_32(blk + 24);
 640         W(7)  = LOAD_BIG_32(blk + 28);
 641         W(8)  = LOAD_BIG_32(blk + 32);
 642         W(9)  = LOAD_BIG_32(blk + 36);
 643         W(10) = LOAD_BIG_32(blk + 40);
 644         W(11) = LOAD_BIG_32(blk + 44);
 645         W(12) = LOAD_BIG_32(blk + 48);
 646         W(13) = LOAD_BIG_32(blk + 52);
 647         W(14) = LOAD_BIG_32(blk + 56);
 648         W(15) = LOAD_BIG_32(blk + 60);
 649 
 650 #endif  /* !defined(__sparc) */
 651 
 652         /*
 653          * general optimization:
 654          *
 655          * even though this approach is described in the standard as
 656          * being slower algorithmically, it is 30-40% faster than the
 657          * "faster" version under SPARC, because this version has more
 658          * of the constraints specified at compile-time and uses fewer
 659          * variables (and therefore has better register utilization)
 660          * than its "speedier" brother.  (i've tried both, trust me)
 661          *
 662          * for either method given in the spec, there is an "assignment"
 663          * phase where the following takes place:
 664          *
 665          *      tmp = (main_computation);
 666          *      e = d; d = c; c = rotate_left(b, 30); b = a; a = tmp;
 667          *
 668          * we can make the algorithm go faster by not doing this work,




 613         }
 614 #else   /* !defined(__sparc) */
 615 
 616 void /* CSTYLED */
 617 SHA1Transform(SHA1_CTX *ctx, const uint8_t blk[64])
 618 {
 619         /* CSTYLED */
 620         sha1word a = ctx->state[0];
 621         sha1word b = ctx->state[1];
 622         sha1word c = ctx->state[2];
 623         sha1word d = ctx->state[3];
 624         sha1word e = ctx->state[4];
 625 
 626 #if     defined(W_ARRAY)
 627         sha1word        w[16];
 628 #else   /* !defined(W_ARRAY) */
 629         sha1word        w_0, w_1, w_2,  w_3,  w_4,  w_5,  w_6,  w_7;
 630         sha1word        w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
 631 #endif  /* !defined(W_ARRAY) */
 632 
 633         W(0)  = LOAD_BIG_32((void *)(blk +  0));
 634         W(1)  = LOAD_BIG_32((void *)(blk +  4));
 635         W(2)  = LOAD_BIG_32((void *)(blk +  8));
 636         W(3)  = LOAD_BIG_32((void *)(blk + 12));
 637         W(4)  = LOAD_BIG_32((void *)(blk + 16));
 638         W(5)  = LOAD_BIG_32((void *)(blk + 20));
 639         W(6)  = LOAD_BIG_32((void *)(blk + 24));
 640         W(7)  = LOAD_BIG_32((void *)(blk + 28));
 641         W(8)  = LOAD_BIG_32((void *)(blk + 32));
 642         W(9)  = LOAD_BIG_32((void *)(blk + 36));
 643         W(10) = LOAD_BIG_32((void *)(blk + 40));
 644         W(11) = LOAD_BIG_32((void *)(blk + 44));
 645         W(12) = LOAD_BIG_32((void *)(blk + 48));
 646         W(13) = LOAD_BIG_32((void *)(blk + 52));
 647         W(14) = LOAD_BIG_32((void *)(blk + 56));
 648         W(15) = LOAD_BIG_32((void *)(blk + 60));
 649 
 650 #endif  /* !defined(__sparc) */
 651 
 652         /*
 653          * general optimization:
 654          *
 655          * even though this approach is described in the standard as
 656          * being slower algorithmically, it is 30-40% faster than the
 657          * "faster" version under SPARC, because this version has more
 658          * of the constraints specified at compile-time and uses fewer
 659          * variables (and therefore has better register utilization)
 660          * than its "speedier" brother.  (i've tried both, trust me)
 661          *
 662          * for either method given in the spec, there is an "assignment"
 663          * phase where the following takes place:
 664          *
 665          *      tmp = (main_computation);
 666          *      e = d; d = c; c = rotate_left(b, 30); b = a; a = tmp;
 667          *
 668          * we can make the algorithm go faster by not doing this work,