613 }
614 #else /* !defined(__sparc) */
615
616 void /* CSTYLED */
617 SHA1Transform(SHA1_CTX *ctx, const uint8_t blk[64])
618 {
619 /* CSTYLED */
620 sha1word a = ctx->state[0];
621 sha1word b = ctx->state[1];
622 sha1word c = ctx->state[2];
623 sha1word d = ctx->state[3];
624 sha1word e = ctx->state[4];
625
626 #if defined(W_ARRAY)
627 sha1word w[16];
628 #else /* !defined(W_ARRAY) */
629 sha1word w_0, w_1, w_2, w_3, w_4, w_5, w_6, w_7;
630 sha1word w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
631 #endif /* !defined(W_ARRAY) */
632
633 W(0) = LOAD_BIG_32(blk + 0);
634 W(1) = LOAD_BIG_32(blk + 4);
635 W(2) = LOAD_BIG_32(blk + 8);
636 W(3) = LOAD_BIG_32(blk + 12);
637 W(4) = LOAD_BIG_32(blk + 16);
638 W(5) = LOAD_BIG_32(blk + 20);
639 W(6) = LOAD_BIG_32(blk + 24);
640 W(7) = LOAD_BIG_32(blk + 28);
641 W(8) = LOAD_BIG_32(blk + 32);
642 W(9) = LOAD_BIG_32(blk + 36);
643 W(10) = LOAD_BIG_32(blk + 40);
644 W(11) = LOAD_BIG_32(blk + 44);
645 W(12) = LOAD_BIG_32(blk + 48);
646 W(13) = LOAD_BIG_32(blk + 52);
647 W(14) = LOAD_BIG_32(blk + 56);
648 W(15) = LOAD_BIG_32(blk + 60);
649
650 #endif /* !defined(__sparc) */
651
652 /*
653 * general optimization:
654 *
655 * even though this approach is described in the standard as
656 * being slower algorithmically, it is 30-40% faster than the
657 * "faster" version under SPARC, because this version has more
658 * of the constraints specified at compile-time and uses fewer
659 * variables (and therefore has better register utilization)
660 * than its "speedier" brother. (i've tried both, trust me)
661 *
662 * for either method given in the spec, there is an "assignment"
663 * phase where the following takes place:
664 *
665 * tmp = (main_computation);
666 * e = d; d = c; c = rotate_left(b, 30); b = a; a = tmp;
667 *
668 * we can make the algorithm go faster by not doing this work,
|
613 }
614 #else /* !defined(__sparc) */
615
616 void /* CSTYLED */
617 SHA1Transform(SHA1_CTX *ctx, const uint8_t blk[64])
618 {
619 /* CSTYLED */
620 sha1word a = ctx->state[0];
621 sha1word b = ctx->state[1];
622 sha1word c = ctx->state[2];
623 sha1word d = ctx->state[3];
624 sha1word e = ctx->state[4];
625
626 #if defined(W_ARRAY)
627 sha1word w[16];
628 #else /* !defined(W_ARRAY) */
629 sha1word w_0, w_1, w_2, w_3, w_4, w_5, w_6, w_7;
630 sha1word w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
631 #endif /* !defined(W_ARRAY) */
632
633 W(0) = LOAD_BIG_32((void *)(blk + 0));
634 W(1) = LOAD_BIG_32((void *)(blk + 4));
635 W(2) = LOAD_BIG_32((void *)(blk + 8));
636 W(3) = LOAD_BIG_32((void *)(blk + 12));
637 W(4) = LOAD_BIG_32((void *)(blk + 16));
638 W(5) = LOAD_BIG_32((void *)(blk + 20));
639 W(6) = LOAD_BIG_32((void *)(blk + 24));
640 W(7) = LOAD_BIG_32((void *)(blk + 28));
641 W(8) = LOAD_BIG_32((void *)(blk + 32));
642 W(9) = LOAD_BIG_32((void *)(blk + 36));
643 W(10) = LOAD_BIG_32((void *)(blk + 40));
644 W(11) = LOAD_BIG_32((void *)(blk + 44));
645 W(12) = LOAD_BIG_32((void *)(blk + 48));
646 W(13) = LOAD_BIG_32((void *)(blk + 52));
647 W(14) = LOAD_BIG_32((void *)(blk + 56));
648 W(15) = LOAD_BIG_32((void *)(blk + 60));
649
650 #endif /* !defined(__sparc) */
651
652 /*
653 * general optimization:
654 *
655 * even though this approach is described in the standard as
656 * being slower algorithmically, it is 30-40% faster than the
657 * "faster" version under SPARC, because this version has more
658 * of the constraints specified at compile-time and uses fewer
659 * variables (and therefore has better register utilization)
660 * than its "speedier" brother. (i've tried both, trust me)
661 *
662 * for either method given in the spec, there is an "assignment"
663 * phase where the following takes place:
664 *
665 * tmp = (main_computation);
666 * e = d; d = c; c = rotate_left(b, 30); b = a; a = tmp;
667 *
668 * we can make the algorithm go faster by not doing this work,
|