Print this page
6665607 Need a SHA256/SHA384/SHA512 implementation optimized for 64-bit x86
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/common/crypto/sha2/sha2.c
+++ new/usr/src/common/crypto/sha2/sha2.c
1 1 /*
2 - * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
2 + * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
3 3 * Use is subject to license terms.
4 4 */
5 5
6 -#pragma ident "@(#)sha2.c 1.7 07/04/10 SMI"
6 +#pragma ident "@(#)sha2.c 1.8 08/03/20 SMI"
7 7
8 -
9 8 /*
10 9 * The basic framework for this code came from the reference
11 10 * implementation for MD5. That implementation is Copyright (C)
12 11 * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
13 12 *
14 13 * License to copy and use this software is granted provided that it
15 14 * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
16 15 * Algorithm" in all material mentioning or referencing this software
17 16 * or this function.
18 17 *
19 18 * License is also granted to make and use derivative works provided
20 19 * that such works are identified as "derived from the RSA Data
21 20 * Security, Inc. MD5 Message-Digest Algorithm" in all material
22 21 * mentioning or referencing the derived work.
23 22 *
24 23 * RSA Data Security, Inc. makes no representations concerning either
25 24 * the merchantability of this software or the suitability of this
26 25 * software for any particular purpose. It is provided "as is"
27 26 * without express or implied warranty of any kind.
28 27 *
29 28 * These notices must be retained in any copies of any part of this
30 29 * documentation and/or software.
31 30 *
32 31 * NOTE: Cleaned-up and optimized, version of SHA2, based on the FIPS 180-2
33 32 * standard, available at http://www.itl.nist.gov/div897/pubs/fip180-2.htm
34 33 * Not as fast as one would like -- further optimizations are encouraged
35 34 * and appreciated.
↓ open down ↓ |
17 lines elided |
↑ open up ↑ |
36 35 */
37 36
38 37 #include <sys/types.h>
39 38 #include <sys/param.h>
40 39 #include <sys/systm.h>
41 40 #include <sys/sysmacros.h>
42 41 #define _SHA2_IMPL
43 42 #include <sys/sha2.h>
44 43 #include <sys/sha2_consts.h>
45 44
46 -#ifndef _KERNEL
45 +#ifdef _KERNEL
46 +#include <sys/cmn_err.h>
47 47
48 +#else
48 49 #include <strings.h>
49 50 #include <stdlib.h>
50 51 #include <errno.h>
51 52
52 53 #pragma weak SHA256Update = SHA2Update
53 54 #pragma weak SHA384Update = SHA2Update
54 55 #pragma weak SHA512Update = SHA2Update
55 56
56 57 #pragma weak SHA256Final = SHA2Final
57 58 #pragma weak SHA384Final = SHA2Final
58 59 #pragma weak SHA512Final = SHA2Final
59 60
60 -#endif /* !_KERNEL */
61 +#endif /* _KERNEL */
61 62
62 -#ifdef _KERNEL
63 -#include <sys/cmn_err.h>
64 -#endif /* _KERNEL */
65 -
66 63 static void Encode(uint8_t *, uint32_t *, size_t);
67 64 static void Encode64(uint8_t *, uint64_t *, size_t);
65 +
66 +#if defined(__amd64)
67 +#define SHA512Transform(ctx, in) SHA512TransformBlocks((ctx), (in), 1)
68 +#define SHA256Transform(ctx, in) SHA256TransformBlocks((ctx), (in), 1)
69 +
70 +void SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
71 +void SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
72 +
73 +#else
68 74 static void SHA256Transform(SHA2_CTX *, const uint8_t *);
69 75 static void SHA512Transform(SHA2_CTX *, const uint8_t *);
76 +#endif /* __amd64 */
70 77
71 78 static uint8_t PADDING[128] = { 0x80, /* all zeros */ };
72 79
73 80 /* Ch and Maj are the basic SHA2 functions. */
74 81 #define Ch(b, c, d) (((b) & (c)) ^ ((~b) & (d)))
75 82 #define Maj(b, c, d) (((b) & (c)) ^ ((b) & (d)) ^ ((c) & (d)))
76 83
77 84 /* Rotates x right n bits. */
78 85 #define ROTR(x, n) \
79 86 (((x) >> (n)) | ((x) << ((sizeof (x) * NBBY)-(n))))
80 87
81 88 /* Shift x right n bits */
82 89 #define SHR(x, n) ((x) >> (n))
83 90
84 91 /* SHA256 Functions */
85 92 #define BIGSIGMA0_256(x) (ROTR((x), 2) ^ ROTR((x), 13) ^ ROTR((x), 22))
86 93 #define BIGSIGMA1_256(x) (ROTR((x), 6) ^ ROTR((x), 11) ^ ROTR((x), 25))
87 94 #define SIGMA0_256(x) (ROTR((x), 7) ^ ROTR((x), 18) ^ SHR((x), 3))
88 95 #define SIGMA1_256(x) (ROTR((x), 17) ^ ROTR((x), 19) ^ SHR((x), 10))
89 96
90 97 #define SHA256ROUND(a, b, c, d, e, f, g, h, i, w) \
91 98 T1 = h + BIGSIGMA1_256(e) + Ch(e, f, g) + SHA256_CONST(i) + w; \
92 99 d += T1; \
93 100 T2 = BIGSIGMA0_256(a) + Maj(a, b, c); \
94 101 h = T1 + T2
95 102
96 103 /* SHA384/512 Functions */
97 104 #define BIGSIGMA0(x) (ROTR((x), 28) ^ ROTR((x), 34) ^ ROTR((x), 39))
98 105 #define BIGSIGMA1(x) (ROTR((x), 14) ^ ROTR((x), 18) ^ ROTR((x), 41))
99 106 #define SIGMA0(x) (ROTR((x), 1) ^ ROTR((x), 8) ^ SHR((x), 7))
100 107 #define SIGMA1(x) (ROTR((x), 19) ^ ROTR((x), 61) ^ SHR((x), 6))
101 108 #define SHA512ROUND(a, b, c, d, e, f, g, h, i, w) \
102 109 T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + SHA512_CONST(i) + w; \
103 110 d += T1; \
104 111 T2 = BIGSIGMA0(a) + Maj(a, b, c); \
105 112 h = T1 + T2
106 113
107 114 /*
108 115 * sparc optimization:
109 116 *
110 117 * on the sparc, we can load big endian 32-bit data easily. note that
111 118 * special care must be taken to ensure the address is 32-bit aligned.
112 119 * in the interest of speed, we don't check to make sure, since
113 120 * careful programming can guarantee this for us.
114 121 */
115 122
116 123 #if defined(_BIG_ENDIAN)
117 124
118 125 #define LOAD_BIG_32(addr) (*(uint32_t *)(addr))
119 126
120 127 #else /* little endian -- will work on big endian, but slowly */
121 128
122 129 #define LOAD_BIG_32(addr) \
123 130 (((addr)[0] << 24) | ((addr)[1] << 16) | ((addr)[2] << 8) | (addr)[3])
124 131 #endif
125 132
126 133
127 134 #if defined(_BIG_ENDIAN)
↓ open down ↓ |
48 lines elided |
↑ open up ↑ |
128 135
129 136 #define LOAD_BIG_64(addr) (*(uint64_t *)(addr))
130 137
131 138 #else /* little endian -- will work on big endian, but slowly */
132 139
133 140 #define LOAD_BIG_64(addr) \
134 141 (((uint64_t)(addr)[0] << 56) | ((uint64_t)(addr)[1] << 48) | \
135 142 ((uint64_t)(addr)[2] << 40) | ((uint64_t)(addr)[3] << 32) | \
136 143 ((uint64_t)(addr)[4] << 24) | ((uint64_t)(addr)[5] << 16) | \
137 144 ((uint64_t)(addr)[6] << 8) | (uint64_t)(addr)[7])
138 -
139 145 #endif
140 146
141 147
148 +#if !defined(__amd64)
142 149 /* SHA256 Transform */
143 150
144 151 static void
145 152 SHA256Transform(SHA2_CTX *ctx, const uint8_t *blk)
146 153 {
147 -
148 154 uint32_t a = ctx->state.s32[0];
149 155 uint32_t b = ctx->state.s32[1];
150 156 uint32_t c = ctx->state.s32[2];
151 157 uint32_t d = ctx->state.s32[3];
152 158 uint32_t e = ctx->state.s32[4];
153 159 uint32_t f = ctx->state.s32[5];
154 160 uint32_t g = ctx->state.s32[6];
155 161 uint32_t h = ctx->state.s32[7];
156 162
157 163 uint32_t w0, w1, w2, w3, w4, w5, w6, w7;
158 164 uint32_t w8, w9, w10, w11, w12, w13, w14, w15;
159 165 uint32_t T1, T2;
160 166
161 167 #if defined(__sparc)
162 168 static const uint32_t sha256_consts[] = {
163 169 SHA256_CONST_0, SHA256_CONST_1, SHA256_CONST_2,
164 170 SHA256_CONST_3, SHA256_CONST_4, SHA256_CONST_5,
165 171 SHA256_CONST_6, SHA256_CONST_7, SHA256_CONST_8,
166 172 SHA256_CONST_9, SHA256_CONST_10, SHA256_CONST_11,
167 173 SHA256_CONST_12, SHA256_CONST_13, SHA256_CONST_14,
168 174 SHA256_CONST_15, SHA256_CONST_16, SHA256_CONST_17,
169 175 SHA256_CONST_18, SHA256_CONST_19, SHA256_CONST_20,
170 176 SHA256_CONST_21, SHA256_CONST_22, SHA256_CONST_23,
171 177 SHA256_CONST_24, SHA256_CONST_25, SHA256_CONST_26,
172 178 SHA256_CONST_27, SHA256_CONST_28, SHA256_CONST_29,
173 179 SHA256_CONST_30, SHA256_CONST_31, SHA256_CONST_32,
174 180 SHA256_CONST_33, SHA256_CONST_34, SHA256_CONST_35,
175 181 SHA256_CONST_36, SHA256_CONST_37, SHA256_CONST_38,
↓ open down ↓ |
18 lines elided |
↑ open up ↑ |
176 182 SHA256_CONST_39, SHA256_CONST_40, SHA256_CONST_41,
177 183 SHA256_CONST_42, SHA256_CONST_43, SHA256_CONST_44,
178 184 SHA256_CONST_45, SHA256_CONST_46, SHA256_CONST_47,
179 185 SHA256_CONST_48, SHA256_CONST_49, SHA256_CONST_50,
180 186 SHA256_CONST_51, SHA256_CONST_52, SHA256_CONST_53,
181 187 SHA256_CONST_54, SHA256_CONST_55, SHA256_CONST_56,
182 188 SHA256_CONST_57, SHA256_CONST_58, SHA256_CONST_59,
183 189 SHA256_CONST_60, SHA256_CONST_61, SHA256_CONST_62,
184 190 SHA256_CONST_63
185 191 };
186 -#endif
192 +#endif /* __sparc */
187 193
188 194 if ((uintptr_t)blk & 0x3) { /* not 4-byte aligned? */
189 195 bcopy(blk, ctx->buf_un.buf32, sizeof (ctx->buf_un.buf32));
190 196 blk = (uint8_t *)ctx->buf_un.buf32;
191 197 }
192 198
193 199 /* LINTED E_BAD_PTR_CAST_ALIGN */
194 200 w0 = LOAD_BIG_32(blk + 4 * 0);
195 201 SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);
196 202 /* LINTED E_BAD_PTR_CAST_ALIGN */
197 203 w1 = LOAD_BIG_32(blk + 4 * 1);
198 204 SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1);
199 205 /* LINTED E_BAD_PTR_CAST_ALIGN */
200 206 w2 = LOAD_BIG_32(blk + 4 * 2);
201 207 SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2);
202 208 /* LINTED E_BAD_PTR_CAST_ALIGN */
203 209 w3 = LOAD_BIG_32(blk + 4 * 3);
204 210 SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3);
205 211 /* LINTED E_BAD_PTR_CAST_ALIGN */
206 212 w4 = LOAD_BIG_32(blk + 4 * 4);
207 213 SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4);
208 214 /* LINTED E_BAD_PTR_CAST_ALIGN */
209 215 w5 = LOAD_BIG_32(blk + 4 * 5);
210 216 SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5);
211 217 /* LINTED E_BAD_PTR_CAST_ALIGN */
212 218 w6 = LOAD_BIG_32(blk + 4 * 6);
213 219 SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6);
214 220 /* LINTED E_BAD_PTR_CAST_ALIGN */
215 221 w7 = LOAD_BIG_32(blk + 4 * 7);
216 222 SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7);
217 223 /* LINTED E_BAD_PTR_CAST_ALIGN */
218 224 w8 = LOAD_BIG_32(blk + 4 * 8);
219 225 SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8);
220 226 /* LINTED E_BAD_PTR_CAST_ALIGN */
221 227 w9 = LOAD_BIG_32(blk + 4 * 9);
222 228 SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9);
223 229 /* LINTED E_BAD_PTR_CAST_ALIGN */
224 230 w10 = LOAD_BIG_32(blk + 4 * 10);
225 231 SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10);
226 232 /* LINTED E_BAD_PTR_CAST_ALIGN */
227 233 w11 = LOAD_BIG_32(blk + 4 * 11);
228 234 SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11);
229 235 /* LINTED E_BAD_PTR_CAST_ALIGN */
230 236 w12 = LOAD_BIG_32(blk + 4 * 12);
231 237 SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12);
232 238 /* LINTED E_BAD_PTR_CAST_ALIGN */
233 239 w13 = LOAD_BIG_32(blk + 4 * 13);
234 240 SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13);
235 241 /* LINTED E_BAD_PTR_CAST_ALIGN */
236 242 w14 = LOAD_BIG_32(blk + 4 * 14);
237 243 SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14);
238 244 /* LINTED E_BAD_PTR_CAST_ALIGN */
239 245 w15 = LOAD_BIG_32(blk + 4 * 15);
240 246 SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15);
241 247
242 248 w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
243 249 SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0);
244 250 w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
245 251 SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1);
246 252 w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
247 253 SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2);
248 254 w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
249 255 SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3);
250 256 w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
251 257 SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4);
252 258 w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
253 259 SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5);
254 260 w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
255 261 SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6);
256 262 w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
257 263 SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7);
258 264 w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
259 265 SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8);
260 266 w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
261 267 SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9);
262 268 w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
263 269 SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10);
264 270 w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
265 271 SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11);
266 272 w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
267 273 SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12);
268 274 w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
269 275 SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13);
270 276 w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
271 277 SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14);
272 278 w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
273 279 SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15);
274 280
275 281 w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
276 282 SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0);
277 283 w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
278 284 SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1);
279 285 w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
280 286 SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2);
281 287 w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
282 288 SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3);
283 289 w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
284 290 SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4);
285 291 w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
286 292 SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5);
287 293 w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
288 294 SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6);
289 295 w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
290 296 SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7);
291 297 w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
292 298 SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8);
293 299 w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
294 300 SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9);
295 301 w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
296 302 SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10);
297 303 w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
298 304 SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11);
299 305 w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
300 306 SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12);
301 307 w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
302 308 SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13);
303 309 w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
304 310 SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14);
305 311 w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
306 312 SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15);
307 313
308 314 w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
309 315 SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0);
310 316 w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
311 317 SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1);
312 318 w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
313 319 SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2);
314 320 w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
315 321 SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3);
316 322 w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
317 323 SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4);
318 324 w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
319 325 SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5);
320 326 w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
321 327 SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6);
322 328 w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
323 329 SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7);
324 330 w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
325 331 SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8);
326 332 w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
327 333 SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9);
328 334 w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
329 335 SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10);
330 336 w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
331 337 SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11);
332 338 w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
333 339 SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12);
334 340 w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
335 341 SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13);
336 342 w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
337 343 SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14);
338 344 w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
339 345 SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15);
340 346
341 347 ctx->state.s32[0] += a;
342 348 ctx->state.s32[1] += b;
343 349 ctx->state.s32[2] += c;
344 350 ctx->state.s32[3] += d;
345 351 ctx->state.s32[4] += e;
346 352 ctx->state.s32[5] += f;
347 353 ctx->state.s32[6] += g;
348 354 ctx->state.s32[7] += h;
349 355 }
350 356
351 357
352 358 /* SHA384 and SHA512 Transform */
353 359
354 360 static void
355 361 SHA512Transform(SHA2_CTX *ctx, const uint8_t *blk)
356 362 {
357 363
358 364 uint64_t a = ctx->state.s64[0];
359 365 uint64_t b = ctx->state.s64[1];
360 366 uint64_t c = ctx->state.s64[2];
361 367 uint64_t d = ctx->state.s64[3];
362 368 uint64_t e = ctx->state.s64[4];
363 369 uint64_t f = ctx->state.s64[5];
364 370 uint64_t g = ctx->state.s64[6];
365 371 uint64_t h = ctx->state.s64[7];
366 372
367 373 uint64_t w0, w1, w2, w3, w4, w5, w6, w7;
368 374 uint64_t w8, w9, w10, w11, w12, w13, w14, w15;
369 375 uint64_t T1, T2;
370 376
371 377 #if defined(__sparc)
372 378 static const uint64_t sha512_consts[] = {
373 379 SHA512_CONST_0, SHA512_CONST_1, SHA512_CONST_2,
374 380 SHA512_CONST_3, SHA512_CONST_4, SHA512_CONST_5,
375 381 SHA512_CONST_6, SHA512_CONST_7, SHA512_CONST_8,
376 382 SHA512_CONST_9, SHA512_CONST_10, SHA512_CONST_11,
377 383 SHA512_CONST_12, SHA512_CONST_13, SHA512_CONST_14,
378 384 SHA512_CONST_15, SHA512_CONST_16, SHA512_CONST_17,
379 385 SHA512_CONST_18, SHA512_CONST_19, SHA512_CONST_20,
380 386 SHA512_CONST_21, SHA512_CONST_22, SHA512_CONST_23,
381 387 SHA512_CONST_24, SHA512_CONST_25, SHA512_CONST_26,
382 388 SHA512_CONST_27, SHA512_CONST_28, SHA512_CONST_29,
383 389 SHA512_CONST_30, SHA512_CONST_31, SHA512_CONST_32,
384 390 SHA512_CONST_33, SHA512_CONST_34, SHA512_CONST_35,
385 391 SHA512_CONST_36, SHA512_CONST_37, SHA512_CONST_38,
386 392 SHA512_CONST_39, SHA512_CONST_40, SHA512_CONST_41,
387 393 SHA512_CONST_42, SHA512_CONST_43, SHA512_CONST_44,
388 394 SHA512_CONST_45, SHA512_CONST_46, SHA512_CONST_47,
389 395 SHA512_CONST_48, SHA512_CONST_49, SHA512_CONST_50,
390 396 SHA512_CONST_51, SHA512_CONST_52, SHA512_CONST_53,
↓ open down ↓ |
194 lines elided |
↑ open up ↑ |
391 397 SHA512_CONST_54, SHA512_CONST_55, SHA512_CONST_56,
392 398 SHA512_CONST_57, SHA512_CONST_58, SHA512_CONST_59,
393 399 SHA512_CONST_60, SHA512_CONST_61, SHA512_CONST_62,
394 400 SHA512_CONST_63, SHA512_CONST_64, SHA512_CONST_65,
395 401 SHA512_CONST_66, SHA512_CONST_67, SHA512_CONST_68,
396 402 SHA512_CONST_69, SHA512_CONST_70, SHA512_CONST_71,
397 403 SHA512_CONST_72, SHA512_CONST_73, SHA512_CONST_74,
398 404 SHA512_CONST_75, SHA512_CONST_76, SHA512_CONST_77,
399 405 SHA512_CONST_78, SHA512_CONST_79
400 406 };
401 -#endif
407 +#endif /* __sparc */
402 408
403 409
404 410 if ((uintptr_t)blk & 0x7) { /* not 8-byte aligned? */
405 411 bcopy(blk, ctx->buf_un.buf64, sizeof (ctx->buf_un.buf64));
406 412 blk = (uint8_t *)ctx->buf_un.buf64;
407 413 }
408 414
409 415 /* LINTED E_BAD_PTR_CAST_ALIGN */
410 416 w0 = LOAD_BIG_64(blk + 8 * 0);
411 417 SHA512ROUND(a, b, c, d, e, f, g, h, 0, w0);
412 418 /* LINTED E_BAD_PTR_CAST_ALIGN */
413 419 w1 = LOAD_BIG_64(blk + 8 * 1);
414 420 SHA512ROUND(h, a, b, c, d, e, f, g, 1, w1);
415 421 /* LINTED E_BAD_PTR_CAST_ALIGN */
416 422 w2 = LOAD_BIG_64(blk + 8 * 2);
417 423 SHA512ROUND(g, h, a, b, c, d, e, f, 2, w2);
418 424 /* LINTED E_BAD_PTR_CAST_ALIGN */
419 425 w3 = LOAD_BIG_64(blk + 8 * 3);
420 426 SHA512ROUND(f, g, h, a, b, c, d, e, 3, w3);
421 427 /* LINTED E_BAD_PTR_CAST_ALIGN */
422 428 w4 = LOAD_BIG_64(blk + 8 * 4);
423 429 SHA512ROUND(e, f, g, h, a, b, c, d, 4, w4);
424 430 /* LINTED E_BAD_PTR_CAST_ALIGN */
425 431 w5 = LOAD_BIG_64(blk + 8 * 5);
426 432 SHA512ROUND(d, e, f, g, h, a, b, c, 5, w5);
427 433 /* LINTED E_BAD_PTR_CAST_ALIGN */
428 434 w6 = LOAD_BIG_64(blk + 8 * 6);
429 435 SHA512ROUND(c, d, e, f, g, h, a, b, 6, w6);
430 436 /* LINTED E_BAD_PTR_CAST_ALIGN */
431 437 w7 = LOAD_BIG_64(blk + 8 * 7);
432 438 SHA512ROUND(b, c, d, e, f, g, h, a, 7, w7);
433 439 /* LINTED E_BAD_PTR_CAST_ALIGN */
434 440 w8 = LOAD_BIG_64(blk + 8 * 8);
435 441 SHA512ROUND(a, b, c, d, e, f, g, h, 8, w8);
436 442 /* LINTED E_BAD_PTR_CAST_ALIGN */
437 443 w9 = LOAD_BIG_64(blk + 8 * 9);
438 444 SHA512ROUND(h, a, b, c, d, e, f, g, 9, w9);
439 445 /* LINTED E_BAD_PTR_CAST_ALIGN */
440 446 w10 = LOAD_BIG_64(blk + 8 * 10);
441 447 SHA512ROUND(g, h, a, b, c, d, e, f, 10, w10);
442 448 /* LINTED E_BAD_PTR_CAST_ALIGN */
443 449 w11 = LOAD_BIG_64(blk + 8 * 11);
444 450 SHA512ROUND(f, g, h, a, b, c, d, e, 11, w11);
445 451 /* LINTED E_BAD_PTR_CAST_ALIGN */
446 452 w12 = LOAD_BIG_64(blk + 8 * 12);
447 453 SHA512ROUND(e, f, g, h, a, b, c, d, 12, w12);
448 454 /* LINTED E_BAD_PTR_CAST_ALIGN */
449 455 w13 = LOAD_BIG_64(blk + 8 * 13);
450 456 SHA512ROUND(d, e, f, g, h, a, b, c, 13, w13);
451 457 /* LINTED E_BAD_PTR_CAST_ALIGN */
452 458 w14 = LOAD_BIG_64(blk + 8 * 14);
453 459 SHA512ROUND(c, d, e, f, g, h, a, b, 14, w14);
454 460 /* LINTED E_BAD_PTR_CAST_ALIGN */
455 461 w15 = LOAD_BIG_64(blk + 8 * 15);
456 462 SHA512ROUND(b, c, d, e, f, g, h, a, 15, w15);
457 463
458 464 w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
459 465 SHA512ROUND(a, b, c, d, e, f, g, h, 16, w0);
460 466 w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
461 467 SHA512ROUND(h, a, b, c, d, e, f, g, 17, w1);
462 468 w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
463 469 SHA512ROUND(g, h, a, b, c, d, e, f, 18, w2);
464 470 w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
465 471 SHA512ROUND(f, g, h, a, b, c, d, e, 19, w3);
466 472 w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
467 473 SHA512ROUND(e, f, g, h, a, b, c, d, 20, w4);
468 474 w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
469 475 SHA512ROUND(d, e, f, g, h, a, b, c, 21, w5);
470 476 w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
471 477 SHA512ROUND(c, d, e, f, g, h, a, b, 22, w6);
472 478 w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
473 479 SHA512ROUND(b, c, d, e, f, g, h, a, 23, w7);
474 480 w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
475 481 SHA512ROUND(a, b, c, d, e, f, g, h, 24, w8);
476 482 w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
477 483 SHA512ROUND(h, a, b, c, d, e, f, g, 25, w9);
478 484 w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
479 485 SHA512ROUND(g, h, a, b, c, d, e, f, 26, w10);
480 486 w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
481 487 SHA512ROUND(f, g, h, a, b, c, d, e, 27, w11);
482 488 w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
483 489 SHA512ROUND(e, f, g, h, a, b, c, d, 28, w12);
484 490 w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
485 491 SHA512ROUND(d, e, f, g, h, a, b, c, 29, w13);
486 492 w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
487 493 SHA512ROUND(c, d, e, f, g, h, a, b, 30, w14);
488 494 w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
489 495 SHA512ROUND(b, c, d, e, f, g, h, a, 31, w15);
490 496
491 497 w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
492 498 SHA512ROUND(a, b, c, d, e, f, g, h, 32, w0);
493 499 w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
494 500 SHA512ROUND(h, a, b, c, d, e, f, g, 33, w1);
495 501 w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
496 502 SHA512ROUND(g, h, a, b, c, d, e, f, 34, w2);
497 503 w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
498 504 SHA512ROUND(f, g, h, a, b, c, d, e, 35, w3);
499 505 w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
500 506 SHA512ROUND(e, f, g, h, a, b, c, d, 36, w4);
501 507 w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
502 508 SHA512ROUND(d, e, f, g, h, a, b, c, 37, w5);
503 509 w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
504 510 SHA512ROUND(c, d, e, f, g, h, a, b, 38, w6);
505 511 w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
506 512 SHA512ROUND(b, c, d, e, f, g, h, a, 39, w7);
507 513 w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
508 514 SHA512ROUND(a, b, c, d, e, f, g, h, 40, w8);
509 515 w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
510 516 SHA512ROUND(h, a, b, c, d, e, f, g, 41, w9);
511 517 w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
512 518 SHA512ROUND(g, h, a, b, c, d, e, f, 42, w10);
513 519 w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
514 520 SHA512ROUND(f, g, h, a, b, c, d, e, 43, w11);
515 521 w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
516 522 SHA512ROUND(e, f, g, h, a, b, c, d, 44, w12);
517 523 w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
518 524 SHA512ROUND(d, e, f, g, h, a, b, c, 45, w13);
519 525 w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
520 526 SHA512ROUND(c, d, e, f, g, h, a, b, 46, w14);
521 527 w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
522 528 SHA512ROUND(b, c, d, e, f, g, h, a, 47, w15);
523 529
524 530 w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
525 531 SHA512ROUND(a, b, c, d, e, f, g, h, 48, w0);
526 532 w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
527 533 SHA512ROUND(h, a, b, c, d, e, f, g, 49, w1);
528 534 w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
529 535 SHA512ROUND(g, h, a, b, c, d, e, f, 50, w2);
530 536 w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
531 537 SHA512ROUND(f, g, h, a, b, c, d, e, 51, w3);
532 538 w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
533 539 SHA512ROUND(e, f, g, h, a, b, c, d, 52, w4);
534 540 w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
535 541 SHA512ROUND(d, e, f, g, h, a, b, c, 53, w5);
536 542 w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
537 543 SHA512ROUND(c, d, e, f, g, h, a, b, 54, w6);
538 544 w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
539 545 SHA512ROUND(b, c, d, e, f, g, h, a, 55, w7);
540 546 w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
541 547 SHA512ROUND(a, b, c, d, e, f, g, h, 56, w8);
542 548 w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
543 549 SHA512ROUND(h, a, b, c, d, e, f, g, 57, w9);
544 550 w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
545 551 SHA512ROUND(g, h, a, b, c, d, e, f, 58, w10);
546 552 w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
547 553 SHA512ROUND(f, g, h, a, b, c, d, e, 59, w11);
548 554 w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
549 555 SHA512ROUND(e, f, g, h, a, b, c, d, 60, w12);
550 556 w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
551 557 SHA512ROUND(d, e, f, g, h, a, b, c, 61, w13);
552 558 w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
553 559 SHA512ROUND(c, d, e, f, g, h, a, b, 62, w14);
554 560 w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
555 561 SHA512ROUND(b, c, d, e, f, g, h, a, 63, w15);
556 562
557 563 w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
558 564 SHA512ROUND(a, b, c, d, e, f, g, h, 64, w0);
559 565 w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
560 566 SHA512ROUND(h, a, b, c, d, e, f, g, 65, w1);
561 567 w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
562 568 SHA512ROUND(g, h, a, b, c, d, e, f, 66, w2);
563 569 w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
564 570 SHA512ROUND(f, g, h, a, b, c, d, e, 67, w3);
565 571 w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
566 572 SHA512ROUND(e, f, g, h, a, b, c, d, 68, w4);
567 573 w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
568 574 SHA512ROUND(d, e, f, g, h, a, b, c, 69, w5);
569 575 w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
570 576 SHA512ROUND(c, d, e, f, g, h, a, b, 70, w6);
571 577 w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
572 578 SHA512ROUND(b, c, d, e, f, g, h, a, 71, w7);
573 579 w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
574 580 SHA512ROUND(a, b, c, d, e, f, g, h, 72, w8);
575 581 w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
576 582 SHA512ROUND(h, a, b, c, d, e, f, g, 73, w9);
577 583 w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
578 584 SHA512ROUND(g, h, a, b, c, d, e, f, 74, w10);
579 585 w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
580 586 SHA512ROUND(f, g, h, a, b, c, d, e, 75, w11);
581 587 w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
582 588 SHA512ROUND(e, f, g, h, a, b, c, d, 76, w12);
583 589 w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
584 590 SHA512ROUND(d, e, f, g, h, a, b, c, 77, w13);
585 591 w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
586 592 SHA512ROUND(c, d, e, f, g, h, a, b, 78, w14);
587 593 w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
588 594 SHA512ROUND(b, c, d, e, f, g, h, a, 79, w15);
589 595
↓ open down ↓ |
178 lines elided |
↑ open up ↑ |
590 596 ctx->state.s64[0] += a;
591 597 ctx->state.s64[1] += b;
592 598 ctx->state.s64[2] += c;
593 599 ctx->state.s64[3] += d;
594 600 ctx->state.s64[4] += e;
595 601 ctx->state.s64[5] += f;
596 602 ctx->state.s64[6] += g;
597 603 ctx->state.s64[7] += h;
598 604
599 605 }
606 +#endif /* !__amd64 */
600 607
601 608
602 609 /*
603 610 * Encode()
604 611 *
605 612 * purpose: to convert a list of numbers from little endian to big endian
606 613 * input: uint8_t * : place to store the converted big endian numbers
607 614 * uint32_t * : place to get numbers to convert from
608 615 * size_t : the length of the input in bytes
609 616 * output: void
610 617 */
611 618
612 619 static void
613 620 Encode(uint8_t *_RESTRICT_KYWD output, uint32_t *_RESTRICT_KYWD input,
614 621 size_t len)
615 622 {
616 623 size_t i, j;
617 624
618 625 #if defined(__sparc)
619 626 if (IS_P2ALIGNED(output, sizeof (uint32_t))) {
620 627 for (i = 0, j = 0; j < len; i++, j += 4) {
621 628 /* LINTED: pointer alignment */
622 629 *((uint32_t *)(output + j)) = input[i];
623 630 }
624 631 } else {
625 632 #endif /* little endian -- will work on big endian, but slowly */
626 633 for (i = 0, j = 0; j < len; i++, j += 4) {
627 634 output[j] = (input[i] >> 24) & 0xff;
628 635 output[j + 1] = (input[i] >> 16) & 0xff;
629 636 output[j + 2] = (input[i] >> 8) & 0xff;
630 637 output[j + 3] = input[i] & 0xff;
631 638 }
632 639 #if defined(__sparc)
633 640 }
634 641 #endif
635 642 }
636 643
637 644 static void
638 645 Encode64(uint8_t *_RESTRICT_KYWD output, uint64_t *_RESTRICT_KYWD input,
639 646 size_t len)
640 647 {
641 648 size_t i, j;
642 649
643 650 #if defined(__sparc)
644 651 if (IS_P2ALIGNED(output, sizeof (uint64_t))) {
645 652 for (i = 0, j = 0; j < len; i++, j += 8) {
646 653 /* LINTED: pointer alignment */
647 654 *((uint64_t *)(output + j)) = input[i];
648 655 }
649 656 } else {
650 657 #endif /* little endian -- will work on big endian, but slowly */
651 658 for (i = 0, j = 0; j < len; i++, j += 8) {
652 659
653 660 output[j] = (input[i] >> 56) & 0xff;
654 661 output[j + 1] = (input[i] >> 48) & 0xff;
655 662 output[j + 2] = (input[i] >> 40) & 0xff;
656 663 output[j + 3] = (input[i] >> 32) & 0xff;
657 664 output[j + 4] = (input[i] >> 24) & 0xff;
658 665 output[j + 5] = (input[i] >> 16) & 0xff;
659 666 output[j + 6] = (input[i] >> 8) & 0xff;
660 667 output[j + 7] = input[i] & 0xff;
661 668 }
662 669 #if defined(__sparc)
663 670 }
664 671 #endif
665 672 }
666 673
667 674
668 675 void
669 676 SHA2Init(uint64_t mech, SHA2_CTX *ctx)
670 677 {
671 678
672 679 switch (mech) {
673 680 case SHA256_MECH_INFO_TYPE:
674 681 case SHA256_HMAC_MECH_INFO_TYPE:
675 682 case SHA256_HMAC_GEN_MECH_INFO_TYPE:
676 683 ctx->state.s32[0] = 0x6a09e667U;
677 684 ctx->state.s32[1] = 0xbb67ae85U;
678 685 ctx->state.s32[2] = 0x3c6ef372U;
679 686 ctx->state.s32[3] = 0xa54ff53aU;
680 687 ctx->state.s32[4] = 0x510e527fU;
681 688 ctx->state.s32[5] = 0x9b05688cU;
682 689 ctx->state.s32[6] = 0x1f83d9abU;
683 690 ctx->state.s32[7] = 0x5be0cd19U;
684 691 break;
685 692 case SHA384_MECH_INFO_TYPE:
686 693 case SHA384_HMAC_MECH_INFO_TYPE:
687 694 case SHA384_HMAC_GEN_MECH_INFO_TYPE:
688 695 ctx->state.s64[0] = 0xcbbb9d5dc1059ed8ULL;
689 696 ctx->state.s64[1] = 0x629a292a367cd507ULL;
690 697 ctx->state.s64[2] = 0x9159015a3070dd17ULL;
691 698 ctx->state.s64[3] = 0x152fecd8f70e5939ULL;
692 699 ctx->state.s64[4] = 0x67332667ffc00b31ULL;
693 700 ctx->state.s64[5] = 0x8eb44a8768581511ULL;
694 701 ctx->state.s64[6] = 0xdb0c2e0d64f98fa7ULL;
695 702 ctx->state.s64[7] = 0x47b5481dbefa4fa4ULL;
696 703 break;
697 704 case SHA512_MECH_INFO_TYPE:
698 705 case SHA512_HMAC_MECH_INFO_TYPE:
699 706 case SHA512_HMAC_GEN_MECH_INFO_TYPE:
700 707 ctx->state.s64[0] = 0x6a09e667f3bcc908ULL;
701 708 ctx->state.s64[1] = 0xbb67ae8584caa73bULL;
702 709 ctx->state.s64[2] = 0x3c6ef372fe94f82bULL;
703 710 ctx->state.s64[3] = 0xa54ff53a5f1d36f1ULL;
704 711 ctx->state.s64[4] = 0x510e527fade682d1ULL;
705 712 ctx->state.s64[5] = 0x9b05688c2b3e6c1fULL;
706 713 ctx->state.s64[6] = 0x1f83d9abfb41bd6bULL;
707 714 ctx->state.s64[7] = 0x5be0cd19137e2179ULL;
708 715 break;
709 716 #ifdef _KERNEL
710 717 default:
711 718 cmn_err(CE_PANIC, "sha2_init: "
712 719 "failed to find a supported algorithm: 0x%x",
713 720 (uint32_t)mech);
714 721
715 722 #endif /* _KERNEL */
716 723 }
717 724
718 725 ctx->algotype = mech;
719 726 ctx->count.c64[0] = ctx->count.c64[1] = 0;
720 727 }
721 728
722 729 #ifndef _KERNEL
723 730
724 731 #pragma inline(SHA256Init, SHA384Init, SHA512Init)
725 732 void
726 733 SHA256Init(SHA256_CTX *ctx)
727 734 {
728 735 SHA2Init(SHA256, ctx);
729 736 }
730 737
731 738 void
732 739 SHA384Init(SHA384_CTX *ctx)
733 740 {
734 741 SHA2Init(SHA384, ctx);
735 742 }
736 743
737 744 void
738 745 SHA512Init(SHA512_CTX *ctx)
739 746 {
740 747 SHA2Init(SHA512, ctx);
741 748 }
↓ open down ↓ |
132 lines elided |
↑ open up ↑ |
742 749
743 750 #endif /* _KERNEL */
744 751
745 752 /*
746 753 * SHA2Update()
747 754 *
748 755 * purpose: continues an sha2 digest operation, using the message block
749 756 * to update the context.
750 757 * input: SHA2_CTX * : the context to update
751 758 * void * : the message block
752 - * size_t : the length of the message block in bytes
759 + * size_t : the length of the message block, in bytes
753 760 * output: void
754 761 */
755 762
756 763 void
757 764 SHA2Update(SHA2_CTX *ctx, const void *inptr, size_t input_len)
758 765 {
759 - uint32_t i, buf_index, buf_len, buf_limit;
760 - const uint8_t *input = inptr;
766 + uint32_t i, buf_index, buf_len, buf_limit;
767 + const uint8_t *input = inptr;
768 + uint32_t algotype = ctx->algotype;
769 +#if defined(__amd64)
770 + uint32_t block_count;
771 +#endif /* !__amd64 */
761 772
773 +
762 774 /* check for noop */
763 775 if (input_len == 0)
764 776 return;
765 777
766 - if (ctx->algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
778 + if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
767 779 buf_limit = 64;
768 780
769 781 /* compute number of bytes mod 64 */
770 782 buf_index = (ctx->count.c32[1] >> 3) & 0x3F;
771 783
772 784 /* update number of bits */
773 785 if ((ctx->count.c32[1] += (input_len << 3)) < (input_len << 3))
774 786 ctx->count.c32[0]++;
775 787
776 788 ctx->count.c32[0] += (input_len >> 29);
777 789
778 790 } else {
779 791 buf_limit = 128;
780 792
781 793 /* compute number of bytes mod 128 */
782 794 buf_index = (ctx->count.c64[1] >> 3) & 0x7F;
783 795
784 796 /* update number of bits */
785 797 if ((ctx->count.c64[1] += (input_len << 3)) < (input_len << 3))
786 798 ctx->count.c64[0]++;
787 799
788 800 ctx->count.c64[0] += (input_len >> 29);
789 801 }
790 802
791 803 buf_len = buf_limit - buf_index;
792 804
793 805 /* transform as many times as possible */
794 806 i = 0;
795 807 if (input_len >= buf_len) {
796 808
797 809 /*
↓ open down ↓ |
21 lines elided |
↑ open up ↑ |
798 810 * general optimization:
799 811 *
800 812 * only do initial bcopy() and SHA2Transform() if
801 813 * buf_index != 0. if buf_index == 0, we're just
802 814 * wasting our time doing the bcopy() since there
803 815 * wasn't any data left over from a previous call to
804 816 * SHA2Update().
805 817 */
806 818 if (buf_index) {
807 819 bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
808 - if (ctx->algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE)
820 + if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE)
809 821 SHA256Transform(ctx, ctx->buf_un.buf8);
810 822 else
811 823 SHA512Transform(ctx, ctx->buf_un.buf8);
812 824
813 825 i = buf_len;
814 826 }
815 827
816 -
817 - for (; i + buf_limit - 1 < input_len; i += buf_limit) {
818 - if (ctx->algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE)
828 +#if !defined(__amd64)
829 + if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
830 + for (; i + buf_limit - 1 < input_len; i += buf_limit) {
819 831 SHA256Transform(ctx, &input[i]);
820 - else
832 + }
833 + } else {
834 + for (; i + buf_limit - 1 < input_len; i += buf_limit) {
821 835 SHA512Transform(ctx, &input[i]);
836 + }
822 837 }
823 838
839 +#else
840 + if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
841 + block_count = (input_len - i) >> 6;
842 + if (block_count > 0) {
843 + SHA256TransformBlocks(ctx, &input[i],
844 + block_count);
845 + i += block_count << 6;
846 + }
847 + } else {
848 + block_count = (input_len - i) >> 7;
849 + if (block_count > 0) {
850 + SHA512TransformBlocks(ctx, &input[i],
851 + block_count);
852 + i += block_count << 7;
853 + }
854 + }
855 +#endif /* !__amd64 */
856 +
824 857 /*
825 858 * general optimization:
826 859 *
827 860 * if i and input_len are the same, return now instead
828 861 * of calling bcopy(), since the bcopy() in this case
829 - * will be an expensive nop.
862 + * will be an expensive noop.
830 863 */
831 864
832 865 if (input_len == i)
833 866 return;
834 867
835 868 buf_index = 0;
836 869 }
837 870
838 871 /* buffer remaining input */
839 872 bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
840 873 }
841 874
842 875
843 876 /*
844 877 * SHA2Final()
845 878 *
846 879 * purpose: ends an sha2 digest operation, finalizing the message digest and
847 880 * zeroing the context.
848 - * input: uchar_t * : a buffer to store the digest in
881 + * input: uchar_t * : a buffer to store the digest
849 882 * : The function actually uses void* because many
850 883 * : callers pass things other than uchar_t here.
851 884 * SHA2_CTX * : the context to finalize, save, and zero
852 885 * output: void
853 886 */
854 887
855 888 void
856 889 SHA2Final(void *digest, SHA2_CTX *ctx)
857 890 {
858 891 uint8_t bitcount_be[sizeof (ctx->count.c32)];
859 892 uint8_t bitcount_be64[sizeof (ctx->count.c64)];
860 893 uint32_t index;
894 + uint32_t algotype = ctx->algotype;
861 895
862 -
863 - if (ctx->algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
896 + if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
864 897 index = (ctx->count.c32[1] >> 3) & 0x3f;
865 898 Encode(bitcount_be, ctx->count.c32, sizeof (bitcount_be));
866 899 SHA2Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index);
867 900 SHA2Update(ctx, bitcount_be, sizeof (bitcount_be));
868 901 Encode(digest, ctx->state.s32, sizeof (ctx->state.s32));
869 902
870 903 } else {
871 904 index = (ctx->count.c64[1] >> 3) & 0x7f;
872 905 Encode64(bitcount_be64, ctx->count.c64,
873 906 sizeof (bitcount_be64));
874 907 SHA2Update(ctx, PADDING, ((index < 112) ? 112 : 240) - index);
875 908 SHA2Update(ctx, bitcount_be64, sizeof (bitcount_be64));
876 - if (ctx->algotype <= SHA384_HMAC_GEN_MECH_INFO_TYPE) {
909 + if (algotype <= SHA384_HMAC_GEN_MECH_INFO_TYPE) {
877 910 ctx->state.s64[6] = ctx->state.s64[7] = 0;
878 911 Encode64(digest, ctx->state.s64,
879 912 sizeof (uint64_t) * 6);
880 913 } else
881 914 Encode64(digest, ctx->state.s64,
882 915 sizeof (ctx->state.s64));
883 916 }
884 917
885 918 /* zeroize sensitive information */
886 919 bzero(ctx, sizeof (*ctx));
887 920 }
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX