Print this page
5007142 Add ntohll and htonll to sys/byteorder.h
6717509 Need to use bswap/bswapq for byte swap of 64-bit integer on x32/x64
PSARC 2008/474
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/common/crypto/aes/amd64/aesopt.h
+++ new/usr/src/common/crypto/aes/amd64/aesopt.h
1 1 /*
2 2 * ---------------------------------------------------------------------------
3 3 * Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
4 4 *
5 5 * LICENSE TERMS
6 6 *
7 7 * The free distribution and use of this software is allowed (with or without
8 8 * changes) provided that:
9 9 *
10 10 * 1. source code distributions include the above copyright notice, this
11 11 * list of conditions and the following disclaimer;
12 12 *
13 13 * 2. binary distributions include the above copyright notice, this list
14 14 * of conditions and the following disclaimer in their documentation;
15 15 *
16 16 * 3. the name of the copyright holder is not used to endorse products
17 17 * built using this software without specific written permission.
18 18 *
19 19 * DISCLAIMER
20 20 *
21 21 * This software is provided 'as is' with no explicit or implied warranties
22 22 * in respect of its properties, including, but not limited to, correctness
23 23 * and/or fitness for purpose.
24 24 * ---------------------------------------------------------------------------
25 25 * Issue Date: 20/12/2007
26 26 *
27 27 * This file contains the compilation options for AES (Rijndael) and code
28 28 * that is common across encryption, key scheduling and table generation.
29 29 *
30 30 * OPERATION
31 31 *
32 32 * These source code files implement the AES algorithm Rijndael designed by
33 33 * Joan Daemen and Vincent Rijmen. This version is designed for the standard
34 34 * block size of 16 bytes and for key sizes of 128, 192 and 256 bits (16, 24
35 35 * and 32 bytes).
36 36 *
37 37 * This version is designed for flexibility and speed using operations on
38 38 * 32-bit words rather than operations on bytes. It can be compiled with
39 39 * either big or little endian internal byte order but is faster when the
40 40 * native byte order for the processor is used.
41 41 *
42 42 * THE CIPHER INTERFACE
43 43 *
44 44 * The cipher interface is implemented as an array of bytes in which lower
45 45 * AES bit sequence indexes map to higher numeric significance within bytes.
46 46 */
↓ open down ↓ |
46 lines elided |
↑ open up ↑ |
47 47
48 48 /*
49 49 * OpenSolaris changes
50 50 * 1. Added __cplusplus and _AESTAB_H header guards
51 51 * 2. Added header files sys/types.h and aes_impl.h
52 52 * 3. Added defines for AES_ENCRYPT, AES_DECRYPT, AES_REV_DKS, and ASM_AMD64_C
53 53 * 4. Moved defines for IS_BIG_ENDIAN, IS_LITTLE_ENDIAN, PLATFORM_BYTE_ORDER
54 54 * from brg_endian.h
55 55 * 5. Undefined VIA_ACE_POSSIBLE and ASSUME_VIA_ACE_PRESENT
56 56 * 6. Changed uint_8t and uint_32t to uint8_t and uint32_t
57 - * 7. cstyled and hdrchk code
57 + * 7. Defined aes_sw32 as htonl() for byte swapping
58 + * 8. Cstyled and hdrchk code
58 59 *
59 60 */
60 61
61 62 #ifndef _AESOPT_H
62 63 #define _AESOPT_H
63 64
64 -#pragma ident "%Z%%M% %I% %E% SMI"
65 -
66 65 #ifdef __cplusplus
67 66 extern "C" {
68 67 #endif
69 68
70 69 #include <sys/types.h>
70 +#include <sys/byteorder.h>
71 71 #include <aes_impl.h>
72 72
73 73 /* SUPPORT FEATURES */
74 74 #define AES_ENCRYPT /* if support for encryption is needed */
75 75 #define AES_DECRYPT /* if support for decryption is needed */
76 76
77 77 /* PLATFORM-SPECIFIC FEATURES */
78 78 #define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
79 79 #define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
80 80 #define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
81 81 #define AES_REV_DKS /* define to reverse decryption key schedule */
82 82
83 83
84 84 /*
85 85 * CONFIGURATION - THE USE OF DEFINES
86 86 * Later in this section there are a number of defines that control the
87 87 * operation of the code. In each section, the purpose of each define is
88 88 * explained so that the relevant form can be included or excluded by
89 89 * setting either 1's or 0's respectively on the branches of the related
90 90 * #if clauses. The following local defines should not be changed.
91 91 */
92 92
93 93 #define ENCRYPTION_IN_C 1
94 94 #define DECRYPTION_IN_C 2
95 95 #define ENC_KEYING_IN_C 4
96 96 #define DEC_KEYING_IN_C 8
97 97
98 98 #define NO_TABLES 0
99 99 #define ONE_TABLE 1
100 100 #define FOUR_TABLES 4
101 101 #define NONE 0
102 102 #define PARTIAL 1
103 103 #define FULL 2
104 104
105 105 /* --- START OF USER CONFIGURED OPTIONS --- */
106 106
107 107 /*
108 108 * 1. BYTE ORDER WITHIN 32 BIT WORDS
109 109 *
110 110 * The fundamental data processing units in Rijndael are 8-bit bytes. The
111 111 * input, output and key input are all enumerated arrays of bytes in which
112 112 * bytes are numbered starting at zero and increasing to one less than the
113 113 * number of bytes in the array in question. This enumeration is only used
114 114 * for naming bytes and does not imply any adjacency or order relationship
115 115 * from one byte to another. When these inputs and outputs are considered
116 116 * as bit sequences, bits 8*n to 8*n+7 of the bit sequence are mapped to
117 117 * byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte.
118 118 * In this implementation bits are numbered from 0 to 7 starting at the
119 119 * numerically least significant end of each byte. Bit n represents 2^n.
120 120 *
121 121 * However, Rijndael can be implemented more efficiently using 32-bit
122 122 * words by packing bytes into words so that bytes 4*n to 4*n+3 are placed
123 123 * into word[n]. While in principle these bytes can be assembled into words
124 124 * in any positions, this implementation only supports the two formats in
125 125 * which bytes in adjacent positions within words also have adjacent byte
126 126 * numbers. This order is called big-endian if the lowest numbered bytes
127 127 * in words have the highest numeric significance and little-endian if the
128 128 * opposite applies.
129 129 *
130 130 * This code can work in either order irrespective of the order used by the
131 131 * machine on which it runs. Normally the internal byte order will be set
132 132 * to the order of the processor on which the code is to be run but this
133 133 * define can be used to reverse this in special situations
134 134 *
135 135 * WARNING: Assembler code versions rely on PLATFORM_BYTE_ORDER being set.
136 136 * This define will hence be redefined later (in section 4) if necessary
137 137 */
138 138
139 139 #if 1
140 140 #define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
141 141 #elif 0
142 142 #define ALGORITHM_BYTE_ORDER IS_LITTLE_ENDIAN
143 143 #elif 0
144 144 #define ALGORITHM_BYTE_ORDER IS_BIG_ENDIAN
145 145 #else
146 146 #error The algorithm byte order is not defined
147 147 #endif
148 148
149 149 /* 2. VIA ACE SUPPORT */
150 150
151 151 #if defined(__GNUC__) && defined(__i386__) || \
152 152 defined(_WIN32) && defined(_M_IX86) && \
153 153 !(defined(_WIN64) || defined(_WIN32_WCE) || \
154 154 defined(_MSC_VER) && (_MSC_VER <= 800))
155 155 #define VIA_ACE_POSSIBLE
156 156 #endif
157 157
158 158 /*
159 159 * Define this option if support for the VIA ACE is required. This uses
160 160 * inline assembler instructions and is only implemented for the Microsoft,
161 161 * Intel and GCC compilers. If VIA ACE is known to be present, then defining
162 162 * ASSUME_VIA_ACE_PRESENT will remove the ordinary encryption/decryption
163 163 * code. If USE_VIA_ACE_IF_PRESENT is defined then VIA ACE will be used if
164 164 * it is detected (both present and enabled) but the normal AES code will
165 165 * also be present.
166 166 *
167 167 * When VIA ACE is to be used, all AES encryption contexts MUST be 16 byte
168 168 * aligned; other input/output buffers do not need to be 16 byte aligned
169 169 * but there are very large performance gains if this can be arranged.
170 170 * VIA ACE also requires the decryption key schedule to be in reverse
171 171 * order (which later checks below ensure).
172 172 */
173 173
174 174 /* VIA ACE is not used here for OpenSolaris: */
175 175 #undef VIA_ACE_POSSIBLE
176 176 #undef ASSUME_VIA_ACE_PRESENT
177 177
178 178 #if 0 && defined(VIA_ACE_POSSIBLE) && !defined(USE_VIA_ACE_IF_PRESENT)
179 179 #define USE_VIA_ACE_IF_PRESENT
180 180 #endif
181 181
182 182 #if 0 && defined(VIA_ACE_POSSIBLE) && !defined(ASSUME_VIA_ACE_PRESENT)
183 183 #define ASSUME_VIA_ACE_PRESENT
184 184 #endif
185 185
186 186
187 187 /*
188 188 * 3. ASSEMBLER SUPPORT
189 189 *
190 190 * This define (which can be on the command line) enables the use of the
191 191 * assembler code routines for encryption, decryption and key scheduling
192 192 * as follows:
193 193 *
194 194 * ASM_X86_V1C uses the assembler (aes_x86_v1.asm) with large tables for
195 195 * encryption and decryption and but with key scheduling in C
196 196 * ASM_X86_V2 uses assembler (aes_x86_v2.asm) with compressed tables for
197 197 * encryption, decryption and key scheduling
198 198 * ASM_X86_V2C uses assembler (aes_x86_v2.asm) with compressed tables for
199 199 * encryption and decryption and but with key scheduling in C
200 200 * ASM_AMD64_C uses assembler (aes_amd64.asm) with compressed tables for
201 201 * encryption and decryption and but with key scheduling in C
202 202 *
203 203 * Change one 'if 0' below to 'if 1' to select the version or define
204 204 * as a compilation option.
205 205 */
206 206
207 207 #if 0 && !defined(ASM_X86_V1C)
208 208 #define ASM_X86_V1C
209 209 #elif 0 && !defined(ASM_X86_V2)
210 210 #define ASM_X86_V2
211 211 #elif 0 && !defined(ASM_X86_V2C)
212 212 #define ASM_X86_V2C
213 213 #elif 1 && !defined(ASM_AMD64_C)
214 214 #define ASM_AMD64_C
215 215 #endif
216 216
217 217 #if (defined(ASM_X86_V1C) || defined(ASM_X86_V2) || defined(ASM_X86_V2C)) && \
218 218 !defined(_M_IX86) || defined(ASM_AMD64_C) && !defined(_M_X64) && \
219 219 !defined(__amd64)
220 220 #error Assembler code is only available for x86 and AMD64 systems
221 221 #endif
222 222
223 223 /*
224 224 * 4. FAST INPUT/OUTPUT OPERATIONS.
225 225 *
226 226 * On some machines it is possible to improve speed by transferring the
227 227 * bytes in the input and output arrays to and from the internal 32-bit
228 228 * variables by addressing these arrays as if they are arrays of 32-bit
229 229 * words. On some machines this will always be possible but there may
230 230 * be a large performance penalty if the byte arrays are not aligned on
231 231 * the normal word boundaries. On other machines this technique will
232 232 * lead to memory access errors when such 32-bit word accesses are not
233 233 * properly aligned. The option SAFE_IO avoids such problems but will
234 234 * often be slower on those machines that support misaligned access
235 235 * (especially so if care is taken to align the input and output byte
236 236 * arrays on 32-bit word boundaries). If SAFE_IO is not defined it is
237 237 * assumed that access to byte arrays as if they are arrays of 32-bit
238 238 * words will not cause problems when such accesses are misaligned.
239 239 */
240 240 #if 1 && !defined(_MSC_VER)
241 241 #define SAFE_IO
242 242 #endif
243 243
244 244 /*
245 245 * 5. LOOP UNROLLING
246 246 *
247 247 * The code for encryption and decryption cycles through a number of rounds
248 248 * that can be implemented either in a loop or by expanding the code into a
249 249 * long sequence of instructions, the latter producing a larger program but
250 250 * one that will often be much faster. The latter is called loop unrolling.
251 251 * There are also potential speed advantages in expanding two iterations in
252 252 * a loop with half the number of iterations, which is called partial loop
253 253 * unrolling. The following options allow partial or full loop unrolling
254 254 * to be set independently for encryption and decryption
255 255 */
256 256 #if 1
257 257 #define ENC_UNROLL FULL
258 258 #elif 0
259 259 #define ENC_UNROLL PARTIAL
260 260 #else
261 261 #define ENC_UNROLL NONE
262 262 #endif
263 263
264 264 #if 1
265 265 #define DEC_UNROLL FULL
266 266 #elif 0
267 267 #define DEC_UNROLL PARTIAL
268 268 #else
269 269 #define DEC_UNROLL NONE
270 270 #endif
271 271
272 272 #if 1
273 273 #define ENC_KS_UNROLL
274 274 #endif
275 275
276 276 #if 1
277 277 #define DEC_KS_UNROLL
278 278 #endif
279 279
280 280 /*
281 281 * 6. FAST FINITE FIELD OPERATIONS
282 282 *
283 283 * If this section is included, tables are used to provide faster finite
284 284 * field arithmetic. This has no effect if FIXED_TABLES is defined.
285 285 */
286 286 #if 1
287 287 #define FF_TABLES
288 288 #endif
289 289
290 290 /*
291 291 * 7. INTERNAL STATE VARIABLE FORMAT
292 292 *
293 293 * The internal state of Rijndael is stored in a number of local 32-bit
294 294 * word variables which can be defined either as an array or as individual
295 295 * names variables. Include this section if you want to store these local
296 296 * variables in arrays. Otherwise individual local variables will be used.
297 297 */
298 298 #if 1
299 299 #define ARRAYS
300 300 #endif
301 301
302 302 /*
303 303 * 8. FIXED OR DYNAMIC TABLES
304 304 *
305 305 * When this section is included the tables used by the code are compiled
306 306 * statically into the binary file. Otherwise the subroutine aes_init()
307 307 * must be called to compute them before the code is first used.
308 308 */
309 309 #if 1 && !(defined(_MSC_VER) && (_MSC_VER <= 800))
310 310 #define FIXED_TABLES
311 311 #endif
312 312
313 313 /*
314 314 * 9. MASKING OR CASTING FROM LONGER VALUES TO BYTES
315 315 *
316 316 * In some systems it is better to mask longer values to extract bytes
317 317 * rather than using a cast. This option allows this choice.
318 318 */
319 319 #if 0
320 320 #define to_byte(x) ((uint8_t)(x))
321 321 #else
322 322 #define to_byte(x) ((x) & 0xff)
323 323 #endif
324 324
325 325 /*
326 326 * 10. TABLE ALIGNMENT
327 327 *
328 328 * On some systems speed will be improved by aligning the AES large lookup
329 329 * tables on particular boundaries. This define should be set to a power of
330 330 * two giving the desired alignment. It can be left undefined if alignment
331 331 * is not needed. This option is specific to the Micrsoft VC++ compiler -
332 332 * it seems to sometimes cause trouble for the VC++ version 6 compiler.
333 333 */
334 334
335 335 #if 1 && defined(_MSC_VER) && (_MSC_VER >= 1300)
336 336 #define TABLE_ALIGN 32
337 337 #endif
338 338
339 339 /*
340 340 * 11. REDUCE CODE AND TABLE SIZE
341 341 *
342 342 * This replaces some expanded macros with function calls if AES_ASM_V2 or
343 343 * AES_ASM_V2C are defined
344 344 */
345 345
346 346 #if 1 && (defined(ASM_X86_V2) || defined(ASM_X86_V2C))
347 347 #define REDUCE_CODE_SIZE
348 348 #endif
349 349
350 350 /*
351 351 * 12. TABLE OPTIONS
352 352 *
353 353 * This cipher proceeds by repeating in a number of cycles known as rounds
354 354 * which are implemented by a round function which is optionally be speeded
355 355 * up using tables. The basic tables are 256 32-bit words, with either
356 356 * one or four tables being required for each round function depending on
357 357 * how much speed is required. Encryption and decryption round functions
358 358 * are different and the last encryption and decryption round functions are
359 359 * different again making four different round functions in all.
360 360 *
361 361 * This means that:
362 362 * 1. Normal encryption and decryption rounds can each use either 0, 1
363 363 * or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
364 364 * 2. The last encryption and decryption rounds can also use either 0, 1
365 365 * or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
366 366 *
367 367 * Include or exclude the appropriate definitions below to set the number
368 368 * of tables used by this implementation.
369 369 */
370 370
371 371 #if 1 /* set tables for the normal encryption round */
372 372 #define ENC_ROUND FOUR_TABLES
373 373 #elif 0
374 374 #define ENC_ROUND ONE_TABLE
375 375 #else
376 376 #define ENC_ROUND NO_TABLES
377 377 #endif
378 378
379 379 #if 1 /* set tables for the last encryption round */
380 380 #define LAST_ENC_ROUND FOUR_TABLES
381 381 #elif 0
382 382 #define LAST_ENC_ROUND ONE_TABLE
383 383 #else
384 384 #define LAST_ENC_ROUND NO_TABLES
385 385 #endif
386 386
387 387 #if 1 /* set tables for the normal decryption round */
388 388 #define DEC_ROUND FOUR_TABLES
389 389 #elif 0
390 390 #define DEC_ROUND ONE_TABLE
391 391 #else
392 392 #define DEC_ROUND NO_TABLES
393 393 #endif
394 394
395 395 #if 1 /* set tables for the last decryption round */
396 396 #define LAST_DEC_ROUND FOUR_TABLES
397 397 #elif 0
398 398 #define LAST_DEC_ROUND ONE_TABLE
399 399 #else
400 400 #define LAST_DEC_ROUND NO_TABLES
401 401 #endif
402 402
403 403 /*
404 404 * The decryption key schedule can be speeded up with tables in the same
405 405 * way that the round functions can. Include or exclude the following
406 406 * defines to set this requirement.
407 407 */
408 408 #if 1
409 409 #define KEY_SCHED FOUR_TABLES
410 410 #elif 0
411 411 #define KEY_SCHED ONE_TABLE
412 412 #else
413 413 #define KEY_SCHED NO_TABLES
414 414 #endif
415 415
416 416 /* ---- END OF USER CONFIGURED OPTIONS ---- */
417 417
418 418 /* VIA ACE support is only available for VC++ and GCC */
419 419
420 420 #if !defined(_MSC_VER) && !defined(__GNUC__)
421 421 #if defined(ASSUME_VIA_ACE_PRESENT)
422 422 #undef ASSUME_VIA_ACE_PRESENT
423 423 #endif
424 424 #if defined(USE_VIA_ACE_IF_PRESENT)
425 425 #undef USE_VIA_ACE_IF_PRESENT
426 426 #endif
427 427 #endif
428 428
429 429 #if defined(ASSUME_VIA_ACE_PRESENT) && !defined(USE_VIA_ACE_IF_PRESENT)
430 430 #define USE_VIA_ACE_IF_PRESENT
431 431 #endif
432 432
433 433 #if defined(USE_VIA_ACE_IF_PRESENT) && !defined(AES_REV_DKS)
434 434 #define AES_REV_DKS
435 435 #endif
436 436
437 437 /* Assembler support requires the use of platform byte order */
438 438
439 439 #if (defined(ASM_X86_V1C) || defined(ASM_X86_V2C) || defined(ASM_AMD64_C)) && \
440 440 (ALGORITHM_BYTE_ORDER != PLATFORM_BYTE_ORDER)
441 441 #undef ALGORITHM_BYTE_ORDER
442 442 #define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
443 443 #endif
444 444
445 445 /*
446 446 * In this implementation the columns of the state array are each held in
447 447 * 32-bit words. The state array can be held in various ways: in an array
448 448 * of words, in a number of individual word variables or in a number of
449 449 * processor registers. The following define maps a variable name x and
450 450 * a column number c to the way the state array variable is to be held.
451 451 * The first define below maps the state into an array x[c] whereas the
452 452 * second form maps the state into a number of individual variables x0,
453 453 * x1, etc. Another form could map individual state columns to machine
454 454 * register names.
455 455 */
456 456
457 457 #if defined(ARRAYS)
458 458 #define s(x, c) x[c]
459 459 #else
460 460 #define s(x, c) x##c
461 461 #endif
462 462
463 463 /*
464 464 * This implementation provides subroutines for encryption, decryption
465 465 * and for setting the three key lengths (separately) for encryption
466 466 * and decryption. Since not all functions are needed, masks are set
467 467 * up here to determine which will be implemented in C
468 468 */
469 469
470 470 #if !defined(AES_ENCRYPT)
471 471 #define EFUNCS_IN_C 0
472 472 #elif defined(ASSUME_VIA_ACE_PRESENT) || defined(ASM_X86_V1C) || \
473 473 defined(ASM_X86_V2C) || defined(ASM_AMD64_C)
474 474 #define EFUNCS_IN_C ENC_KEYING_IN_C
475 475 #elif !defined(ASM_X86_V2)
476 476 #define EFUNCS_IN_C (ENCRYPTION_IN_C | ENC_KEYING_IN_C)
477 477 #else
478 478 #define EFUNCS_IN_C 0
479 479 #endif
480 480
481 481 #if !defined(AES_DECRYPT)
482 482 #define DFUNCS_IN_C 0
483 483 #elif defined(ASSUME_VIA_ACE_PRESENT) || defined(ASM_X86_V1C) || \
484 484 defined(ASM_X86_V2C) || defined(ASM_AMD64_C)
485 485 #define DFUNCS_IN_C DEC_KEYING_IN_C
486 486 #elif !defined(ASM_X86_V2)
487 487 #define DFUNCS_IN_C (DECRYPTION_IN_C | DEC_KEYING_IN_C)
488 488 #else
489 489 #define DFUNCS_IN_C 0
490 490 #endif
491 491
492 492 #define FUNCS_IN_C (EFUNCS_IN_C | DFUNCS_IN_C)
493 493
494 494 /* END OF CONFIGURATION OPTIONS */
495 495
496 496 /* Disable or report errors on some combinations of options */
497 497
498 498 #if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES
499 499 #undef LAST_ENC_ROUND
500 500 #define LAST_ENC_ROUND NO_TABLES
501 501 #elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES
502 502 #undef LAST_ENC_ROUND
503 503 #define LAST_ENC_ROUND ONE_TABLE
504 504 #endif
505 505
506 506 #if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE
507 507 #undef ENC_UNROLL
508 508 #define ENC_UNROLL NONE
509 509 #endif
510 510
511 511 #if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES
512 512 #undef LAST_DEC_ROUND
513 513 #define LAST_DEC_ROUND NO_TABLES
↓ open down ↓ |
433 lines elided |
↑ open up ↑ |
514 514 #elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES
515 515 #undef LAST_DEC_ROUND
516 516 #define LAST_DEC_ROUND ONE_TABLE
517 517 #endif
518 518
519 519 #if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE
520 520 #undef DEC_UNROLL
521 521 #define DEC_UNROLL NONE
522 522 #endif
523 523
524 -#if defined(bswap32)
524 +#if (ALGORITHM_BYTE_ORDER == IS_LITTLE_ENDIAN)
525 +#define aes_sw32 htonl
526 +#elif defined(bswap32)
525 527 #define aes_sw32 bswap32
526 528 #elif defined(bswap_32)
527 529 #define aes_sw32 bswap_32
528 530 #else
529 -#define brot(x, n) (((uint32_t)(x) << n) | ((uint32_t)(x) >> (32 - n)))
531 +#define brot(x, n) (((uint32_t)(x) << (n)) | ((uint32_t)(x) >> (32 - (n))))
530 532 #define aes_sw32(x) ((brot((x), 8) & 0x00ff00ff) | (brot((x), 24) & 0xff00ff00))
531 533 #endif
532 534
535 +
533 536 /*
534 - * upr(x, n): rotates bytes within words by n positions, moving bytes to
537 + * upr(x, n): rotates bytes within words by n positions, moving bytes to
535 538 * higher index positions with wrap around into low positions
536 539 * ups(x, n): moves bytes by n positions to higher index positions in
537 540 * words but without wrap around
538 541 * bval(x, n): extracts a byte from a word
539 542 *
540 543 * WARNING: The definitions given here are intended only for use with
541 544 * unsigned variables and with shift counts that are compile
542 545 * time constants
543 546 */
544 547
545 548 #if (ALGORITHM_BYTE_ORDER == IS_LITTLE_ENDIAN)
546 549 #define upr(x, n) (((uint32_t)(x) << (8 * (n))) | \
547 550 ((uint32_t)(x) >> (32 - 8 * (n))))
548 551 #define ups(x, n) ((uint32_t)(x) << (8 * (n)))
549 552 #define bval(x, n) to_byte((x) >> (8 * (n)))
550 553 #define bytes2word(b0, b1, b2, b3) \
551 554 (((uint32_t)(b3) << 24) | ((uint32_t)(b2) << 16) | \
552 555 ((uint32_t)(b1) << 8) | (b0))
553 556 #endif
554 557
555 558 #if (ALGORITHM_BYTE_ORDER == IS_BIG_ENDIAN)
556 559 #define upr(x, n) (((uint32_t)(x) >> (8 * (n))) | \
557 560 ((uint32_t)(x) << (32 - 8 * (n))))
558 561 #define ups(x, n) ((uint32_t)(x) >> (8 * (n)))
559 562 #define bval(x, n) to_byte((x) >> (24 - 8 * (n)))
560 563 #define bytes2word(b0, b1, b2, b3) \
561 564 (((uint32_t)(b0) << 24) | ((uint32_t)(b1) << 16) | \
562 565 ((uint32_t)(b2) << 8) | (b3))
563 566 #endif
564 567
565 568 #if defined(SAFE_IO)
566 569 #define word_in(x, c) bytes2word(((const uint8_t *)(x) + 4 * c)[0], \
567 570 ((const uint8_t *)(x) + 4 * c)[1], \
568 571 ((const uint8_t *)(x) + 4 * c)[2], \
569 572 ((const uint8_t *)(x) + 4 * c)[3])
570 573 #define word_out(x, c, v) { ((uint8_t *)(x) + 4 * c)[0] = bval(v, 0); \
571 574 ((uint8_t *)(x) + 4 * c)[1] = bval(v, 1); \
572 575 ((uint8_t *)(x) + 4 * c)[2] = bval(v, 2); \
573 576 ((uint8_t *)(x) + 4 * c)[3] = bval(v, 3); }
574 577 #elif (ALGORITHM_BYTE_ORDER == PLATFORM_BYTE_ORDER)
575 578 #define word_in(x, c) (*((uint32_t *)(x) + (c)))
576 579 #define word_out(x, c, v) (*((uint32_t *)(x) + (c)) = (v))
577 580 #else
578 581 #define word_in(x, c) aes_sw32(*((uint32_t *)(x) + (c)))
579 582 #define word_out(x, c, v) (*((uint32_t *)(x) + (c)) = aes_sw32(v))
580 583 #endif
581 584
582 585 /* the finite field modular polynomial and elements */
583 586
584 587 #define WPOLY 0x011b
585 588 #define BPOLY 0x1b
586 589
587 590 /* multiply four bytes in GF(2^8) by 'x' {02} in parallel */
588 591
589 592 #define m1 0x80808080
590 593 #define m2 0x7f7f7f7f
591 594 #define gf_mulx(x) ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * BPOLY))
592 595
593 596 /*
594 597 * The following defines provide alternative definitions of gf_mulx that might
595 598 * give improved performance if a fast 32-bit multiply is not available. Note
596 599 * that a temporary variable u needs to be defined where gf_mulx is used.
597 600 *
598 601 * #define gf_mulx(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ \
599 602 * ((u >> 3) | (u >> 6))
600 603 * #define m4 (0x01010101 * BPOLY)
601 604 * #define gf_mulx(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) \
602 605 * & m4)
603 606 */
604 607
605 608 /* Work out which tables are needed for the different options */
606 609
607 610 #if defined(ASM_X86_V1C)
608 611 #if defined(ENC_ROUND)
609 612 #undef ENC_ROUND
610 613 #endif
611 614 #define ENC_ROUND FOUR_TABLES
612 615 #if defined(LAST_ENC_ROUND)
613 616 #undef LAST_ENC_ROUND
614 617 #endif
615 618 #define LAST_ENC_ROUND FOUR_TABLES
616 619 #if defined(DEC_ROUND)
617 620 #undef DEC_ROUND
618 621 #endif
619 622 #define DEC_ROUND FOUR_TABLES
620 623 #if defined(LAST_DEC_ROUND)
621 624 #undef LAST_DEC_ROUND
622 625 #endif
623 626 #define LAST_DEC_ROUND FOUR_TABLES
624 627 #if defined(KEY_SCHED)
625 628 #undef KEY_SCHED
626 629 #define KEY_SCHED FOUR_TABLES
627 630 #endif
628 631 #endif
629 632
630 633 #if (FUNCS_IN_C & ENCRYPTION_IN_C) || defined(ASM_X86_V1C)
631 634 #if ENC_ROUND == ONE_TABLE
632 635 #define FT1_SET
633 636 #elif ENC_ROUND == FOUR_TABLES
634 637 #define FT4_SET
635 638 #else
636 639 #define SBX_SET
637 640 #endif
638 641 #if LAST_ENC_ROUND == ONE_TABLE
639 642 #define FL1_SET
640 643 #elif LAST_ENC_ROUND == FOUR_TABLES
641 644 #define FL4_SET
642 645 #elif !defined(SBX_SET)
643 646 #define SBX_SET
644 647 #endif
645 648 #endif
646 649
647 650 #if (FUNCS_IN_C & DECRYPTION_IN_C) || defined(ASM_X86_V1C)
648 651 #if DEC_ROUND == ONE_TABLE
649 652 #define IT1_SET
650 653 #elif DEC_ROUND == FOUR_TABLES
651 654 #define IT4_SET
652 655 #else
653 656 #define ISB_SET
654 657 #endif
655 658 #if LAST_DEC_ROUND == ONE_TABLE
656 659 #define IL1_SET
657 660 #elif LAST_DEC_ROUND == FOUR_TABLES
658 661 #define IL4_SET
659 662 #elif !defined(ISB_SET)
660 663 #define ISB_SET
661 664 #endif
662 665 #endif
663 666
664 667
665 668 #if !(defined(REDUCE_CODE_SIZE) && (defined(ASM_X86_V2) || \
666 669 defined(ASM_X86_V2C)))
667 670 #if ((FUNCS_IN_C & ENC_KEYING_IN_C) || (FUNCS_IN_C & DEC_KEYING_IN_C))
668 671 #if KEY_SCHED == ONE_TABLE
669 672 #if !defined(FL1_SET) && !defined(FL4_SET)
670 673 #define LS1_SET
671 674 #endif
672 675 #elif KEY_SCHED == FOUR_TABLES
673 676 #if !defined(FL4_SET)
674 677 #define LS4_SET
675 678 #endif
676 679 #elif !defined(SBX_SET)
677 680 #define SBX_SET
678 681 #endif
679 682 #endif
680 683 #if (FUNCS_IN_C & DEC_KEYING_IN_C)
681 684 #if KEY_SCHED == ONE_TABLE
682 685 #define IM1_SET
683 686 #elif KEY_SCHED == FOUR_TABLES
684 687 #define IM4_SET
685 688 #elif !defined(SBX_SET)
686 689 #define SBX_SET
687 690 #endif
688 691 #endif
689 692 #endif
690 693
691 694 /* generic definitions of Rijndael macros that use tables */
692 695
693 696 #define no_table(x, box, vf, rf, c) bytes2word(\
694 697 box[bval(vf(x, 0, c), rf(0, c))], \
695 698 box[bval(vf(x, 1, c), rf(1, c))], \
696 699 box[bval(vf(x, 2, c), rf(2, c))], \
697 700 box[bval(vf(x, 3, c), rf(3, c))])
698 701
699 702 #define one_table(x, op, tab, vf, rf, c) \
700 703 (tab[bval(vf(x, 0, c), rf(0, c))] \
701 704 ^ op(tab[bval(vf(x, 1, c), rf(1, c))], 1) \
702 705 ^ op(tab[bval(vf(x, 2, c), rf(2, c))], 2) \
703 706 ^ op(tab[bval(vf(x, 3, c), rf(3, c))], 3))
704 707
705 708 #define four_tables(x, tab, vf, rf, c) \
706 709 (tab[0][bval(vf(x, 0, c), rf(0, c))] \
707 710 ^ tab[1][bval(vf(x, 1, c), rf(1, c))] \
708 711 ^ tab[2][bval(vf(x, 2, c), rf(2, c))] \
709 712 ^ tab[3][bval(vf(x, 3, c), rf(3, c))])
710 713
711 714 #define vf1(x, r, c) (x)
712 715 #define rf1(r, c) (r)
713 716 #define rf2(r, c) ((8+r-c)&3)
714 717
715 718 /*
716 719 * Perform forward and inverse column mix operation on four bytes in long word
717 720 * x in parallel. NOTE: x must be a simple variable, NOT an expression in
718 721 * these macros.
719 722 */
720 723
721 724 #if !(defined(REDUCE_CODE_SIZE) && (defined(ASM_X86_V2) || \
722 725 defined(ASM_X86_V2C)))
723 726
724 727 #if defined(FM4_SET) /* not currently used */
725 728 #define fwd_mcol(x) four_tables(x, t_use(f, m), vf1, rf1, 0)
726 729 #elif defined(FM1_SET) /* not currently used */
727 730 #define fwd_mcol(x) one_table(x, upr, t_use(f, m), vf1, rf1, 0)
728 731 #else
729 732 #define dec_fmvars uint32_t g2
730 733 #define fwd_mcol(x) (g2 = gf_mulx(x), g2 ^ upr((x) ^ g2, 3) ^ \
731 734 upr((x), 2) ^ upr((x), 1))
732 735 #endif
733 736
734 737 #if defined(IM4_SET)
735 738 #define inv_mcol(x) four_tables(x, t_use(i, m), vf1, rf1, 0)
736 739 #elif defined(IM1_SET)
737 740 #define inv_mcol(x) one_table(x, upr, t_use(i, m), vf1, rf1, 0)
738 741 #else
739 742 #define dec_imvars uint32_t g2, g4, g9
740 743 #define inv_mcol(x) (g2 = gf_mulx(x), g4 = gf_mulx(g2), g9 = \
741 744 (x) ^ gf_mulx(g4), g4 ^= g9, \
742 745 (x) ^ g2 ^ g4 ^ upr(g2 ^ g9, 3) ^ \
743 746 upr(g4, 2) ^ upr(g9, 1))
744 747 #endif
745 748
746 749 #if defined(FL4_SET)
747 750 #define ls_box(x, c) four_tables(x, t_use(f, l), vf1, rf2, c)
748 751 #elif defined(LS4_SET)
749 752 #define ls_box(x, c) four_tables(x, t_use(l, s), vf1, rf2, c)
750 753 #elif defined(FL1_SET)
751 754 #define ls_box(x, c) one_table(x, upr, t_use(f, l), vf1, rf2, c)
752 755 #elif defined(LS1_SET)
753 756 #define ls_box(x, c) one_table(x, upr, t_use(l, s), vf1, rf2, c)
754 757 #else
755 758 #define ls_box(x, c) no_table(x, t_use(s, box), vf1, rf2, c)
756 759 #endif
757 760
758 761 #endif
759 762
760 763 #if defined(ASM_X86_V1C) && defined(AES_DECRYPT) && !defined(ISB_SET)
761 764 #define ISB_SET
762 765 #endif
763 766
764 767 #ifdef __cplusplus
765 768 }
766 769 #endif
767 770
768 771 #endif /* _AESOPT_H */
↓ open down ↓ |
224 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX