New usr/src/common/crypto/aes/amd64/aesopt.h

   1 /*
   2  * ---------------------------------------------------------------------------
   3  * Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
   4  *
   5  * LICENSE TERMS
   6  *
   7  * The free distribution and use of this software is allowed (with or without
   8  * changes) provided that:
   9  *
  10  *  1. source code distributions include the above copyright notice, this
  11  *      list of conditions and the following disclaimer;
  12  *
  13  *  2. binary distributions include the above copyright notice, this list
  14  *      of conditions and the following disclaimer in their documentation;
  15  *
  16  *  3. the name of the copyright holder is not used to endorse products
  17  *      built using this software without specific written permission.
  18  *
  19  * DISCLAIMER
  20  *
  21  * This software is provided 'as is' with no explicit or implied warranties
  22  * in respect of its properties, including, but not limited to, correctness
  23  * and/or fitness for purpose.
  24  * ---------------------------------------------------------------------------
  25  * Issue Date: 20/12/2007
  26  *
  27  * This file contains the compilation options for AES (Rijndael) and code
  28  * that is common across encryption, key scheduling and table generation.
  29  *
  30  * OPERATION
  31  *
  32  * These source code files implement the AES algorithm Rijndael designed by
  33  * Joan Daemen and Vincent Rijmen. This version is designed for the standard
  34  * block size of 16 bytes and for key sizes of 128, 192 and 256 bits (16, 24
  35  * and 32 bytes).
  36  *
  37  * This version is designed for flexibility and speed using operations on
  38  * 32-bit words rather than operations on bytes.  It can be compiled with
  39  * either big or little endian internal byte order but is faster when the
  40  * native byte order for the processor is used.
  41  *
  42  * THE CIPHER INTERFACE
  43  *
  44  * The cipher interface is implemented as an array of bytes in which lower
  45  * AES bit sequence indexes map to higher numeric significance within bytes.
  46  */
  47 
  48 /*
  49  * OpenSolaris changes
  50  * 1. Added __cplusplus and _AESTAB_H header guards
  51  * 2. Added header files sys/types.h and aes_impl.h
  52  * 3. Added defines for AES_ENCRYPT, AES_DECRYPT, AES_REV_DKS, and ASM_AMD64_C
  53  * 4. Moved defines for IS_BIG_ENDIAN, IS_LITTLE_ENDIAN, PLATFORM_BYTE_ORDER
  54  *    from brg_endian.h
  55  * 5. Undefined VIA_ACE_POSSIBLE and ASSUME_VIA_ACE_PRESENT
  56  * 6. Changed uint_8t and uint_32t to uint8_t and uint32_t
  57  * 7. cstyled and hdrchk code
  58  *
  59  */
  60 
  61 #ifndef _AESOPT_H
  62 #define _AESOPT_H
  63 
  64 #pragma ident   "@(#)aesopt.h   1.1     08/05/21 SMI"
  65 
  66 #ifdef  __cplusplus
  67 extern "C" {
  68 #endif
  69 
  70 #include <sys/types.h>
  71 #include <aes_impl.h>
  72 
  73 /*  SUPPORT FEATURES */
  74 #define AES_ENCRYPT /* if support for encryption is needed */
  75 #define AES_DECRYPT /* if support for decryption is needed */
  76 
  77 /*  PLATFORM-SPECIFIC FEATURES */
  78 #define IS_BIG_ENDIAN           4321 /* byte 0 is most significant (mc68k) */
  79 #define IS_LITTLE_ENDIAN        1234 /* byte 0 is least significant (i386) */
  80 #define PLATFORM_BYTE_ORDER     IS_LITTLE_ENDIAN
  81 #define AES_REV_DKS /* define to reverse decryption key schedule */
  82 
  83 
  84 /*
  85  *  CONFIGURATION - THE USE OF DEFINES
  86  *      Later in this section there are a number of defines that control the
  87  *      operation of the code.  In each section, the purpose of each define is
  88  *      explained so that the relevant form can be included or excluded by
  89  *      setting either 1's or 0's respectively on the branches of the related
  90  *      #if clauses.  The following local defines should not be changed.
  91  */
  92 
  93 #define ENCRYPTION_IN_C 1
  94 #define DECRYPTION_IN_C 2
  95 #define ENC_KEYING_IN_C 4
  96 #define DEC_KEYING_IN_C 8
  97 
  98 #define NO_TABLES       0
  99 #define ONE_TABLE       1
 100 #define FOUR_TABLES     4
 101 #define NONE            0
 102 #define PARTIAL         1
 103 #define FULL            2
 104 
 105 /*  --- START OF USER CONFIGURED OPTIONS --- */
 106 
 107 /*
 108  *  1. BYTE ORDER WITHIN 32 BIT WORDS
 109  *
 110  *      The fundamental data processing units in Rijndael are 8-bit bytes. The
 111  *      input, output and key input are all enumerated arrays of bytes in which
 112  *      bytes are numbered starting at zero and increasing to one less than the
 113  *      number of bytes in the array in question. This enumeration is only used
 114  *      for naming bytes and does not imply any adjacency or order relationship
 115  *      from one byte to another. When these inputs and outputs are considered
 116  *      as bit sequences, bits 8*n to 8*n+7 of the bit sequence are mapped to
 117  *      byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte.
 118  *      In this implementation bits are numbered from 0 to 7 starting at the
 119  *      numerically least significant end of each byte.  Bit n represents 2^n.
 120  *
 121  *      However, Rijndael can be implemented more efficiently using 32-bit
 122  *      words by packing bytes into words so that bytes 4*n to 4*n+3 are placed
 123  *      into word[n]. While in principle these bytes can be assembled into words
 124  *      in any positions, this implementation only supports the two formats in
 125  *      which bytes in adjacent positions within words also have adjacent byte
 126  *      numbers. This order is called big-endian if the lowest numbered bytes
 127  *      in words have the highest numeric significance and little-endian if the
 128  *      opposite applies.
 129  *
 130  *      This code can work in either order irrespective of the order used by the
 131  *      machine on which it runs. Normally the internal byte order will be set
 132  *      to the order of the processor on which the code is to be run but this
 133  *      define  can be used to reverse this in special situations
 134  *
 135  *      WARNING: Assembler code versions rely on PLATFORM_BYTE_ORDER being set.
 136  *      This define will hence be redefined later (in section 4) if necessary
 137  */
 138 
 139 #if 1
 140 #define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
 141 #elif 0
 142 #define ALGORITHM_BYTE_ORDER IS_LITTLE_ENDIAN
 143 #elif 0
 144 #define ALGORITHM_BYTE_ORDER IS_BIG_ENDIAN
 145 #else
 146 #error The algorithm byte order is not defined
 147 #endif
 148 
 149 /*  2. VIA ACE SUPPORT */
 150 
 151 #if defined(__GNUC__) && defined(__i386__) || \
 152         defined(_WIN32) && defined(_M_IX86) && \
 153         !(defined(_WIN64) || defined(_WIN32_WCE) || \
 154         defined(_MSC_VER) && (_MSC_VER <= 800))
 155 #define VIA_ACE_POSSIBLE
 156 #endif
 157 
 158 /*
 159  *  Define this option if support for the VIA ACE is required. This uses
 160  *  inline assembler instructions and is only implemented for the Microsoft,
 161  *  Intel and GCC compilers.  If VIA ACE is known to be present, then defining
 162  *  ASSUME_VIA_ACE_PRESENT will remove the ordinary encryption/decryption
 163  *  code.  If USE_VIA_ACE_IF_PRESENT is defined then VIA ACE will be used if
 164  *  it is detected (both present and enabled) but the normal AES code will
 165  *  also be present.
 166  *
 167  *  When VIA ACE is to be used, all AES encryption contexts MUST be 16 byte
 168  *  aligned; other input/output buffers do not need to be 16 byte aligned
 169  *  but there are very large performance gains if this can be arranged.
 170  *  VIA ACE also requires the decryption key schedule to be in reverse
 171  *  order (which later checks below ensure).
 172  */
 173 
 174 /*  VIA ACE is not used here for OpenSolaris: */
 175 #undef  VIA_ACE_POSSIBLE
 176 #undef  ASSUME_VIA_ACE_PRESENT
 177 
 178 #if 0 && defined(VIA_ACE_POSSIBLE) && !defined(USE_VIA_ACE_IF_PRESENT)
 179 #define USE_VIA_ACE_IF_PRESENT
 180 #endif
 181 
 182 #if 0 && defined(VIA_ACE_POSSIBLE) && !defined(ASSUME_VIA_ACE_PRESENT)
 183 #define ASSUME_VIA_ACE_PRESENT
 184 #endif
 185 
 186 
 187 /*
 188  *  3. ASSEMBLER SUPPORT
 189  *
 190  *      This define (which can be on the command line) enables the use of the
 191  *      assembler code routines for encryption, decryption and key scheduling
 192  *      as follows:
 193  *
 194  *      ASM_X86_V1C uses the assembler (aes_x86_v1.asm) with large tables for
 195  *              encryption and decryption and but with key scheduling in C
 196  *      ASM_X86_V2  uses assembler (aes_x86_v2.asm) with compressed tables for
 197  *              encryption, decryption and key scheduling
 198  *      ASM_X86_V2C uses assembler (aes_x86_v2.asm) with compressed tables for
 199  *              encryption and decryption and but with key scheduling in C
 200  *      ASM_AMD64_C uses assembler (aes_amd64.asm) with compressed tables for
 201  *              encryption and decryption and but with key scheduling in C
 202  *
 203  *      Change one 'if 0' below to 'if 1' to select the version or define
 204  *      as a compilation option.
 205  */
 206 
 207 #if 0 && !defined(ASM_X86_V1C)
 208 #define ASM_X86_V1C
 209 #elif 0 && !defined(ASM_X86_V2)
 210 #define ASM_X86_V2
 211 #elif 0 && !defined(ASM_X86_V2C)
 212 #define ASM_X86_V2C
 213 #elif 1 && !defined(ASM_AMD64_C)
 214 #define ASM_AMD64_C
 215 #endif
 216 
 217 #if (defined(ASM_X86_V1C) || defined(ASM_X86_V2) || defined(ASM_X86_V2C)) && \
 218         !defined(_M_IX86) || defined(ASM_AMD64_C) && !defined(_M_X64) && \
 219         !defined(__amd64)
 220 #error Assembler code is only available for x86 and AMD64 systems
 221 #endif
 222 
 223 /*
 224  *  4. FAST INPUT/OUTPUT OPERATIONS.
 225  *
 226  *      On some machines it is possible to improve speed by transferring the
 227  *      bytes in the input and output arrays to and from the internal 32-bit
 228  *      variables by addressing these arrays as if they are arrays of 32-bit
 229  *      words.  On some machines this will always be possible but there may
 230  *      be a large performance penalty if the byte arrays are not aligned on
 231  *      the normal word boundaries. On other machines this technique will
 232  *      lead to memory access errors when such 32-bit word accesses are not
 233  *      properly aligned. The option SAFE_IO avoids such problems but will
 234  *      often be slower on those machines that support misaligned access
 235  *      (especially so if care is taken to align the input  and output byte
 236  *      arrays on 32-bit word boundaries). If SAFE_IO is not defined it is
 237  *      assumed that access to byte arrays as if they are arrays of 32-bit
 238  *      words will not cause problems when such accesses are misaligned.
 239  */
 240 #if 1 && !defined(_MSC_VER)
 241 #define SAFE_IO
 242 #endif
 243 
 244 /*
 245  *  5. LOOP UNROLLING
 246  *
 247  *      The code for encryption and decryption cycles through a number of rounds
 248  *      that can be implemented either in a loop or by expanding the code into a
 249  *      long sequence of instructions, the latter producing a larger program but
 250  *      one that will often be much faster. The latter is called loop unrolling.
 251  *      There are also potential speed advantages in expanding two iterations in
 252  *      a loop with half the number of iterations, which is called partial loop
 253  *      unrolling.  The following options allow partial or full loop unrolling
 254  *      to be set independently for encryption and decryption
 255  */
 256 #if 1
 257 #define ENC_UNROLL  FULL
 258 #elif 0
 259 #define ENC_UNROLL  PARTIAL
 260 #else
 261 #define ENC_UNROLL  NONE
 262 #endif
 263 
 264 #if 1
 265 #define DEC_UNROLL  FULL
 266 #elif 0
 267 #define DEC_UNROLL  PARTIAL
 268 #else
 269 #define DEC_UNROLL  NONE
 270 #endif
 271 
 272 #if 1
 273 #define ENC_KS_UNROLL
 274 #endif
 275 
 276 #if 1
 277 #define DEC_KS_UNROLL
 278 #endif
 279 
 280 /*
 281  *  6. FAST FINITE FIELD OPERATIONS
 282  *
 283  *      If this section is included, tables are used to provide faster finite
 284  *      field arithmetic.  This has no effect if FIXED_TABLES is defined.
 285  */
 286 #if 1
 287 #define FF_TABLES
 288 #endif
 289 
 290 /*
 291  *  7. INTERNAL STATE VARIABLE FORMAT
 292  *
 293  *      The internal state of Rijndael is stored in a number of local 32-bit
 294  *      word variables which can be defined either as an array or as individual
 295  *      names variables. Include this section if you want to store these local
 296  *      variables in arrays. Otherwise individual local variables will be used.
 297  */
 298 #if 1
 299 #define ARRAYS
 300 #endif
 301 
 302 /*
 303  *  8. FIXED OR DYNAMIC TABLES
 304  *
 305  *      When this section is included the tables used by the code are compiled
 306  *      statically into the binary file.  Otherwise the subroutine aes_init()
 307  *      must be called to compute them before the code is first used.
 308  */
 309 #if 1 && !(defined(_MSC_VER) && (_MSC_VER <= 800))
 310 #define FIXED_TABLES
 311 #endif
 312 
 313 /*
 314  *  9. MASKING OR CASTING FROM LONGER VALUES TO BYTES
 315  *
 316  *      In some systems it is better to mask longer values to extract bytes
 317  *      rather than using a cast. This option allows this choice.
 318  */
 319 #if 0
 320 #define to_byte(x)  ((uint8_t)(x))
 321 #else
 322 #define to_byte(x)  ((x) & 0xff)
 323 #endif
 324 
 325 /*
 326  *  10. TABLE ALIGNMENT
 327  *
 328  *      On some systems speed will be improved by aligning the AES large lookup
 329  *      tables on particular boundaries. This define should be set to a power of
 330  *      two giving the desired alignment. It can be left undefined if alignment
 331  *      is not needed.  This option is specific to the Micrsoft VC++ compiler -
 332  *      it seems to sometimes cause trouble for the VC++ version 6 compiler.
 333  */
 334 
 335 #if 1 && defined(_MSC_VER) && (_MSC_VER >= 1300)
 336 #define TABLE_ALIGN 32
 337 #endif
 338 
 339 /*
 340  *  11.  REDUCE CODE AND TABLE SIZE
 341  *
 342  *      This replaces some expanded macros with function calls if AES_ASM_V2 or
 343  *      AES_ASM_V2C are defined
 344  */
 345 
 346 #if 1 && (defined(ASM_X86_V2) || defined(ASM_X86_V2C))
 347 #define REDUCE_CODE_SIZE
 348 #endif
 349 
 350 /*
 351  *  12. TABLE OPTIONS
 352  *
 353  *      This cipher proceeds by repeating in a number of cycles known as rounds
 354  *      which are implemented by a round function which is optionally be speeded
 355  *      up using tables.  The basic tables are 256 32-bit words, with either
 356  *      one or four tables being required for each round function depending on
 357  *      how much speed is required. Encryption and decryption round functions
 358  *      are different and the last encryption and decryption round functions are
 359  *      different again making four different round functions in all.
 360  *
 361  *      This means that:
 362  *      1. Normal encryption and decryption rounds can each use either 0, 1
 363  *              or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
 364  *      2. The last encryption and decryption rounds can also use either 0, 1
 365  *              or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
 366  *
 367  *      Include or exclude the appropriate definitions below to set the number
 368  *      of tables used by this implementation.
 369  */
 370 
 371 #if 1   /* set tables for the normal encryption round */
 372 #define ENC_ROUND   FOUR_TABLES
 373 #elif 0
 374 #define ENC_ROUND   ONE_TABLE
 375 #else
 376 #define ENC_ROUND   NO_TABLES
 377 #endif
 378 
 379 #if 1   /* set tables for the last encryption round */
 380 #define LAST_ENC_ROUND  FOUR_TABLES
 381 #elif 0
 382 #define LAST_ENC_ROUND  ONE_TABLE
 383 #else
 384 #define LAST_ENC_ROUND  NO_TABLES
 385 #endif
 386 
 387 #if 1   /* set tables for the normal decryption round */
 388 #define DEC_ROUND   FOUR_TABLES
 389 #elif 0
 390 #define DEC_ROUND   ONE_TABLE
 391 #else
 392 #define DEC_ROUND   NO_TABLES
 393 #endif
 394 
 395 #if 1   /* set tables for the last decryption round */
 396 #define LAST_DEC_ROUND  FOUR_TABLES
 397 #elif 0
 398 #define LAST_DEC_ROUND  ONE_TABLE
 399 #else
 400 #define LAST_DEC_ROUND  NO_TABLES
 401 #endif
 402 
 403 /*
 404  *  The decryption key schedule can be speeded up with tables in the same
 405  *      way that the round functions can.  Include or exclude the following
 406  *      defines to set this requirement.
 407  */
 408 #if 1
 409 #define KEY_SCHED   FOUR_TABLES
 410 #elif 0
 411 #define KEY_SCHED   ONE_TABLE
 412 #else
 413 #define KEY_SCHED   NO_TABLES
 414 #endif
 415 
 416 /*  ---- END OF USER CONFIGURED OPTIONS ---- */
 417 
 418 /* VIA ACE support is only available for VC++ and GCC */
 419 
 420 #if !defined(_MSC_VER) && !defined(__GNUC__)
 421 #if defined(ASSUME_VIA_ACE_PRESENT)
 422 #undef ASSUME_VIA_ACE_PRESENT
 423 #endif
 424 #if defined(USE_VIA_ACE_IF_PRESENT)
 425 #undef USE_VIA_ACE_IF_PRESENT
 426 #endif
 427 #endif
 428 
 429 #if defined(ASSUME_VIA_ACE_PRESENT) && !defined(USE_VIA_ACE_IF_PRESENT)
 430 #define USE_VIA_ACE_IF_PRESENT
 431 #endif
 432 
 433 #if defined(USE_VIA_ACE_IF_PRESENT) && !defined(AES_REV_DKS)
 434 #define AES_REV_DKS
 435 #endif
 436 
 437 /* Assembler support requires the use of platform byte order */
 438 
 439 #if (defined(ASM_X86_V1C) || defined(ASM_X86_V2C) || defined(ASM_AMD64_C)) && \
 440         (ALGORITHM_BYTE_ORDER != PLATFORM_BYTE_ORDER)
 441 #undef  ALGORITHM_BYTE_ORDER
 442 #define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
 443 #endif
 444 
 445 /*
 446  * In this implementation the columns of the state array are each held in
 447  *      32-bit words. The state array can be held in various ways: in an array
 448  *      of words, in a number of individual word variables or in a number of
 449  *      processor registers. The following define maps a variable name x and
 450  *      a column number c to the way the state array variable is to be held.
 451  *      The first define below maps the state into an array x[c] whereas the
 452  *      second form maps the state into a number of individual variables x0,
 453  *      x1, etc.  Another form could map individual state columns to machine
 454  *      register names.
 455  */
 456 
 457 #if defined(ARRAYS)
 458 #define s(x, c) x[c]
 459 #else
 460 #define s(x, c) x##c
 461 #endif
 462 
 463 /*
 464  *  This implementation provides subroutines for encryption, decryption
 465  *      and for setting the three key lengths (separately) for encryption
 466  *      and decryption. Since not all functions are needed, masks are set
 467  *      up here to determine which will be implemented in C
 468  */
 469 
 470 #if !defined(AES_ENCRYPT)
 471 #define EFUNCS_IN_C   0
 472 #elif defined(ASSUME_VIA_ACE_PRESENT) || defined(ASM_X86_V1C) || \
 473         defined(ASM_X86_V2C) || defined(ASM_AMD64_C)
 474 #define EFUNCS_IN_C   ENC_KEYING_IN_C
 475 #elif !defined(ASM_X86_V2)
 476 #define EFUNCS_IN_C   (ENCRYPTION_IN_C | ENC_KEYING_IN_C)
 477 #else
 478 #define EFUNCS_IN_C   0
 479 #endif
 480 
 481 #if !defined(AES_DECRYPT)
 482 #define DFUNCS_IN_C   0
 483 #elif defined(ASSUME_VIA_ACE_PRESENT) || defined(ASM_X86_V1C) || \
 484         defined(ASM_X86_V2C) || defined(ASM_AMD64_C)
 485 #define DFUNCS_IN_C   DEC_KEYING_IN_C
 486 #elif !defined(ASM_X86_V2)
 487 #define DFUNCS_IN_C   (DECRYPTION_IN_C | DEC_KEYING_IN_C)
 488 #else
 489 #define DFUNCS_IN_C   0
 490 #endif
 491 
 492 #define FUNCS_IN_C  (EFUNCS_IN_C | DFUNCS_IN_C)
 493 
 494 /* END OF CONFIGURATION OPTIONS */
 495 
 496 /* Disable or report errors on some combinations of options */
 497 
 498 #if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES
 499 #undef  LAST_ENC_ROUND
 500 #define LAST_ENC_ROUND  NO_TABLES
 501 #elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES
 502 #undef  LAST_ENC_ROUND
 503 #define LAST_ENC_ROUND  ONE_TABLE
 504 #endif
 505 
 506 #if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE
 507 #undef  ENC_UNROLL
 508 #define ENC_UNROLL  NONE
 509 #endif
 510 
 511 #if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES
 512 #undef  LAST_DEC_ROUND
 513 #define LAST_DEC_ROUND  NO_TABLES
 514 #elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES
 515 #undef  LAST_DEC_ROUND
 516 #define LAST_DEC_ROUND  ONE_TABLE
 517 #endif
 518 
 519 #if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE
 520 #undef  DEC_UNROLL
 521 #define DEC_UNROLL  NONE
 522 #endif
 523 
 524 #if defined(bswap32)
 525 #define aes_sw32        bswap32
 526 #elif defined(bswap_32)
 527 #define aes_sw32        bswap_32
 528 #else
 529 #define brot(x, n)   (((uint32_t)(x) <<  n) | ((uint32_t)(x) >> (32 - n)))
 530 #define aes_sw32(x) ((brot((x), 8) & 0x00ff00ff) | (brot((x), 24) & 0xff00ff00))
 531 #endif
 532 
 533 /*
 534  *  upr(x, n):  rotates bytes within words by n positions, moving bytes to
 535  *              higher index positions with wrap around into low positions
 536  *      ups(x, n):  moves bytes by n positions to higher index positions in
 537  *              words but without wrap around
 538  *      bval(x, n): extracts a byte from a word
 539  *
 540  *      WARNING:   The definitions given here are intended only for use with
 541  *              unsigned variables and with shift counts that are compile
 542  *              time constants
 543  */
 544 
 545 #if (ALGORITHM_BYTE_ORDER == IS_LITTLE_ENDIAN)
 546 #define upr(x, n)       (((uint32_t)(x) << (8 * (n))) | \
 547                         ((uint32_t)(x) >> (32 - 8 * (n))))
 548 #define ups(x, n)       ((uint32_t)(x) << (8 * (n)))
 549 #define bval(x, n)      to_byte((x) >> (8 * (n)))
 550 #define bytes2word(b0, b1, b2, b3)  \
 551                 (((uint32_t)(b3) << 24) | ((uint32_t)(b2) << 16) | \
 552                 ((uint32_t)(b1) << 8) | (b0))
 553 #endif
 554 
 555 #if (ALGORITHM_BYTE_ORDER == IS_BIG_ENDIAN)
 556 #define upr(x, n)       (((uint32_t)(x) >> (8 * (n))) | \
 557                         ((uint32_t)(x) << (32 - 8 * (n))))
 558 #define ups(x, n)       ((uint32_t)(x) >> (8 * (n)))
 559 #define bval(x, n)      to_byte((x) >> (24 - 8 * (n)))
 560 #define bytes2word(b0, b1, b2, b3)  \
 561                 (((uint32_t)(b0) << 24) | ((uint32_t)(b1) << 16) | \
 562                 ((uint32_t)(b2) << 8) | (b3))
 563 #endif
 564 
 565 #if defined(SAFE_IO)
 566 #define word_in(x, c)   bytes2word(((const uint8_t *)(x) + 4 * c)[0], \
 567                                 ((const uint8_t *)(x) + 4 * c)[1], \
 568                                 ((const uint8_t *)(x) + 4 * c)[2], \
 569                                 ((const uint8_t *)(x) + 4 * c)[3])
 570 #define word_out(x, c, v) { ((uint8_t *)(x) + 4 * c)[0] = bval(v, 0); \
 571                         ((uint8_t *)(x) + 4 * c)[1] = bval(v, 1); \
 572                         ((uint8_t *)(x) + 4 * c)[2] = bval(v, 2); \
 573                         ((uint8_t *)(x) + 4 * c)[3] = bval(v, 3); }
 574 #elif (ALGORITHM_BYTE_ORDER == PLATFORM_BYTE_ORDER)
 575 #define word_in(x, c)   (*((uint32_t *)(x) + (c)))
 576 #define word_out(x, c, v) (*((uint32_t *)(x) + (c)) = (v))
 577 #else
 578 #define word_in(x, c)   aes_sw32(*((uint32_t *)(x) + (c)))
 579 #define word_out(x, c, v) (*((uint32_t *)(x) + (c)) = aes_sw32(v))
 580 #endif
 581 
 582 /* the finite field modular polynomial and elements */
 583 
 584 #define WPOLY   0x011b
 585 #define BPOLY   0x1b
 586 
 587 /* multiply four bytes in GF(2^8) by 'x' {02} in parallel */
 588 
 589 #define m1  0x80808080
 590 #define m2  0x7f7f7f7f
 591 #define gf_mulx(x)  ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * BPOLY))
 592 
 593 /*
 594  * The following defines provide alternative definitions of gf_mulx that might
 595  * give improved performance if a fast 32-bit multiply is not available. Note
 596  * that a temporary variable u needs to be defined where gf_mulx is used.
 597  *
 598  * #define      gf_mulx(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ \
 599  *                      ((u >> 3) | (u >> 6))
 600  * #define      m4  (0x01010101 * BPOLY)
 601  * #define      gf_mulx(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) \
 602  *                      & m4)
 603  */
 604 
 605 /* Work out which tables are needed for the different options   */
 606 
 607 #if defined(ASM_X86_V1C)
 608 #if defined(ENC_ROUND)
 609 #undef  ENC_ROUND
 610 #endif
 611 #define ENC_ROUND   FOUR_TABLES
 612 #if defined(LAST_ENC_ROUND)
 613 #undef  LAST_ENC_ROUND
 614 #endif
 615 #define LAST_ENC_ROUND  FOUR_TABLES
 616 #if defined(DEC_ROUND)
 617 #undef  DEC_ROUND
 618 #endif
 619 #define DEC_ROUND   FOUR_TABLES
 620 #if defined(LAST_DEC_ROUND)
 621 #undef  LAST_DEC_ROUND
 622 #endif
 623 #define LAST_DEC_ROUND  FOUR_TABLES
 624 #if defined(KEY_SCHED)
 625 #undef  KEY_SCHED
 626 #define KEY_SCHED   FOUR_TABLES
 627 #endif
 628 #endif
 629 
 630 #if (FUNCS_IN_C & ENCRYPTION_IN_C) || defined(ASM_X86_V1C)
 631 #if ENC_ROUND == ONE_TABLE
 632 #define FT1_SET
 633 #elif ENC_ROUND == FOUR_TABLES
 634 #define FT4_SET
 635 #else
 636 #define SBX_SET
 637 #endif
 638 #if LAST_ENC_ROUND == ONE_TABLE
 639 #define FL1_SET
 640 #elif LAST_ENC_ROUND == FOUR_TABLES
 641 #define FL4_SET
 642 #elif !defined(SBX_SET)
 643 #define SBX_SET
 644 #endif
 645 #endif
 646 
 647 #if (FUNCS_IN_C & DECRYPTION_IN_C) || defined(ASM_X86_V1C)
 648 #if DEC_ROUND == ONE_TABLE
 649 #define IT1_SET
 650 #elif DEC_ROUND == FOUR_TABLES
 651 #define IT4_SET
 652 #else
 653 #define ISB_SET
 654 #endif
 655 #if LAST_DEC_ROUND == ONE_TABLE
 656 #define IL1_SET
 657 #elif LAST_DEC_ROUND == FOUR_TABLES
 658 #define IL4_SET
 659 #elif !defined(ISB_SET)
 660 #define ISB_SET
 661 #endif
 662 #endif
 663 
 664 
 665 #if !(defined(REDUCE_CODE_SIZE) && (defined(ASM_X86_V2) || \
 666         defined(ASM_X86_V2C)))
 667 #if ((FUNCS_IN_C & ENC_KEYING_IN_C) || (FUNCS_IN_C & DEC_KEYING_IN_C))
 668 #if KEY_SCHED == ONE_TABLE
 669 #if !defined(FL1_SET) && !defined(FL4_SET)
 670 #define LS1_SET
 671 #endif
 672 #elif KEY_SCHED == FOUR_TABLES
 673 #if !defined(FL4_SET)
 674 #define LS4_SET
 675 #endif
 676 #elif !defined(SBX_SET)
 677 #define SBX_SET
 678 #endif
 679 #endif
 680 #if (FUNCS_IN_C & DEC_KEYING_IN_C)
 681 #if KEY_SCHED == ONE_TABLE
 682 #define IM1_SET
 683 #elif KEY_SCHED == FOUR_TABLES
 684 #define IM4_SET
 685 #elif !defined(SBX_SET)
 686 #define SBX_SET
 687 #endif
 688 #endif
 689 #endif
 690 
 691 /* generic definitions of Rijndael macros that use tables */
 692 
 693 #define no_table(x, box, vf, rf, c) bytes2word(\
 694         box[bval(vf(x, 0, c), rf(0, c))], \
 695         box[bval(vf(x, 1, c), rf(1, c))], \
 696         box[bval(vf(x, 2, c), rf(2, c))], \
 697         box[bval(vf(x, 3, c), rf(3, c))])
 698 
 699 #define one_table(x, op, tab, vf, rf, c) \
 700         (tab[bval(vf(x, 0, c), rf(0, c))] \
 701         ^ op(tab[bval(vf(x, 1, c), rf(1, c))], 1) \
 702         ^ op(tab[bval(vf(x, 2, c), rf(2, c))], 2) \
 703         ^ op(tab[bval(vf(x, 3, c), rf(3, c))], 3))
 704 
 705 #define four_tables(x, tab, vf, rf, c) \
 706         (tab[0][bval(vf(x, 0, c), rf(0, c))] \
 707         ^ tab[1][bval(vf(x, 1, c), rf(1, c))] \
 708         ^ tab[2][bval(vf(x, 2, c), rf(2, c))] \
 709         ^ tab[3][bval(vf(x, 3, c), rf(3, c))])
 710 
 711 #define vf1(x, r, c)    (x)
 712 #define rf1(r, c)       (r)
 713 #define rf2(r, c)       ((8+r-c)&3)
 714 
 715 /*
 716  * Perform forward and inverse column mix operation on four bytes in long word
 717  * x in parallel. NOTE: x must be a simple variable, NOT an expression in
 718  * these macros.
 719  */
 720 
 721 #if !(defined(REDUCE_CODE_SIZE) && (defined(ASM_X86_V2) || \
 722         defined(ASM_X86_V2C)))
 723 
 724 #if defined(FM4_SET)    /* not currently used */
 725 #define fwd_mcol(x)     four_tables(x, t_use(f, m), vf1, rf1, 0)
 726 #elif defined(FM1_SET)  /* not currently used */
 727 #define fwd_mcol(x)     one_table(x, upr, t_use(f, m), vf1, rf1, 0)
 728 #else
 729 #define dec_fmvars      uint32_t g2
 730 #define fwd_mcol(x)     (g2 = gf_mulx(x), g2 ^ upr((x) ^ g2, 3) ^ \
 731                                 upr((x), 2) ^ upr((x), 1))
 732 #endif
 733 
 734 #if defined(IM4_SET)
 735 #define inv_mcol(x)     four_tables(x, t_use(i, m), vf1, rf1, 0)
 736 #elif defined(IM1_SET)
 737 #define inv_mcol(x)     one_table(x, upr, t_use(i, m), vf1, rf1, 0)
 738 #else
 739 #define dec_imvars      uint32_t g2, g4, g9
 740 #define inv_mcol(x)     (g2 = gf_mulx(x), g4 = gf_mulx(g2), g9 = \
 741                                 (x) ^ gf_mulx(g4), g4 ^= g9, \
 742                                 (x) ^ g2 ^ g4 ^ upr(g2 ^ g9, 3) ^ \
 743                                 upr(g4, 2) ^ upr(g9, 1))
 744 #endif
 745 
 746 #if defined(FL4_SET)
 747 #define ls_box(x, c)    four_tables(x, t_use(f, l), vf1, rf2, c)
 748 #elif defined(LS4_SET)
 749 #define ls_box(x, c)    four_tables(x, t_use(l, s), vf1, rf2, c)
 750 #elif defined(FL1_SET)
 751 #define ls_box(x, c)    one_table(x, upr, t_use(f, l), vf1, rf2, c)
 752 #elif defined(LS1_SET)
 753 #define ls_box(x, c)    one_table(x, upr, t_use(l, s), vf1, rf2, c)
 754 #else
 755 #define ls_box(x, c)    no_table(x, t_use(s, box), vf1, rf2, c)
 756 #endif
 757 
 758 #endif
 759 
 760 #if defined(ASM_X86_V1C) && defined(AES_DECRYPT) && !defined(ISB_SET)
 761 #define ISB_SET
 762 #endif
 763 
 764 #ifdef  __cplusplus
 765 }
 766 #endif
 767 
 768 #endif  /* _AESOPT_H */