nv97_bswap New usr/src/common/crypto/aes/amd64/aesopt.h

   1 /*
   2  * ---------------------------------------------------------------------------
   3  * Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
   4  *
   5  * LICENSE TERMS
   6  *
   7  * The free distribution and use of this software is allowed (with or without
   8  * changes) provided that:
   9  *
  10  *  1. source code distributions include the above copyright notice, this
  11  *      list of conditions and the following disclaimer;
  12  *
  13  *  2. binary distributions include the above copyright notice, this list
  14  *      of conditions and the following disclaimer in their documentation;
  15  *
  16  *  3. the name of the copyright holder is not used to endorse products
  17  *      built using this software without specific written permission.
  18  *
  19  * DISCLAIMER
  20  *
  21  * This software is provided 'as is' with no explicit or implied warranties
  22  * in respect of its properties, including, but not limited to, correctness
  23  * and/or fitness for purpose.
  24  * ---------------------------------------------------------------------------
  25  * Issue Date: 20/12/2007
  26  *
  27  * This file contains the compilation options for AES (Rijndael) and code
  28  * that is common across encryption, key scheduling and table generation.
  29  *
  30  * OPERATION
  31  *
  32  * These source code files implement the AES algorithm Rijndael designed by
  33  * Joan Daemen and Vincent Rijmen. This version is designed for the standard
  34  * block size of 16 bytes and for key sizes of 128, 192 and 256 bits (16, 24
  35  * and 32 bytes).
  36  *
  37  * This version is designed for flexibility and speed using operations on
  38  * 32-bit words rather than operations on bytes.  It can be compiled with
  39  * either big or little endian internal byte order but is faster when the
  40  * native byte order for the processor is used.
  41  *
  42  * THE CIPHER INTERFACE
  43  *
  44  * The cipher interface is implemented as an array of bytes in which lower
  45  * AES bit sequence indexes map to higher numeric significance within bytes.
  46  */
  47 
  48 /*
  49  * OpenSolaris changes
  50  * 1. Added __cplusplus and _AESTAB_H header guards
  51  * 2. Added header files sys/types.h and aes_impl.h
  52  * 3. Added defines for AES_ENCRYPT, AES_DECRYPT, AES_REV_DKS, and ASM_AMD64_C
  53  * 4. Moved defines for IS_BIG_ENDIAN, IS_LITTLE_ENDIAN, PLATFORM_BYTE_ORDER
  54  *    from brg_endian.h
  55  * 5. Undefined VIA_ACE_POSSIBLE and ASSUME_VIA_ACE_PRESENT
  56  * 6. Changed uint_8t and uint_32t to uint8_t and uint32_t
  57  * 7. Defined aes_sw32 as htonl() for byte swapping
  58  * 8. Cstyled and hdrchk code
  59  *
  60  */
  61 
  62 #ifndef _AESOPT_H
  63 #define _AESOPT_H
  64 
  65 #ifdef  __cplusplus
  66 extern "C" {
  67 #endif
  68 
  69 #include <sys/types.h>
  70 #include <sys/byteorder.h>
  71 #include <aes_impl.h>
  72 
  73 /*  SUPPORT FEATURES */
  74 #define AES_ENCRYPT /* if support for encryption is needed */
  75 #define AES_DECRYPT /* if support for decryption is needed */
  76 
  77 /*  PLATFORM-SPECIFIC FEATURES */
  78 #define IS_BIG_ENDIAN           4321 /* byte 0 is most significant (mc68k) */
  79 #define IS_LITTLE_ENDIAN        1234 /* byte 0 is least significant (i386) */
  80 #define PLATFORM_BYTE_ORDER     IS_LITTLE_ENDIAN
  81 #define AES_REV_DKS /* define to reverse decryption key schedule */
  82 
  83 
  84 /*
  85  *  CONFIGURATION - THE USE OF DEFINES
  86  *      Later in this section there are a number of defines that control the
  87  *      operation of the code.  In each section, the purpose of each define is
  88  *      explained so that the relevant form can be included or excluded by
  89  *      setting either 1's or 0's respectively on the branches of the related
  90  *      #if clauses.  The following local defines should not be changed.
  91  */
  92 
  93 #define ENCRYPTION_IN_C 1
  94 #define DECRYPTION_IN_C 2
  95 #define ENC_KEYING_IN_C 4
  96 #define DEC_KEYING_IN_C 8
  97 
  98 #define NO_TABLES       0
  99 #define ONE_TABLE       1
 100 #define FOUR_TABLES     4
 101 #define NONE            0
 102 #define PARTIAL         1
 103 #define FULL            2
 104 
 105 /*  --- START OF USER CONFIGURED OPTIONS --- */
 106 
 107 /*
 108  *  1. BYTE ORDER WITHIN 32 BIT WORDS
 109  *
 110  *      The fundamental data processing units in Rijndael are 8-bit bytes. The
 111  *      input, output and key input are all enumerated arrays of bytes in which
 112  *      bytes are numbered starting at zero and increasing to one less than the
 113  *      number of bytes in the array in question. This enumeration is only used
 114  *      for naming bytes and does not imply any adjacency or order relationship
 115  *      from one byte to another. When these inputs and outputs are considered
 116  *      as bit sequences, bits 8*n to 8*n+7 of the bit sequence are mapped to
 117  *      byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte.
 118  *      In this implementation bits are numbered from 0 to 7 starting at the
 119  *      numerically least significant end of each byte.  Bit n represents 2^n.
 120  *
 121  *      However, Rijndael can be implemented more efficiently using 32-bit
 122  *      words by packing bytes into words so that bytes 4*n to 4*n+3 are placed
 123  *      into word[n]. While in principle these bytes can be assembled into words
 124  *      in any positions, this implementation only supports the two formats in
 125  *      which bytes in adjacent positions within words also have adjacent byte
 126  *      numbers. This order is called big-endian if the lowest numbered bytes
 127  *      in words have the highest numeric significance and little-endian if the
 128  *      opposite applies.
 129  *
 130  *      This code can work in either order irrespective of the order used by the
 131  *      machine on which it runs. Normally the internal byte order will be set
 132  *      to the order of the processor on which the code is to be run but this
 133  *      define  can be used to reverse this in special situations
 134  *
 135  *      WARNING: Assembler code versions rely on PLATFORM_BYTE_ORDER being set.
 136  *      This define will hence be redefined later (in section 4) if necessary
 137  */
 138 
 139 #if 1
 140 #define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
 141 #elif 0
 142 #define ALGORITHM_BYTE_ORDER IS_LITTLE_ENDIAN
 143 #elif 0
 144 #define ALGORITHM_BYTE_ORDER IS_BIG_ENDIAN
 145 #else
 146 #error The algorithm byte order is not defined
 147 #endif
 148 
 149 /*  2. VIA ACE SUPPORT */
 150 
 151 #if defined(__GNUC__) && defined(__i386__) || \
 152         defined(_WIN32) && defined(_M_IX86) && \
 153         !(defined(_WIN64) || defined(_WIN32_WCE) || \
 154         defined(_MSC_VER) && (_MSC_VER <= 800))
 155 #define VIA_ACE_POSSIBLE
 156 #endif
 157 
 158 /*
 159  *  Define this option if support for the VIA ACE is required. This uses
 160  *  inline assembler instructions and is only implemented for the Microsoft,
 161  *  Intel and GCC compilers.  If VIA ACE is known to be present, then defining
 162  *  ASSUME_VIA_ACE_PRESENT will remove the ordinary encryption/decryption
 163  *  code.  If USE_VIA_ACE_IF_PRESENT is defined then VIA ACE will be used if
 164  *  it is detected (both present and enabled) but the normal AES code will
 165  *  also be present.
 166  *
 167  *  When VIA ACE is to be used, all AES encryption contexts MUST be 16 byte
 168  *  aligned; other input/output buffers do not need to be 16 byte aligned
 169  *  but there are very large performance gains if this can be arranged.
 170  *  VIA ACE also requires the decryption key schedule to be in reverse
 171  *  order (which later checks below ensure).
 172  */
 173 
 174 /*  VIA ACE is not used here for OpenSolaris: */
 175 #undef  VIA_ACE_POSSIBLE
 176 #undef  ASSUME_VIA_ACE_PRESENT
 177 
 178 #if 0 && defined(VIA_ACE_POSSIBLE) && !defined(USE_VIA_ACE_IF_PRESENT)
 179 #define USE_VIA_ACE_IF_PRESENT
 180 #endif
 181 
 182 #if 0 && defined(VIA_ACE_POSSIBLE) && !defined(ASSUME_VIA_ACE_PRESENT)
 183 #define ASSUME_VIA_ACE_PRESENT
 184 #endif
 185 
 186 
 187 /*
 188  *  3. ASSEMBLER SUPPORT
 189  *
 190  *      This define (which can be on the command line) enables the use of the
 191  *      assembler code routines for encryption, decryption and key scheduling
 192  *      as follows:
 193  *
 194  *      ASM_X86_V1C uses the assembler (aes_x86_v1.asm) with large tables for
 195  *              encryption and decryption and but with key scheduling in C
 196  *      ASM_X86_V2  uses assembler (aes_x86_v2.asm) with compressed tables for
 197  *              encryption, decryption and key scheduling
 198  *      ASM_X86_V2C uses assembler (aes_x86_v2.asm) with compressed tables for
 199  *              encryption and decryption and but with key scheduling in C
 200  *      ASM_AMD64_C uses assembler (aes_amd64.asm) with compressed tables for
 201  *              encryption and decryption and but with key scheduling in C
 202  *
 203  *      Change one 'if 0' below to 'if 1' to select the version or define
 204  *      as a compilation option.
 205  */
 206 
 207 #if 0 && !defined(ASM_X86_V1C)
 208 #define ASM_X86_V1C
 209 #elif 0 && !defined(ASM_X86_V2)
 210 #define ASM_X86_V2
 211 #elif 0 && !defined(ASM_X86_V2C)
 212 #define ASM_X86_V2C
 213 #elif 1 && !defined(ASM_AMD64_C)
 214 #define ASM_AMD64_C
 215 #endif
 216 
 217 #if (defined(ASM_X86_V1C) || defined(ASM_X86_V2) || defined(ASM_X86_V2C)) && \
 218         !defined(_M_IX86) || defined(ASM_AMD64_C) && !defined(_M_X64) && \
 219         !defined(__amd64)
 220 #error Assembler code is only available for x86 and AMD64 systems
 221 #endif
 222 
 223 /*
 224  *  4. FAST INPUT/OUTPUT OPERATIONS.
 225  *
 226  *      On some machines it is possible to improve speed by transferring the
 227  *      bytes in the input and output arrays to and from the internal 32-bit
 228  *      variables by addressing these arrays as if they are arrays of 32-bit
 229  *      words.  On some machines this will always be possible but there may
 230  *      be a large performance penalty if the byte arrays are not aligned on
 231  *      the normal word boundaries. On other machines this technique will
 232  *      lead to memory access errors when such 32-bit word accesses are not
 233  *      properly aligned. The option SAFE_IO avoids such problems but will
 234  *      often be slower on those machines that support misaligned access
 235  *      (especially so if care is taken to align the input  and output byte
 236  *      arrays on 32-bit word boundaries). If SAFE_IO is not defined it is
 237  *      assumed that access to byte arrays as if they are arrays of 32-bit
 238  *      words will not cause problems when such accesses are misaligned.
 239  */
 240 #if 1 && !defined(_MSC_VER)
 241 #define SAFE_IO
 242 #endif
 243 
 244 /*
 245  *  5. LOOP UNROLLING
 246  *
 247  *      The code for encryption and decryption cycles through a number of rounds
 248  *      that can be implemented either in a loop or by expanding the code into a
 249  *      long sequence of instructions, the latter producing a larger program but
 250  *      one that will often be much faster. The latter is called loop unrolling.
 251  *      There are also potential speed advantages in expanding two iterations in
 252  *      a loop with half the number of iterations, which is called partial loop
 253  *      unrolling.  The following options allow partial or full loop unrolling
 254  *      to be set independently for encryption and decryption
 255  */
 256 #if 1
 257 #define ENC_UNROLL  FULL
 258 #elif 0
 259 #define ENC_UNROLL  PARTIAL
 260 #else
 261 #define ENC_UNROLL  NONE
 262 #endif
 263 
 264 #if 1
 265 #define DEC_UNROLL  FULL
 266 #elif 0
 267 #define DEC_UNROLL  PARTIAL
 268 #else
 269 #define DEC_UNROLL  NONE
 270 #endif
 271 
 272 #if 1
 273 #define ENC_KS_UNROLL
 274 #endif
 275 
 276 #if 1
 277 #define DEC_KS_UNROLL
 278 #endif
 279 
 280 /*
 281  *  6. FAST FINITE FIELD OPERATIONS
 282  *
 283  *      If this section is included, tables are used to provide faster finite
 284  *      field arithmetic.  This has no effect if FIXED_TABLES is defined.
 285  */
 286 #if 1
 287 #define FF_TABLES
 288 #endif
 289 
 290 /*
 291  *  7. INTERNAL STATE VARIABLE FORMAT
 292  *
 293  *      The internal state of Rijndael is stored in a number of local 32-bit
 294  *      word variables which can be defined either as an array or as individual
 295  *      names variables. Include this section if you want to store these local
 296  *      variables in arrays. Otherwise individual local variables will be used.
 297  */
 298 #if 1
 299 #define ARRAYS
 300 #endif
 301 
 302 /*
 303  *  8. FIXED OR DYNAMIC TABLES
 304  *
 305  *      When this section is included the tables used by the code are compiled
 306  *      statically into the binary file.  Otherwise the subroutine aes_init()
 307  *      must be called to compute them before the code is first used.
 308  */
 309 #if 1 && !(defined(_MSC_VER) && (_MSC_VER <= 800))
 310 #define FIXED_TABLES
 311 #endif
 312 
 313 /*
 314  *  9. MASKING OR CASTING FROM LONGER VALUES TO BYTES
 315  *
 316  *      In some systems it is better to mask longer values to extract bytes
 317  *      rather than using a cast. This option allows this choice.
 318  */
 319 #if 0
 320 #define to_byte(x)  ((uint8_t)(x))
 321 #else
 322 #define to_byte(x)  ((x) & 0xff)
 323 #endif
 324 
 325 /*
 326  *  10. TABLE ALIGNMENT
 327  *
 328  *      On some systems speed will be improved by aligning the AES large lookup
 329  *      tables on particular boundaries. This define should be set to a power of
 330  *      two giving the desired alignment. It can be left undefined if alignment
 331  *      is not needed.  This option is specific to the Micrsoft VC++ compiler -
 332  *      it seems to sometimes cause trouble for the VC++ version 6 compiler.
 333  */
 334 
 335 #if 1 && defined(_MSC_VER) && (_MSC_VER >= 1300)
 336 #define TABLE_ALIGN 32
 337 #endif
 338 
 339 /*
 340  *  11.  REDUCE CODE AND TABLE SIZE
 341  *
 342  *      This replaces some expanded macros with function calls if AES_ASM_V2 or
 343  *      AES_ASM_V2C are defined
 344  */
 345 
 346 #if 1 && (defined(ASM_X86_V2) || defined(ASM_X86_V2C))
 347 #define REDUCE_CODE_SIZE
 348 #endif
 349 
 350 /*
 351  *  12. TABLE OPTIONS
 352  *
 353  *      This cipher proceeds by repeating in a number of cycles known as rounds
 354  *      which are implemented by a round function which is optionally be speeded
 355  *      up using tables.  The basic tables are 256 32-bit words, with either
 356  *      one or four tables being required for each round function depending on
 357  *      how much speed is required. Encryption and decryption round functions
 358  *      are different and the last encryption and decryption round functions are
 359  *      different again making four different round functions in all.
 360  *
 361  *      This means that:
 362  *      1. Normal encryption and decryption rounds can each use either 0, 1
 363  *              or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
 364  *      2. The last encryption and decryption rounds can also use either 0, 1
 365  *              or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
 366  *
 367  *      Include or exclude the appropriate definitions below to set the number
 368  *      of tables used by this implementation.
 369  */
 370 
 371 #if 1   /* set tables for the normal encryption round */
 372 #define ENC_ROUND   FOUR_TABLES
 373 #elif 0
 374 #define ENC_ROUND   ONE_TABLE
 375 #else
 376 #define ENC_ROUND   NO_TABLES
 377 #endif
 378 
 379 #if 1   /* set tables for the last encryption round */
 380 #define LAST_ENC_ROUND  FOUR_TABLES
 381 #elif 0
 382 #define LAST_ENC_ROUND  ONE_TABLE
 383 #else
 384 #define LAST_ENC_ROUND  NO_TABLES
 385 #endif
 386 
 387 #if 1   /* set tables for the normal decryption round */
 388 #define DEC_ROUND   FOUR_TABLES
 389 #elif 0
 390 #define DEC_ROUND   ONE_TABLE
 391 #else
 392 #define DEC_ROUND   NO_TABLES
 393 #endif
 394 
 395 #if 1   /* set tables for the last decryption round */
 396 #define LAST_DEC_ROUND  FOUR_TABLES
 397 #elif 0
 398 #define LAST_DEC_ROUND  ONE_TABLE
 399 #else
 400 #define LAST_DEC_ROUND  NO_TABLES
 401 #endif
 402 
 403 /*
 404  *  The decryption key schedule can be speeded up with tables in the same
 405  *      way that the round functions can.  Include or exclude the following
 406  *      defines to set this requirement.
 407  */
 408 #if 1
 409 #define KEY_SCHED   FOUR_TABLES
 410 #elif 0
 411 #define KEY_SCHED   ONE_TABLE
 412 #else
 413 #define KEY_SCHED   NO_TABLES
 414 #endif
 415 
 416 /*  ---- END OF USER CONFIGURED OPTIONS ---- */
 417 
 418 /* VIA ACE support is only available for VC++ and GCC */
 419 
 420 #if !defined(_MSC_VER) && !defined(__GNUC__)
 421 #if defined(ASSUME_VIA_ACE_PRESENT)
 422 #undef ASSUME_VIA_ACE_PRESENT
 423 #endif
 424 #if defined(USE_VIA_ACE_IF_PRESENT)
 425 #undef USE_VIA_ACE_IF_PRESENT
 426 #endif
 427 #endif
 428 
 429 #if defined(ASSUME_VIA_ACE_PRESENT) && !defined(USE_VIA_ACE_IF_PRESENT)
 430 #define USE_VIA_ACE_IF_PRESENT
 431 #endif
 432 
 433 #if defined(USE_VIA_ACE_IF_PRESENT) && !defined(AES_REV_DKS)
 434 #define AES_REV_DKS
 435 #endif
 436 
 437 /* Assembler support requires the use of platform byte order */
 438 
 439 #if (defined(ASM_X86_V1C) || defined(ASM_X86_V2C) || defined(ASM_AMD64_C)) && \
 440         (ALGORITHM_BYTE_ORDER != PLATFORM_BYTE_ORDER)
 441 #undef  ALGORITHM_BYTE_ORDER
 442 #define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
 443 #endif
 444 
 445 /*
 446  * In this implementation the columns of the state array are each held in
 447  *      32-bit words. The state array can be held in various ways: in an array
 448  *      of words, in a number of individual word variables or in a number of
 449  *      processor registers. The following define maps a variable name x and
 450  *      a column number c to the way the state array variable is to be held.
 451  *      The first define below maps the state into an array x[c] whereas the
 452  *      second form maps the state into a number of individual variables x0,
 453  *      x1, etc.  Another form could map individual state columns to machine
 454  *      register names.
 455  */
 456 
 457 #if defined(ARRAYS)
 458 #define s(x, c) x[c]
 459 #else
 460 #define s(x, c) x##c
 461 #endif
 462 
 463 /*
 464  *  This implementation provides subroutines for encryption, decryption
 465  *      and for setting the three key lengths (separately) for encryption
 466  *      and decryption. Since not all functions are needed, masks are set
 467  *      up here to determine which will be implemented in C
 468  */
 469 
 470 #if !defined(AES_ENCRYPT)
 471 #define EFUNCS_IN_C   0
 472 #elif defined(ASSUME_VIA_ACE_PRESENT) || defined(ASM_X86_V1C) || \
 473         defined(ASM_X86_V2C) || defined(ASM_AMD64_C)
 474 #define EFUNCS_IN_C   ENC_KEYING_IN_C
 475 #elif !defined(ASM_X86_V2)
 476 #define EFUNCS_IN_C   (ENCRYPTION_IN_C | ENC_KEYING_IN_C)
 477 #else
 478 #define EFUNCS_IN_C   0
 479 #endif
 480 
 481 #if !defined(AES_DECRYPT)
 482 #define DFUNCS_IN_C   0
 483 #elif defined(ASSUME_VIA_ACE_PRESENT) || defined(ASM_X86_V1C) || \
 484         defined(ASM_X86_V2C) || defined(ASM_AMD64_C)
 485 #define DFUNCS_IN_C   DEC_KEYING_IN_C
 486 #elif !defined(ASM_X86_V2)
 487 #define DFUNCS_IN_C   (DECRYPTION_IN_C | DEC_KEYING_IN_C)
 488 #else
 489 #define DFUNCS_IN_C   0
 490 #endif
 491 
 492 #define FUNCS_IN_C  (EFUNCS_IN_C | DFUNCS_IN_C)
 493 
 494 /* END OF CONFIGURATION OPTIONS */
 495 
 496 /* Disable or report errors on some combinations of options */
 497 
 498 #if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES
 499 #undef  LAST_ENC_ROUND
 500 #define LAST_ENC_ROUND  NO_TABLES
 501 #elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES
 502 #undef  LAST_ENC_ROUND
 503 #define LAST_ENC_ROUND  ONE_TABLE
 504 #endif
 505 
 506 #if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE
 507 #undef  ENC_UNROLL
 508 #define ENC_UNROLL  NONE
 509 #endif
 510 
 511 #if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES
 512 #undef  LAST_DEC_ROUND
 513 #define LAST_DEC_ROUND  NO_TABLES
 514 #elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES
 515 #undef  LAST_DEC_ROUND
 516 #define LAST_DEC_ROUND  ONE_TABLE
 517 #endif
 518 
 519 #if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE
 520 #undef  DEC_UNROLL
 521 #define DEC_UNROLL  NONE
 522 #endif
 523 
 524 #if (ALGORITHM_BYTE_ORDER == IS_LITTLE_ENDIAN)
 525 #define aes_sw32        htonl
 526 #elif defined(bswap32)
 527 #define aes_sw32        bswap32
 528 #elif defined(bswap_32)
 529 #define aes_sw32        bswap_32
 530 #else
 531 #define brot(x, n)  (((uint32_t)(x) << (n)) | ((uint32_t)(x) >> (32 - (n))))
 532 #define aes_sw32(x) ((brot((x), 8) & 0x00ff00ff) | (brot((x), 24) & 0xff00ff00))
 533 #endif
 534 
 535 
 536 /*
 537  *      upr(x, n):  rotates bytes within words by n positions, moving bytes to
 538  *              higher index positions with wrap around into low positions
 539  *      ups(x, n):  moves bytes by n positions to higher index positions in
 540  *              words but without wrap around
 541  *      bval(x, n): extracts a byte from a word
 542  *
 543  *      WARNING:   The definitions given here are intended only for use with
 544  *              unsigned variables and with shift counts that are compile
 545  *              time constants
 546  */
 547 
 548 #if (ALGORITHM_BYTE_ORDER == IS_LITTLE_ENDIAN)
 549 #define upr(x, n)       (((uint32_t)(x) << (8 * (n))) | \
 550                         ((uint32_t)(x) >> (32 - 8 * (n))))
 551 #define ups(x, n)       ((uint32_t)(x) << (8 * (n)))
 552 #define bval(x, n)      to_byte((x) >> (8 * (n)))
 553 #define bytes2word(b0, b1, b2, b3)  \
 554                 (((uint32_t)(b3) << 24) | ((uint32_t)(b2) << 16) | \
 555                 ((uint32_t)(b1) << 8) | (b0))
 556 #endif
 557 
 558 #if (ALGORITHM_BYTE_ORDER == IS_BIG_ENDIAN)
 559 #define upr(x, n)       (((uint32_t)(x) >> (8 * (n))) | \
 560                         ((uint32_t)(x) << (32 - 8 * (n))))
 561 #define ups(x, n)       ((uint32_t)(x) >> (8 * (n)))
 562 #define bval(x, n)      to_byte((x) >> (24 - 8 * (n)))
 563 #define bytes2word(b0, b1, b2, b3)  \
 564                 (((uint32_t)(b0) << 24) | ((uint32_t)(b1) << 16) | \
 565                 ((uint32_t)(b2) << 8) | (b3))
 566 #endif
 567 
 568 #if defined(SAFE_IO)
 569 #define word_in(x, c)   bytes2word(((const uint8_t *)(x) + 4 * c)[0], \
 570                                 ((const uint8_t *)(x) + 4 * c)[1], \
 571                                 ((const uint8_t *)(x) + 4 * c)[2], \
 572                                 ((const uint8_t *)(x) + 4 * c)[3])
 573 #define word_out(x, c, v) { ((uint8_t *)(x) + 4 * c)[0] = bval(v, 0); \
 574                         ((uint8_t *)(x) + 4 * c)[1] = bval(v, 1); \
 575                         ((uint8_t *)(x) + 4 * c)[2] = bval(v, 2); \
 576                         ((uint8_t *)(x) + 4 * c)[3] = bval(v, 3); }
 577 #elif (ALGORITHM_BYTE_ORDER == PLATFORM_BYTE_ORDER)
 578 #define word_in(x, c)   (*((uint32_t *)(x) + (c)))
 579 #define word_out(x, c, v) (*((uint32_t *)(x) + (c)) = (v))
 580 #else
 581 #define word_in(x, c)   aes_sw32(*((uint32_t *)(x) + (c)))
 582 #define word_out(x, c, v) (*((uint32_t *)(x) + (c)) = aes_sw32(v))
 583 #endif
 584 
 585 /* the finite field modular polynomial and elements */
 586 
 587 #define WPOLY   0x011b
 588 #define BPOLY   0x1b
 589 
 590 /* multiply four bytes in GF(2^8) by 'x' {02} in parallel */
 591 
 592 #define m1  0x80808080
 593 #define m2  0x7f7f7f7f
 594 #define gf_mulx(x)  ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * BPOLY))
 595 
 596 /*
 597  * The following defines provide alternative definitions of gf_mulx that might
 598  * give improved performance if a fast 32-bit multiply is not available. Note
 599  * that a temporary variable u needs to be defined where gf_mulx is used.
 600  *
 601  * #define      gf_mulx(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ \
 602  *                      ((u >> 3) | (u >> 6))
 603  * #define      m4  (0x01010101 * BPOLY)
 604  * #define      gf_mulx(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) \
 605  *                      & m4)
 606  */
 607 
 608 /* Work out which tables are needed for the different options   */
 609 
 610 #if defined(ASM_X86_V1C)
 611 #if defined(ENC_ROUND)
 612 #undef  ENC_ROUND
 613 #endif
 614 #define ENC_ROUND   FOUR_TABLES
 615 #if defined(LAST_ENC_ROUND)
 616 #undef  LAST_ENC_ROUND
 617 #endif
 618 #define LAST_ENC_ROUND  FOUR_TABLES
 619 #if defined(DEC_ROUND)
 620 #undef  DEC_ROUND
 621 #endif
 622 #define DEC_ROUND   FOUR_TABLES
 623 #if defined(LAST_DEC_ROUND)
 624 #undef  LAST_DEC_ROUND
 625 #endif
 626 #define LAST_DEC_ROUND  FOUR_TABLES
 627 #if defined(KEY_SCHED)
 628 #undef  KEY_SCHED
 629 #define KEY_SCHED   FOUR_TABLES
 630 #endif
 631 #endif
 632 
 633 #if (FUNCS_IN_C & ENCRYPTION_IN_C) || defined(ASM_X86_V1C)
 634 #if ENC_ROUND == ONE_TABLE
 635 #define FT1_SET
 636 #elif ENC_ROUND == FOUR_TABLES
 637 #define FT4_SET
 638 #else
 639 #define SBX_SET
 640 #endif
 641 #if LAST_ENC_ROUND == ONE_TABLE
 642 #define FL1_SET
 643 #elif LAST_ENC_ROUND == FOUR_TABLES
 644 #define FL4_SET
 645 #elif !defined(SBX_SET)
 646 #define SBX_SET
 647 #endif
 648 #endif
 649 
 650 #if (FUNCS_IN_C & DECRYPTION_IN_C) || defined(ASM_X86_V1C)
 651 #if DEC_ROUND == ONE_TABLE
 652 #define IT1_SET
 653 #elif DEC_ROUND == FOUR_TABLES
 654 #define IT4_SET
 655 #else
 656 #define ISB_SET
 657 #endif
 658 #if LAST_DEC_ROUND == ONE_TABLE
 659 #define IL1_SET
 660 #elif LAST_DEC_ROUND == FOUR_TABLES
 661 #define IL4_SET
 662 #elif !defined(ISB_SET)
 663 #define ISB_SET
 664 #endif
 665 #endif
 666 
 667 
 668 #if !(defined(REDUCE_CODE_SIZE) && (defined(ASM_X86_V2) || \
 669         defined(ASM_X86_V2C)))
 670 #if ((FUNCS_IN_C & ENC_KEYING_IN_C) || (FUNCS_IN_C & DEC_KEYING_IN_C))
 671 #if KEY_SCHED == ONE_TABLE
 672 #if !defined(FL1_SET) && !defined(FL4_SET)
 673 #define LS1_SET
 674 #endif
 675 #elif KEY_SCHED == FOUR_TABLES
 676 #if !defined(FL4_SET)
 677 #define LS4_SET
 678 #endif
 679 #elif !defined(SBX_SET)
 680 #define SBX_SET
 681 #endif
 682 #endif
 683 #if (FUNCS_IN_C & DEC_KEYING_IN_C)
 684 #if KEY_SCHED == ONE_TABLE
 685 #define IM1_SET
 686 #elif KEY_SCHED == FOUR_TABLES
 687 #define IM4_SET
 688 #elif !defined(SBX_SET)
 689 #define SBX_SET
 690 #endif
 691 #endif
 692 #endif
 693 
 694 /* generic definitions of Rijndael macros that use tables */
 695 
 696 #define no_table(x, box, vf, rf, c) bytes2word(\
 697         box[bval(vf(x, 0, c), rf(0, c))], \
 698         box[bval(vf(x, 1, c), rf(1, c))], \
 699         box[bval(vf(x, 2, c), rf(2, c))], \
 700         box[bval(vf(x, 3, c), rf(3, c))])
 701 
 702 #define one_table(x, op, tab, vf, rf, c) \
 703         (tab[bval(vf(x, 0, c), rf(0, c))] \
 704         ^ op(tab[bval(vf(x, 1, c), rf(1, c))], 1) \
 705         ^ op(tab[bval(vf(x, 2, c), rf(2, c))], 2) \
 706         ^ op(tab[bval(vf(x, 3, c), rf(3, c))], 3))
 707 
 708 #define four_tables(x, tab, vf, rf, c) \
 709         (tab[0][bval(vf(x, 0, c), rf(0, c))] \
 710         ^ tab[1][bval(vf(x, 1, c), rf(1, c))] \
 711         ^ tab[2][bval(vf(x, 2, c), rf(2, c))] \
 712         ^ tab[3][bval(vf(x, 3, c), rf(3, c))])
 713 
 714 #define vf1(x, r, c)    (x)
 715 #define rf1(r, c)       (r)
 716 #define rf2(r, c)       ((8+r-c)&3)
 717 
 718 /*
 719  * Perform forward and inverse column mix operation on four bytes in long word
 720  * x in parallel. NOTE: x must be a simple variable, NOT an expression in
 721  * these macros.
 722  */
 723 
 724 #if !(defined(REDUCE_CODE_SIZE) && (defined(ASM_X86_V2) || \
 725         defined(ASM_X86_V2C)))
 726 
 727 #if defined(FM4_SET)    /* not currently used */
 728 #define fwd_mcol(x)     four_tables(x, t_use(f, m), vf1, rf1, 0)
 729 #elif defined(FM1_SET)  /* not currently used */
 730 #define fwd_mcol(x)     one_table(x, upr, t_use(f, m), vf1, rf1, 0)
 731 #else
 732 #define dec_fmvars      uint32_t g2
 733 #define fwd_mcol(x)     (g2 = gf_mulx(x), g2 ^ upr((x) ^ g2, 3) ^ \
 734                                 upr((x), 2) ^ upr((x), 1))
 735 #endif
 736 
 737 #if defined(IM4_SET)
 738 #define inv_mcol(x)     four_tables(x, t_use(i, m), vf1, rf1, 0)
 739 #elif defined(IM1_SET)
 740 #define inv_mcol(x)     one_table(x, upr, t_use(i, m), vf1, rf1, 0)
 741 #else
 742 #define dec_imvars      uint32_t g2, g4, g9
 743 #define inv_mcol(x)     (g2 = gf_mulx(x), g4 = gf_mulx(g2), g9 = \
 744                                 (x) ^ gf_mulx(g4), g4 ^= g9, \
 745                                 (x) ^ g2 ^ g4 ^ upr(g2 ^ g9, 3) ^ \
 746                                 upr(g4, 2) ^ upr(g9, 1))
 747 #endif
 748 
 749 #if defined(FL4_SET)
 750 #define ls_box(x, c)    four_tables(x, t_use(f, l), vf1, rf2, c)
 751 #elif defined(LS4_SET)
 752 #define ls_box(x, c)    four_tables(x, t_use(l, s), vf1, rf2, c)
 753 #elif defined(FL1_SET)
 754 #define ls_box(x, c)    one_table(x, upr, t_use(f, l), vf1, rf2, c)
 755 #elif defined(LS1_SET)
 756 #define ls_box(x, c)    one_table(x, upr, t_use(l, s), vf1, rf2, c)
 757 #else
 758 #define ls_box(x, c)    no_table(x, t_use(s, box), vf1, rf2, c)
 759 #endif
 760 
 761 #endif
 762 
 763 #if defined(ASM_X86_V1C) && defined(AES_DECRYPT) && !defined(ISB_SET)
 764 #define ISB_SET
 765 #endif
 766 
 767 #ifdef  __cplusplus
 768 }
 769 #endif
 770 
 771 #endif  /* _AESOPT_H */