nv97_bswap Wdiff usr/src/common/crypto/aes/amd64/aesopt.h

Print this page

5007142 Add ntohll and htonll to sys/byteorder.h
6717509 Need to use bswap/bswapq for byte swap of 64-bit integer on x32/x64
PSARC 2008/474

Split	Close
Expand all
Collapse all

          --- old/usr/src/common/crypto/aes/amd64/aesopt.h
          +++ new/usr/src/common/crypto/aes/amd64/aesopt.h

   1    1  /*
   2    2   * ---------------------------------------------------------------------------
   3    3   * Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
   4    4   *
   5    5   * LICENSE TERMS
   6    6   *
   7    7   * The free distribution and use of this software is allowed (with or without
   8    8   * changes) provided that:
   9    9   *
  10   10   *  1. source code distributions include the above copyright notice, this
  11   11   *      list of conditions and the following disclaimer;
  12   12   *
  13   13   *  2. binary distributions include the above copyright notice, this list
  14   14   *      of conditions and the following disclaimer in their documentation;
  15   15   *
  16   16   *  3. the name of the copyright holder is not used to endorse products
  17   17   *      built using this software without specific written permission.
  18   18   *
  19   19   * DISCLAIMER
  20   20   *
  21   21   * This software is provided 'as is' with no explicit or implied warranties
  22   22   * in respect of its properties, including, but not limited to, correctness
  23   23   * and/or fitness for purpose.
  24   24   * ---------------------------------------------------------------------------
  25   25   * Issue Date: 20/12/2007
  26   26   *
  27   27   * This file contains the compilation options for AES (Rijndael) and code
  28   28   * that is common across encryption, key scheduling and table generation.
  29   29   *
  30   30   * OPERATION
  31   31   *
  32   32   * These source code files implement the AES algorithm Rijndael designed by
  33   33   * Joan Daemen and Vincent Rijmen. This version is designed for the standard
  34   34   * block size of 16 bytes and for key sizes of 128, 192 and 256 bits (16, 24
  35   35   * and 32 bytes).
  36   36   *
  37   37   * This version is designed for flexibility and speed using operations on
  38   38   * 32-bit words rather than operations on bytes.  It can be compiled with
  39   39   * either big or little endian internal byte order but is faster when the
  40   40   * native byte order for the processor is used.
  41   41   *
  42   42   * THE CIPHER INTERFACE
  43   43   *
  44   44   * The cipher interface is implemented as an array of bytes in which lower
  45   45   * AES bit sequence indexes map to higher numeric significance within bytes.
  46   46   */

↓ open down ↓

46 lines elided

↑ open up ↑

  47   47  
  48   48  /*
  49   49   * OpenSolaris changes
  50   50   * 1. Added __cplusplus and _AESTAB_H header guards
  51   51   * 2. Added header files sys/types.h and aes_impl.h
  52   52   * 3. Added defines for AES_ENCRYPT, AES_DECRYPT, AES_REV_DKS, and ASM_AMD64_C
  53   53   * 4. Moved defines for IS_BIG_ENDIAN, IS_LITTLE_ENDIAN, PLATFORM_BYTE_ORDER
  54   54   *    from brg_endian.h
  55   55   * 5. Undefined VIA_ACE_POSSIBLE and ASSUME_VIA_ACE_PRESENT
  56   56   * 6. Changed uint_8t and uint_32t to uint8_t and uint32_t
  57      - * 7. cstyled and hdrchk code
       57 + * 7. Defined aes_sw32 as htonl() for byte swapping
       58 + * 8. Cstyled and hdrchk code
  58   59   *
  59   60   */
  60   61  
  61   62  #ifndef _AESOPT_H
  62   63  #define _AESOPT_H
  63   64  
  64      -#pragma ident   "%Z%%M% %I%     %E% SMI"
  65      -
  66   65  #ifdef  __cplusplus
  67   66  extern "C" {
  68   67  #endif
  69   68  
  70   69  #include <sys/types.h>
       70 +#include <sys/byteorder.h>
  71   71  #include <aes_impl.h>
  72   72  
  73   73  /*  SUPPORT FEATURES */
  74   74  #define AES_ENCRYPT /* if support for encryption is needed */
  75   75  #define AES_DECRYPT /* if support for decryption is needed */
  76   76  
  77   77  /*  PLATFORM-SPECIFIC FEATURES */
  78   78  #define IS_BIG_ENDIAN           4321 /* byte 0 is most significant (mc68k) */
  79   79  #define IS_LITTLE_ENDIAN        1234 /* byte 0 is least significant (i386) */
  80   80  #define PLATFORM_BYTE_ORDER     IS_LITTLE_ENDIAN

  81   81  #define AES_REV_DKS /* define to reverse decryption key schedule */
  82   82  
  83   83  
  84   84  /*
  85   85   *  CONFIGURATION - THE USE OF DEFINES
  86   86   *      Later in this section there are a number of defines that control the
  87   87   *      operation of the code.  In each section, the purpose of each define is
  88   88   *      explained so that the relevant form can be included or excluded by
  89   89   *      setting either 1's or 0's respectively on the branches of the related
  90   90   *      #if clauses.  The following local defines should not be changed.
  91   91   */
  92   92  
  93   93  #define ENCRYPTION_IN_C 1
  94   94  #define DECRYPTION_IN_C 2
  95   95  #define ENC_KEYING_IN_C 4
  96   96  #define DEC_KEYING_IN_C 8
  97   97  
  98   98  #define NO_TABLES       0
  99   99  #define ONE_TABLE       1
 100  100  #define FOUR_TABLES     4
 101  101  #define NONE            0
 102  102  #define PARTIAL         1
 103  103  #define FULL            2
 104  104  
 105  105  /*  --- START OF USER CONFIGURED OPTIONS --- */
 106  106  
 107  107  /*
 108  108   *  1. BYTE ORDER WITHIN 32 BIT WORDS
 109  109   *
 110  110   *      The fundamental data processing units in Rijndael are 8-bit bytes. The
 111  111   *      input, output and key input are all enumerated arrays of bytes in which
 112  112   *      bytes are numbered starting at zero and increasing to one less than the
 113  113   *      number of bytes in the array in question. This enumeration is only used
 114  114   *      for naming bytes and does not imply any adjacency or order relationship
 115  115   *      from one byte to another. When these inputs and outputs are considered
 116  116   *      as bit sequences, bits 8*n to 8*n+7 of the bit sequence are mapped to
 117  117   *      byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte.
 118  118   *      In this implementation bits are numbered from 0 to 7 starting at the
 119  119   *      numerically least significant end of each byte.  Bit n represents 2^n.
 120  120   *
 121  121   *      However, Rijndael can be implemented more efficiently using 32-bit
 122  122   *      words by packing bytes into words so that bytes 4*n to 4*n+3 are placed
 123  123   *      into word[n]. While in principle these bytes can be assembled into words
 124  124   *      in any positions, this implementation only supports the two formats in
 125  125   *      which bytes in adjacent positions within words also have adjacent byte
 126  126   *      numbers. This order is called big-endian if the lowest numbered bytes
 127  127   *      in words have the highest numeric significance and little-endian if the
 128  128   *      opposite applies.
 129  129   *
 130  130   *      This code can work in either order irrespective of the order used by the
 131  131   *      machine on which it runs. Normally the internal byte order will be set
 132  132   *      to the order of the processor on which the code is to be run but this
 133  133   *      define  can be used to reverse this in special situations
 134  134   *
 135  135   *      WARNING: Assembler code versions rely on PLATFORM_BYTE_ORDER being set.
 136  136   *      This define will hence be redefined later (in section 4) if necessary
 137  137   */
 138  138  
 139  139  #if 1
 140  140  #define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
 141  141  #elif 0
 142  142  #define ALGORITHM_BYTE_ORDER IS_LITTLE_ENDIAN
 143  143  #elif 0
 144  144  #define ALGORITHM_BYTE_ORDER IS_BIG_ENDIAN
 145  145  #else
 146  146  #error The algorithm byte order is not defined
 147  147  #endif
 148  148  
 149  149  /*  2. VIA ACE SUPPORT */
 150  150  
 151  151  #if defined(__GNUC__) && defined(__i386__) || \
 152  152          defined(_WIN32) && defined(_M_IX86) && \
 153  153          !(defined(_WIN64) || defined(_WIN32_WCE) || \
 154  154          defined(_MSC_VER) && (_MSC_VER <= 800))
 155  155  #define VIA_ACE_POSSIBLE
 156  156  #endif
 157  157  
 158  158  /*
 159  159   *  Define this option if support for the VIA ACE is required. This uses
 160  160   *  inline assembler instructions and is only implemented for the Microsoft,
 161  161   *  Intel and GCC compilers.  If VIA ACE is known to be present, then defining
 162  162   *  ASSUME_VIA_ACE_PRESENT will remove the ordinary encryption/decryption
 163  163   *  code.  If USE_VIA_ACE_IF_PRESENT is defined then VIA ACE will be used if
 164  164   *  it is detected (both present and enabled) but the normal AES code will
 165  165   *  also be present.
 166  166   *
 167  167   *  When VIA ACE is to be used, all AES encryption contexts MUST be 16 byte
 168  168   *  aligned; other input/output buffers do not need to be 16 byte aligned
 169  169   *  but there are very large performance gains if this can be arranged.
 170  170   *  VIA ACE also requires the decryption key schedule to be in reverse
 171  171   *  order (which later checks below ensure).
 172  172   */
 173  173  
 174  174  /*  VIA ACE is not used here for OpenSolaris: */
 175  175  #undef  VIA_ACE_POSSIBLE
 176  176  #undef  ASSUME_VIA_ACE_PRESENT
 177  177  
 178  178  #if 0 && defined(VIA_ACE_POSSIBLE) && !defined(USE_VIA_ACE_IF_PRESENT)
 179  179  #define USE_VIA_ACE_IF_PRESENT
 180  180  #endif
 181  181  
 182  182  #if 0 && defined(VIA_ACE_POSSIBLE) && !defined(ASSUME_VIA_ACE_PRESENT)
 183  183  #define ASSUME_VIA_ACE_PRESENT
 184  184  #endif
 185  185  
 186  186  
 187  187  /*
 188  188   *  3. ASSEMBLER SUPPORT
 189  189   *
 190  190   *      This define (which can be on the command line) enables the use of the
 191  191   *      assembler code routines for encryption, decryption and key scheduling
 192  192   *      as follows:
 193  193   *
 194  194   *      ASM_X86_V1C uses the assembler (aes_x86_v1.asm) with large tables for
 195  195   *              encryption and decryption and but with key scheduling in C
 196  196   *      ASM_X86_V2  uses assembler (aes_x86_v2.asm) with compressed tables for
 197  197   *              encryption, decryption and key scheduling
 198  198   *      ASM_X86_V2C uses assembler (aes_x86_v2.asm) with compressed tables for
 199  199   *              encryption and decryption and but with key scheduling in C
 200  200   *      ASM_AMD64_C uses assembler (aes_amd64.asm) with compressed tables for
 201  201   *              encryption and decryption and but with key scheduling in C
 202  202   *
 203  203   *      Change one 'if 0' below to 'if 1' to select the version or define
 204  204   *      as a compilation option.
 205  205   */
 206  206  
 207  207  #if 0 && !defined(ASM_X86_V1C)
 208  208  #define ASM_X86_V1C
 209  209  #elif 0 && !defined(ASM_X86_V2)
 210  210  #define ASM_X86_V2
 211  211  #elif 0 && !defined(ASM_X86_V2C)
 212  212  #define ASM_X86_V2C
 213  213  #elif 1 && !defined(ASM_AMD64_C)
 214  214  #define ASM_AMD64_C
 215  215  #endif
 216  216  
 217  217  #if (defined(ASM_X86_V1C) || defined(ASM_X86_V2) || defined(ASM_X86_V2C)) && \
 218  218          !defined(_M_IX86) || defined(ASM_AMD64_C) && !defined(_M_X64) && \
 219  219          !defined(__amd64)
 220  220  #error Assembler code is only available for x86 and AMD64 systems
 221  221  #endif
 222  222  
 223  223  /*
 224  224   *  4. FAST INPUT/OUTPUT OPERATIONS.
 225  225   *
 226  226   *      On some machines it is possible to improve speed by transferring the
 227  227   *      bytes in the input and output arrays to and from the internal 32-bit
 228  228   *      variables by addressing these arrays as if they are arrays of 32-bit
 229  229   *      words.  On some machines this will always be possible but there may
 230  230   *      be a large performance penalty if the byte arrays are not aligned on
 231  231   *      the normal word boundaries. On other machines this technique will
 232  232   *      lead to memory access errors when such 32-bit word accesses are not
 233  233   *      properly aligned. The option SAFE_IO avoids such problems but will
 234  234   *      often be slower on those machines that support misaligned access
 235  235   *      (especially so if care is taken to align the input  and output byte
 236  236   *      arrays on 32-bit word boundaries). If SAFE_IO is not defined it is
 237  237   *      assumed that access to byte arrays as if they are arrays of 32-bit
 238  238   *      words will not cause problems when such accesses are misaligned.
 239  239   */
 240  240  #if 1 && !defined(_MSC_VER)
 241  241  #define SAFE_IO
 242  242  #endif
 243  243  
 244  244  /*
 245  245   *  5. LOOP UNROLLING
 246  246   *
 247  247   *      The code for encryption and decryption cycles through a number of rounds
 248  248   *      that can be implemented either in a loop or by expanding the code into a
 249  249   *      long sequence of instructions, the latter producing a larger program but
 250  250   *      one that will often be much faster. The latter is called loop unrolling.
 251  251   *      There are also potential speed advantages in expanding two iterations in
 252  252   *      a loop with half the number of iterations, which is called partial loop
 253  253   *      unrolling.  The following options allow partial or full loop unrolling
 254  254   *      to be set independently for encryption and decryption
 255  255   */
 256  256  #if 1
 257  257  #define ENC_UNROLL  FULL
 258  258  #elif 0
 259  259  #define ENC_UNROLL  PARTIAL
 260  260  #else
 261  261  #define ENC_UNROLL  NONE
 262  262  #endif
 263  263  
 264  264  #if 1
 265  265  #define DEC_UNROLL  FULL
 266  266  #elif 0
 267  267  #define DEC_UNROLL  PARTIAL
 268  268  #else
 269  269  #define DEC_UNROLL  NONE
 270  270  #endif
 271  271  
 272  272  #if 1
 273  273  #define ENC_KS_UNROLL
 274  274  #endif
 275  275  
 276  276  #if 1
 277  277  #define DEC_KS_UNROLL
 278  278  #endif
 279  279  
 280  280  /*
 281  281   *  6. FAST FINITE FIELD OPERATIONS
 282  282   *
 283  283   *      If this section is included, tables are used to provide faster finite
 284  284   *      field arithmetic.  This has no effect if FIXED_TABLES is defined.
 285  285   */
 286  286  #if 1
 287  287  #define FF_TABLES
 288  288  #endif
 289  289  
 290  290  /*
 291  291   *  7. INTERNAL STATE VARIABLE FORMAT
 292  292   *
 293  293   *      The internal state of Rijndael is stored in a number of local 32-bit
 294  294   *      word variables which can be defined either as an array or as individual
 295  295   *      names variables. Include this section if you want to store these local
 296  296   *      variables in arrays. Otherwise individual local variables will be used.
 297  297   */
 298  298  #if 1
 299  299  #define ARRAYS
 300  300  #endif
 301  301  
 302  302  /*
 303  303   *  8. FIXED OR DYNAMIC TABLES
 304  304   *
 305  305   *      When this section is included the tables used by the code are compiled
 306  306   *      statically into the binary file.  Otherwise the subroutine aes_init()
 307  307   *      must be called to compute them before the code is first used.
 308  308   */
 309  309  #if 1 && !(defined(_MSC_VER) && (_MSC_VER <= 800))
 310  310  #define FIXED_TABLES
 311  311  #endif
 312  312  
 313  313  /*
 314  314   *  9. MASKING OR CASTING FROM LONGER VALUES TO BYTES
 315  315   *
 316  316   *      In some systems it is better to mask longer values to extract bytes
 317  317   *      rather than using a cast. This option allows this choice.
 318  318   */
 319  319  #if 0
 320  320  #define to_byte(x)  ((uint8_t)(x))
 321  321  #else
 322  322  #define to_byte(x)  ((x) & 0xff)
 323  323  #endif
 324  324  
 325  325  /*
 326  326   *  10. TABLE ALIGNMENT
 327  327   *
 328  328   *      On some systems speed will be improved by aligning the AES large lookup
 329  329   *      tables on particular boundaries. This define should be set to a power of
 330  330   *      two giving the desired alignment. It can be left undefined if alignment
 331  331   *      is not needed.  This option is specific to the Micrsoft VC++ compiler -
 332  332   *      it seems to sometimes cause trouble for the VC++ version 6 compiler.
 333  333   */
 334  334  
 335  335  #if 1 && defined(_MSC_VER) && (_MSC_VER >= 1300)
 336  336  #define TABLE_ALIGN 32
 337  337  #endif
 338  338  
 339  339  /*
 340  340   *  11.  REDUCE CODE AND TABLE SIZE
 341  341   *
 342  342   *      This replaces some expanded macros with function calls if AES_ASM_V2 or
 343  343   *      AES_ASM_V2C are defined
 344  344   */
 345  345  
 346  346  #if 1 && (defined(ASM_X86_V2) || defined(ASM_X86_V2C))
 347  347  #define REDUCE_CODE_SIZE
 348  348  #endif
 349  349  
 350  350  /*
 351  351   *  12. TABLE OPTIONS
 352  352   *
 353  353   *      This cipher proceeds by repeating in a number of cycles known as rounds
 354  354   *      which are implemented by a round function which is optionally be speeded
 355  355   *      up using tables.  The basic tables are 256 32-bit words, with either
 356  356   *      one or four tables being required for each round function depending on
 357  357   *      how much speed is required. Encryption and decryption round functions
 358  358   *      are different and the last encryption and decryption round functions are
 359  359   *      different again making four different round functions in all.
 360  360   *
 361  361   *      This means that:
 362  362   *      1. Normal encryption and decryption rounds can each use either 0, 1
 363  363   *              or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
 364  364   *      2. The last encryption and decryption rounds can also use either 0, 1
 365  365   *              or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
 366  366   *
 367  367   *      Include or exclude the appropriate definitions below to set the number
 368  368   *      of tables used by this implementation.
 369  369   */
 370  370  
 371  371  #if 1   /* set tables for the normal encryption round */
 372  372  #define ENC_ROUND   FOUR_TABLES
 373  373  #elif 0
 374  374  #define ENC_ROUND   ONE_TABLE
 375  375  #else
 376  376  #define ENC_ROUND   NO_TABLES
 377  377  #endif
 378  378  
 379  379  #if 1   /* set tables for the last encryption round */
 380  380  #define LAST_ENC_ROUND  FOUR_TABLES
 381  381  #elif 0
 382  382  #define LAST_ENC_ROUND  ONE_TABLE
 383  383  #else
 384  384  #define LAST_ENC_ROUND  NO_TABLES
 385  385  #endif
 386  386  
 387  387  #if 1   /* set tables for the normal decryption round */
 388  388  #define DEC_ROUND   FOUR_TABLES
 389  389  #elif 0
 390  390  #define DEC_ROUND   ONE_TABLE
 391  391  #else
 392  392  #define DEC_ROUND   NO_TABLES
 393  393  #endif
 394  394  
 395  395  #if 1   /* set tables for the last decryption round */
 396  396  #define LAST_DEC_ROUND  FOUR_TABLES
 397  397  #elif 0
 398  398  #define LAST_DEC_ROUND  ONE_TABLE
 399  399  #else
 400  400  #define LAST_DEC_ROUND  NO_TABLES
 401  401  #endif
 402  402  
 403  403  /*
 404  404   *  The decryption key schedule can be speeded up with tables in the same
 405  405   *      way that the round functions can.  Include or exclude the following
 406  406   *      defines to set this requirement.
 407  407   */
 408  408  #if 1
 409  409  #define KEY_SCHED   FOUR_TABLES
 410  410  #elif 0
 411  411  #define KEY_SCHED   ONE_TABLE
 412  412  #else
 413  413  #define KEY_SCHED   NO_TABLES
 414  414  #endif
 415  415  
 416  416  /*  ---- END OF USER CONFIGURED OPTIONS ---- */
 417  417  
 418  418  /* VIA ACE support is only available for VC++ and GCC */
 419  419  
 420  420  #if !defined(_MSC_VER) && !defined(__GNUC__)
 421  421  #if defined(ASSUME_VIA_ACE_PRESENT)
 422  422  #undef ASSUME_VIA_ACE_PRESENT
 423  423  #endif
 424  424  #if defined(USE_VIA_ACE_IF_PRESENT)
 425  425  #undef USE_VIA_ACE_IF_PRESENT
 426  426  #endif
 427  427  #endif
 428  428  
 429  429  #if defined(ASSUME_VIA_ACE_PRESENT) && !defined(USE_VIA_ACE_IF_PRESENT)
 430  430  #define USE_VIA_ACE_IF_PRESENT
 431  431  #endif
 432  432  
 433  433  #if defined(USE_VIA_ACE_IF_PRESENT) && !defined(AES_REV_DKS)
 434  434  #define AES_REV_DKS
 435  435  #endif
 436  436  
 437  437  /* Assembler support requires the use of platform byte order */
 438  438  
 439  439  #if (defined(ASM_X86_V1C) || defined(ASM_X86_V2C) || defined(ASM_AMD64_C)) && \
 440  440          (ALGORITHM_BYTE_ORDER != PLATFORM_BYTE_ORDER)
 441  441  #undef  ALGORITHM_BYTE_ORDER
 442  442  #define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
 443  443  #endif
 444  444  
 445  445  /*
 446  446   * In this implementation the columns of the state array are each held in
 447  447   *      32-bit words. The state array can be held in various ways: in an array
 448  448   *      of words, in a number of individual word variables or in a number of
 449  449   *      processor registers. The following define maps a variable name x and
 450  450   *      a column number c to the way the state array variable is to be held.
 451  451   *      The first define below maps the state into an array x[c] whereas the
 452  452   *      second form maps the state into a number of individual variables x0,
 453  453   *      x1, etc.  Another form could map individual state columns to machine
 454  454   *      register names.
 455  455   */
 456  456  
 457  457  #if defined(ARRAYS)
 458  458  #define s(x, c) x[c]
 459  459  #else
 460  460  #define s(x, c) x##c
 461  461  #endif
 462  462  
 463  463  /*
 464  464   *  This implementation provides subroutines for encryption, decryption
 465  465   *      and for setting the three key lengths (separately) for encryption
 466  466   *      and decryption. Since not all functions are needed, masks are set
 467  467   *      up here to determine which will be implemented in C
 468  468   */
 469  469  
 470  470  #if !defined(AES_ENCRYPT)
 471  471  #define EFUNCS_IN_C   0
 472  472  #elif defined(ASSUME_VIA_ACE_PRESENT) || defined(ASM_X86_V1C) || \
 473  473          defined(ASM_X86_V2C) || defined(ASM_AMD64_C)
 474  474  #define EFUNCS_IN_C   ENC_KEYING_IN_C
 475  475  #elif !defined(ASM_X86_V2)
 476  476  #define EFUNCS_IN_C   (ENCRYPTION_IN_C | ENC_KEYING_IN_C)
 477  477  #else
 478  478  #define EFUNCS_IN_C   0
 479  479  #endif
 480  480  
 481  481  #if !defined(AES_DECRYPT)
 482  482  #define DFUNCS_IN_C   0
 483  483  #elif defined(ASSUME_VIA_ACE_PRESENT) || defined(ASM_X86_V1C) || \
 484  484          defined(ASM_X86_V2C) || defined(ASM_AMD64_C)
 485  485  #define DFUNCS_IN_C   DEC_KEYING_IN_C
 486  486  #elif !defined(ASM_X86_V2)
 487  487  #define DFUNCS_IN_C   (DECRYPTION_IN_C | DEC_KEYING_IN_C)
 488  488  #else
 489  489  #define DFUNCS_IN_C   0
 490  490  #endif
 491  491  
 492  492  #define FUNCS_IN_C  (EFUNCS_IN_C | DFUNCS_IN_C)
 493  493  
 494  494  /* END OF CONFIGURATION OPTIONS */
 495  495  
 496  496  /* Disable or report errors on some combinations of options */
 497  497  
 498  498  #if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES
 499  499  #undef  LAST_ENC_ROUND
 500  500  #define LAST_ENC_ROUND  NO_TABLES
 501  501  #elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES
 502  502  #undef  LAST_ENC_ROUND
 503  503  #define LAST_ENC_ROUND  ONE_TABLE
 504  504  #endif
 505  505  
 506  506  #if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE
 507  507  #undef  ENC_UNROLL
 508  508  #define ENC_UNROLL  NONE
 509  509  #endif
 510  510  
 511  511  #if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES
 512  512  #undef  LAST_DEC_ROUND
 513  513  #define LAST_DEC_ROUND  NO_TABLES

↓ open down ↓

433 lines elided

↑ open up ↑

 514  514  #elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES
 515  515  #undef  LAST_DEC_ROUND
 516  516  #define LAST_DEC_ROUND  ONE_TABLE
 517  517  #endif
 518  518  
 519  519  #if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE
 520  520  #undef  DEC_UNROLL
 521  521  #define DEC_UNROLL  NONE
 522  522  #endif
 523  523  
 524      -#if defined(bswap32)
      524 +#if (ALGORITHM_BYTE_ORDER == IS_LITTLE_ENDIAN)
      525 +#define aes_sw32        htonl
      526 +#elif defined(bswap32)
 525  527  #define aes_sw32        bswap32
 526  528  #elif defined(bswap_32)
 527  529  #define aes_sw32        bswap_32
 528  530  #else
 529      -#define brot(x, n)   (((uint32_t)(x) <<  n) | ((uint32_t)(x) >> (32 - n)))
      531 +#define brot(x, n)  (((uint32_t)(x) << (n)) | ((uint32_t)(x) >> (32 - (n))))
 530  532  #define aes_sw32(x) ((brot((x), 8) & 0x00ff00ff) | (brot((x), 24) & 0xff00ff00))
 531  533  #endif
 532  534  
      535 +
 533  536  /*
 534      - *  upr(x, n):  rotates bytes within words by n positions, moving bytes to
      537 + *      upr(x, n):  rotates bytes within words by n positions, moving bytes to
 535  538   *              higher index positions with wrap around into low positions
 536  539   *      ups(x, n):  moves bytes by n positions to higher index positions in
 537  540   *              words but without wrap around
 538  541   *      bval(x, n): extracts a byte from a word
 539  542   *
 540  543   *      WARNING:   The definitions given here are intended only for use with
 541  544   *              unsigned variables and with shift counts that are compile
 542  545   *              time constants
 543  546   */
 544  547

 545  548  #if (ALGORITHM_BYTE_ORDER == IS_LITTLE_ENDIAN)
 546  549  #define upr(x, n)       (((uint32_t)(x) << (8 * (n))) | \
 547  550                          ((uint32_t)(x) >> (32 - 8 * (n))))
 548  551  #define ups(x, n)       ((uint32_t)(x) << (8 * (n)))
 549  552  #define bval(x, n)      to_byte((x) >> (8 * (n)))
 550  553  #define bytes2word(b0, b1, b2, b3)  \
 551  554                  (((uint32_t)(b3) << 24) | ((uint32_t)(b2) << 16) | \
 552  555                  ((uint32_t)(b1) << 8) | (b0))
 553  556  #endif
 554  557  
 555  558  #if (ALGORITHM_BYTE_ORDER == IS_BIG_ENDIAN)
 556  559  #define upr(x, n)       (((uint32_t)(x) >> (8 * (n))) | \
 557  560                          ((uint32_t)(x) << (32 - 8 * (n))))
 558  561  #define ups(x, n)       ((uint32_t)(x) >> (8 * (n)))
 559  562  #define bval(x, n)      to_byte((x) >> (24 - 8 * (n)))
 560  563  #define bytes2word(b0, b1, b2, b3)  \
 561  564                  (((uint32_t)(b0) << 24) | ((uint32_t)(b1) << 16) | \
 562  565                  ((uint32_t)(b2) << 8) | (b3))
 563  566  #endif
 564  567  
 565  568  #if defined(SAFE_IO)
 566  569  #define word_in(x, c)   bytes2word(((const uint8_t *)(x) + 4 * c)[0], \
 567  570                                  ((const uint8_t *)(x) + 4 * c)[1], \
 568  571                                  ((const uint8_t *)(x) + 4 * c)[2], \
 569  572                                  ((const uint8_t *)(x) + 4 * c)[3])
 570  573  #define word_out(x, c, v) { ((uint8_t *)(x) + 4 * c)[0] = bval(v, 0); \
 571  574                          ((uint8_t *)(x) + 4 * c)[1] = bval(v, 1); \
 572  575                          ((uint8_t *)(x) + 4 * c)[2] = bval(v, 2); \
 573  576                          ((uint8_t *)(x) + 4 * c)[3] = bval(v, 3); }
 574  577  #elif (ALGORITHM_BYTE_ORDER == PLATFORM_BYTE_ORDER)
 575  578  #define word_in(x, c)   (*((uint32_t *)(x) + (c)))
 576  579  #define word_out(x, c, v) (*((uint32_t *)(x) + (c)) = (v))
 577  580  #else
 578  581  #define word_in(x, c)   aes_sw32(*((uint32_t *)(x) + (c)))
 579  582  #define word_out(x, c, v) (*((uint32_t *)(x) + (c)) = aes_sw32(v))
 580  583  #endif
 581  584  
 582  585  /* the finite field modular polynomial and elements */
 583  586  
 584  587  #define WPOLY   0x011b
 585  588  #define BPOLY   0x1b
 586  589  
 587  590  /* multiply four bytes in GF(2^8) by 'x' {02} in parallel */
 588  591  
 589  592  #define m1  0x80808080
 590  593  #define m2  0x7f7f7f7f
 591  594  #define gf_mulx(x)  ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * BPOLY))
 592  595  
 593  596  /*
 594  597   * The following defines provide alternative definitions of gf_mulx that might
 595  598   * give improved performance if a fast 32-bit multiply is not available. Note
 596  599   * that a temporary variable u needs to be defined where gf_mulx is used.
 597  600   *
 598  601   * #define      gf_mulx(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ \
 599  602   *                      ((u >> 3) | (u >> 6))
 600  603   * #define      m4  (0x01010101 * BPOLY)
 601  604   * #define      gf_mulx(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) \
 602  605   *                      & m4)
 603  606   */
 604  607  
 605  608  /* Work out which tables are needed for the different options   */
 606  609  
 607  610  #if defined(ASM_X86_V1C)
 608  611  #if defined(ENC_ROUND)
 609  612  #undef  ENC_ROUND
 610  613  #endif
 611  614  #define ENC_ROUND   FOUR_TABLES
 612  615  #if defined(LAST_ENC_ROUND)
 613  616  #undef  LAST_ENC_ROUND
 614  617  #endif
 615  618  #define LAST_ENC_ROUND  FOUR_TABLES
 616  619  #if defined(DEC_ROUND)
 617  620  #undef  DEC_ROUND
 618  621  #endif
 619  622  #define DEC_ROUND   FOUR_TABLES
 620  623  #if defined(LAST_DEC_ROUND)
 621  624  #undef  LAST_DEC_ROUND
 622  625  #endif
 623  626  #define LAST_DEC_ROUND  FOUR_TABLES
 624  627  #if defined(KEY_SCHED)
 625  628  #undef  KEY_SCHED
 626  629  #define KEY_SCHED   FOUR_TABLES
 627  630  #endif
 628  631  #endif
 629  632  
 630  633  #if (FUNCS_IN_C & ENCRYPTION_IN_C) || defined(ASM_X86_V1C)
 631  634  #if ENC_ROUND == ONE_TABLE
 632  635  #define FT1_SET
 633  636  #elif ENC_ROUND == FOUR_TABLES
 634  637  #define FT4_SET
 635  638  #else
 636  639  #define SBX_SET
 637  640  #endif
 638  641  #if LAST_ENC_ROUND == ONE_TABLE
 639  642  #define FL1_SET
 640  643  #elif LAST_ENC_ROUND == FOUR_TABLES
 641  644  #define FL4_SET
 642  645  #elif !defined(SBX_SET)
 643  646  #define SBX_SET
 644  647  #endif
 645  648  #endif
 646  649  
 647  650  #if (FUNCS_IN_C & DECRYPTION_IN_C) || defined(ASM_X86_V1C)
 648  651  #if DEC_ROUND == ONE_TABLE
 649  652  #define IT1_SET
 650  653  #elif DEC_ROUND == FOUR_TABLES
 651  654  #define IT4_SET
 652  655  #else
 653  656  #define ISB_SET
 654  657  #endif
 655  658  #if LAST_DEC_ROUND == ONE_TABLE
 656  659  #define IL1_SET
 657  660  #elif LAST_DEC_ROUND == FOUR_TABLES
 658  661  #define IL4_SET
 659  662  #elif !defined(ISB_SET)
 660  663  #define ISB_SET
 661  664  #endif
 662  665  #endif
 663  666  
 664  667  
 665  668  #if !(defined(REDUCE_CODE_SIZE) && (defined(ASM_X86_V2) || \
 666  669          defined(ASM_X86_V2C)))
 667  670  #if ((FUNCS_IN_C & ENC_KEYING_IN_C) || (FUNCS_IN_C & DEC_KEYING_IN_C))
 668  671  #if KEY_SCHED == ONE_TABLE
 669  672  #if !defined(FL1_SET) && !defined(FL4_SET)
 670  673  #define LS1_SET
 671  674  #endif
 672  675  #elif KEY_SCHED == FOUR_TABLES
 673  676  #if !defined(FL4_SET)
 674  677  #define LS4_SET
 675  678  #endif
 676  679  #elif !defined(SBX_SET)
 677  680  #define SBX_SET
 678  681  #endif
 679  682  #endif
 680  683  #if (FUNCS_IN_C & DEC_KEYING_IN_C)
 681  684  #if KEY_SCHED == ONE_TABLE
 682  685  #define IM1_SET
 683  686  #elif KEY_SCHED == FOUR_TABLES
 684  687  #define IM4_SET
 685  688  #elif !defined(SBX_SET)
 686  689  #define SBX_SET
 687  690  #endif
 688  691  #endif
 689  692  #endif
 690  693  
 691  694  /* generic definitions of Rijndael macros that use tables */
 692  695  
 693  696  #define no_table(x, box, vf, rf, c) bytes2word(\
 694  697          box[bval(vf(x, 0, c), rf(0, c))], \
 695  698          box[bval(vf(x, 1, c), rf(1, c))], \
 696  699          box[bval(vf(x, 2, c), rf(2, c))], \
 697  700          box[bval(vf(x, 3, c), rf(3, c))])
 698  701  
 699  702  #define one_table(x, op, tab, vf, rf, c) \
 700  703          (tab[bval(vf(x, 0, c), rf(0, c))] \
 701  704          ^ op(tab[bval(vf(x, 1, c), rf(1, c))], 1) \
 702  705          ^ op(tab[bval(vf(x, 2, c), rf(2, c))], 2) \
 703  706          ^ op(tab[bval(vf(x, 3, c), rf(3, c))], 3))
 704  707  
 705  708  #define four_tables(x, tab, vf, rf, c) \
 706  709          (tab[0][bval(vf(x, 0, c), rf(0, c))] \
 707  710          ^ tab[1][bval(vf(x, 1, c), rf(1, c))] \
 708  711          ^ tab[2][bval(vf(x, 2, c), rf(2, c))] \
 709  712          ^ tab[3][bval(vf(x, 3, c), rf(3, c))])
 710  713  
 711  714  #define vf1(x, r, c)    (x)
 712  715  #define rf1(r, c)       (r)
 713  716  #define rf2(r, c)       ((8+r-c)&3)
 714  717  
 715  718  /*
 716  719   * Perform forward and inverse column mix operation on four bytes in long word
 717  720   * x in parallel. NOTE: x must be a simple variable, NOT an expression in
 718  721   * these macros.
 719  722   */
 720  723  
 721  724  #if !(defined(REDUCE_CODE_SIZE) && (defined(ASM_X86_V2) || \
 722  725          defined(ASM_X86_V2C)))
 723  726  
 724  727  #if defined(FM4_SET)    /* not currently used */
 725  728  #define fwd_mcol(x)     four_tables(x, t_use(f, m), vf1, rf1, 0)
 726  729  #elif defined(FM1_SET)  /* not currently used */
 727  730  #define fwd_mcol(x)     one_table(x, upr, t_use(f, m), vf1, rf1, 0)
 728  731  #else
 729  732  #define dec_fmvars      uint32_t g2
 730  733  #define fwd_mcol(x)     (g2 = gf_mulx(x), g2 ^ upr((x) ^ g2, 3) ^ \
 731  734                                  upr((x), 2) ^ upr((x), 1))
 732  735  #endif
 733  736  
 734  737  #if defined(IM4_SET)
 735  738  #define inv_mcol(x)     four_tables(x, t_use(i, m), vf1, rf1, 0)
 736  739  #elif defined(IM1_SET)
 737  740  #define inv_mcol(x)     one_table(x, upr, t_use(i, m), vf1, rf1, 0)
 738  741  #else
 739  742  #define dec_imvars      uint32_t g2, g4, g9
 740  743  #define inv_mcol(x)     (g2 = gf_mulx(x), g4 = gf_mulx(g2), g9 = \
 741  744                                  (x) ^ gf_mulx(g4), g4 ^= g9, \
 742  745                                  (x) ^ g2 ^ g4 ^ upr(g2 ^ g9, 3) ^ \
 743  746                                  upr(g4, 2) ^ upr(g9, 1))
 744  747  #endif
 745  748  
 746  749  #if defined(FL4_SET)
 747  750  #define ls_box(x, c)    four_tables(x, t_use(f, l), vf1, rf2, c)
 748  751  #elif defined(LS4_SET)
 749  752  #define ls_box(x, c)    four_tables(x, t_use(l, s), vf1, rf2, c)
 750  753  #elif defined(FL1_SET)
 751  754  #define ls_box(x, c)    one_table(x, upr, t_use(f, l), vf1, rf2, c)
 752  755  #elif defined(LS1_SET)
 753  756  #define ls_box(x, c)    one_table(x, upr, t_use(l, s), vf1, rf2, c)
 754  757  #else
 755  758  #define ls_box(x, c)    no_table(x, t_use(s, box), vf1, rf2, c)
 756  759  #endif
 757  760  
 758  761  #endif
 759  762  
 760  763  #if defined(ASM_X86_V1C) && defined(AES_DECRYPT) && !defined(ISB_SET)
 761  764  #define ISB_SET
 762  765  #endif
 763  766  
 764  767  #ifdef  __cplusplus
 765  768  }
 766  769  #endif
 767  770  
 768  771  #endif  /* _AESOPT_H */

↓ open down ↓

224 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX