Print this page
6729208 Optimize BSWAP_* and BE_* macros in sys/byteorder.h to use inline amd64 assembly

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/sys/byteorder.h
          +++ new/usr/src/uts/common/sys/byteorder.h
↓ open down ↓ 92 lines elided ↑ open up ↑
  93   93  extern  uint64_t ntohll(uint64_t);
  94   94  #endif  /* !(_XPG4_2||_XPG5) || __EXTENSIONS__ */
  95   95  #endif
  96   96  
  97   97  #if !defined(_XPG4_2) || defined(__EXTENSIONS__)
  98   98  
  99   99  /*
 100  100   * Macros to reverse byte order
 101  101   */
 102  102  #define BSWAP_8(x)      ((x) & 0xff)
      103 +#if !defined(__i386) && !defined(__amd64)
 103  104  #define BSWAP_16(x)     ((BSWAP_8(x) << 8) | BSWAP_8((x) >> 8))
 104      -#define BSWAP_32(x)     ((BSWAP_16(x) << 16) | BSWAP_16((x) >> 16))
 105      -#define BSWAP_64(x)     ((BSWAP_32(x) << 32) | BSWAP_32((x) >> 32))
      105 +#define BSWAP_32(x)     (((uint32_t)(x) << 24) | \
      106 +                        (((uint32_t)(x) << 8) & 0xff0000) | \
      107 +                        (((uint32_t)(x) >> 8) & 0xff00) | \
      108 +                        ((uint32_t)(x)  >> 24))
      109 +#else /* x86 */
      110 +#define BSWAP_16(x)     htons(x)
      111 +#define BSWAP_32(x)     htonl(x)
      112 +#endif  /* !__i386 && !__amd64 */
 106  113  
      114 +#if (!defined(__i386) && !defined(__amd64)) || \
      115 +        ((defined(_XPG4_2) || defined(_XPG5)) && !defined(__EXTENSIONS__))
      116 +#define BSWAP_64(x)     (((uint64_t)(x) << 56) | \
      117 +                        (((uint64_t)(x) << 40) & 0xff000000000000ULL) | \
      118 +                        (((uint64_t)(x) << 24) & 0xff0000000000ULL) | \
      119 +                        (((uint64_t)(x) << 8)  & 0xff00000000ULL) | \
      120 +                        (((uint64_t)(x) >> 8)  & 0xff000000ULL) | \
      121 +                        (((uint64_t)(x) >> 24) & 0xff0000ULL) | \
      122 +                        (((uint64_t)(x) >> 40) & 0xff00ULL) | \
      123 +                        ((uint64_t)(x)  >> 56))
      124 +#else /* x86 with non-XPG extensions allowed */
      125 +#define BSWAP_64(x)     htonll(x)
      126 +#endif  /* (!__i386&&!__amd64) || ((_XPG4_2||_XPG5) && !__EXTENSIONS__) */
      127 +
 107  128  #define BMASK_8(x)      ((x) & 0xff)
 108  129  #define BMASK_16(x)     ((x) & 0xffff)
 109  130  #define BMASK_32(x)     ((x) & 0xffffffff)
 110  131  #define BMASK_64(x)     (x)
 111  132  
 112  133  /*
 113  134   * Macros to convert from a specific byte order to/from native byte order
 114  135   */
 115  136  #ifdef _BIG_ENDIAN
 116  137  #define BE_8(x)         BMASK_8(x)
↓ open down ↓ 16 lines elided ↑ open up ↑
 133  154  #endif
 134  155  
 135  156  /*
 136  157   * Macros to read unaligned values from a specific byte order to
 137  158   * native byte order
 138  159   */
 139  160  
 140  161  #define BE_IN8(xa) \
 141  162          *((uint8_t *)(xa))
 142  163  
      164 +#if !defined(__i386) && !defined(__amd64)
 143  165  #define BE_IN16(xa) \
 144      -        (((uint16_t)BE_IN8(xa) << 8) | BE_IN8((uint8_t *)(xa)+1))
      166 +        (((uint16_t)BE_IN8(xa) << 8) | BE_IN8((uint8_t *)(xa) + 1))
 145  167  
 146  168  #define BE_IN32(xa) \
 147      -        (((uint32_t)BE_IN16(xa) << 16) | BE_IN16((uint8_t *)(xa)+2))
      169 +        (((uint32_t)BE_IN16(xa) << 16) | BE_IN16((uint8_t *)(xa) + 2))
 148  170  
      171 +#else /* x86 */
      172 +#define BE_IN16(xa) htons(*((uint16_t *)(void *)(xa)))
      173 +#define BE_IN32(xa) htonl(*((uint32_t *)(void *)(xa)))
      174 +#endif  /* !__i386 && !__amd64 */
      175 +
      176 +#if (!defined(__i386) && !defined(__amd64)) || \
      177 +        ((defined(_XPG4_2) || defined(_XPG5)) && !defined(__EXTENSIONS__))
 149  178  #define BE_IN64(xa) \
 150      -        (((uint64_t)BE_IN32(xa) << 32) | BE_IN32((uint8_t *)(xa)+4))
      179 +        (((uint64_t)BE_IN32(xa) << 32) | BE_IN32((uint8_t *)(xa) + 4))
      180 +#else /* x86 with non-XPG extensions allowed */
      181 +#define BE_IN64(xa) htonll(*((uint64_t *)(void *)(xa)))
      182 +#endif  /* (!__i386&&!__amd64) || ((_XPG4_2||_XPG5) && !__EXTENSIONS__) */
 151  183  
 152  184  #define LE_IN8(xa) \
 153  185          *((uint8_t *)(xa))
 154  186  
 155  187  #define LE_IN16(xa) \
 156  188          (((uint16_t)LE_IN8((uint8_t *)(xa) + 1) << 8) | LE_IN8(xa))
 157  189  
 158  190  #define LE_IN32(xa) \
 159  191          (((uint32_t)LE_IN16((uint8_t *)(xa) + 2) << 16) | LE_IN16(xa))
 160  192  
↓ open down ↓ 8 lines elided ↑ open up ↑
 169  201  #define BE_OUT8(xa, yv) *((uint8_t *)(xa)) = (uint8_t)(yv);
 170  202  
 171  203  #define BE_OUT16(xa, yv) \
 172  204          BE_OUT8((uint8_t *)(xa) + 1, yv); \
 173  205          BE_OUT8((uint8_t *)(xa), (yv) >> 8);
 174  206  
 175  207  #define BE_OUT32(xa, yv) \
 176  208          BE_OUT16((uint8_t *)(xa) + 2, yv); \
 177  209          BE_OUT16((uint8_t *)(xa), (yv) >> 16);
 178  210  
      211 +#if (!defined(__i386) && !defined(__amd64)) || \
      212 +        ((defined(_XPG4_2) || defined(_XPG5)) && !defined(__EXTENSIONS__))
 179  213  #define BE_OUT64(xa, yv) \
 180  214          BE_OUT32((uint8_t *)(xa) + 4, yv); \
 181  215          BE_OUT32((uint8_t *)(xa), (yv) >> 32);
      216 +#else /* x86 with non-XPG extensions allowed */
      217 +#define BE_OUT64(xa, yv) *((uint64_t *)(void *)(xa)) = htonll((uint64_t)(yv));
      218 +#endif  /* (!__i386&&!__amd64) || ((_XPG4_2||_XPG5) && !__EXTENSIONS__) */
 182  219  
 183  220  #define LE_OUT8(xa, yv) *((uint8_t *)(xa)) = (uint8_t)(yv);
 184  221  
 185  222  #define LE_OUT16(xa, yv) \
 186  223          LE_OUT8((uint8_t *)(xa), yv); \
 187  224          LE_OUT8((uint8_t *)(xa) + 1, (yv) >> 8);
 188  225  
 189  226  #define LE_OUT32(xa, yv) \
 190  227          LE_OUT16((uint8_t *)(xa), yv); \
 191  228          LE_OUT16((uint8_t *)(xa) + 2, (yv) >> 16);
↓ open down ↓ 12 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX