Print this page
5007142 Add ntohll and htonll to sys/byteorder.h
6717509 Need to use bswap/bswapq for byte swap of 64-bit integer on x32/x64
PSARC 2008/474
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/common/crypto/md5/md5_byteswap.h
+++ new/usr/src/common/crypto/md5/md5_byteswap.h
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
↓ open down ↓ |
12 lines elided |
↑ open up ↑ |
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 - * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
23 + * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 */
26 26
27 27 #ifndef _MD5_BYTESWAP_H
28 28 #define _MD5_BYTESWAP_H
29 29
30 -#pragma ident "%Z%%M% %I% %E% SMI"
31 -
32 30 /*
33 31 * definitions for inline functions for little-endian loads.
34 32 *
35 33 * This file has special definitions for UltraSPARC architectures,
36 34 * which have a special address space identifier for loading 32 and 16 bit
37 35 * integers in little-endian byte order.
38 36 *
39 37 * This file and common/crypto/md5/sparc/sun4[uv]/byteswap.il implement the
40 38 * same thing and must be changed together.
41 39 */
42 40
41 +#include <sys/types.h>
43 42 #if defined(__sparc)
44 43 #include <v9/sys/asi.h>
44 +#elif defined(_LITTLE_ENDIAN)
45 +#include <sys/byteorder.h>
45 46 #endif
46 47
47 48 #ifdef __cplusplus
48 49 extern "C" {
49 50 #endif
50 51
51 52 #if defined(_LITTLE_ENDIAN)
52 53
53 54 /*
54 55 * Little-endian optimization: I don't need to do any weirdness. On
55 56 * some little-endian boxen, I'll have to do alignment checks, but I can do
56 57 * that below.
57 58 */
58 59
59 60 #if !defined(__i386) && !defined(__amd64)
60 61 /*
61 62 * i386 and amd64 don't require aligned 4-byte loads. The symbol
62 63 * _MD5_CHECK_ALIGNMENT indicates below whether the MD5Transform function
63 64 * requires alignment checking.
64 65 */
65 66 #define _MD5_CHECK_ALIGNMENT
66 67 #endif /* !__i386 && !__amd64 */
67 68
68 69 #define LOAD_LITTLE_32(addr) (*(uint32_t *)(addr))
69 70
70 71 #else /* !_LITTLE_ENDIAN */
71 72
72 73 /*
73 74 * sparc v9/v8plus optimization:
74 75 *
75 76 * on the sparc v9/v8plus, we can load data little endian. however, since
76 77 * the compiler doesn't have direct support for little endian, we
77 78 * link to an assembly-language routine `load_little_32' to do
78 79 * the magic. note that special care must be taken to ensure the
79 80 * address is 32-bit aligned -- in the interest of speed, we don't
80 81 * check to make sure, since careful programming can guarantee this
81 82 * for us.
82 83 */
83 84 #if defined(sun4u)
84 85
85 86 /* Define alignment check because we can 4-byte load as little endian. */
86 87 #define _MD5_CHECK_ALIGNMENT
87 88 #define LOAD_LITTLE_32(addr) load_little_32((uint32_t *)(addr))
↓ open down ↓ |
33 lines elided |
↑ open up ↑ |
88 89
89 90 #if !defined(__lint) && defined(__GNUC__)
90 91
91 92 static __inline__ uint32_t
92 93 load_little_32(uint32_t *addr)
93 94 {
94 95 uint32_t value;
95 96
96 97 __asm__(
97 98 "lduwa [%1] %2, %0\n\t"
98 - : "=r" (value)
99 - : "r" (addr), "i" (ASI_PL));
99 + : "=r" (value)
100 + : "r" (addr), "i" (ASI_PL));
100 101
101 102 return (value);
102 103 }
103 -
104 -static __inline__ uint16_t
105 -load_little_16(uint16_t *addr)
106 -{
107 - uint16_t value;
108 -
109 - __asm__(
110 - "lduha [%1] %2, %0\n\t"
111 - : "=r" (value)
112 - : "r" (addr), "i" (ASI_PL));
113 -
114 - return (value);
115 -}
116 -
117 104 #endif /* !__lint && __GNUC__ */
118 105
119 106 #if !defined(__GNUC__)
120 107 extern uint32_t load_little_32(uint32_t *);
121 108 #endif /* !__GNUC__ */
122 109
123 110 /* Placate lint */
124 111 #if defined(__lint)
125 112 uint32_t
126 113 load_little_32(uint32_t *addr)
127 114 {
128 115 return (*addr);
129 116 }
130 117 #endif /* __lint */
131 118
132 -#else /* !sun4u */
119 +#elif defined(_LITTLE_ENDIAN)
120 +#define LOAD_LITTLE_32(addr) htonl(addr)
133 121
122 +#else
134 123 /* big endian -- will work on little endian, but slowly */
135 124 /* Since we do byte operations, we don't have to check for alignment. */
136 125 #define LOAD_LITTLE_32(addr) \
137 126 ((addr)[0] | ((addr)[1] << 8) | ((addr)[2] << 16) | ((addr)[3] << 24))
138 -
139 127 #endif /* sun4u */
140 128
141 129 #if defined(sun4v)
142 130
143 131 /*
144 132 * For N1 want to minimize number of arithmetic operations. This is best
145 133 * achieved by using the %asi register to specify ASI for the lduwa operations.
146 134 * Also, have a separate inline template for each word, so can utilize the
147 135 * immediate offset in lduwa, without relying on the compiler to do the right
148 136 * thing.
149 137 *
150 138 * Moving to 64-bit loads might also be beneficial.
151 139 */
152 140 #define LOAD_LITTLE_32_0(addr) load_little_32_0((uint32_t *)(addr))
153 141 #define LOAD_LITTLE_32_1(addr) load_little_32_1((uint32_t *)(addr))
154 142 #define LOAD_LITTLE_32_2(addr) load_little_32_2((uint32_t *)(addr))
155 143 #define LOAD_LITTLE_32_3(addr) load_little_32_3((uint32_t *)(addr))
156 144 #define LOAD_LITTLE_32_4(addr) load_little_32_4((uint32_t *)(addr))
157 145 #define LOAD_LITTLE_32_5(addr) load_little_32_5((uint32_t *)(addr))
158 146 #define LOAD_LITTLE_32_6(addr) load_little_32_6((uint32_t *)(addr))
159 147 #define LOAD_LITTLE_32_7(addr) load_little_32_7((uint32_t *)(addr))
160 148 #define LOAD_LITTLE_32_8(addr) load_little_32_8((uint32_t *)(addr))
161 149 #define LOAD_LITTLE_32_9(addr) load_little_32_9((uint32_t *)(addr))
162 150 #define LOAD_LITTLE_32_a(addr) load_little_32_a((uint32_t *)(addr))
163 151 #define LOAD_LITTLE_32_b(addr) load_little_32_b((uint32_t *)(addr))
164 152 #define LOAD_LITTLE_32_c(addr) load_little_32_c((uint32_t *)(addr))
165 153 #define LOAD_LITTLE_32_d(addr) load_little_32_d((uint32_t *)(addr))
166 154 #define LOAD_LITTLE_32_e(addr) load_little_32_e((uint32_t *)(addr))
167 155 #define LOAD_LITTLE_32_f(addr) load_little_32_f((uint32_t *)(addr))
↓ open down ↓ |
19 lines elided |
↑ open up ↑ |
168 156
169 157 #if !defined(__lint) && defined(__GNUC__)
170 158
171 159 /*
172 160 * This actually sets the ASI register, not necessarily to ASI_PL.
173 161 */
174 162 static __inline__ void
175 163 set_little(uint8_t asi)
176 164 {
177 165 __asm__ __volatile__(
178 - "wr %%g0, %0, %%asi\n\t"
179 - : /* Nothing */
180 - : "r" (asi));
166 + "wr %%g0, %0, %%asi\n\t"
167 + : /* Nothing */
168 + : "r" (asi));
181 169 }
182 170
183 171 static __inline__ uint8_t
184 172 get_little(void)
185 173 {
186 174 uint8_t asi;
187 175
188 176 __asm__ __volatile__(
189 - "rd %%asi, %0\n\t"
190 - : "=r" (asi));
177 + "rd %%asi, %0\n\t"
178 + : "=r" (asi));
191 179
192 180 return (asi);
193 181 }
194 182
195 183 /*
196 184 * We have 16 functions which differ only in the offset from which they
197 185 * load. Use this preprocessor template to simplify maintenance. Its
198 186 * argument is the offset in hex, without the 0x.
199 187 */
200 188 #define LL_TEMPLATE(__off) \
201 189 static __inline__ uint32_t \
202 190 load_little_32_##__off(uint32_t *addr) \
203 191 { \
204 192 uint32_t value; \
205 193 __asm__( \
206 194 "lduwa [%1 + %2]%%asi, %0\n\t" \
207 195 : "=r" (value) \
208 196 : "r" (addr), "i" ((0x##__off) << 2)); \
209 197 return (value); \
210 198 }
211 199
212 200 LL_TEMPLATE(0)
213 201 LL_TEMPLATE(1)
214 202 LL_TEMPLATE(2)
215 203 LL_TEMPLATE(3)
216 204 LL_TEMPLATE(4)
217 205 LL_TEMPLATE(5)
218 206 LL_TEMPLATE(6)
219 207 LL_TEMPLATE(7)
220 208 LL_TEMPLATE(8)
221 209 LL_TEMPLATE(9)
222 210 LL_TEMPLATE(a)
223 211 LL_TEMPLATE(b)
224 212 LL_TEMPLATE(c)
225 213 LL_TEMPLATE(d)
226 214 LL_TEMPLATE(e)
227 215 LL_TEMPLATE(f)
228 216 #undef LL_TEMPLATE
229 217
230 218 #endif /* !__lint && __GNUC__ */
231 219
232 220 #if !defined(__GNUC__)
233 221 /*
234 222 * Using the %asi register to achieve little endian loads - register
235 223 * is set using a inline template.
236 224 *
237 225 * Saves a few arithmetic ops as can now use an immediate offset with the
238 226 * lduwa instructions.
239 227 */
240 228 extern void set_little(uint32_t);
241 229 extern uint32_t get_little(void);
242 230
243 231 extern uint32_t load_little_32_0(uint32_t *);
244 232 extern uint32_t load_little_32_1(uint32_t *);
245 233 extern uint32_t load_little_32_2(uint32_t *);
246 234 extern uint32_t load_little_32_3(uint32_t *);
247 235 extern uint32_t load_little_32_4(uint32_t *);
248 236 extern uint32_t load_little_32_5(uint32_t *);
249 237 extern uint32_t load_little_32_6(uint32_t *);
250 238 extern uint32_t load_little_32_7(uint32_t *);
251 239 extern uint32_t load_little_32_8(uint32_t *);
252 240 extern uint32_t load_little_32_9(uint32_t *);
253 241 extern uint32_t load_little_32_a(uint32_t *);
254 242 extern uint32_t load_little_32_b(uint32_t *);
255 243 extern uint32_t load_little_32_c(uint32_t *);
256 244 extern uint32_t load_little_32_d(uint32_t *);
257 245 extern uint32_t load_little_32_e(uint32_t *);
258 246 extern uint32_t load_little_32_f(uint32_t *);
259 247 #endif /* !__GNUC__ */
260 248 #endif /* sun4v */
261 249
262 250 #endif /* _LITTLE_ENDIAN */
263 251
264 252 #ifdef __cplusplus
265 253 }
266 254 #endif
267 255
268 256 #endif /* !_MD5_BYTESWAP_H */
↓ open down ↓ |
68 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX