1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #pragma ident   "%Z%%M% %I%     %E% SMI"
  27 
  28 /
  29 / In-line functions for amd64 kernels.
  30 /
  31 
  32 /
  33 / return current thread pointer
  34 /
  35 / NOTE: the "0x18" should be replaced by the computed value of the
  36 /       offset of "cpu_thread" from the beginning of the struct cpu.
  37 /       Including "assym.h" does not work, however, since that stuff
  38 /       is PSM-specific and is only visible to the 'unix' build anyway.
  39 /       Same with current cpu pointer, where "0xc" should be replaced
  40 /       by the computed value of the offset of "cpu_self".
  41 /       Ugh -- what a disaster.
  42 /
  43         .inline threadp,0
  44         movq    %gs:0x18, %rax
  45         .end
  46 
  47 /
  48 / return current cpu pointer
  49 /
  50         .inline curcpup,0
  51         movq    %gs:0x10, %rax
  52         .end
  53 
  54 /
  55 / return caller
  56 /
  57         .inline caller,0
  58         movq    8(%rbp), %rax
  59         .end
  60 
  61 /
  62 / convert ipl to spl.  This is the identity function for i86
  63 /
  64         .inline ipltospl,0
  65         movq    %rdi, %rax
  66         .end
  67 
  68 /
  69 / find the low order bit in a word
  70 /
  71         .inline lowbit,4
  72         movq    $-1, %rax
  73         bsfq    %rdi, %rax
  74         incq    %rax
  75         .end
  76 
  77 /
  78 / Networking byte order functions (too bad, Intel has the wrong byte order)
  79 /
  80 
  81         .inline htonl,4
  82         movl    %edi, %eax
  83         bswap   %eax
  84         .end
  85 
  86         .inline ntohl,4
  87         movl    %edi, %eax
  88         bswap   %eax
  89         .end
  90 
  91         .inline htons,4
  92         movl    %edi, %eax
  93         bswap   %eax
  94         shrl    $16, %eax
  95         .end
  96 
  97         .inline ntohs,4
  98         movl    %edi, %eax
  99         bswap   %eax
 100         shrl    $16, %eax
 101         .end
 102 
 103 /*
 104  * multiply two long numbers and yield a u_lonlong_t result
 105  * Provided to manipulate hrtime_t values.
 106  */
 107         /* XX64 These don't work correctly with SOS9 build 13.0 yet
 108         .inline mul32, 8
 109         xorl    %edx, %edx
 110         movl    %edi, %eax
 111         mull    %esi
 112         shlq    $32, %rdx
 113         orq     %rdx, %rax
 114         ret
 115         .end
 116         */
 117 /*
 118  * Unlock hres_lock and increment the count value. (See clock.h)
 119  */
 120         .inline unlock_hres_lock, 0
 121         lock
 122         incl    hres_lock
 123         .end
 124 
 125         .inline atomic_orb,8
 126         movl    %esi, %eax
 127         lock
 128         orb     %al,(%rdi)
 129         .end
 130 
 131         .inline atomic_andb,8
 132         movl    %esi, %eax
 133         lock
 134         andb    %al,(%rdi)
 135         .end
 136 
 137 /*
 138  * atomic inc/dec operations.
 139  *      void atomic_inc16(uint16_t *addr) { ++*addr; }
 140  *      void atomic_dec16(uint16_t *addr) { --*addr; }
 141  */
 142         .inline atomic_inc16,4
 143         lock
 144         incw    (%rdi)
 145         .end
 146 
 147         .inline atomic_dec16,4
 148         lock
 149         decw    (%rdi)
 150         .end
 151 
 152 /*
 153  * atomic bit clear
 154  */
 155         .inline atomic_btr32,8
 156         lock
 157         btrl %esi, (%rdi)
 158         setc %al
 159         .end
 160 
 161 /*
 162  * Call the pause instruction.  To the Pentium 4 Xeon processor, it acts as
 163  * a hint that the code sequence is a busy spin-wait loop.  Without a pause
 164  * instruction in these loops, the P4 Xeon processor may suffer a severe
 165  * penalty when exiting the loop because the processor detects a possible
 166  * memory violation.  Inserting the pause instruction significantly reduces
 167  * the likelihood of a memory order violation, improving performance.
 168  * The pause instruction is a NOP on all other IA-32 processors.
 169  */
 170         .inline ht_pause, 0
 171         pause
 172         .end
 173 
 174 /*
 175  * inlines for update_sregs().
 176  */
 177         .inline __set_ds, 0
 178         movw    %di, %ds
 179         .end
 180 
 181         .inline __set_es, 0
 182         movw    %di, %es
 183         .end
 184 
 185         .inline __set_fs, 0
 186         movw    %di, %fs
 187         .end
 188 
 189         .inline __set_gs, 0
 190         movw    %di, %gs
 191         .end
 192 
 193         /*
 194          * OPTERON_ERRATUM_88 requires mfence
 195          */
 196         .inline __swapgs, 0
 197         mfence
 198         swapgs
 199         .end