VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 90640

最後變更 在這個檔案從90640是 90640,由 vboxsync 提交於 4 年 前

iprt/asm.h,tstRTInlineAsm: Added ASMAtomicCmpWriteU128 and ASMAtomicCmpWriteU128U for AMD64 systems. bugref:6695

  • 屬性 svn:eol-style 設為 native
  • 屬性 svn:keywords 設為 Author Date Id Revision
檔案大小: 232.6 KB
 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2020 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.alldomusa.eu.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef IPRT_INCLUDED_asm_h
27#define IPRT_INCLUDED_asm_h
28#ifndef RT_WITHOUT_PRAGMA_ONCE
29# pragma once
30#endif
31
32#include <iprt/cdefs.h>
33#include <iprt/types.h>
34#include <iprt/assert.h>
35/** @def RT_INLINE_ASM_USES_INTRIN
36 * Defined as 1 if we're using a _MSC_VER 1400.
37 * Otherwise defined as 0.
38 */
39
40/* Solaris 10 header ugliness */
41#ifdef u
42# undef u
43#endif
44
45#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
46/* Emit the intrinsics at all optimization levels. */
47# include <iprt/sanitized/intrin.h>
48# pragma intrinsic(_ReadWriteBarrier)
49# pragma intrinsic(__cpuid)
50# pragma intrinsic(__stosd)
51# pragma intrinsic(__stosw)
52# pragma intrinsic(__stosb)
53# pragma intrinsic(_BitScanForward)
54# pragma intrinsic(_BitScanReverse)
55# pragma intrinsic(_bittest)
56# pragma intrinsic(_bittestandset)
57# pragma intrinsic(_bittestandreset)
58# pragma intrinsic(_bittestandcomplement)
59# pragma intrinsic(_byteswap_ushort)
60# pragma intrinsic(_byteswap_ulong)
61# pragma intrinsic(_interlockedbittestandset)
62# pragma intrinsic(_interlockedbittestandreset)
63# pragma intrinsic(_InterlockedAnd)
64# pragma intrinsic(_InterlockedOr)
65# pragma intrinsic(_InterlockedXor)
66# pragma intrinsic(_InterlockedIncrement)
67# pragma intrinsic(_InterlockedDecrement)
68# pragma intrinsic(_InterlockedExchange)
69# pragma intrinsic(_InterlockedExchangeAdd)
70# pragma intrinsic(_InterlockedCompareExchange)
71# pragma intrinsic(_InterlockedCompareExchange64)
72# pragma intrinsic(_rotl)
73# pragma intrinsic(_rotr)
74# pragma intrinsic(_rotl64)
75# pragma intrinsic(_rotr64)
76# ifdef RT_ARCH_AMD64
77# pragma intrinsic(__stosq)
78# pragma intrinsic(_byteswap_uint64)
79# pragma intrinsic(_InterlockedCompareExchange128)
80# pragma intrinsic(_InterlockedExchange64)
81# pragma intrinsic(_InterlockedExchangeAdd64)
82# pragma intrinsic(_InterlockedAnd64)
83# pragma intrinsic(_InterlockedOr64)
84# pragma intrinsic(_InterlockedIncrement64)
85# pragma intrinsic(_InterlockedDecrement64)
86# endif
87#endif
88
89/*
90 * Undefine all symbols we have Watcom C/C++ #pragma aux'es for.
91 */
92#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
93# include "asm-watcom-x86-16.h"
94#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
95# include "asm-watcom-x86-32.h"
96#endif
97
98
99/** @defgroup grp_rt_asm ASM - Assembly Routines
100 * @ingroup grp_rt
101 *
102 * @remarks The difference between ordered and unordered atomic operations are
103 * that the former will complete outstanding reads and writes before
104 * continuing while the latter doesn't make any promises about the
105 * order. Ordered operations doesn't, it seems, make any 100% promise
106 * wrt to whether the operation will complete before any subsequent
107 * memory access. (please, correct if wrong.)
108 *
109 * ASMAtomicSomething operations are all ordered, while
110 * ASMAtomicUoSomething are unordered (note the Uo).
111 *
112 * Please note that ordered operations does not necessarily imply a
113 * compiler (memory) barrier. The user has to use the
114 * ASMCompilerBarrier() macro when that is deemed necessary.
115 *
116 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed
117 * to reorder or even optimize assembler instructions away. For
118 * instance, in the following code the second rdmsr instruction is
119 * optimized away because gcc treats that instruction as deterministic:
120 *
121 * @code
122 * static inline uint64_t rdmsr_low(int idx)
123 * {
124 * uint32_t low;
125 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
126 * }
127 * ...
128 * uint32_t msr1 = rdmsr_low(1);
129 * foo(msr1);
130 * msr1 = rdmsr_low(1);
131 * bar(msr1);
132 * @endcode
133 *
134 * The input parameter of rdmsr_low is the same for both calls and
135 * therefore gcc will use the result of the first call as input
136 * parameter for bar() as well. For rdmsr this is not acceptable as
137 * this instruction is _not_ deterministic. This applies to reading
138 * machine status information in general.
139 *
140 * @{
141 */
142
143
144/** @def RT_INLINE_ASM_GCC_4_3_X_X86
145 * Used to work around some 4.3.x register allocation issues in this version of
146 * the compiler. So far this workaround is still required for 4.4 and 4.5 but
147 * definitely not for 5.x */
148#if (RT_GNUC_PREREQ(4, 3) && !RT_GNUC_PREREQ(5, 0) && defined(__i386__))
149# define RT_INLINE_ASM_GCC_4_3_X_X86 1
150#else
151# define RT_INLINE_ASM_GCC_4_3_X_X86 0
152#endif
153
154/** @def RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
155 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
156 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
157 * mode, x86.
158 *
159 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
160 * when in PIC mode on x86.
161 */
162#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
163# if defined(DOXYGEN_RUNNING) || defined(__WATCOMC__) /* Watcom has trouble with the expression below */
164# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
165# elif defined(_MSC_VER) /* Visual C++ has trouble too, but it'll only tell us when C4688 is enabled. */
166# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
167# elif ( (defined(PIC) || defined(__PIC__)) \
168 && defined(RT_ARCH_X86) \
169 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
170 || defined(RT_OS_DARWIN)) )
171# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
172# else
173# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
174# endif
175#endif
176
177
178/** @def RT_INLINE_ASM_EXTERNAL_TMP_ARM
179 * Temporary version of RT_INLINE_ASM_EXTERNAL that excludes ARM. */
180#if RT_INLINE_ASM_EXTERNAL && !(defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32))
181# define RT_INLINE_ASM_EXTERNAL_TMP_ARM 1
182#else
183# define RT_INLINE_ASM_EXTERNAL_TMP_ARM 0
184#endif
185
186/*
187 * ARM is great fun.
188 */
189#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
190
191# define RTASM_ARM_NO_BARRIER
192# ifdef RT_ARCH_ARM64
193# define RTASM_ARM_NO_BARRIER_IN_REG
194# define RTASM_ARM_NO_BARRIER_COMMA_IN_REG
195# define RTASM_ARM_DSB_SY "dsb sy\n\t"
196# define RTASM_ARM_DSB_SY_IN_REG
197# define RTASM_ARM_DSB_SY_COMMA_IN_REG
198# define RTASM_ARM_DMB_SY "dmb sy\n\t"
199# define RTASM_ARM_DMB_SY_IN_REG
200# define RTASM_ARM_DMB_SY_COMMA_IN_REG
201# define RTASM_ARM_DMB_ST "dmb st\n\t"
202# define RTASM_ARM_DMB_ST_IN_REG
203# define RTASM_ARM_DMB_ST_COMMA_IN_REG
204# define RTASM_ARM_DMB_LD "dmb ld\n\t"
205# define RTASM_ARM_DMB_LD_IN_REG
206# define RTASM_ARM_DMB_LD_COMMA_IN_REG
207# define RTASM_ARM_PICK_6432(expr64, expr32) expr64
208# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
209 uint32_t rcSpill; \
210 uint32_t u32NewRet; \
211 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
212 RTASM_ARM_##barrier_type /* before lable? */ \
213 "ldaxr %w[uNew], %[pMem]\n\t" \
214 modify64 \
215 "stlxr %w[rc], %w[uNew], %[pMem]\n\t" \
216 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
217 : [pMem] "+m" (*a_pu32Mem) \
218 , [uNew] "=&r" (u32NewRet) \
219 , [rc] "=&r" (rcSpill) \
220 : in_reg \
221 : "cc")
222# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
223 uint32_t rcSpill; \
224 uint32_t u32OldRet; \
225 uint32_t u32NewSpill; \
226 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
227 RTASM_ARM_##barrier_type /* before lable? */ \
228 "ldaxr %w[uOld], %[pMem]\n\t" \
229 modify64 \
230 "stlxr %w[rc], %w[uNew], %[pMem]\n\t" \
231 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
232 : [pMem] "+m" (*a_pu32Mem) \
233 , [uOld] "=&r" (u32OldRet) \
234 , [uNew] "=&r" (u32NewSpill) \
235 , [rc] "=&r" (rcSpill) \
236 : in_reg \
237 : "cc")
238# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
239 uint32_t rcSpill; \
240 uint64_t u64NewRet; \
241 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
242 RTASM_ARM_##barrier_type /* before lable? */ \
243 "ldaxr %[uNew], %[pMem]\n\t" \
244 modify64 \
245 "stlxr %w[rc], %[uNew], %[pMem]\n\t" \
246 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
247 : [pMem] "+m" (*a_pu64Mem) \
248 , [uNew] "=&r" (u64NewRet) \
249 , [rc] "=&r" (rcSpill) \
250 : in_reg \
251 : "cc")
252# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
253 uint32_t rcSpill; \
254 uint64_t u64OldRet; \
255 uint64_t u64NewSpill; \
256 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
257 RTASM_ARM_##barrier_type /* before lable? */ \
258 "ldaxr %[uOld], %[pMem]\n\t" \
259 modify64 \
260 "stlxr %w[rc], %[uNew], %[pMem]\n\t" \
261 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
262 : [pMem] "+m" (*a_pu64Mem) \
263 , [uOld] "=&r" (u64OldRet) \
264 , [uNew] "=&r" (u64NewSpill) \
265 , [rc] "=&r" (rcSpill) \
266 : in_reg \
267 : "cc")
268
269# else /* RT_ARCH_ARM32 */
270# define RTASM_ARM_PICK_6432(expr64, expr32) expr32
271# if RT_ARCH_ARM32 >= 7
272# warning armv7
273# define RTASM_ARM_NO_BARRIER_IN_REG
274# define RTASM_ARM_NO_BARRIER_COMMA_IN_REG
275# define RTASM_ARM_DSB_SY "dsb sy\n\t"
276# define RTASM_ARM_DSB_SY_IN_REG "X" (0xfade)
277# define RTASM_ARM_DMB_SY "dmb sy\n\t"
278# define RTASM_ARM_DMB_SY_IN_REG "X" (0xfade)
279# define RTASM_ARM_DMB_ST "dmb st\n\t"
280# define RTASM_ARM_DMB_ST_IN_REG "X" (0xfade)
281# define RTASM_ARM_DMB_LD "dmb ld\n\t"
282# define RTASM_ARM_DMB_LD_IN_REG "X" (0xfade)
283
284# elif RT_ARCH_ARM32 >= 6
285# warning armv6
286# define RTASM_ARM_DSB_SY "mcr p15, 0, %[uZero], c7, c10, 4\n\t"
287# define RTASM_ARM_DSB_SY_IN_REG [uZero] "r" (0)
288# define RTASM_ARM_DMB_SY "mcr p15, 0, %[uZero], c7, c10, 5\n\t"
289# define RTASM_ARM_DMB_SY_IN_REG [uZero] "r" (0)
290# define RTASM_ARM_DMB_ST RTASM_ARM_DMB_SY
291# define RTASM_ARM_DMB_ST_IN_REG RTASM_ARM_DMB_SY_IN_REG
292# define RTASM_ARM_DMB_LD RTASM_ARM_DMB_SY
293# define RTASM_ARM_DMB_LD_IN_REG RTASM_ARM_DMB_SY_IN_REG
294# elif RT_ARCH_ARM32 >= 4
295# warning armv5 or older
296# define RTASM_ARM_DSB_SY "mcr p15, 0, %[uZero], c7, c10, 4\n\t"
297# define RTASM_ARM_DSB_SY_IN_REG [uZero] "r" (0)
298# define RTASM_ARM_DMB_SY RTASM_ARM_DSB_SY
299# define RTASM_ARM_DMB_SY_IN_REG RTASM_ARM_DSB_SY_IN_REG
300# define RTASM_ARM_DMB_ST RTASM_ARM_DSB_SY
301# define RTASM_ARM_DMB_ST_IN_REG RTASM_ARM_DSB_SY_IN_REG
302# define RTASM_ARM_DMB_LD RTASM_ARM_DSB_SY
303# define RTASM_ARM_DMB_LD_IN_REG RTASM_ARM_DSB_SY_IN_REG
304# else
305# error "huh? Odd RT_ARCH_ARM32 value!"
306# endif
307# define RTASM_ARM_DSB_SY_COMMA_IN_REG , RTASM_ARM_DSB_SY_IN_REG
308# define RTASM_ARM_DMB_SY_COMMA_IN_REG , RTASM_ARM_DMB_SY_IN_REG
309# define RTASM_ARM_DMB_ST_COMMA_IN_REG , RTASM_ARM_DMB_ST_IN_REG
310# define RTASM_ARM_DMB_LD_COMMA_IN_REG , RTASM_ARM_DMB_LD_IN_REG
311# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
312 uint32_t rcSpill; \
313 uint32_t u32NewRet; \
314 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
315 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
316 "ldrex %[uNew], %[pMem]\n\t" \
317 modify32 \
318 "strex %[rc], %[uNew], %[pMem]\n\t" \
319 "cmp %[rc], #0\n\t" \
320 "bne .Ltry_again_" #name "_%=\n\t" \
321 : [pMem] "+m" (*a_pu32Mem) \
322 , [uNew] "=&r" (u32NewRet) \
323 , [rc] "=&r" (rcSpill) \
324 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
325 , in_reg \
326 : "cc")
327# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
328 uint32_t rcSpill; \
329 uint32_t u32OldRet; \
330 uint32_t u32NewSpill; \
331 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
332 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
333 "ldrex %[uOld], %[pMem]\n\t" \
334 modify32 \
335 "strex %[rc], %[uNew], %[pMem]\n\t" \
336 "cmp %[rc], #0\n\t" \
337 "bne .Ltry_again_" #name "_%=\n\t" \
338 : [pMem] "+m" (*a_pu32Mem) \
339 , [uOld] "=&r" (u32OldRet) \
340 , [uNew] "=&r" (u32NewSpill) \
341 , [rc] "=&r" (rcSpill) \
342 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
343 , in_reg \
344 : "cc")
345# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
346 uint32_t rcSpill; \
347 uint64_t u64NewRet; \
348 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
349 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
350 "ldrexd %[uNew], %H[uNew], %[pMem]\n\t" \
351 modify32 \
352 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t" \
353 "cmp %[rc], #0\n\t" \
354 "bne .Ltry_again_" #name "_%=\n\t" \
355 : [pMem] "+m" (*a_pu64Mem), \
356 [uNew] "=&r" (u64NewRet), \
357 [rc] "=&r" (rcSpill) \
358 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
359 , in_reg \
360 : "cc")
361# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
362 uint32_t rcSpill; \
363 uint64_t u64OldRet; \
364 uint64_t u64NewSpill; \
365 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
366 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
367 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t" \
368 modify32 \
369 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t" \
370 "cmp %[rc], #0\n\t" \
371 "bne .Ltry_again_" #name "_%=\n\t" \
372 : [pMem] "+m" (*a_pu64Mem), \
373 [uOld] "=&r" (u64OldRet), \
374 [uNew] "=&r" (u64NewSpill), \
375 [rc] "=&r" (rcSpill) \
376 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
377 , in_reg \
378 : "cc")
379# endif /* RT_ARCH_ARM32 */
380#endif
381
382
383/** @def ASMReturnAddress
384 * Gets the return address of the current (or calling if you like) function or method.
385 */
386#ifdef _MSC_VER
387# ifdef __cplusplus
388extern "C"
389# endif
390void * _ReturnAddress(void);
391# pragma intrinsic(_ReturnAddress)
392# define ASMReturnAddress() _ReturnAddress()
393#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
394# define ASMReturnAddress() __builtin_return_address(0)
395#elif defined(__WATCOMC__)
396# define ASMReturnAddress() Watcom_does_not_appear_to_have_intrinsic_return_address_function()
397#else
398# error "Unsupported compiler."
399#endif
400
401
402/**
403 * Compiler memory barrier.
404 *
405 * Ensure that the compiler does not use any cached (register/tmp stack) memory
406 * values or any outstanding writes when returning from this function.
407 *
408 * This function must be used if non-volatile data is modified by a
409 * device or the VMM. Typical cases are port access, MMIO access,
410 * trapping instruction, etc.
411 */
412#if RT_INLINE_ASM_GNU_STYLE
413# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
414#elif RT_INLINE_ASM_USES_INTRIN
415# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
416#elif defined(__WATCOMC__)
417void ASMCompilerBarrier(void);
418#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
419DECLINLINE(void) ASMCompilerBarrier(void) RT_NOTHROW_DEF
420{
421 __asm
422 {
423 }
424}
425#endif
426
427
428/** @def ASMBreakpoint
429 * Debugger Breakpoint.
430 * @deprecated Use RT_BREAKPOINT instead.
431 * @internal
432 */
433#define ASMBreakpoint() RT_BREAKPOINT()
434
435
436/**
437 * Spinloop hint for platforms that have these, empty function on the other
438 * platforms.
439 *
440 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
441 * spin locks.
442 */
443#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
444RT_ASM_DECL_PRAGMA_WATCOM(void) ASMNopPause(void) RT_NOTHROW_PROTO;
445#else
446DECLINLINE(void) ASMNopPause(void) RT_NOTHROW_DEF
447{
448# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
449# if RT_INLINE_ASM_GNU_STYLE
450 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
451# else
452 __asm {
453 _emit 0f3h
454 _emit 090h
455 }
456# endif
457
458# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
459 __asm__ __volatile__("yield\n\t"); /* ARMv6K+ */
460
461# else
462 /* dummy */
463# endif
464}
465#endif
466
467
468/**
469 * Atomically Exchange an unsigned 8-bit value, ordered.
470 *
471 * @returns Current *pu8 value
472 * @param pu8 Pointer to the 8-bit variable to update.
473 * @param u8 The 8-bit value to assign to *pu8.
474 */
475#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
476RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_PROTO;
477#else
478DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
479{
480# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
481# if RT_INLINE_ASM_GNU_STYLE
482 __asm__ __volatile__("xchgb %0, %1\n\t"
483 : "=m" (*pu8)
484 , "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
485 : "1" (u8)
486 , "m" (*pu8));
487# else
488 __asm
489 {
490# ifdef RT_ARCH_AMD64
491 mov rdx, [pu8]
492 mov al, [u8]
493 xchg [rdx], al
494 mov [u8], al
495# else
496 mov edx, [pu8]
497 mov al, [u8]
498 xchg [edx], al
499 mov [u8], al
500# endif
501 }
502# endif
503 return u8;
504
505# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
506 uint32_t uOld;
507 uint32_t rcSpill;
508 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU8_%=:\n\t"
509 RTASM_ARM_DMB_SY
510# if defined(RT_ARCH_ARM64)
511 "ldaxrb %w[uOld], %[pMem]\n\t"
512 "stlxrb %w[rc], %w[uNew], %[pMem]\n\t"
513 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU8_%=\n\t"
514# else
515 "ldrexb %[uOld], %[pMem]\n\t" /* ARMv6+ */
516 "strexb %[rc], %[uNew], %[pMem]\n\t"
517 "cmp %[rc], #0\n\t"
518 "bne .Ltry_again_ASMAtomicXchgU8_%=\n\t"
519# endif
520 : [pMem] "+m" (*pu8)
521 , [uOld] "=&r" (uOld)
522 , [rc] "=&r" (rcSpill)
523 : [uNew] "r" ((uint32_t)u8)
524 RTASM_ARM_DMB_SY_COMMA_IN_REG
525 : "cc");
526 return (uint8_t)uOld;
527
528# else
529# error "Port me"
530# endif
531}
532#endif
533
534
535/**
536 * Atomically Exchange a signed 8-bit value, ordered.
537 *
538 * @returns Current *pu8 value
539 * @param pi8 Pointer to the 8-bit variable to update.
540 * @param i8 The 8-bit value to assign to *pi8.
541 */
542DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
543{
544 return (int8_t)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8);
545}
546
547
548/**
549 * Atomically Exchange a bool value, ordered.
550 *
551 * @returns Current *pf value
552 * @param pf Pointer to the 8-bit variable to update.
553 * @param f The 8-bit value to assign to *pi8.
554 */
555DECLINLINE(bool) ASMAtomicXchgBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
556{
557#ifdef _MSC_VER
558 return !!ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
559#else
560 return (bool)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
561#endif
562}
563
564
565/**
566 * Atomically Exchange an unsigned 16-bit value, ordered.
567 *
568 * @returns Current *pu16 value
569 * @param pu16 Pointer to the 16-bit variable to update.
570 * @param u16 The 16-bit value to assign to *pu16.
571 */
572#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
573RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_PROTO;
574#else
575DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
576{
577# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
578# if RT_INLINE_ASM_GNU_STYLE
579 __asm__ __volatile__("xchgw %0, %1\n\t"
580 : "=m" (*pu16)
581 , "=r" (u16)
582 : "1" (u16)
583 , "m" (*pu16));
584# else
585 __asm
586 {
587# ifdef RT_ARCH_AMD64
588 mov rdx, [pu16]
589 mov ax, [u16]
590 xchg [rdx], ax
591 mov [u16], ax
592# else
593 mov edx, [pu16]
594 mov ax, [u16]
595 xchg [edx], ax
596 mov [u16], ax
597# endif
598 }
599# endif
600 return u16;
601
602# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
603 uint32_t uOld;
604 uint32_t rcSpill;
605 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU16_%=:\n\t"
606 RTASM_ARM_DMB_SY
607# if defined(RT_ARCH_ARM64)
608 "ldaxrh %w[uOld], %[pMem]\n\t"
609 "stlxrh %w[rc], %w[uNew], %[pMem]\n\t"
610 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU16_%=\n\t"
611# else
612 "ldrexh %[uOld], %[pMem]\n\t" /* ARMv6+ */
613 "strexh %[rc], %[uNew], %[pMem]\n\t"
614 "cmp %[rc], #0\n\t"
615 "bne .Ltry_again_ASMAtomicXchgU16_%=\n\t"
616# endif
617 : [pMem] "+m" (*pu16)
618 , [uOld] "=&r" (uOld)
619 , [rc] "=&r" (rcSpill)
620 : [uNew] "r" ((uint32_t)u16)
621 RTASM_ARM_DMB_SY_COMMA_IN_REG
622 : "cc");
623 return (uint16_t)uOld;
624
625# else
626# error "Port me"
627# endif
628}
629#endif
630
631
632/**
633 * Atomically Exchange a signed 16-bit value, ordered.
634 *
635 * @returns Current *pu16 value
636 * @param pi16 Pointer to the 16-bit variable to update.
637 * @param i16 The 16-bit value to assign to *pi16.
638 */
639DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
640{
641 return (int16_t)ASMAtomicXchgU16((volatile uint16_t RT_FAR *)pi16, (uint16_t)i16);
642}
643
644
645/**
646 * Atomically Exchange an unsigned 32-bit value, ordered.
647 *
648 * @returns Current *pu32 value
649 * @param pu32 Pointer to the 32-bit variable to update.
650 * @param u32 The 32-bit value to assign to *pu32.
651 *
652 * @remarks Does not work on 286 and earlier.
653 */
654#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
655RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
656#else
657DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
658{
659# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
660# if RT_INLINE_ASM_GNU_STYLE
661 __asm__ __volatile__("xchgl %0, %1\n\t"
662 : "=m" (*pu32) /** @todo r=bird: +m rather than =m here? */
663 , "=r" (u32)
664 : "1" (u32)
665 , "m" (*pu32));
666
667# elif RT_INLINE_ASM_USES_INTRIN
668 u32 = _InterlockedExchange((long RT_FAR *)pu32, u32);
669
670# else
671 __asm
672 {
673# ifdef RT_ARCH_AMD64
674 mov rdx, [pu32]
675 mov eax, u32
676 xchg [rdx], eax
677 mov [u32], eax
678# else
679 mov edx, [pu32]
680 mov eax, u32
681 xchg [edx], eax
682 mov [u32], eax
683# endif
684 }
685# endif
686 return u32;
687
688# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
689 uint32_t uOld;
690 uint32_t rcSpill;
691 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU32_%=:\n\t"
692 RTASM_ARM_DMB_SY
693# if defined(RT_ARCH_ARM64)
694 "ldaxr %w[uOld], %[pMem]\n\t"
695 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
696 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU32_%=\n\t"
697# else
698 "ldrex %[uOld], %[pMem]\n\t" /* ARMv6+ */
699 "strex %[rc], %[uNew], %[pMem]\n\t"
700 "cmp %[rc], #0\n\t"
701 "bne .Ltry_again_ASMAtomicXchgU32_%=\n\t"
702# endif
703 : [pMem] "+m" (*pu32)
704 , [uOld] "=&r" (uOld)
705 , [rc] "=&r" (rcSpill)
706 : [uNew] "r" (u32)
707 RTASM_ARM_DMB_SY_COMMA_IN_REG
708 : "cc");
709 return uOld;
710
711# else
712# error "Port me"
713# endif
714}
715#endif
716
717
718/**
719 * Atomically Exchange a signed 32-bit value, ordered.
720 *
721 * @returns Current *pu32 value
722 * @param pi32 Pointer to the 32-bit variable to update.
723 * @param i32 The 32-bit value to assign to *pi32.
724 */
725DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
726{
727 return (int32_t)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32);
728}
729
730
731/**
732 * Atomically Exchange an unsigned 64-bit value, ordered.
733 *
734 * @returns Current *pu64 value
735 * @param pu64 Pointer to the 64-bit variable to update.
736 * @param u64 The 64-bit value to assign to *pu64.
737 *
738 * @remarks Works on 32-bit x86 CPUs starting with Pentium.
739 */
740#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
741 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
742RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
743#else
744DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
745{
746# if defined(RT_ARCH_AMD64)
747# if RT_INLINE_ASM_USES_INTRIN
748 return _InterlockedExchange64((__int64 *)pu64, u64);
749
750# elif RT_INLINE_ASM_GNU_STYLE
751 __asm__ __volatile__("xchgq %0, %1\n\t"
752 : "=m" (*pu64)
753 , "=r" (u64)
754 : "1" (u64)
755 , "m" (*pu64));
756 return u64;
757# else
758 __asm
759 {
760 mov rdx, [pu64]
761 mov rax, [u64]
762 xchg [rdx], rax
763 mov [u64], rax
764 }
765 return u64;
766# endif
767
768# elif defined(RT_ARCH_X86)
769# if RT_INLINE_ASM_GNU_STYLE
770# if defined(PIC) || defined(__PIC__)
771 uint32_t u32EBX = (uint32_t)u64;
772 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
773 "xchgl %%ebx, %3\n\t"
774 "1:\n\t"
775 "lock; cmpxchg8b (%5)\n\t"
776 "jnz 1b\n\t"
777 "movl %3, %%ebx\n\t"
778 /*"xchgl %%esi, %5\n\t"*/
779 : "=A" (u64)
780 , "=m" (*pu64)
781 : "0" (*pu64)
782 , "m" ( u32EBX )
783 , "c" ( (uint32_t)(u64 >> 32) )
784 , "S" (pu64)
785 : "cc");
786# else /* !PIC */
787 __asm__ __volatile__("1:\n\t"
788 "lock; cmpxchg8b %1\n\t"
789 "jnz 1b\n\t"
790 : "=A" (u64)
791 , "=m" (*pu64)
792 : "0" (*pu64)
793 , "b" ( (uint32_t)u64 )
794 , "c" ( (uint32_t)(u64 >> 32) )
795 : "cc");
796# endif
797# else
798 __asm
799 {
800 mov ebx, dword ptr [u64]
801 mov ecx, dword ptr [u64 + 4]
802 mov edi, pu64
803 mov eax, dword ptr [edi]
804 mov edx, dword ptr [edi + 4]
805 retry:
806 lock cmpxchg8b [edi]
807 jnz retry
808 mov dword ptr [u64], eax
809 mov dword ptr [u64 + 4], edx
810 }
811# endif
812 return u64;
813
814# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
815 uint32_t rcSpill;
816 uint64_t uOld;
817 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU64_%=:\n\t"
818 RTASM_ARM_DMB_SY
819# if defined(RT_ARCH_ARM64)
820 "ldaxr %[uOld], %[pMem]\n\t"
821 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
822 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU64_%=\n\t"
823# else
824 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t" /* ARMv6+ */
825 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
826 "cmp %[rc], #0\n\t"
827 "bne .Ltry_again_ASMAtomicXchgU64_%=\n\t"
828# endif
829 : [pMem] "+m" (*pu64)
830 , [uOld] "=&r" (uOld)
831 , [rc] "=&r" (rcSpill)
832 : [uNew] "r" (u64)
833 RTASM_ARM_DMB_SY_COMMA_IN_REG
834 : "cc");
835 return uOld;
836
837# else
838# error "Port me"
839# endif
840}
841#endif
842
843
844/**
845 * Atomically Exchange an signed 64-bit value, ordered.
846 *
847 * @returns Current *pi64 value
848 * @param pi64 Pointer to the 64-bit variable to update.
849 * @param i64 The 64-bit value to assign to *pi64.
850 */
851DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
852{
853 return (int64_t)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64);
854}
855
856
857/**
858 * Atomically Exchange a size_t value, ordered.
859 *
860 * @returns Current *ppv value
861 * @param puDst Pointer to the size_t variable to update.
862 * @param uNew The new value to assign to *puDst.
863 */
864DECLINLINE(size_t) ASMAtomicXchgZ(size_t volatile RT_FAR *puDst, const size_t uNew) RT_NOTHROW_DEF
865{
866#if ARCH_BITS == 16
867 AssertCompile(sizeof(size_t) == 2);
868 return ASMAtomicXchgU16((volatile uint16_t RT_FAR *)puDst, uNew);
869#elif ARCH_BITS == 32
870 return ASMAtomicXchgU32((volatile uint32_t RT_FAR *)puDst, uNew);
871#elif ARCH_BITS == 64
872 return ASMAtomicXchgU64((volatile uint64_t RT_FAR *)puDst, uNew);
873#else
874# error "ARCH_BITS is bogus"
875#endif
876}
877
878
879/**
880 * Atomically Exchange a pointer value, ordered.
881 *
882 * @returns Current *ppv value
883 * @param ppv Pointer to the pointer variable to update.
884 * @param pv The pointer value to assign to *ppv.
885 */
886DECLINLINE(void RT_FAR *) ASMAtomicXchgPtr(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pv) RT_NOTHROW_DEF
887{
888#if ARCH_BITS == 32 || ARCH_BITS == 16
889 return (void RT_FAR *)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
890#elif ARCH_BITS == 64
891 return (void RT_FAR *)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
892#else
893# error "ARCH_BITS is bogus"
894#endif
895}
896
897
898/**
899 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
900 *
901 * @returns Current *pv value
902 * @param ppv Pointer to the pointer variable to update.
903 * @param pv The pointer value to assign to *ppv.
904 * @param Type The type of *ppv, sans volatile.
905 */
906#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
907# define ASMAtomicXchgPtrT(ppv, pv, Type) \
908 __extension__ \
909 ({\
910 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
911 Type const pvTypeChecked = (pv); \
912 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
913 pvTypeCheckedRet; \
914 })
915#else
916# define ASMAtomicXchgPtrT(ppv, pv, Type) \
917 (Type)ASMAtomicXchgPtr((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv))
918#endif
919
920
921/**
922 * Atomically Exchange a raw-mode context pointer value, ordered.
923 *
924 * @returns Current *ppv value
925 * @param ppvRC Pointer to the pointer variable to update.
926 * @param pvRC The pointer value to assign to *ppv.
927 */
928DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile RT_FAR *ppvRC, RTRCPTR pvRC) RT_NOTHROW_DEF
929{
930 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(void RT_FAR *)ppvRC, (uint32_t)pvRC);
931}
932
933
934/**
935 * Atomically Exchange a ring-0 pointer value, ordered.
936 *
937 * @returns Current *ppv value
938 * @param ppvR0 Pointer to the pointer variable to update.
939 * @param pvR0 The pointer value to assign to *ppv.
940 */
941DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile RT_FAR *ppvR0, RTR0PTR pvR0) RT_NOTHROW_DEF
942{
943#if R0_ARCH_BITS == 32 || ARCH_BITS == 16
944 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR0, (uint32_t)pvR0);
945#elif R0_ARCH_BITS == 64
946 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR0, (uint64_t)pvR0);
947#else
948# error "R0_ARCH_BITS is bogus"
949#endif
950}
951
952
953/**
954 * Atomically Exchange a ring-3 pointer value, ordered.
955 *
956 * @returns Current *ppv value
957 * @param ppvR3 Pointer to the pointer variable to update.
958 * @param pvR3 The pointer value to assign to *ppv.
959 */
960DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile RT_FAR *ppvR3, RTR3PTR pvR3) RT_NOTHROW_DEF
961{
962#if R3_ARCH_BITS == 32 || ARCH_BITS == 16
963 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR3, (uint32_t)pvR3);
964#elif R3_ARCH_BITS == 64
965 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR3, (uint64_t)pvR3);
966#else
967# error "R3_ARCH_BITS is bogus"
968#endif
969}
970
971
972/** @def ASMAtomicXchgHandle
973 * Atomically Exchange a typical IPRT handle value, ordered.
974 *
975 * @param ph Pointer to the value to update.
976 * @param hNew The new value to assigned to *pu.
977 * @param phRes Where to store the current *ph value.
978 *
979 * @remarks This doesn't currently work for all handles (like RTFILE).
980 */
981#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
982# define ASMAtomicXchgHandle(ph, hNew, phRes) \
983 do { \
984 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
985 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
986 *(uint32_t RT_FAR *)(phRes) = ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
987 } while (0)
988#elif HC_ARCH_BITS == 64
989# define ASMAtomicXchgHandle(ph, hNew, phRes) \
990 do { \
991 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
992 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
993 *(uint64_t RT_FAR *)(phRes) = ASMAtomicXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
994 } while (0)
995#else
996# error HC_ARCH_BITS
997#endif
998
999
1000/**
1001 * Atomically Exchange a value which size might differ
1002 * between platforms or compilers, ordered.
1003 *
1004 * @param pu Pointer to the variable to update.
1005 * @param uNew The value to assign to *pu.
1006 * @todo This is busted as its missing the result argument.
1007 */
1008#define ASMAtomicXchgSize(pu, uNew) \
1009 do { \
1010 switch (sizeof(*(pu))) { \
1011 case 1: ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
1012 case 2: ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
1013 case 4: ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
1014 case 8: ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
1015 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1016 } \
1017 } while (0)
1018
1019/**
1020 * Atomically Exchange a value which size might differ
1021 * between platforms or compilers, ordered.
1022 *
1023 * @param pu Pointer to the variable to update.
1024 * @param uNew The value to assign to *pu.
1025 * @param puRes Where to store the current *pu value.
1026 */
1027#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
1028 do { \
1029 switch (sizeof(*(pu))) { \
1030 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
1031 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
1032 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
1033 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
1034 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1035 } \
1036 } while (0)
1037
1038
1039
1040/**
1041 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
1042 *
1043 * @returns true if xchg was done.
1044 * @returns false if xchg wasn't done.
1045 *
1046 * @param pu8 Pointer to the value to update.
1047 * @param u8New The new value to assigned to *pu8.
1048 * @param u8Old The old value to *pu8 compare with.
1049 *
1050 * @remarks x86: Requires a 486 or later.
1051 * @todo Rename ASMAtomicCmpWriteU8
1052 */
1053#if RT_INLINE_ASM_EXTERNAL_TMP_ARM || !RT_INLINE_ASM_GNU_STYLE
1054RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old) RT_NOTHROW_PROTO;
1055#else
1056DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, uint8_t u8Old) RT_NOTHROW_DEF
1057{
1058# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1059 uint8_t u8Ret;
1060 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
1061 "setz %1\n\t"
1062 : "=m" (*pu8)
1063 , "=qm" (u8Ret)
1064 , "=a" (u8Old)
1065 : "q" (u8New)
1066 , "2" (u8Old)
1067 , "m" (*pu8)
1068 : "cc");
1069 return (bool)u8Ret;
1070
1071# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1072 union { uint32_t u; bool f; } fXchg;
1073 uint32_t u32Spill;
1074 uint32_t rcSpill;
1075 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU8_%=:\n\t"
1076 RTASM_ARM_DMB_SY
1077# if defined(RT_ARCH_ARM64)
1078 "ldaxrb %w[uOld], %[pMem]\n\t"
1079 "cmp %w[uOld], %w[uCmp]\n\t"
1080 "bne 1f\n\t" /* stop here if not equal */
1081 "stlxrb %w[rc], %w[uNew], %[pMem]\n\t"
1082 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU8_%=\n\t"
1083 "mov %w[fXchg], #1\n\t"
1084# else
1085 "ldrexb %[uOld], %[pMem]\n\t"
1086 "teq %[uOld], %[uCmp]\n\t"
1087 "strexbeq %[rc], %[uNew], %[pMem]\n\t"
1088 "bne 1f\n\t" /* stop here if not equal */
1089 "cmp %[rc], #0\n\t"
1090 "bne .Ltry_again_ASMAtomicCmpXchgU8_%=\n\t"
1091 "mov %[fXchg], #1\n\t"
1092# endif
1093 "1:\n\t"
1094 : [pMem] "+m" (*pu8)
1095 , [uOld] "=&r" (u32Spill)
1096 , [rc] "=&r" (rcSpill)
1097 , [fXchg] "=&r" (fXchg.u)
1098 : [uCmp] "r" ((uint32_t)u8Old)
1099 , [uNew] "r" ((uint32_t)u8New)
1100 , "[fXchg]" (0)
1101 RTASM_ARM_DMB_SY_COMMA_IN_REG
1102 : "cc");
1103 return fXchg.f;
1104
1105# else
1106# error "Port me"
1107# endif
1108}
1109#endif
1110
1111
1112/**
1113 * Atomically Compare and Exchange a signed 8-bit value, ordered.
1114 *
1115 * @returns true if xchg was done.
1116 * @returns false if xchg wasn't done.
1117 *
1118 * @param pi8 Pointer to the value to update.
1119 * @param i8New The new value to assigned to *pi8.
1120 * @param i8Old The old value to *pi8 compare with.
1121 *
1122 * @remarks x86: Requires a 486 or later.
1123 * @todo Rename ASMAtomicCmpWriteS8
1124 */
1125DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t RT_FAR *pi8, const int8_t i8New, const int8_t i8Old) RT_NOTHROW_DEF
1126{
1127 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8New, (uint8_t)i8Old);
1128}
1129
1130
1131/**
1132 * Atomically Compare and Exchange a bool value, ordered.
1133 *
1134 * @returns true if xchg was done.
1135 * @returns false if xchg wasn't done.
1136 *
1137 * @param pf Pointer to the value to update.
1138 * @param fNew The new value to assigned to *pf.
1139 * @param fOld The old value to *pf compare with.
1140 *
1141 * @remarks x86: Requires a 486 or later.
1142 * @todo Rename ASMAtomicCmpWriteBool
1143 */
1144DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool RT_FAR *pf, const bool fNew, const bool fOld) RT_NOTHROW_DEF
1145{
1146 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)fNew, (uint8_t)fOld);
1147}
1148
1149
1150/**
1151 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
1152 *
1153 * @returns true if xchg was done.
1154 * @returns false if xchg wasn't done.
1155 *
1156 * @param pu32 Pointer to the value to update.
1157 * @param u32New The new value to assigned to *pu32.
1158 * @param u32Old The old value to *pu32 compare with.
1159 *
1160 * @remarks x86: Requires a 486 or later.
1161 * @todo Rename ASMAtomicCmpWriteU32
1162 */
1163#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1164RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old) RT_NOTHROW_PROTO;
1165#else
1166DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, uint32_t u32Old) RT_NOTHROW_DEF
1167{
1168# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1169# if RT_INLINE_ASM_GNU_STYLE
1170 uint8_t u8Ret;
1171 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1172 "setz %1\n\t"
1173 : "=m" (*pu32)
1174 , "=qm" (u8Ret)
1175 , "=a" (u32Old)
1176 : "r" (u32New)
1177 , "2" (u32Old)
1178 , "m" (*pu32)
1179 : "cc");
1180 return (bool)u8Ret;
1181
1182# elif RT_INLINE_ASM_USES_INTRIN
1183 return (uint32_t)_InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old) == u32Old;
1184
1185# else
1186 uint32_t u32Ret;
1187 __asm
1188 {
1189# ifdef RT_ARCH_AMD64
1190 mov rdx, [pu32]
1191# else
1192 mov edx, [pu32]
1193# endif
1194 mov eax, [u32Old]
1195 mov ecx, [u32New]
1196# ifdef RT_ARCH_AMD64
1197 lock cmpxchg [rdx], ecx
1198# else
1199 lock cmpxchg [edx], ecx
1200# endif
1201 setz al
1202 movzx eax, al
1203 mov [u32Ret], eax
1204 }
1205 return !!u32Ret;
1206# endif
1207
1208# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1209 union { uint32_t u; bool f; } fXchg;
1210 uint32_t u32Spill;
1211 uint32_t rcSpill;
1212 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU32_%=:\n\t"
1213 RTASM_ARM_DMB_SY
1214# if defined(RT_ARCH_ARM64)
1215 "ldaxr %w[uOld], %[pMem]\n\t"
1216 "cmp %w[uOld], %w[uCmp]\n\t"
1217 "bne 1f\n\t" /* stop here if not equal */
1218 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
1219 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU32_%=\n\t"
1220 "mov %w[fXchg], #1\n\t"
1221# else
1222 "ldrex %[uOld], %[pMem]\n\t"
1223 "teq %[uOld], %[uCmp]\n\t"
1224 "strexeq %[rc], %[uNew], %[pMem]\n\t"
1225 "bne 1f\n\t" /* stop here if not equal */
1226 "cmp %[rc], #0\n\t"
1227 "bne .Ltry_again_ASMAtomicCmpXchgU32_%=\n\t"
1228 "mov %[fXchg], #1\n\t"
1229# endif
1230 "1:\n\t"
1231 : [pMem] "+m" (*pu32)
1232 , [uOld] "=&r" (u32Spill)
1233 , [rc] "=&r" (rcSpill)
1234 , [fXchg] "=&r" (fXchg.u)
1235 : [uCmp] "r" (u32Old)
1236 , [uNew] "r" (u32New)
1237 , "[fXchg]" (0)
1238 RTASM_ARM_DMB_SY_COMMA_IN_REG
1239 : "cc");
1240 return fXchg.f;
1241
1242# else
1243# error "Port me"
1244# endif
1245}
1246#endif
1247
1248
1249/**
1250 * Atomically Compare and Exchange a signed 32-bit value, ordered.
1251 *
1252 * @returns true if xchg was done.
1253 * @returns false if xchg wasn't done.
1254 *
1255 * @param pi32 Pointer to the value to update.
1256 * @param i32New The new value to assigned to *pi32.
1257 * @param i32Old The old value to *pi32 compare with.
1258 *
1259 * @remarks x86: Requires a 486 or later.
1260 * @todo Rename ASMAtomicCmpWriteS32
1261 */
1262DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old) RT_NOTHROW_DEF
1263{
1264 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
1265}
1266
1267
1268/**
1269 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
1270 *
1271 * @returns true if xchg was done.
1272 * @returns false if xchg wasn't done.
1273 *
1274 * @param pu64 Pointer to the 64-bit variable to update.
1275 * @param u64New The 64-bit value to assign to *pu64.
1276 * @param u64Old The value to compare with.
1277 *
1278 * @remarks x86: Requires a Pentium or later.
1279 * @todo Rename ASMAtomicCmpWriteU64
1280 */
1281#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
1282 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1283RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old) RT_NOTHROW_PROTO;
1284#else
1285DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64New, uint64_t u64Old) RT_NOTHROW_DEF
1286{
1287# if RT_INLINE_ASM_USES_INTRIN
1288 return (uint64_t)_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old) == u64Old;
1289
1290# elif defined(RT_ARCH_AMD64)
1291# if RT_INLINE_ASM_GNU_STYLE
1292 uint8_t u8Ret;
1293 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1294 "setz %1\n\t"
1295 : "=m" (*pu64)
1296 , "=qm" (u8Ret)
1297 , "=a" (u64Old)
1298 : "r" (u64New)
1299 , "2" (u64Old)
1300 , "m" (*pu64)
1301 : "cc");
1302 return (bool)u8Ret;
1303# else
1304 bool fRet;
1305 __asm
1306 {
1307 mov rdx, [pu32]
1308 mov rax, [u64Old]
1309 mov rcx, [u64New]
1310 lock cmpxchg [rdx], rcx
1311 setz al
1312 mov [fRet], al
1313 }
1314 return fRet;
1315# endif
1316
1317# elif defined(RT_ARCH_X86)
1318 uint32_t u32Ret;
1319# if RT_INLINE_ASM_GNU_STYLE
1320# if defined(PIC) || defined(__PIC__)
1321 uint32_t u32EBX = (uint32_t)u64New;
1322 uint32_t u32Spill;
1323 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
1324 "lock; cmpxchg8b (%6)\n\t"
1325 "setz %%al\n\t"
1326 "movl %4, %%ebx\n\t"
1327 "movzbl %%al, %%eax\n\t"
1328 : "=a" (u32Ret)
1329 , "=d" (u32Spill)
1330# if RT_GNUC_PREREQ(4, 3)
1331 , "+m" (*pu64)
1332# else
1333 , "=m" (*pu64)
1334# endif
1335 : "A" (u64Old)
1336 , "m" ( u32EBX )
1337 , "c" ( (uint32_t)(u64New >> 32) )
1338 , "S" (pu64)
1339 : "cc");
1340# else /* !PIC */
1341 uint32_t u32Spill;
1342 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
1343 "setz %%al\n\t"
1344 "movzbl %%al, %%eax\n\t"
1345 : "=a" (u32Ret)
1346 , "=d" (u32Spill)
1347 , "+m" (*pu64)
1348 : "A" (u64Old)
1349 , "b" ( (uint32_t)u64New )
1350 , "c" ( (uint32_t)(u64New >> 32) )
1351 : "cc");
1352# endif
1353 return (bool)u32Ret;
1354# else
1355 __asm
1356 {
1357 mov ebx, dword ptr [u64New]
1358 mov ecx, dword ptr [u64New + 4]
1359 mov edi, [pu64]
1360 mov eax, dword ptr [u64Old]
1361 mov edx, dword ptr [u64Old + 4]
1362 lock cmpxchg8b [edi]
1363 setz al
1364 movzx eax, al
1365 mov dword ptr [u32Ret], eax
1366 }
1367 return !!u32Ret;
1368# endif
1369
1370# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1371 union { uint32_t u; bool f; } fXchg;
1372 uint64_t u64Spill;
1373 uint32_t rcSpill;
1374 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU64_%=:\n\t"
1375 RTASM_ARM_DMB_SY
1376# if defined(RT_ARCH_ARM64)
1377 "ldaxr %[uOld], %[pMem]\n\t"
1378 "cmp %[uOld], %[uCmp]\n\t"
1379 "bne 1f\n\t" /* stop here if not equal */
1380 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
1381 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1382 "mov %w[fXchg], #1\n\t"
1383# else
1384 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t"
1385 "teq %[uOld], %[uCmp]\n\t"
1386 "teqeq %H[uOld], %H[uCmp]\n\t"
1387 "strexdeq %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
1388 "bne 1f\n\t" /* stop here if not equal */
1389 "cmp %[rc], #0\n\t"
1390 "bne .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1391 "mov %[fXchg], #1\n\t"
1392# endif
1393 "1:\n\t"
1394 : [pMem] "+m" (*pu64)
1395 , [uOld] "=&r" (u64Spill)
1396 , [rc] "=&r" (rcSpill)
1397 , [fXchg] "=&r" (fXchg.u)
1398 : [uCmp] "r" (u64Old)
1399 , [uNew] "r" (u64New)
1400 , "[fXchg]" (0)
1401 RTASM_ARM_DMB_SY_COMMA_IN_REG
1402 : "cc");
1403 return fXchg.f;
1404
1405# else
1406# error "Port me"
1407# endif
1408}
1409#endif
1410
1411
1412/**
1413 * Atomically Compare and exchange a signed 64-bit value, ordered.
1414 *
1415 * @returns true if xchg was done.
1416 * @returns false if xchg wasn't done.
1417 *
1418 * @param pi64 Pointer to the 64-bit variable to update.
1419 * @param i64 The 64-bit value to assign to *pu64.
1420 * @param i64Old The value to compare with.
1421 *
1422 * @remarks x86: Requires a Pentium or later.
1423 * @todo Rename ASMAtomicCmpWriteS64
1424 */
1425DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old) RT_NOTHROW_DEF
1426{
1427 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old);
1428}
1429
1430#if defined(RT_ARCH_AMD64) || defined(DOXYGEN_RUNNING)
1431
1432/**
1433 * Atomically compare and write an unsigned 128-bit value, ordered.
1434 *
1435 * @returns true if write was done.
1436 * @returns false if write wasn't done.
1437 *
1438 * @param pu128 Pointer to the 128-bit variable to update.
1439 * @param u128New The 128-bit value to assign to *pu128.
1440 * @param u128Old The value to compare with.
1441 *
1442 * @remarks AMD64: Not present in the earliest CPUs, so check CPUID.
1443 */
1444# if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN)
1445DECLASM(bool) ASMAtomicCmpWriteU128(volatile uint128_t *pu128, const uint128_t u128New, const uint128_t u128Old) RT_NOTHROW_PROTO;
1446# else
1447DECLINLINE(bool) ASMAtomicCmpWriteU128(volatile uint128_t *pu128, const uint128_t u128New, const uint128_t u128Old) RT_NOTHROW_DEF
1448{
1449# if RT_INLINE_ASM_USES_INTRIN
1450 __int64 ai64Cmp[2];
1451 ai64Cmp[0] = (__int64)u128Old.Lo;
1452 ai64Cmp[1] = (__int64)u128Old.Hi;
1453 return _InterlockedCompareExchange128((__int64 volatile *)pu128, u128New.Hi, u128New.Lo, ai64Cmp) != 0;
1454
1455# elif defined(RT_ARCH_AMD64)
1456# if RT_INLINE_ASM_GNU_STYLE
1457 uint64_t u64Ret;
1458 uint64_t u64Spill;
1459 __asm__ __volatile__("lock; cmpxchg16b %2\n\t"
1460 "setz %%al\n\t"
1461 "movzbl %%al, %%eax\n\t"
1462 : "=a" (u64Ret)
1463 , "=d" (u64Spill)
1464 , "+m" (*pu128)
1465 : "A" (u128Old)
1466 , "b" ((uint64_t)u128New)
1467 , "c" ((uint64_t)(u128New >> 64))
1468 : "cc");
1469
1470 return (bool)u64Ret;
1471# else
1472# error "Port me"
1473# endif
1474# else
1475# error "Port me"
1476# endif
1477}
1478# endif
1479
1480/** @def RTASM_HAVE_CMP_WRITE_U128
1481 * Indicates that we've got ASMAtomicCmpWriteU128() available. */
1482# define RTASM_HAVE_CMP_WRITE_U128 1
1483
1484/**
1485 * RTUINT128U wrapper for ASMAtomicCmpWriteU128.
1486 */
1487DECLINLINE(bool) ASMAtomicCmpWriteU128U(volatile RTUINT128U *pu128, const RTUINT128U u128New,
1488 const RTUINT128U u128Old) RT_NOTHROW_DEF
1489{
1490 return ASMAtomicCmpWriteU128(&pu128->u, u128New.u, u128Old.u);
1491}
1492
1493#endif /* RT_ARCH_AMD64 */
1494
1495
1496/**
1497 * Atomically Compare and Exchange a pointer value, ordered.
1498 *
1499 * @returns true if xchg was done.
1500 * @returns false if xchg wasn't done.
1501 *
1502 * @param ppv Pointer to the value to update.
1503 * @param pvNew The new value to assigned to *ppv.
1504 * @param pvOld The old value to *ppv compare with.
1505 *
1506 * @remarks x86: Requires a 486 or later.
1507 * @todo Rename ASMAtomicCmpWritePtrVoid
1508 */
1509DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld) RT_NOTHROW_DEF
1510{
1511#if ARCH_BITS == 32 || ARCH_BITS == 16
1512 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
1513#elif ARCH_BITS == 64
1514 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
1515#else
1516# error "ARCH_BITS is bogus"
1517#endif
1518}
1519
1520
1521/**
1522 * Atomically Compare and Exchange a pointer value, ordered.
1523 *
1524 * @returns true if xchg was done.
1525 * @returns false if xchg wasn't done.
1526 *
1527 * @param ppv Pointer to the value to update.
1528 * @param pvNew The new value to assigned to *ppv.
1529 * @param pvOld The old value to *ppv compare with.
1530 *
1531 * @remarks This is relatively type safe on GCC platforms.
1532 * @remarks x86: Requires a 486 or later.
1533 * @todo Rename ASMAtomicCmpWritePtr
1534 */
1535#ifdef __GNUC__
1536# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1537 __extension__ \
1538 ({\
1539 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1540 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1541 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1542 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
1543 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
1544 fMacroRet; \
1545 })
1546#else
1547# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1548 ASMAtomicCmpXchgPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld))
1549#endif
1550
1551
1552/** @def ASMAtomicCmpXchgHandle
1553 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1554 *
1555 * @param ph Pointer to the value to update.
1556 * @param hNew The new value to assigned to *pu.
1557 * @param hOld The old value to *pu compare with.
1558 * @param fRc Where to store the result.
1559 *
1560 * @remarks This doesn't currently work for all handles (like RTFILE).
1561 * @remarks x86: Requires a 486 or later.
1562 * @todo Rename ASMAtomicCmpWriteHandle
1563 */
1564#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1565# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1566 do { \
1567 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1568 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
1569 } while (0)
1570#elif HC_ARCH_BITS == 64
1571# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1572 do { \
1573 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1574 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
1575 } while (0)
1576#else
1577# error HC_ARCH_BITS
1578#endif
1579
1580
1581/** @def ASMAtomicCmpXchgSize
1582 * Atomically Compare and Exchange a value which size might differ
1583 * between platforms or compilers, ordered.
1584 *
1585 * @param pu Pointer to the value to update.
1586 * @param uNew The new value to assigned to *pu.
1587 * @param uOld The old value to *pu compare with.
1588 * @param fRc Where to store the result.
1589 *
1590 * @remarks x86: Requires a 486 or later.
1591 * @todo Rename ASMAtomicCmpWriteSize
1592 */
1593#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1594 do { \
1595 switch (sizeof(*(pu))) { \
1596 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1597 break; \
1598 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1599 break; \
1600 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1601 (fRc) = false; \
1602 break; \
1603 } \
1604 } while (0)
1605
1606
1607/**
1608 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1609 * passes back old value, ordered.
1610 *
1611 * @returns true if xchg was done.
1612 * @returns false if xchg wasn't done.
1613 *
1614 * @param pu32 Pointer to the value to update.
1615 * @param u32New The new value to assigned to *pu32.
1616 * @param u32Old The old value to *pu32 compare with.
1617 * @param pu32Old Pointer store the old value at.
1618 *
1619 * @remarks x86: Requires a 486 or later.
1620 */
1621#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1622RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old) RT_NOTHROW_PROTO;
1623#else
1624DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old) RT_NOTHROW_DEF
1625{
1626# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1627# if RT_INLINE_ASM_GNU_STYLE
1628 uint8_t u8Ret;
1629 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1630 "setz %1\n\t"
1631 : "=m" (*pu32)
1632 , "=qm" (u8Ret)
1633 , "=a" (*pu32Old)
1634 : "r" (u32New)
1635 , "a" (u32Old)
1636 , "m" (*pu32)
1637 : "cc");
1638 return (bool)u8Ret;
1639
1640# elif RT_INLINE_ASM_USES_INTRIN
1641 return (*pu32Old = _InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old)) == u32Old;
1642
1643# else
1644 uint32_t u32Ret;
1645 __asm
1646 {
1647# ifdef RT_ARCH_AMD64
1648 mov rdx, [pu32]
1649# else
1650 mov edx, [pu32]
1651# endif
1652 mov eax, [u32Old]
1653 mov ecx, [u32New]
1654# ifdef RT_ARCH_AMD64
1655 lock cmpxchg [rdx], ecx
1656 mov rdx, [pu32Old]
1657 mov [rdx], eax
1658# else
1659 lock cmpxchg [edx], ecx
1660 mov edx, [pu32Old]
1661 mov [edx], eax
1662# endif
1663 setz al
1664 movzx eax, al
1665 mov [u32Ret], eax
1666 }
1667 return !!u32Ret;
1668# endif
1669
1670# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1671 union { uint32_t u; bool f; } fXchg;
1672 uint32_t u32ActualOld;
1673 uint32_t rcSpill;
1674 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgExU32_%=:\n\t"
1675 RTASM_ARM_DMB_SY
1676# if defined(RT_ARCH_ARM64)
1677 "ldaxr %w[uOld], %[pMem]\n\t"
1678 "cmp %w[uOld], %w[uCmp]\n\t"
1679 "bne 1f\n\t" /* stop here if not equal */
1680 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
1681 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgExU32_%=\n\t"
1682 "mov %w[fXchg], #1\n\t"
1683# else
1684 "ldrex %[uOld], %[pMem]\n\t"
1685 "teq %[uOld], %[uCmp]\n\t"
1686 "strexeq %[rc], %[uNew], %[pMem]\n\t"
1687 "bne 1f\n\t" /* stop here if not equal */
1688 "cmp %[rc], #0\n\t"
1689 "bne .Ltry_again_ASMAtomicCmpXchgExU32_%=\n\t"
1690 "mov %[fXchg], #1\n\t"
1691# endif
1692 "1:\n\t"
1693 : [pMem] "+m" (*pu32)
1694 , [uOld] "=&r" (u32ActualOld)
1695 , [rc] "=&r" (rcSpill)
1696 , [fXchg] "=&r" (fXchg.u)
1697 : [uCmp] "r" (u32Old)
1698 , [uNew] "r" (u32New)
1699 , "[fXchg]" (0)
1700 RTASM_ARM_DMB_SY_COMMA_IN_REG
1701 : "cc");
1702 *pu32Old = u32ActualOld;
1703 return fXchg.f;
1704
1705# else
1706# error "Port me"
1707# endif
1708}
1709#endif
1710
1711
1712/**
1713 * Atomically Compare and Exchange a signed 32-bit value, additionally
1714 * passes back old value, ordered.
1715 *
1716 * @returns true if xchg was done.
1717 * @returns false if xchg wasn't done.
1718 *
1719 * @param pi32 Pointer to the value to update.
1720 * @param i32New The new value to assigned to *pi32.
1721 * @param i32Old The old value to *pi32 compare with.
1722 * @param pi32Old Pointer store the old value at.
1723 *
1724 * @remarks x86: Requires a 486 or later.
1725 */
1726DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old, int32_t RT_FAR *pi32Old) RT_NOTHROW_DEF
1727{
1728 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t RT_FAR *)pi32Old);
1729}
1730
1731
1732/**
1733 * Atomically Compare and exchange an unsigned 64-bit value, additionally
1734 * passing back old value, ordered.
1735 *
1736 * @returns true if xchg was done.
1737 * @returns false if xchg wasn't done.
1738 *
1739 * @param pu64 Pointer to the 64-bit variable to update.
1740 * @param u64New The 64-bit value to assign to *pu64.
1741 * @param u64Old The value to compare with.
1742 * @param pu64Old Pointer store the old value at.
1743 *
1744 * @remarks x86: Requires a Pentium or later.
1745 */
1746#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
1747 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1748RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old) RT_NOTHROW_PROTO;
1749#else
1750DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old) RT_NOTHROW_DEF
1751{
1752# if RT_INLINE_ASM_USES_INTRIN
1753 return (*pu64Old =_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old)) == u64Old;
1754
1755# elif defined(RT_ARCH_AMD64)
1756# if RT_INLINE_ASM_GNU_STYLE
1757 uint8_t u8Ret;
1758 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1759 "setz %1\n\t"
1760 : "=m" (*pu64)
1761 , "=qm" (u8Ret)
1762 , "=a" (*pu64Old)
1763 : "r" (u64New)
1764 , "a" (u64Old)
1765 , "m" (*pu64)
1766 : "cc");
1767 return (bool)u8Ret;
1768# else
1769 bool fRet;
1770 __asm
1771 {
1772 mov rdx, [pu32]
1773 mov rax, [u64Old]
1774 mov rcx, [u64New]
1775 lock cmpxchg [rdx], rcx
1776 mov rdx, [pu64Old]
1777 mov [rdx], rax
1778 setz al
1779 mov [fRet], al
1780 }
1781 return fRet;
1782# endif
1783
1784# elif defined(RT_ARCH_X86)
1785# if RT_INLINE_ASM_GNU_STYLE
1786 uint64_t u64Ret;
1787# if defined(PIC) || defined(__PIC__)
1788 /* NB: this code uses a memory clobber description, because the clean
1789 * solution with an output value for *pu64 makes gcc run out of registers.
1790 * This will cause suboptimal code, and anyone with a better solution is
1791 * welcome to improve this. */
1792 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1793 "lock; cmpxchg8b %3\n\t"
1794 "xchgl %%ebx, %1\n\t"
1795 : "=A" (u64Ret)
1796 : "DS" ((uint32_t)u64New)
1797 , "c" ((uint32_t)(u64New >> 32))
1798 , "m" (*pu64)
1799 , "0" (u64Old)
1800 : "memory"
1801 , "cc" );
1802# else /* !PIC */
1803 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1804 : "=A" (u64Ret)
1805 , "=m" (*pu64)
1806 : "b" ((uint32_t)u64New)
1807 , "c" ((uint32_t)(u64New >> 32))
1808 , "m" (*pu64)
1809 , "0" (u64Old)
1810 : "cc");
1811# endif
1812 *pu64Old = u64Ret;
1813 return u64Ret == u64Old;
1814# else
1815 uint32_t u32Ret;
1816 __asm
1817 {
1818 mov ebx, dword ptr [u64New]
1819 mov ecx, dword ptr [u64New + 4]
1820 mov edi, [pu64]
1821 mov eax, dword ptr [u64Old]
1822 mov edx, dword ptr [u64Old + 4]
1823 lock cmpxchg8b [edi]
1824 mov ebx, [pu64Old]
1825 mov [ebx], eax
1826 setz al
1827 movzx eax, al
1828 add ebx, 4
1829 mov [ebx], edx
1830 mov dword ptr [u32Ret], eax
1831 }
1832 return !!u32Ret;
1833# endif
1834
1835# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1836 union { uint32_t u; bool f; } fXchg;
1837 uint64_t u64ActualOld;
1838 uint32_t rcSpill;
1839 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU64_%=:\n\t"
1840 RTASM_ARM_DMB_SY
1841# if defined(RT_ARCH_ARM64)
1842 "ldaxr %[uOld], %[pMem]\n\t"
1843 "cmp %[uOld], %[uCmp]\n\t"
1844 "bne 1f\n\t" /* stop here if not equal */
1845 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
1846 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1847 "mov %w[fXchg], #1\n\t"
1848# else
1849 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t"
1850 "teq %[uOld], %[uCmp]\n\t"
1851 "teqeq %H[uOld], %H[uCmp]\n\t"
1852 "strexdeq %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
1853 "bne 1f\n\t" /* stop here if not equal */
1854 "cmp %[rc], #0\n\t"
1855 "bne .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1856 "mov %[fXchg], #1\n\t"
1857# endif
1858 "1:\n\t"
1859 : [pMem] "+m" (*pu64)
1860 , [uOld] "=&r" (u64ActualOld)
1861 , [rc] "=&r" (rcSpill)
1862 , [fXchg] "=&r" (fXchg.u)
1863 : [uCmp] "r" (u64Old)
1864 , [uNew] "r" (u64New)
1865 , "[fXchg]" (0)
1866 RTASM_ARM_DMB_SY_COMMA_IN_REG
1867 : "cc");
1868 *pu64Old = u64ActualOld;
1869 return fXchg.f;
1870
1871# else
1872# error "Port me"
1873# endif
1874}
1875#endif
1876
1877
1878/**
1879 * Atomically Compare and exchange a signed 64-bit value, additionally
1880 * passing back old value, ordered.
1881 *
1882 * @returns true if xchg was done.
1883 * @returns false if xchg wasn't done.
1884 *
1885 * @param pi64 Pointer to the 64-bit variable to update.
1886 * @param i64 The 64-bit value to assign to *pu64.
1887 * @param i64Old The value to compare with.
1888 * @param pi64Old Pointer store the old value at.
1889 *
1890 * @remarks x86: Requires a Pentium or later.
1891 */
1892DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old, int64_t RT_FAR *pi64Old) RT_NOTHROW_DEF
1893{
1894 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t RT_FAR *)pi64Old);
1895}
1896
1897/** @def ASMAtomicCmpXchgExHandle
1898 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1899 *
1900 * @param ph Pointer to the value to update.
1901 * @param hNew The new value to assigned to *pu.
1902 * @param hOld The old value to *pu compare with.
1903 * @param fRc Where to store the result.
1904 * @param phOldVal Pointer to where to store the old value.
1905 *
1906 * @remarks This doesn't currently work for all handles (like RTFILE).
1907 */
1908#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1909# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1910 do { \
1911 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1912 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1913 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(ph), (uint32_t)(hNew), (uint32_t)(hOld), (uint32_t RT_FAR *)(phOldVal)); \
1914 } while (0)
1915#elif HC_ARCH_BITS == 64
1916# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1917 do { \
1918 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1919 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1920 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(ph), (uint64_t)(hNew), (uint64_t)(hOld), (uint64_t RT_FAR *)(phOldVal)); \
1921 } while (0)
1922#else
1923# error HC_ARCH_BITS
1924#endif
1925
1926
1927/** @def ASMAtomicCmpXchgExSize
1928 * Atomically Compare and Exchange a value which size might differ
1929 * between platforms or compilers. Additionally passes back old value.
1930 *
1931 * @param pu Pointer to the value to update.
1932 * @param uNew The new value to assigned to *pu.
1933 * @param uOld The old value to *pu compare with.
1934 * @param fRc Where to store the result.
1935 * @param puOldVal Pointer to where to store the old value.
1936 *
1937 * @remarks x86: Requires a 486 or later.
1938 */
1939#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1940 do { \
1941 switch (sizeof(*(pu))) { \
1942 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(uOldVal)); \
1943 break; \
1944 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(uOldVal)); \
1945 break; \
1946 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1947 (fRc) = false; \
1948 (uOldVal) = 0; \
1949 break; \
1950 } \
1951 } while (0)
1952
1953
1954/**
1955 * Atomically Compare and Exchange a pointer value, additionally
1956 * passing back old value, ordered.
1957 *
1958 * @returns true if xchg was done.
1959 * @returns false if xchg wasn't done.
1960 *
1961 * @param ppv Pointer to the value to update.
1962 * @param pvNew The new value to assigned to *ppv.
1963 * @param pvOld The old value to *ppv compare with.
1964 * @param ppvOld Pointer store the old value at.
1965 *
1966 * @remarks x86: Requires a 486 or later.
1967 */
1968DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld,
1969 void RT_FAR * RT_FAR *ppvOld) RT_NOTHROW_DEF
1970{
1971#if ARCH_BITS == 32 || ARCH_BITS == 16
1972 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t RT_FAR *)ppvOld);
1973#elif ARCH_BITS == 64
1974 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t RT_FAR *)ppvOld);
1975#else
1976# error "ARCH_BITS is bogus"
1977#endif
1978}
1979
1980
1981/**
1982 * Atomically Compare and Exchange a pointer value, additionally
1983 * passing back old value, ordered.
1984 *
1985 * @returns true if xchg was done.
1986 * @returns false if xchg wasn't done.
1987 *
1988 * @param ppv Pointer to the value to update.
1989 * @param pvNew The new value to assigned to *ppv.
1990 * @param pvOld The old value to *ppv compare with.
1991 * @param ppvOld Pointer store the old value at.
1992 *
1993 * @remarks This is relatively type safe on GCC platforms.
1994 * @remarks x86: Requires a 486 or later.
1995 */
1996#ifdef __GNUC__
1997# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1998 __extension__ \
1999 ({\
2000 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2001 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
2002 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
2003 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
2004 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
2005 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
2006 (void **)ppvOldTypeChecked); \
2007 fMacroRet; \
2008 })
2009#else
2010# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
2011 ASMAtomicCmpXchgExPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld), (void RT_FAR * RT_FAR *)(ppvOld))
2012#endif
2013
2014
2015/**
2016 * Virtualization unfriendly serializing instruction, always exits.
2017 */
2018#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
2019RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionCpuId(void) RT_NOTHROW_PROTO;
2020#else
2021DECLINLINE(void) ASMSerializeInstructionCpuId(void) RT_NOTHROW_DEF
2022{
2023# if RT_INLINE_ASM_GNU_STYLE
2024 RTCCUINTREG xAX = 0;
2025# ifdef RT_ARCH_AMD64
2026 __asm__ __volatile__ ("cpuid"
2027 : "=a" (xAX)
2028 : "0" (xAX)
2029 : "rbx", "rcx", "rdx", "memory");
2030# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
2031 __asm__ __volatile__ ("push %%ebx\n\t"
2032 "cpuid\n\t"
2033 "pop %%ebx\n\t"
2034 : "=a" (xAX)
2035 : "0" (xAX)
2036 : "ecx", "edx", "memory");
2037# else
2038 __asm__ __volatile__ ("cpuid"
2039 : "=a" (xAX)
2040 : "0" (xAX)
2041 : "ebx", "ecx", "edx", "memory");
2042# endif
2043
2044# elif RT_INLINE_ASM_USES_INTRIN
2045 int aInfo[4];
2046 _ReadWriteBarrier();
2047 __cpuid(aInfo, 0);
2048
2049# else
2050 __asm
2051 {
2052 push ebx
2053 xor eax, eax
2054 cpuid
2055 pop ebx
2056 }
2057# endif
2058}
2059#endif
2060
2061/**
2062 * Virtualization friendly serializing instruction, though more expensive.
2063 */
2064#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
2065RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionIRet(void) RT_NOTHROW_PROTO;
2066#else
2067DECLINLINE(void) ASMSerializeInstructionIRet(void) RT_NOTHROW_DEF
2068{
2069# if RT_INLINE_ASM_GNU_STYLE
2070# ifdef RT_ARCH_AMD64
2071 __asm__ __volatile__ ("movq %%rsp,%%r10\n\t"
2072 "subq $128, %%rsp\n\t" /*redzone*/
2073 "mov %%ss, %%eax\n\t"
2074 "pushq %%rax\n\t"
2075 "pushq %%r10\n\t"
2076 "pushfq\n\t"
2077 "movl %%cs, %%eax\n\t"
2078 "pushq %%rax\n\t"
2079 "leaq 1f(%%rip), %%rax\n\t"
2080 "pushq %%rax\n\t"
2081 "iretq\n\t"
2082 "1:\n\t"
2083 ::: "rax", "r10", "memory", "cc");
2084# else
2085 __asm__ __volatile__ ("pushfl\n\t"
2086 "pushl %%cs\n\t"
2087 "pushl $1f\n\t"
2088 "iretl\n\t"
2089 "1:\n\t"
2090 ::: "memory");
2091# endif
2092
2093# else
2094 __asm
2095 {
2096 pushfd
2097 push cs
2098 push la_ret
2099 iretd
2100 la_ret:
2101 }
2102# endif
2103}
2104#endif
2105
2106/**
2107 * Virtualization friendlier serializing instruction, may still cause exits.
2108 */
2109#if (RT_INLINE_ASM_EXTERNAL && RT_INLINE_ASM_USES_INTRIN < RT_MSC_VER_VS2008) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
2110RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionRdTscp(void) RT_NOTHROW_PROTO;
2111#else
2112DECLINLINE(void) ASMSerializeInstructionRdTscp(void) RT_NOTHROW_DEF
2113{
2114# if RT_INLINE_ASM_GNU_STYLE
2115 /* rdtscp is not supported by ancient linux build VM of course :-( */
2116# ifdef RT_ARCH_AMD64
2117 /*__asm__ __volatile__("rdtscp\n\t" ::: "rax", "rdx, "rcx"); */
2118 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "rax", "rdx", "rcx", "memory");
2119# else
2120 /*__asm__ __volatile__("rdtscp\n\t" ::: "eax", "edx, "ecx"); */
2121 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "eax", "edx", "ecx", "memory");
2122# endif
2123# else
2124# if RT_INLINE_ASM_USES_INTRIN >= RT_MSC_VER_VS2008
2125 uint32_t uIgnore;
2126 _ReadWriteBarrier();
2127 (void)__rdtscp(&uIgnore);
2128 (void)uIgnore;
2129# else
2130 __asm
2131 {
2132 rdtscp
2133 }
2134# endif
2135# endif
2136}
2137#endif
2138
2139
2140/**
2141 * Serialize Instruction (both data store and instruction flush).
2142 */
2143#if (defined(RT_ARCH_X86) && ARCH_BITS == 16) || defined(IN_GUEST)
2144# define ASMSerializeInstruction() ASMSerializeInstructionIRet()
2145#elif defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
2146# define ASMSerializeInstruction() ASMSerializeInstructionCpuId()
2147#elif defined(RT_ARCH_SPARC64)
2148RTDECL(void) ASMSerializeInstruction(void) RT_NOTHROW_PROTO;
2149#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2150DECLINLINE(void) ASMSerializeInstruction(void) RT_NOTHROW_DEF
2151{
2152 __asm__ __volatile__ (RTASM_ARM_DSB_SY :: RTASM_ARM_DSB_SY_IN_REG :);
2153}
2154#else
2155# error "Port me"
2156#endif
2157
2158
2159/**
2160 * Memory fence, waits for any pending writes and reads to complete.
2161 * @note No implicit compiler barrier (which is probably stupid).
2162 */
2163DECLINLINE(void) ASMMemoryFence(void) RT_NOTHROW_DEF
2164{
2165#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2166# if RT_INLINE_ASM_GNU_STYLE
2167 __asm__ __volatile__ (".byte 0x0f,0xae,0xf0\n\t");
2168# elif RT_INLINE_ASM_USES_INTRIN
2169 _mm_mfence();
2170# else
2171 __asm
2172 {
2173 _emit 0x0f
2174 _emit 0xae
2175 _emit 0xf0
2176 }
2177# endif
2178#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2179 __asm__ __volatile__ (RTASM_ARM_DMB_SY :: RTASM_ARM_DMB_SY_IN_REG :);
2180#elif ARCH_BITS == 16
2181 uint16_t volatile u16;
2182 ASMAtomicXchgU16(&u16, 0);
2183#else
2184 uint32_t volatile u32;
2185 ASMAtomicXchgU32(&u32, 0);
2186#endif
2187}
2188
2189
2190/**
2191 * Write fence, waits for any pending writes to complete.
2192 * @note No implicit compiler barrier (which is probably stupid).
2193 */
2194DECLINLINE(void) ASMWriteFence(void) RT_NOTHROW_DEF
2195{
2196#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2197# if RT_INLINE_ASM_GNU_STYLE
2198 __asm__ __volatile__ (".byte 0x0f,0xae,0xf8\n\t");
2199# elif RT_INLINE_ASM_USES_INTRIN
2200 _mm_sfence();
2201# else
2202 __asm
2203 {
2204 _emit 0x0f
2205 _emit 0xae
2206 _emit 0xf8
2207 }
2208# endif
2209#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2210 __asm__ __volatile__ (RTASM_ARM_DMB_ST :: RTASM_ARM_DMB_ST_IN_REG :);
2211#else
2212 ASMMemoryFence();
2213#endif
2214}
2215
2216
2217/**
2218 * Read fence, waits for any pending reads to complete.
2219 * @note No implicit compiler barrier (which is probably stupid).
2220 */
2221DECLINLINE(void) ASMReadFence(void) RT_NOTHROW_DEF
2222{
2223#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2224# if RT_INLINE_ASM_GNU_STYLE
2225 __asm__ __volatile__ (".byte 0x0f,0xae,0xe8\n\t");
2226# elif RT_INLINE_ASM_USES_INTRIN
2227 _mm_lfence();
2228# else
2229 __asm
2230 {
2231 _emit 0x0f
2232 _emit 0xae
2233 _emit 0xe8
2234 }
2235# endif
2236#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2237 __asm__ __volatile__ (RTASM_ARM_DMB_LD :: RTASM_ARM_DMB_LD_IN_REG :);
2238#else
2239 ASMMemoryFence();
2240#endif
2241}
2242
2243
2244/**
2245 * Atomically reads an unsigned 8-bit value, ordered.
2246 *
2247 * @returns Current *pu8 value
2248 * @param pu8 Pointer to the 8-bit variable to read.
2249 */
2250DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t RT_FAR *pu8) RT_NOTHROW_DEF
2251{
2252#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2253 uint32_t u32;
2254 __asm__ __volatile__(".Lstart_ASMAtomicReadU8_%=:\n\t"
2255 RTASM_ARM_DMB_SY
2256# if defined(RT_ARCH_ARM64)
2257 "ldxrb %w[uDst], %[pMem]\n\t"
2258# else
2259 "ldrexb %[uDst], %[pMem]\n\t"
2260# endif
2261 : [uDst] "=&r" (u32)
2262 : [pMem] "m" (*pu8)
2263 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2264 return (uint8_t)u32;
2265#else
2266 ASMMemoryFence();
2267 return *pu8; /* byte reads are atomic on x86 */
2268#endif
2269}
2270
2271
2272/**
2273 * Atomically reads an unsigned 8-bit value, unordered.
2274 *
2275 * @returns Current *pu8 value
2276 * @param pu8 Pointer to the 8-bit variable to read.
2277 */
2278DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t RT_FAR *pu8) RT_NOTHROW_DEF
2279{
2280#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2281 uint32_t u32;
2282 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU8_%=:\n\t"
2283# if defined(RT_ARCH_ARM64)
2284 "ldxrb %w[uDst], %[pMem]\n\t"
2285# else
2286 "ldrexb %[uDst], %[pMem]\n\t"
2287# endif
2288 : [uDst] "=&r" (u32)
2289 : [pMem] "m" (*pu8));
2290 return (uint8_t)u32;
2291#else
2292 return *pu8; /* byte reads are atomic on x86 */
2293#endif
2294}
2295
2296
2297/**
2298 * Atomically reads a signed 8-bit value, ordered.
2299 *
2300 * @returns Current *pi8 value
2301 * @param pi8 Pointer to the 8-bit variable to read.
2302 */
2303DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t RT_FAR *pi8) RT_NOTHROW_DEF
2304{
2305 ASMMemoryFence();
2306#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2307 int32_t i32;
2308 __asm__ __volatile__(".Lstart_ASMAtomicReadS8_%=:\n\t"
2309 RTASM_ARM_DMB_SY
2310# if defined(RT_ARCH_ARM64)
2311 "ldxrb %w[iDst], %[pMem]\n\t"
2312# else
2313 "ldrexb %[iDst], %[pMem]\n\t"
2314# endif
2315 : [iDst] "=&r" (i32)
2316 : [pMem] "m" (*pi8)
2317 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2318 return (int8_t)i32;
2319#else
2320 return *pi8; /* byte reads are atomic on x86 */
2321#endif
2322}
2323
2324
2325/**
2326 * Atomically reads a signed 8-bit value, unordered.
2327 *
2328 * @returns Current *pi8 value
2329 * @param pi8 Pointer to the 8-bit variable to read.
2330 */
2331DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t RT_FAR *pi8) RT_NOTHROW_DEF
2332{
2333#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2334 int32_t i32;
2335 __asm__ __volatile__(".Lstart_ASMAtomicUoReadS8_%=:\n\t"
2336# if defined(RT_ARCH_ARM64)
2337 "ldxrb %w[iDst], %[pMem]\n\t"
2338# else
2339 "ldrexb %[iDst], %[pMem]\n\t"
2340# endif
2341 : [iDst] "=&r" (i32)
2342 : [pMem] "m" (*pi8));
2343 return (int8_t)i32;
2344#else
2345 return *pi8; /* byte reads are atomic on x86 */
2346#endif
2347}
2348
2349
2350/**
2351 * Atomically reads an unsigned 16-bit value, ordered.
2352 *
2353 * @returns Current *pu16 value
2354 * @param pu16 Pointer to the 16-bit variable to read.
2355 */
2356DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t RT_FAR *pu16) RT_NOTHROW_DEF
2357{
2358 Assert(!((uintptr_t)pu16 & 1));
2359#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2360 uint32_t u32;
2361 __asm__ __volatile__(".Lstart_ASMAtomicReadU16_%=:\n\t"
2362 RTASM_ARM_DMB_SY
2363# if defined(RT_ARCH_ARM64)
2364 "ldxrh %w[uDst], %[pMem]\n\t"
2365# else
2366 "ldrexh %[uDst], %[pMem]\n\t"
2367# endif
2368 : [uDst] "=&r" (u32)
2369 : [pMem] "m" (*pu16)
2370 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2371 return (uint16_t)u32;
2372#else
2373 ASMMemoryFence();
2374 return *pu16;
2375#endif
2376}
2377
2378
2379/**
2380 * Atomically reads an unsigned 16-bit value, unordered.
2381 *
2382 * @returns Current *pu16 value
2383 * @param pu16 Pointer to the 16-bit variable to read.
2384 */
2385DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t RT_FAR *pu16) RT_NOTHROW_DEF
2386{
2387 Assert(!((uintptr_t)pu16 & 1));
2388#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2389 uint32_t u32;
2390 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU16_%=:\n\t"
2391# if defined(RT_ARCH_ARM64)
2392 "ldxrh %w[uDst], %[pMem]\n\t"
2393# else
2394 "ldrexh %[uDst], %[pMem]\n\t"
2395# endif
2396 : [uDst] "=&r" (u32)
2397 : [pMem] "m" (*pu16));
2398 return (uint16_t)u32;
2399#else
2400 return *pu16;
2401#endif
2402}
2403
2404
2405/**
2406 * Atomically reads a signed 16-bit value, ordered.
2407 *
2408 * @returns Current *pi16 value
2409 * @param pi16 Pointer to the 16-bit variable to read.
2410 */
2411DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t RT_FAR *pi16) RT_NOTHROW_DEF
2412{
2413 Assert(!((uintptr_t)pi16 & 1));
2414#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2415 int32_t i32;
2416 __asm__ __volatile__(".Lstart_ASMAtomicReadS16_%=:\n\t"
2417 RTASM_ARM_DMB_SY
2418# if defined(RT_ARCH_ARM64)
2419 "ldxrh %w[iDst], %[pMem]\n\t"
2420# else
2421 "ldrexh %[iDst], %[pMem]\n\t"
2422# endif
2423 : [iDst] "=&r" (i32)
2424 : [pMem] "m" (*pi16)
2425 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2426 return (int16_t)i32;
2427#else
2428 ASMMemoryFence();
2429 return *pi16;
2430#endif
2431}
2432
2433
2434/**
2435 * Atomically reads a signed 16-bit value, unordered.
2436 *
2437 * @returns Current *pi16 value
2438 * @param pi16 Pointer to the 16-bit variable to read.
2439 */
2440DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t RT_FAR *pi16) RT_NOTHROW_DEF
2441{
2442 Assert(!((uintptr_t)pi16 & 1));
2443#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2444 int32_t i32;
2445 __asm__ __volatile__(".Lstart_ASMAtomicUoReadS16_%=:\n\t"
2446# if defined(RT_ARCH_ARM64)
2447 "ldxrh %w[iDst], %[pMem]\n\t"
2448# else
2449 "ldrexh %[iDst], %[pMem]\n\t"
2450# endif
2451 : [iDst] "=&r" (i32)
2452 : [pMem] "m" (*pi16));
2453 return (int16_t)i32;
2454#else
2455 return *pi16;
2456#endif
2457}
2458
2459
2460/**
2461 * Atomically reads an unsigned 32-bit value, ordered.
2462 *
2463 * @returns Current *pu32 value
2464 * @param pu32 Pointer to the 32-bit variable to read.
2465 */
2466DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t RT_FAR *pu32) RT_NOTHROW_DEF
2467{
2468 Assert(!((uintptr_t)pu32 & 3));
2469#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2470 uint32_t u32;
2471 __asm__ __volatile__(".Lstart_ASMAtomicReadU32_%=:\n\t"
2472 RTASM_ARM_DMB_SY
2473# if defined(RT_ARCH_ARM64)
2474 "ldxr %w[uDst], %[pMem]\n\t"
2475# else
2476 "ldrex %[uDst], %[pMem]\n\t"
2477# endif
2478 : [uDst] "=&r" (u32)
2479 : [pMem] "m" (*pu32)
2480 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2481 return u32;
2482#else
2483 ASMMemoryFence();
2484# if ARCH_BITS == 16
2485 AssertFailed(); /** @todo 16-bit */
2486# endif
2487 return *pu32;
2488#endif
2489}
2490
2491
2492/**
2493 * Atomically reads an unsigned 32-bit value, unordered.
2494 *
2495 * @returns Current *pu32 value
2496 * @param pu32 Pointer to the 32-bit variable to read.
2497 */
2498DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t RT_FAR *pu32) RT_NOTHROW_DEF
2499{
2500 Assert(!((uintptr_t)pu32 & 3));
2501#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2502 uint32_t u32;
2503 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU32_%=:\n\t"
2504# if defined(RT_ARCH_ARM64)
2505 "ldxr %w[uDst], %[pMem]\n\t"
2506# else
2507 "ldrex %[uDst], %[pMem]\n\t"
2508# endif
2509 : [uDst] "=&r" (u32)
2510 : [pMem] "m" (*pu32));
2511 return u32;
2512#else
2513# if ARCH_BITS == 16
2514 AssertFailed(); /** @todo 16-bit */
2515# endif
2516 return *pu32;
2517#endif
2518}
2519
2520
2521/**
2522 * Atomically reads a signed 32-bit value, ordered.
2523 *
2524 * @returns Current *pi32 value
2525 * @param pi32 Pointer to the 32-bit variable to read.
2526 */
2527DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t RT_FAR *pi32) RT_NOTHROW_DEF
2528{
2529 Assert(!((uintptr_t)pi32 & 3));
2530#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2531 int32_t i32;
2532 __asm__ __volatile__(".Lstart_ASMAtomicReadS32_%=:\n\t"
2533 RTASM_ARM_DMB_SY
2534# if defined(RT_ARCH_ARM64)
2535 "ldxr %w[iDst], %[pMem]\n\t"
2536# else
2537 "ldrex %[iDst], %[pMem]\n\t"
2538# endif
2539 : [iDst] "=&r" (i32)
2540 : [pMem] "m" (*pi32)
2541 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2542 return i32;
2543#else
2544 ASMMemoryFence();
2545# if ARCH_BITS == 16
2546 AssertFailed(); /** @todo 16-bit */
2547# endif
2548 return *pi32;
2549#endif
2550}
2551
2552
2553/**
2554 * Atomically reads a signed 32-bit value, unordered.
2555 *
2556 * @returns Current *pi32 value
2557 * @param pi32 Pointer to the 32-bit variable to read.
2558 */
2559DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t RT_FAR *pi32) RT_NOTHROW_DEF
2560{
2561 Assert(!((uintptr_t)pi32 & 3));
2562#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2563 int32_t i32;
2564 __asm__ __volatile__(".Lstart_ASMAtomicUoReadS32_%=:\n\t"
2565# if defined(RT_ARCH_ARM64)
2566 "ldxr %w[iDst], %[pMem]\n\t"
2567# else
2568 "ldrex %[iDst], %[pMem]\n\t"
2569# endif
2570 : [iDst] "=&r" (i32)
2571 : [pMem] "m" (*pi32));
2572 return i32;
2573
2574#else
2575# if ARCH_BITS == 16
2576 AssertFailed(); /** @todo 16-bit */
2577# endif
2578 return *pi32;
2579#endif
2580}
2581
2582
2583/**
2584 * Atomically reads an unsigned 64-bit value, ordered.
2585 *
2586 * @returns Current *pu64 value
2587 * @param pu64 Pointer to the 64-bit variable to read.
2588 * The memory pointed to must be writable.
2589 *
2590 * @remarks This may fault if the memory is read-only!
2591 * @remarks x86: Requires a Pentium or later.
2592 */
2593#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !defined(RT_ARCH_AMD64)) \
2594 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
2595RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_PROTO;
2596#else
2597DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_DEF
2598{
2599 uint64_t u64;
2600# ifdef RT_ARCH_AMD64
2601 Assert(!((uintptr_t)pu64 & 7));
2602/*# if RT_INLINE_ASM_GNU_STYLE
2603 __asm__ __volatile__( "mfence\n\t"
2604 "movq %1, %0\n\t"
2605 : "=r" (u64)
2606 : "m" (*pu64));
2607# else
2608 __asm
2609 {
2610 mfence
2611 mov rdx, [pu64]
2612 mov rax, [rdx]
2613 mov [u64], rax
2614 }
2615# endif*/
2616 ASMMemoryFence();
2617 u64 = *pu64;
2618
2619# elif defined(RT_ARCH_X86)
2620# if RT_INLINE_ASM_GNU_STYLE
2621# if defined(PIC) || defined(__PIC__)
2622 uint32_t u32EBX = 0;
2623 Assert(!((uintptr_t)pu64 & 7));
2624 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
2625 "lock; cmpxchg8b (%5)\n\t"
2626 "movl %3, %%ebx\n\t"
2627 : "=A" (u64)
2628# if RT_GNUC_PREREQ(4, 3)
2629 , "+m" (*pu64)
2630# else
2631 , "=m" (*pu64)
2632# endif
2633 : "0" (0ULL)
2634 , "m" (u32EBX)
2635 , "c" (0)
2636 , "S" (pu64)
2637 : "cc");
2638# else /* !PIC */
2639 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2640 : "=A" (u64)
2641 , "+m" (*pu64)
2642 : "0" (0ULL)
2643 , "b" (0)
2644 , "c" (0)
2645 : "cc");
2646# endif
2647# else
2648 Assert(!((uintptr_t)pu64 & 7));
2649 __asm
2650 {
2651 xor eax, eax
2652 xor edx, edx
2653 mov edi, pu64
2654 xor ecx, ecx
2655 xor ebx, ebx
2656 lock cmpxchg8b [edi]
2657 mov dword ptr [u64], eax
2658 mov dword ptr [u64 + 4], edx
2659 }
2660# endif
2661
2662# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2663 Assert(!((uintptr_t)pu64 & 7));
2664 __asm__ __volatile__(".Lstart_ASMAtomicReadU64_%=:\n\t"
2665 RTASM_ARM_DMB_SY
2666# if defined(RT_ARCH_ARM64)
2667 "ldxr %[uDst], %[pMem]\n\t"
2668# else
2669 "ldrexd %[uDst], %H[uDst], %[pMem]\n\t"
2670# endif
2671 : [uDst] "=&r" (u64)
2672 : [pMem] "m" (*pu64)
2673 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2674
2675# else
2676# error "Port me"
2677# endif
2678 return u64;
2679}
2680#endif
2681
2682
2683/**
2684 * Atomically reads an unsigned 64-bit value, unordered.
2685 *
2686 * @returns Current *pu64 value
2687 * @param pu64 Pointer to the 64-bit variable to read.
2688 * The memory pointed to must be writable.
2689 *
2690 * @remarks This may fault if the memory is read-only!
2691 * @remarks x86: Requires a Pentium or later.
2692 */
2693#if !defined(RT_ARCH_AMD64) \
2694 && ( (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
2695 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
2696RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_PROTO;
2697#else
2698DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_DEF
2699{
2700 uint64_t u64;
2701# ifdef RT_ARCH_AMD64
2702 Assert(!((uintptr_t)pu64 & 7));
2703/*# if RT_INLINE_ASM_GNU_STYLE
2704 Assert(!((uintptr_t)pu64 & 7));
2705 __asm__ __volatile__("movq %1, %0\n\t"
2706 : "=r" (u64)
2707 : "m" (*pu64));
2708# else
2709 __asm
2710 {
2711 mov rdx, [pu64]
2712 mov rax, [rdx]
2713 mov [u64], rax
2714 }
2715# endif */
2716 u64 = *pu64;
2717
2718# elif defined(RT_ARCH_X86)
2719# if RT_INLINE_ASM_GNU_STYLE
2720# if defined(PIC) || defined(__PIC__)
2721 uint32_t u32EBX = 0;
2722 uint32_t u32Spill;
2723 Assert(!((uintptr_t)pu64 & 7));
2724 __asm__ __volatile__("xor %%eax,%%eax\n\t"
2725 "xor %%ecx,%%ecx\n\t"
2726 "xor %%edx,%%edx\n\t"
2727 "xchgl %%ebx, %3\n\t"
2728 "lock; cmpxchg8b (%4)\n\t"
2729 "movl %3, %%ebx\n\t"
2730 : "=A" (u64)
2731# if RT_GNUC_PREREQ(4, 3)
2732 , "+m" (*pu64)
2733# else
2734 , "=m" (*pu64)
2735# endif
2736 , "=c" (u32Spill)
2737 : "m" (u32EBX)
2738 , "S" (pu64)
2739 : "cc");
2740# else /* !PIC */
2741 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2742 : "=A" (u64)
2743 , "+m" (*pu64)
2744 : "0" (0ULL)
2745 , "b" (0)
2746 , "c" (0)
2747 : "cc");
2748# endif
2749# else
2750 Assert(!((uintptr_t)pu64 & 7));
2751 __asm
2752 {
2753 xor eax, eax
2754 xor edx, edx
2755 mov edi, pu64
2756 xor ecx, ecx
2757 xor ebx, ebx
2758 lock cmpxchg8b [edi]
2759 mov dword ptr [u64], eax
2760 mov dword ptr [u64 + 4], edx
2761 }
2762# endif
2763
2764# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2765 Assert(!((uintptr_t)pu64 & 7));
2766 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU64_%=:\n\t"
2767# if defined(RT_ARCH_ARM64)
2768 "ldxr %[uDst], %[pMem]\n\t"
2769# else
2770 "ldrexd %[uDst], %H[uDst], %[pMem]\n\t"
2771# endif
2772 : [uDst] "=&r" (u64)
2773 : [pMem] "m" (*pu64));
2774
2775# else
2776# error "Port me"
2777# endif
2778 return u64;
2779}
2780#endif
2781
2782
2783/**
2784 * Atomically reads a signed 64-bit value, ordered.
2785 *
2786 * @returns Current *pi64 value
2787 * @param pi64 Pointer to the 64-bit variable to read.
2788 * The memory pointed to must be writable.
2789 *
2790 * @remarks This may fault if the memory is read-only!
2791 * @remarks x86: Requires a Pentium or later.
2792 */
2793DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t RT_FAR *pi64) RT_NOTHROW_DEF
2794{
2795 return (int64_t)ASMAtomicReadU64((volatile uint64_t RT_FAR *)pi64);
2796}
2797
2798
2799/**
2800 * Atomically reads a signed 64-bit value, unordered.
2801 *
2802 * @returns Current *pi64 value
2803 * @param pi64 Pointer to the 64-bit variable to read.
2804 * The memory pointed to must be writable.
2805 *
2806 * @remarks This will fault if the memory is read-only!
2807 * @remarks x86: Requires a Pentium or later.
2808 */
2809DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t RT_FAR *pi64) RT_NOTHROW_DEF
2810{
2811 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)pi64);
2812}
2813
2814
2815/**
2816 * Atomically reads a size_t value, ordered.
2817 *
2818 * @returns Current *pcb value
2819 * @param pcb Pointer to the size_t variable to read.
2820 */
2821DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
2822{
2823#if ARCH_BITS == 64
2824 return ASMAtomicReadU64((uint64_t volatile RT_FAR *)pcb);
2825#elif ARCH_BITS == 32
2826 return ASMAtomicReadU32((uint32_t volatile RT_FAR *)pcb);
2827#elif ARCH_BITS == 16
2828 AssertCompileSize(size_t, 2);
2829 return ASMAtomicReadU16((uint16_t volatile RT_FAR *)pcb);
2830#else
2831# error "Unsupported ARCH_BITS value"
2832#endif
2833}
2834
2835
2836/**
2837 * Atomically reads a size_t value, unordered.
2838 *
2839 * @returns Current *pcb value
2840 * @param pcb Pointer to the size_t variable to read.
2841 */
2842DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
2843{
2844#if ARCH_BITS == 64 || ARCH_BITS == 16
2845 return ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)pcb);
2846#elif ARCH_BITS == 32
2847 return ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)pcb);
2848#elif ARCH_BITS == 16
2849 AssertCompileSize(size_t, 2);
2850 return ASMAtomicUoReadU16((uint16_t volatile RT_FAR *)pcb);
2851#else
2852# error "Unsupported ARCH_BITS value"
2853#endif
2854}
2855
2856
2857/**
2858 * Atomically reads a pointer value, ordered.
2859 *
2860 * @returns Current *pv value
2861 * @param ppv Pointer to the pointer variable to read.
2862 *
2863 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
2864 * requires less typing (no casts).
2865 */
2866DECLINLINE(void RT_FAR *) ASMAtomicReadPtr(void RT_FAR * volatile RT_FAR *ppv) RT_NOTHROW_DEF
2867{
2868#if ARCH_BITS == 32 || ARCH_BITS == 16
2869 return (void RT_FAR *)ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
2870#elif ARCH_BITS == 64
2871 return (void RT_FAR *)ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
2872#else
2873# error "ARCH_BITS is bogus"
2874#endif
2875}
2876
2877/**
2878 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
2879 *
2880 * @returns Current *pv value
2881 * @param ppv Pointer to the pointer variable to read.
2882 * @param Type The type of *ppv, sans volatile.
2883 */
2884#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
2885# define ASMAtomicReadPtrT(ppv, Type) \
2886 __extension__ \
2887 ({\
2888 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
2889 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
2890 pvTypeChecked; \
2891 })
2892#else
2893# define ASMAtomicReadPtrT(ppv, Type) \
2894 (Type)ASMAtomicReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
2895#endif
2896
2897
2898/**
2899 * Atomically reads a pointer value, unordered.
2900 *
2901 * @returns Current *pv value
2902 * @param ppv Pointer to the pointer variable to read.
2903 *
2904 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
2905 * requires less typing (no casts).
2906 */
2907DECLINLINE(void RT_FAR *) ASMAtomicUoReadPtr(void RT_FAR * volatile RT_FAR *ppv) RT_NOTHROW_DEF
2908{
2909#if ARCH_BITS == 32 || ARCH_BITS == 16
2910 return (void RT_FAR *)ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
2911#elif ARCH_BITS == 64
2912 return (void RT_FAR *)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
2913#else
2914# error "ARCH_BITS is bogus"
2915#endif
2916}
2917
2918
2919/**
2920 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
2921 *
2922 * @returns Current *pv value
2923 * @param ppv Pointer to the pointer variable to read.
2924 * @param Type The type of *ppv, sans volatile.
2925 */
2926#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
2927# define ASMAtomicUoReadPtrT(ppv, Type) \
2928 __extension__ \
2929 ({\
2930 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2931 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
2932 pvTypeChecked; \
2933 })
2934#else
2935# define ASMAtomicUoReadPtrT(ppv, Type) \
2936 (Type)ASMAtomicUoReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
2937#endif
2938
2939
2940/**
2941 * Atomically reads a boolean value, ordered.
2942 *
2943 * @returns Current *pf value
2944 * @param pf Pointer to the boolean variable to read.
2945 */
2946DECLINLINE(bool) ASMAtomicReadBool(volatile bool RT_FAR *pf) RT_NOTHROW_DEF
2947{
2948 ASMMemoryFence();
2949 return *pf; /* byte reads are atomic on x86 */
2950}
2951
2952
2953/**
2954 * Atomically reads a boolean value, unordered.
2955 *
2956 * @returns Current *pf value
2957 * @param pf Pointer to the boolean variable to read.
2958 */
2959DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool RT_FAR *pf) RT_NOTHROW_DEF
2960{
2961 return *pf; /* byte reads are atomic on x86 */
2962}
2963
2964
2965/**
2966 * Atomically read a typical IPRT handle value, ordered.
2967 *
2968 * @param ph Pointer to the handle variable to read.
2969 * @param phRes Where to store the result.
2970 *
2971 * @remarks This doesn't currently work for all handles (like RTFILE).
2972 */
2973#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
2974# define ASMAtomicReadHandle(ph, phRes) \
2975 do { \
2976 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2977 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2978 *(uint32_t RT_FAR *)(phRes) = ASMAtomicReadU32((uint32_t volatile RT_FAR *)(ph)); \
2979 } while (0)
2980#elif HC_ARCH_BITS == 64
2981# define ASMAtomicReadHandle(ph, phRes) \
2982 do { \
2983 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2984 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2985 *(uint64_t RT_FAR *)(phRes) = ASMAtomicReadU64((uint64_t volatile RT_FAR *)(ph)); \
2986 } while (0)
2987#else
2988# error HC_ARCH_BITS
2989#endif
2990
2991
2992/**
2993 * Atomically read a typical IPRT handle value, unordered.
2994 *
2995 * @param ph Pointer to the handle variable to read.
2996 * @param phRes Where to store the result.
2997 *
2998 * @remarks This doesn't currently work for all handles (like RTFILE).
2999 */
3000#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3001# define ASMAtomicUoReadHandle(ph, phRes) \
3002 do { \
3003 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3004 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
3005 *(uint32_t RT_FAR *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)(ph)); \
3006 } while (0)
3007#elif HC_ARCH_BITS == 64
3008# define ASMAtomicUoReadHandle(ph, phRes) \
3009 do { \
3010 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3011 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
3012 *(uint64_t RT_FAR *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)(ph)); \
3013 } while (0)
3014#else
3015# error HC_ARCH_BITS
3016#endif
3017
3018
3019/**
3020 * Atomically read a value which size might differ
3021 * between platforms or compilers, ordered.
3022 *
3023 * @param pu Pointer to the variable to read.
3024 * @param puRes Where to store the result.
3025 */
3026#define ASMAtomicReadSize(pu, puRes) \
3027 do { \
3028 switch (sizeof(*(pu))) { \
3029 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3030 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3031 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3032 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3033 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3034 } \
3035 } while (0)
3036
3037
3038/**
3039 * Atomically read a value which size might differ
3040 * between platforms or compilers, unordered.
3041 *
3042 * @param pu Pointer to the variable to read.
3043 * @param puRes Where to store the result.
3044 */
3045#define ASMAtomicUoReadSize(pu, puRes) \
3046 do { \
3047 switch (sizeof(*(pu))) { \
3048 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3049 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3050 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3051 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3052 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3053 } \
3054 } while (0)
3055
3056
3057/**
3058 * Atomically writes an unsigned 8-bit value, ordered.
3059 *
3060 * @param pu8 Pointer to the 8-bit variable.
3061 * @param u8 The 8-bit value to assign to *pu8.
3062 */
3063DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
3064{
3065 /** @todo Any possible ARM32/ARM64 optimizations here? */
3066 ASMAtomicXchgU8(pu8, u8);
3067}
3068
3069
3070/**
3071 * Atomically writes an unsigned 8-bit value, unordered.
3072 *
3073 * @param pu8 Pointer to the 8-bit variable.
3074 * @param u8 The 8-bit value to assign to *pu8.
3075 */
3076DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
3077{
3078 /** @todo Any possible ARM32/ARM64 improvements here? */
3079 *pu8 = u8; /* byte writes are atomic on x86 */
3080}
3081
3082
3083/**
3084 * Atomically writes a signed 8-bit value, ordered.
3085 *
3086 * @param pi8 Pointer to the 8-bit variable to read.
3087 * @param i8 The 8-bit value to assign to *pi8.
3088 */
3089DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
3090{
3091 /** @todo Any possible ARM32/ARM64 optimizations here? */
3092 ASMAtomicXchgS8(pi8, i8);
3093}
3094
3095
3096/**
3097 * Atomically writes a signed 8-bit value, unordered.
3098 *
3099 * @param pi8 Pointer to the 8-bit variable to write.
3100 * @param i8 The 8-bit value to assign to *pi8.
3101 */
3102DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
3103{
3104 *pi8 = i8; /* byte writes are atomic on x86 */
3105}
3106
3107
3108/**
3109 * Atomically writes an unsigned 16-bit value, ordered.
3110 *
3111 * @param pu16 Pointer to the 16-bit variable to write.
3112 * @param u16 The 16-bit value to assign to *pu16.
3113 */
3114DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
3115{
3116 /** @todo Any possible ARM32/ARM64 optimizations here? */
3117 ASMAtomicXchgU16(pu16, u16);
3118}
3119
3120
3121/**
3122 * Atomically writes an unsigned 16-bit value, unordered.
3123 *
3124 * @param pu16 Pointer to the 16-bit variable to write.
3125 * @param u16 The 16-bit value to assign to *pu16.
3126 */
3127DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
3128{
3129 Assert(!((uintptr_t)pu16 & 1));
3130 *pu16 = u16;
3131}
3132
3133
3134/**
3135 * Atomically writes a signed 16-bit value, ordered.
3136 *
3137 * @param pi16 Pointer to the 16-bit variable to write.
3138 * @param i16 The 16-bit value to assign to *pi16.
3139 */
3140DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
3141{
3142 /** @todo Any possible ARM32/ARM64 optimizations here? */
3143 ASMAtomicXchgS16(pi16, i16);
3144}
3145
3146
3147/**
3148 * Atomically writes a signed 16-bit value, unordered.
3149 *
3150 * @param pi16 Pointer to the 16-bit variable to write.
3151 * @param i16 The 16-bit value to assign to *pi16.
3152 */
3153DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
3154{
3155 Assert(!((uintptr_t)pi16 & 1));
3156 *pi16 = i16;
3157}
3158
3159
3160/**
3161 * Atomically writes an unsigned 32-bit value, ordered.
3162 *
3163 * @param pu32 Pointer to the 32-bit variable to write.
3164 * @param u32 The 32-bit value to assign to *pu32.
3165 */
3166DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3167{
3168 /** @todo Any possible ARM32/ARM64 optimizations here? */
3169 ASMAtomicXchgU32(pu32, u32);
3170}
3171
3172
3173/**
3174 * Atomically writes an unsigned 32-bit value, unordered.
3175 *
3176 * @param pu32 Pointer to the 32-bit variable to write.
3177 * @param u32 The 32-bit value to assign to *pu32.
3178 */
3179DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3180{
3181 Assert(!((uintptr_t)pu32 & 3));
3182#if ARCH_BITS >= 32
3183 *pu32 = u32;
3184#else
3185 ASMAtomicXchgU32(pu32, u32);
3186#endif
3187}
3188
3189
3190/**
3191 * Atomically writes a signed 32-bit value, ordered.
3192 *
3193 * @param pi32 Pointer to the 32-bit variable to write.
3194 * @param i32 The 32-bit value to assign to *pi32.
3195 */
3196DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3197{
3198 ASMAtomicXchgS32(pi32, i32);
3199}
3200
3201
3202/**
3203 * Atomically writes a signed 32-bit value, unordered.
3204 *
3205 * @param pi32 Pointer to the 32-bit variable to write.
3206 * @param i32 The 32-bit value to assign to *pi32.
3207 */
3208DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3209{
3210 Assert(!((uintptr_t)pi32 & 3));
3211#if ARCH_BITS >= 32
3212 *pi32 = i32;
3213#else
3214 ASMAtomicXchgS32(pi32, i32);
3215#endif
3216}
3217
3218
3219/**
3220 * Atomically writes an unsigned 64-bit value, ordered.
3221 *
3222 * @param pu64 Pointer to the 64-bit variable to write.
3223 * @param u64 The 64-bit value to assign to *pu64.
3224 */
3225DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3226{
3227 /** @todo Any possible ARM32/ARM64 optimizations here? */
3228 ASMAtomicXchgU64(pu64, u64);
3229}
3230
3231
3232/**
3233 * Atomically writes an unsigned 64-bit value, unordered.
3234 *
3235 * @param pu64 Pointer to the 64-bit variable to write.
3236 * @param u64 The 64-bit value to assign to *pu64.
3237 */
3238DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3239{
3240 Assert(!((uintptr_t)pu64 & 7));
3241#if ARCH_BITS == 64
3242 *pu64 = u64;
3243#else
3244 ASMAtomicXchgU64(pu64, u64);
3245#endif
3246}
3247
3248
3249/**
3250 * Atomically writes a signed 64-bit value, ordered.
3251 *
3252 * @param pi64 Pointer to the 64-bit variable to write.
3253 * @param i64 The 64-bit value to assign to *pi64.
3254 */
3255DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3256{
3257 /** @todo Any possible ARM32/ARM64 optimizations here? */
3258 ASMAtomicXchgS64(pi64, i64);
3259}
3260
3261
3262/**
3263 * Atomically writes a signed 64-bit value, unordered.
3264 *
3265 * @param pi64 Pointer to the 64-bit variable to write.
3266 * @param i64 The 64-bit value to assign to *pi64.
3267 */
3268DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3269{
3270 Assert(!((uintptr_t)pi64 & 7));
3271#if ARCH_BITS == 64
3272 *pi64 = i64;
3273#else
3274 ASMAtomicXchgS64(pi64, i64);
3275#endif
3276}
3277
3278
3279/**
3280 * Atomically writes a size_t value, ordered.
3281 *
3282 * @returns nothing.
3283 * @param pcb Pointer to the size_t variable to write.
3284 * @param cb The value to assign to *pcb.
3285 */
3286DECLINLINE(void) ASMAtomicWriteZ(volatile size_t RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3287{
3288#if ARCH_BITS == 64
3289 ASMAtomicWriteU64((uint64_t volatile *)pcb, cb);
3290#elif ARCH_BITS == 32
3291 ASMAtomicWriteU32((uint32_t volatile *)pcb, cb);
3292#elif ARCH_BITS == 16
3293 AssertCompileSize(size_t, 2);
3294 ASMAtomicWriteU16((uint16_t volatile *)pcb, cb);
3295#else
3296# error "Unsupported ARCH_BITS value"
3297#endif
3298}
3299
3300
3301/**
3302 * Atomically writes a size_t value, unordered.
3303 *
3304 * @returns nothing.
3305 * @param pcb Pointer to the size_t variable to write.
3306 * @param cb The value to assign to *pcb.
3307 */
3308DECLINLINE(void) ASMAtomicUoWriteZ(volatile size_t RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3309{
3310#if ARCH_BITS == 64
3311 ASMAtomicUoWriteU64((uint64_t volatile *)pcb, cb);
3312#elif ARCH_BITS == 32
3313 ASMAtomicUoWriteU32((uint32_t volatile *)pcb, cb);
3314#elif ARCH_BITS == 16
3315 AssertCompileSize(size_t, 2);
3316 ASMAtomicUoWriteU16((uint16_t volatile *)pcb, cb);
3317#else
3318# error "Unsupported ARCH_BITS value"
3319#endif
3320}
3321
3322
3323/**
3324 * Atomically writes a boolean value, unordered.
3325 *
3326 * @param pf Pointer to the boolean variable to write.
3327 * @param f The boolean value to assign to *pf.
3328 */
3329DECLINLINE(void) ASMAtomicWriteBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
3330{
3331 ASMAtomicWriteU8((uint8_t volatile RT_FAR *)pf, f);
3332}
3333
3334
3335/**
3336 * Atomically writes a boolean value, unordered.
3337 *
3338 * @param pf Pointer to the boolean variable to write.
3339 * @param f The boolean value to assign to *pf.
3340 */
3341DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
3342{
3343 *pf = f; /* byte writes are atomic on x86 */
3344}
3345
3346
3347/**
3348 * Atomically writes a pointer value, ordered.
3349 *
3350 * @param ppv Pointer to the pointer variable to write.
3351 * @param pv The pointer value to assign to *ppv.
3352 */
3353DECLINLINE(void) ASMAtomicWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv) RT_NOTHROW_DEF
3354{
3355#if ARCH_BITS == 32 || ARCH_BITS == 16
3356 ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
3357#elif ARCH_BITS == 64
3358 ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
3359#else
3360# error "ARCH_BITS is bogus"
3361#endif
3362}
3363
3364
3365/**
3366 * Atomically writes a pointer value, unordered.
3367 *
3368 * @param ppv Pointer to the pointer variable to write.
3369 * @param pv The pointer value to assign to *ppv.
3370 */
3371DECLINLINE(void) ASMAtomicUoWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv) RT_NOTHROW_DEF
3372{
3373#if ARCH_BITS == 32 || ARCH_BITS == 16
3374 ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
3375#elif ARCH_BITS == 64
3376 ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
3377#else
3378# error "ARCH_BITS is bogus"
3379#endif
3380}
3381
3382
3383/**
3384 * Atomically writes a pointer value, ordered.
3385 *
3386 * @param ppv Pointer to the pointer variable to write.
3387 * @param pv The pointer value to assign to *ppv. If NULL use
3388 * ASMAtomicWriteNullPtr or you'll land in trouble.
3389 *
3390 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
3391 * NULL.
3392 */
3393#ifdef __GNUC__
3394# define ASMAtomicWritePtr(ppv, pv) \
3395 do \
3396 { \
3397 __typeof__(*(ppv)) volatile RT_FAR * const ppvTypeChecked = (ppv); \
3398 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
3399 \
3400 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3401 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3402 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3403 \
3404 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), (void RT_FAR *)(pvTypeChecked)); \
3405 } while (0)
3406#else
3407# define ASMAtomicWritePtr(ppv, pv) \
3408 do \
3409 { \
3410 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3411 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3412 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3413 \
3414 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv)); \
3415 } while (0)
3416#endif
3417
3418
3419/**
3420 * Atomically sets a pointer to NULL, ordered.
3421 *
3422 * @param ppv Pointer to the pointer variable that should be set to NULL.
3423 *
3424 * @remarks This is relatively type safe on GCC platforms.
3425 */
3426#if RT_GNUC_PREREQ(4, 2)
3427# define ASMAtomicWriteNullPtr(ppv) \
3428 do \
3429 { \
3430 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
3431 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3432 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3433 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), NULL); \
3434 } while (0)
3435#else
3436# define ASMAtomicWriteNullPtr(ppv) \
3437 do \
3438 { \
3439 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3440 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3441 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), NULL); \
3442 } while (0)
3443#endif
3444
3445
3446/**
3447 * Atomically writes a pointer value, unordered.
3448 *
3449 * @returns Current *pv value
3450 * @param ppv Pointer to the pointer variable.
3451 * @param pv The pointer value to assign to *ppv. If NULL use
3452 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
3453 *
3454 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
3455 * NULL.
3456 */
3457#if RT_GNUC_PREREQ(4, 2)
3458# define ASMAtomicUoWritePtr(ppv, pv) \
3459 do \
3460 { \
3461 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
3462 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
3463 \
3464 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
3465 AssertCompile(sizeof(pv) == sizeof(void *)); \
3466 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3467 \
3468 *(ppvTypeChecked) = pvTypeChecked; \
3469 } while (0)
3470#else
3471# define ASMAtomicUoWritePtr(ppv, pv) \
3472 do \
3473 { \
3474 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3475 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3476 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3477 *(ppv) = pv; \
3478 } while (0)
3479#endif
3480
3481
3482/**
3483 * Atomically sets a pointer to NULL, unordered.
3484 *
3485 * @param ppv Pointer to the pointer variable that should be set to NULL.
3486 *
3487 * @remarks This is relatively type safe on GCC platforms.
3488 */
3489#ifdef __GNUC__
3490# define ASMAtomicUoWriteNullPtr(ppv) \
3491 do \
3492 { \
3493 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
3494 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
3495 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3496 *(ppvTypeChecked) = NULL; \
3497 } while (0)
3498#else
3499# define ASMAtomicUoWriteNullPtr(ppv) \
3500 do \
3501 { \
3502 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3503 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3504 *(ppv) = NULL; \
3505 } while (0)
3506#endif
3507
3508
3509/**
3510 * Atomically write a typical IPRT handle value, ordered.
3511 *
3512 * @param ph Pointer to the variable to update.
3513 * @param hNew The value to assign to *ph.
3514 *
3515 * @remarks This doesn't currently work for all handles (like RTFILE).
3516 */
3517#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3518# define ASMAtomicWriteHandle(ph, hNew) \
3519 do { \
3520 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3521 ASMAtomicWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
3522 } while (0)
3523#elif HC_ARCH_BITS == 64
3524# define ASMAtomicWriteHandle(ph, hNew) \
3525 do { \
3526 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3527 ASMAtomicWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
3528 } while (0)
3529#else
3530# error HC_ARCH_BITS
3531#endif
3532
3533
3534/**
3535 * Atomically write a typical IPRT handle value, unordered.
3536 *
3537 * @param ph Pointer to the variable to update.
3538 * @param hNew The value to assign to *ph.
3539 *
3540 * @remarks This doesn't currently work for all handles (like RTFILE).
3541 */
3542#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3543# define ASMAtomicUoWriteHandle(ph, hNew) \
3544 do { \
3545 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3546 ASMAtomicUoWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)hNew); \
3547 } while (0)
3548#elif HC_ARCH_BITS == 64
3549# define ASMAtomicUoWriteHandle(ph, hNew) \
3550 do { \
3551 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3552 ASMAtomicUoWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)hNew); \
3553 } while (0)
3554#else
3555# error HC_ARCH_BITS
3556#endif
3557
3558
3559/**
3560 * Atomically write a value which size might differ
3561 * between platforms or compilers, ordered.
3562 *
3563 * @param pu Pointer to the variable to update.
3564 * @param uNew The value to assign to *pu.
3565 */
3566#define ASMAtomicWriteSize(pu, uNew) \
3567 do { \
3568 switch (sizeof(*(pu))) { \
3569 case 1: ASMAtomicWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
3570 case 2: ASMAtomicWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
3571 case 4: ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3572 case 8: ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3573 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3574 } \
3575 } while (0)
3576
3577/**
3578 * Atomically write a value which size might differ
3579 * between platforms or compilers, unordered.
3580 *
3581 * @param pu Pointer to the variable to update.
3582 * @param uNew The value to assign to *pu.
3583 */
3584#define ASMAtomicUoWriteSize(pu, uNew) \
3585 do { \
3586 switch (sizeof(*(pu))) { \
3587 case 1: ASMAtomicUoWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
3588 case 2: ASMAtomicUoWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
3589 case 4: ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3590 case 8: ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3591 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3592 } \
3593 } while (0)
3594
3595
3596
3597/**
3598 * Atomically exchanges and adds to a 16-bit value, ordered.
3599 *
3600 * @returns The old value.
3601 * @param pu16 Pointer to the value.
3602 * @param u16 Number to add.
3603 *
3604 * @remarks Currently not implemented, just to make 16-bit code happy.
3605 * @remarks x86: Requires a 486 or later.
3606 */
3607RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicAddU16(uint16_t volatile RT_FAR *pu16, uint32_t u16) RT_NOTHROW_PROTO;
3608
3609
3610/**
3611 * Atomically exchanges and adds to a 32-bit value, ordered.
3612 *
3613 * @returns The old value.
3614 * @param pu32 Pointer to the value.
3615 * @param u32 Number to add.
3616 *
3617 * @remarks x86: Requires a 486 or later.
3618 */
3619#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
3620RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
3621#else
3622DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3623{
3624# if RT_INLINE_ASM_USES_INTRIN
3625 u32 = _InterlockedExchangeAdd((long RT_FAR *)pu32, u32);
3626 return u32;
3627
3628# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
3629# if RT_INLINE_ASM_GNU_STYLE
3630 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3631 : "=r" (u32)
3632 , "=m" (*pu32)
3633 : "0" (u32)
3634 , "m" (*pu32)
3635 : "memory"
3636 , "cc");
3637 return u32;
3638# else
3639 __asm
3640 {
3641 mov eax, [u32]
3642# ifdef RT_ARCH_AMD64
3643 mov rdx, [pu32]
3644 lock xadd [rdx], eax
3645# else
3646 mov edx, [pu32]
3647 lock xadd [edx], eax
3648# endif
3649 mov [u32], eax
3650 }
3651 return u32;
3652# endif
3653
3654# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3655 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicAddU32, pu32, DMB_SY,
3656 "add %w[uNew], %w[uOld], %w[uVal]\n\t",
3657 "add %[uNew], %[uOld], %[uVal]\n\t",
3658 [uVal] "r" (u32));
3659 return u32OldRet;
3660
3661# else
3662# error "Port me"
3663# endif
3664}
3665#endif
3666
3667
3668/**
3669 * Atomically exchanges and adds to a signed 32-bit value, ordered.
3670 *
3671 * @returns The old value.
3672 * @param pi32 Pointer to the value.
3673 * @param i32 Number to add.
3674 *
3675 * @remarks x86: Requires a 486 or later.
3676 */
3677DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3678{
3679 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
3680}
3681
3682
3683/**
3684 * Atomically exchanges and adds to a 64-bit value, ordered.
3685 *
3686 * @returns The old value.
3687 * @param pu64 Pointer to the value.
3688 * @param u64 Number to add.
3689 *
3690 * @remarks x86: Requires a Pentium or later.
3691 */
3692#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
3693DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
3694#else
3695DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3696{
3697# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3698 u64 = _InterlockedExchangeAdd64((__int64 RT_FAR *)pu64, u64);
3699 return u64;
3700
3701# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3702 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
3703 : "=r" (u64)
3704 , "=m" (*pu64)
3705 : "0" (u64)
3706 , "m" (*pu64)
3707 : "memory"
3708 , "cc");
3709 return u64;
3710
3711# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3712 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(ASMAtomicAddU64, pu64, DMB_SY,
3713 "add %[uNew], %[uOld], %[uVal]\n\t"
3714 ,
3715 "add %[uNew], %[uOld], %[uVal]\n\t"
3716 "adc %H[uNew], %H[uOld], %H[uVal]\n\t",
3717 [uVal] "r" (u64));
3718 return u64OldRet;
3719
3720# else
3721 uint64_t u64Old;
3722 for (;;)
3723 {
3724 uint64_t u64New;
3725 u64Old = ASMAtomicUoReadU64(pu64);
3726 u64New = u64Old + u64;
3727 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3728 break;
3729 ASMNopPause();
3730 }
3731 return u64Old;
3732# endif
3733}
3734#endif
3735
3736
3737/**
3738 * Atomically exchanges and adds to a signed 64-bit value, ordered.
3739 *
3740 * @returns The old value.
3741 * @param pi64 Pointer to the value.
3742 * @param i64 Number to add.
3743 *
3744 * @remarks x86: Requires a Pentium or later.
3745 */
3746DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3747{
3748 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
3749}
3750
3751
3752/**
3753 * Atomically exchanges and adds to a size_t value, ordered.
3754 *
3755 * @returns The old value.
3756 * @param pcb Pointer to the size_t value.
3757 * @param cb Number to add.
3758 */
3759DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3760{
3761#if ARCH_BITS == 64
3762 AssertCompileSize(size_t, 8);
3763 return ASMAtomicAddU64((uint64_t volatile RT_FAR *)pcb, cb);
3764#elif ARCH_BITS == 32
3765 AssertCompileSize(size_t, 4);
3766 return ASMAtomicAddU32((uint32_t volatile RT_FAR *)pcb, cb);
3767#elif ARCH_BITS == 16
3768 AssertCompileSize(size_t, 2);
3769 return ASMAtomicAddU16((uint16_t volatile RT_FAR *)pcb, cb);
3770#else
3771# error "Unsupported ARCH_BITS value"
3772#endif
3773}
3774
3775
3776/**
3777 * Atomically exchanges and adds a value which size might differ between
3778 * platforms or compilers, ordered.
3779 *
3780 * @param pu Pointer to the variable to update.
3781 * @param uNew The value to add to *pu.
3782 * @param puOld Where to store the old value.
3783 */
3784#define ASMAtomicAddSize(pu, uNew, puOld) \
3785 do { \
3786 switch (sizeof(*(pu))) { \
3787 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3788 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3789 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
3790 } \
3791 } while (0)
3792
3793
3794
3795/**
3796 * Atomically exchanges and subtracts to an unsigned 16-bit value, ordered.
3797 *
3798 * @returns The old value.
3799 * @param pu16 Pointer to the value.
3800 * @param u16 Number to subtract.
3801 *
3802 * @remarks x86: Requires a 486 or later.
3803 */
3804DECLINLINE(uint16_t) ASMAtomicSubU16(uint16_t volatile RT_FAR *pu16, uint32_t u16) RT_NOTHROW_DEF
3805{
3806 return ASMAtomicAddU16(pu16, (uint16_t)-(int16_t)u16);
3807}
3808
3809
3810/**
3811 * Atomically exchanges and subtracts to a signed 16-bit value, ordered.
3812 *
3813 * @returns The old value.
3814 * @param pi16 Pointer to the value.
3815 * @param i16 Number to subtract.
3816 *
3817 * @remarks x86: Requires a 486 or later.
3818 */
3819DECLINLINE(int16_t) ASMAtomicSubS16(int16_t volatile RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
3820{
3821 return (int16_t)ASMAtomicAddU16((uint16_t volatile RT_FAR *)pi16, (uint16_t)-i16);
3822}
3823
3824
3825/**
3826 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
3827 *
3828 * @returns The old value.
3829 * @param pu32 Pointer to the value.
3830 * @param u32 Number to subtract.
3831 *
3832 * @remarks x86: Requires a 486 or later.
3833 */
3834DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3835{
3836 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
3837}
3838
3839
3840/**
3841 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
3842 *
3843 * @returns The old value.
3844 * @param pi32 Pointer to the value.
3845 * @param i32 Number to subtract.
3846 *
3847 * @remarks x86: Requires a 486 or later.
3848 */
3849DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3850{
3851 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)-i32);
3852}
3853
3854
3855/**
3856 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
3857 *
3858 * @returns The old value.
3859 * @param pu64 Pointer to the value.
3860 * @param u64 Number to subtract.
3861 *
3862 * @remarks x86: Requires a Pentium or later.
3863 */
3864DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3865{
3866 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
3867}
3868
3869
3870/**
3871 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
3872 *
3873 * @returns The old value.
3874 * @param pi64 Pointer to the value.
3875 * @param i64 Number to subtract.
3876 *
3877 * @remarks x86: Requires a Pentium or later.
3878 */
3879DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3880{
3881 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)-i64);
3882}
3883
3884
3885/**
3886 * Atomically exchanges and subtracts to a size_t value, ordered.
3887 *
3888 * @returns The old value.
3889 * @param pcb Pointer to the size_t value.
3890 * @param cb Number to subtract.
3891 *
3892 * @remarks x86: Requires a 486 or later.
3893 */
3894DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3895{
3896#if ARCH_BITS == 64
3897 return ASMAtomicSubU64((uint64_t volatile RT_FAR *)pcb, cb);
3898#elif ARCH_BITS == 32
3899 return ASMAtomicSubU32((uint32_t volatile RT_FAR *)pcb, cb);
3900#elif ARCH_BITS == 16
3901 AssertCompileSize(size_t, 2);
3902 return ASMAtomicSubU16((uint16_t volatile RT_FAR *)pcb, cb);
3903#else
3904# error "Unsupported ARCH_BITS value"
3905#endif
3906}
3907
3908
3909/**
3910 * Atomically exchanges and subtracts a value which size might differ between
3911 * platforms or compilers, ordered.
3912 *
3913 * @param pu Pointer to the variable to update.
3914 * @param uNew The value to subtract to *pu.
3915 * @param puOld Where to store the old value.
3916 *
3917 * @remarks x86: Requires a 486 or later.
3918 */
3919#define ASMAtomicSubSize(pu, uNew, puOld) \
3920 do { \
3921 switch (sizeof(*(pu))) { \
3922 case 4: *(uint32_t RT_FAR *)(puOld) = ASMAtomicSubU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3923 case 8: *(uint64_t RT_FAR *)(puOld) = ASMAtomicSubU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3924 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
3925 } \
3926 } while (0)
3927
3928
3929
3930/**
3931 * Atomically increment a 16-bit value, ordered.
3932 *
3933 * @returns The new value.
3934 * @param pu16 Pointer to the value to increment.
3935 * @remarks Not implemented. Just to make 16-bit code happy.
3936 *
3937 * @remarks x86: Requires a 486 or later.
3938 */
3939RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicIncU16(uint16_t volatile RT_FAR *pu16) RT_NOTHROW_PROTO;
3940
3941
3942/**
3943 * Atomically increment a 32-bit value, ordered.
3944 *
3945 * @returns The new value.
3946 * @param pu32 Pointer to the value to increment.
3947 *
3948 * @remarks x86: Requires a 486 or later.
3949 */
3950#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
3951RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
3952#else
3953DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
3954{
3955# if RT_INLINE_ASM_USES_INTRIN
3956 return (uint32_t)_InterlockedIncrement((long RT_FAR *)pu32);
3957
3958# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
3959# if RT_INLINE_ASM_GNU_STYLE
3960 uint32_t u32;
3961 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3962 : "=r" (u32)
3963 , "=m" (*pu32)
3964 : "0" (1)
3965 , "m" (*pu32)
3966 : "memory"
3967 , "cc");
3968 return u32+1;
3969# else
3970 __asm
3971 {
3972 mov eax, 1
3973# ifdef RT_ARCH_AMD64
3974 mov rdx, [pu32]
3975 lock xadd [rdx], eax
3976# else
3977 mov edx, [pu32]
3978 lock xadd [edx], eax
3979# endif
3980 mov u32, eax
3981 }
3982 return u32+1;
3983# endif
3984
3985# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3986 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicIncU32, pu32, DMB_SY,
3987 "add %w[uNew], %w[uNew], #1\n\t",
3988 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
3989 "X" (0) /* dummy */);
3990 return u32NewRet;
3991
3992# else
3993 return ASMAtomicAddU32(pu32, 1) + 1;
3994# endif
3995}
3996#endif
3997
3998
3999/**
4000 * Atomically increment a signed 32-bit value, ordered.
4001 *
4002 * @returns The new value.
4003 * @param pi32 Pointer to the value to increment.
4004 *
4005 * @remarks x86: Requires a 486 or later.
4006 */
4007DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile RT_FAR *pi32) RT_NOTHROW_DEF
4008{
4009 return (int32_t)ASMAtomicIncU32((uint32_t volatile RT_FAR *)pi32);
4010}
4011
4012
4013/**
4014 * Atomically increment a 64-bit value, ordered.
4015 *
4016 * @returns The new value.
4017 * @param pu64 Pointer to the value to increment.
4018 *
4019 * @remarks x86: Requires a Pentium or later.
4020 */
4021#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4022DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_PROTO;
4023#else
4024DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_DEF
4025{
4026# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4027 return (uint64_t)_InterlockedIncrement64((__int64 RT_FAR *)pu64);
4028
4029# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4030 uint64_t u64;
4031 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
4032 : "=r" (u64)
4033 , "=m" (*pu64)
4034 : "0" (1)
4035 , "m" (*pu64)
4036 : "memory"
4037 , "cc");
4038 return u64 + 1;
4039
4040# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4041 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicIncU64, pu64, DMB_SY,
4042 "add %[uNew], %[uNew], #1\n\t"
4043 ,
4044 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */
4045 "adc %H[uNew], %H[uNew], %[uZeroVal]\n\t",
4046 RTASM_ARM_PICK_6432("X" (0) /* dummy */, [uZeroVal] "r" (0)) );
4047 return u64NewRet;
4048
4049# else
4050 return ASMAtomicAddU64(pu64, 1) + 1;
4051# endif
4052}
4053#endif
4054
4055
4056/**
4057 * Atomically increment a signed 64-bit value, ordered.
4058 *
4059 * @returns The new value.
4060 * @param pi64 Pointer to the value to increment.
4061 *
4062 * @remarks x86: Requires a Pentium or later.
4063 */
4064DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile RT_FAR *pi64) RT_NOTHROW_DEF
4065{
4066 return (int64_t)ASMAtomicIncU64((uint64_t volatile RT_FAR *)pi64);
4067}
4068
4069
4070/**
4071 * Atomically increment a size_t value, ordered.
4072 *
4073 * @returns The new value.
4074 * @param pcb Pointer to the value to increment.
4075 *
4076 * @remarks x86: Requires a 486 or later.
4077 */
4078DECLINLINE(size_t) ASMAtomicIncZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
4079{
4080#if ARCH_BITS == 64
4081 return ASMAtomicIncU64((uint64_t volatile RT_FAR *)pcb);
4082#elif ARCH_BITS == 32
4083 return ASMAtomicIncU32((uint32_t volatile RT_FAR *)pcb);
4084#elif ARCH_BITS == 16
4085 return ASMAtomicIncU16((uint16_t volatile RT_FAR *)pcb);
4086#else
4087# error "Unsupported ARCH_BITS value"
4088#endif
4089}
4090
4091
4092
4093/**
4094 * Atomically decrement an unsigned 32-bit value, ordered.
4095 *
4096 * @returns The new value.
4097 * @param pu16 Pointer to the value to decrement.
4098 * @remarks Not implemented. Just to make 16-bit code happy.
4099 *
4100 * @remarks x86: Requires a 486 or later.
4101 */
4102RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU16(uint16_t volatile RT_FAR *pu16) RT_NOTHROW_PROTO;
4103
4104
4105/**
4106 * Atomically decrement an unsigned 32-bit value, ordered.
4107 *
4108 * @returns The new value.
4109 * @param pu32 Pointer to the value to decrement.
4110 *
4111 * @remarks x86: Requires a 486 or later.
4112 */
4113#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4114RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
4115#else
4116DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
4117{
4118# if RT_INLINE_ASM_USES_INTRIN
4119 return (uint32_t)_InterlockedDecrement((long RT_FAR *)pu32);
4120
4121# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4122# if RT_INLINE_ASM_GNU_STYLE
4123 uint32_t u32;
4124 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
4125 : "=r" (u32)
4126 , "=m" (*pu32)
4127 : "0" (-1)
4128 , "m" (*pu32)
4129 : "memory"
4130 , "cc");
4131 return u32-1;
4132# else
4133 uint32_t u32;
4134 __asm
4135 {
4136 mov eax, -1
4137# ifdef RT_ARCH_AMD64
4138 mov rdx, [pu32]
4139 lock xadd [rdx], eax
4140# else
4141 mov edx, [pu32]
4142 lock xadd [edx], eax
4143# endif
4144 mov u32, eax
4145 }
4146 return u32-1;
4147# endif
4148
4149# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4150 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicDecU32, pu32, DMB_SY,
4151 "sub %w[uNew], %w[uNew], #1\n\t",
4152 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
4153 "X" (0) /* dummy */);
4154 return u32NewRet;
4155
4156# else
4157 return ASMAtomicSubU32(pu32, 1) - (uint32_t)1;
4158# endif
4159}
4160#endif
4161
4162
4163/**
4164 * Atomically decrement a signed 32-bit value, ordered.
4165 *
4166 * @returns The new value.
4167 * @param pi32 Pointer to the value to decrement.
4168 *
4169 * @remarks x86: Requires a 486 or later.
4170 */
4171DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile RT_FAR *pi32) RT_NOTHROW_DEF
4172{
4173 return (int32_t)ASMAtomicDecU32((uint32_t volatile RT_FAR *)pi32);
4174}
4175
4176
4177/**
4178 * Atomically decrement an unsigned 64-bit value, ordered.
4179 *
4180 * @returns The new value.
4181 * @param pu64 Pointer to the value to decrement.
4182 *
4183 * @remarks x86: Requires a Pentium or later.
4184 */
4185#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4186RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_PROTO;
4187#else
4188DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_DEF
4189{
4190# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4191 return (uint64_t)_InterlockedDecrement64((__int64 volatile RT_FAR *)pu64);
4192
4193# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4194 uint64_t u64;
4195 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
4196 : "=r" (u64)
4197 , "=m" (*pu64)
4198 : "0" (~(uint64_t)0)
4199 , "m" (*pu64)
4200 : "memory"
4201 , "cc");
4202 return u64-1;
4203
4204# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4205 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicDecU64, pu64, DMB_SY,
4206 "sub %[uNew], %[uNew], #1\n\t"
4207 ,
4208 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */
4209 "sbc %H[uNew], %H[uNew], %[uZeroVal]\n\t",
4210 RTASM_ARM_PICK_6432("X" (0) /* dummy */, [uZeroVal] "r" (0)) );
4211 return u64NewRet;
4212
4213# else
4214 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
4215# endif
4216}
4217#endif
4218
4219
4220/**
4221 * Atomically decrement a signed 64-bit value, ordered.
4222 *
4223 * @returns The new value.
4224 * @param pi64 Pointer to the value to decrement.
4225 *
4226 * @remarks x86: Requires a Pentium or later.
4227 */
4228DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile RT_FAR *pi64) RT_NOTHROW_DEF
4229{
4230 return (int64_t)ASMAtomicDecU64((uint64_t volatile RT_FAR *)pi64);
4231}
4232
4233
4234/**
4235 * Atomically decrement a size_t value, ordered.
4236 *
4237 * @returns The new value.
4238 * @param pcb Pointer to the value to decrement.
4239 *
4240 * @remarks x86: Requires a 486 or later.
4241 */
4242DECLINLINE(size_t) ASMAtomicDecZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
4243{
4244#if ARCH_BITS == 64
4245 return ASMAtomicDecU64((uint64_t volatile RT_FAR *)pcb);
4246#elif ARCH_BITS == 32
4247 return ASMAtomicDecU32((uint32_t volatile RT_FAR *)pcb);
4248#elif ARCH_BITS == 16
4249 return ASMAtomicDecU16((uint16_t volatile RT_FAR *)pcb);
4250#else
4251# error "Unsupported ARCH_BITS value"
4252#endif
4253}
4254
4255
4256/**
4257 * Atomically Or an unsigned 32-bit value, ordered.
4258 *
4259 * @param pu32 Pointer to the pointer variable to OR u32 with.
4260 * @param u32 The value to OR *pu32 with.
4261 *
4262 * @remarks x86: Requires a 386 or later.
4263 */
4264#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4265RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4266#else
4267DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4268{
4269# if RT_INLINE_ASM_USES_INTRIN
4270 _InterlockedOr((long volatile RT_FAR *)pu32, (long)u32);
4271
4272# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4273# if RT_INLINE_ASM_GNU_STYLE
4274 __asm__ __volatile__("lock; orl %1, %0\n\t"
4275 : "=m" (*pu32)
4276 : "ir" (u32)
4277 , "m" (*pu32)
4278 : "cc");
4279# else
4280 __asm
4281 {
4282 mov eax, [u32]
4283# ifdef RT_ARCH_AMD64
4284 mov rdx, [pu32]
4285 lock or [rdx], eax
4286# else
4287 mov edx, [pu32]
4288 lock or [edx], eax
4289# endif
4290 }
4291# endif
4292
4293# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4294 /* For more on Orr see https://en.wikipedia.org/wiki/Orr_(Catch-22) ;-) */
4295 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicOr32, pu32, DMB_SY,
4296 "orr %w[uNew], %w[uNew], %w[uVal]\n\t",
4297 "orr %[uNew], %[uNew], %[uVal]\n\t",
4298 [uVal] "r" (u32));
4299
4300# else
4301# error "Port me"
4302# endif
4303}
4304#endif
4305
4306
4307/**
4308 * Atomically OR an unsigned 32-bit value, ordered, extended version (for bitmap
4309 * fallback).
4310 *
4311 * @returns Old value.
4312 * @param pu32 Pointer to the variable to OR @a u32 with.
4313 * @param u32 The value to OR @a *pu32 with.
4314 */
4315DECLINLINE(uint32_t) ASMAtomicOrExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4316{
4317#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4318 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicOrEx32, pu32, DMB_SY,
4319 "orr %w[uNew], %w[uOld], %w[uVal]\n\t",
4320 "orr %[uNew], %[uOld], %[uVal]\n\t",
4321 [uVal] "r" (u32));
4322 return u32OldRet;
4323
4324#else
4325 uint32_t u32RetOld = ASMAtomicUoReadU32(pu32);
4326 uint32_t u32New;
4327 do
4328 u32New = u32RetOld | u32;
4329 while (!ASMAtomicCmpXchgExU32(pu32, u32New, u32RetOld, &u32RetOld));
4330 return u32RetOld;
4331#endif
4332}
4333
4334
4335/**
4336 * Atomically Or a signed 32-bit value, ordered.
4337 *
4338 * @param pi32 Pointer to the pointer variable to OR u32 with.
4339 * @param i32 The value to OR *pu32 with.
4340 *
4341 * @remarks x86: Requires a 386 or later.
4342 */
4343DECLINLINE(void) ASMAtomicOrS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4344{
4345 ASMAtomicOrU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4346}
4347
4348
4349/**
4350 * Atomically Or an unsigned 64-bit value, ordered.
4351 *
4352 * @param pu64 Pointer to the pointer variable to OR u64 with.
4353 * @param u64 The value to OR *pu64 with.
4354 *
4355 * @remarks x86: Requires a Pentium or later.
4356 */
4357#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4358DECLASM(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4359#else
4360DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4361{
4362# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4363 _InterlockedOr64((__int64 volatile RT_FAR *)pu64, (__int64)u64);
4364
4365# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4366 __asm__ __volatile__("lock; orq %1, %q0\n\t"
4367 : "=m" (*pu64)
4368 : "r" (u64)
4369 , "m" (*pu64)
4370 : "cc");
4371
4372# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4373 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicOrU64, pu64, DMB_SY,
4374 "orr %[uNew], %[uNew], %[uVal]\n\t"
4375 ,
4376 "orr %[uNew], %[uNew], %[uVal]\n\t"
4377 "orr %H[uNew], %H[uNew], %H[uVal]\n\t",
4378 [uVal] "r" (u64));
4379
4380# else
4381 for (;;)
4382 {
4383 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4384 uint64_t u64New = u64Old | u64;
4385 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4386 break;
4387 ASMNopPause();
4388 }
4389# endif
4390}
4391#endif
4392
4393
4394/**
4395 * Atomically Or a signed 64-bit value, ordered.
4396 *
4397 * @param pi64 Pointer to the pointer variable to OR u64 with.
4398 * @param i64 The value to OR *pu64 with.
4399 *
4400 * @remarks x86: Requires a Pentium or later.
4401 */
4402DECLINLINE(void) ASMAtomicOrS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4403{
4404 ASMAtomicOrU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4405}
4406
4407
4408/**
4409 * Atomically And an unsigned 32-bit value, ordered.
4410 *
4411 * @param pu32 Pointer to the pointer variable to AND u32 with.
4412 * @param u32 The value to AND *pu32 with.
4413 *
4414 * @remarks x86: Requires a 386 or later.
4415 */
4416#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4417RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4418#else
4419DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4420{
4421# if RT_INLINE_ASM_USES_INTRIN
4422 _InterlockedAnd((long volatile RT_FAR *)pu32, u32);
4423
4424# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4425# if RT_INLINE_ASM_GNU_STYLE
4426 __asm__ __volatile__("lock; andl %1, %0\n\t"
4427 : "=m" (*pu32)
4428 : "ir" (u32)
4429 , "m" (*pu32)
4430 : "cc");
4431# else
4432 __asm
4433 {
4434 mov eax, [u32]
4435# ifdef RT_ARCH_AMD64
4436 mov rdx, [pu32]
4437 lock and [rdx], eax
4438# else
4439 mov edx, [pu32]
4440 lock and [edx], eax
4441# endif
4442 }
4443# endif
4444
4445# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4446 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicAnd32, pu32, DMB_SY,
4447 "and %w[uNew], %w[uNew], %w[uVal]\n\t",
4448 "and %[uNew], %[uNew], %[uVal]\n\t",
4449 [uVal] "r" (u32));
4450
4451# else
4452# error "Port me"
4453# endif
4454}
4455#endif
4456
4457
4458/**
4459 * Atomically AND an unsigned 32-bit value, ordered, extended version.
4460 *
4461 * @returns Old value.
4462 * @param pu32 Pointer to the variable to AND @a u32 with.
4463 * @param u32 The value to AND @a *pu32 with.
4464 */
4465DECLINLINE(uint32_t) ASMAtomicAndExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4466{
4467#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4468 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicAndEx32, pu32, DMB_SY,
4469 "and %w[uNew], %w[uOld], %w[uVal]\n\t",
4470 "and %[uNew], %[uOld], %[uVal]\n\t",
4471 [uVal] "r" (u32));
4472 return u32OldRet;
4473
4474#else
4475 uint32_t u32RetOld = ASMAtomicUoReadU32(pu32);
4476 uint32_t u32New;
4477 do
4478 u32New = u32RetOld & u32;
4479 while (!ASMAtomicCmpXchgExU32(pu32, u32New, u32RetOld, &u32RetOld));
4480 return u32RetOld;
4481#endif
4482}
4483
4484
4485/**
4486 * Atomically And a signed 32-bit value, ordered.
4487 *
4488 * @param pi32 Pointer to the pointer variable to AND i32 with.
4489 * @param i32 The value to AND *pi32 with.
4490 *
4491 * @remarks x86: Requires a 386 or later.
4492 */
4493DECLINLINE(void) ASMAtomicAndS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4494{
4495 ASMAtomicAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4496}
4497
4498
4499/**
4500 * Atomically And an unsigned 64-bit value, ordered.
4501 *
4502 * @param pu64 Pointer to the pointer variable to AND u64 with.
4503 * @param u64 The value to AND *pu64 with.
4504 *
4505 * @remarks x86: Requires a Pentium or later.
4506 */
4507#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4508DECLASM(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4509#else
4510DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4511{
4512# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4513 _InterlockedAnd64((__int64 volatile RT_FAR *)pu64, u64);
4514
4515# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4516 __asm__ __volatile__("lock; andq %1, %0\n\t"
4517 : "=m" (*pu64)
4518 : "r" (u64)
4519 , "m" (*pu64)
4520 : "cc");
4521
4522# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4523 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicAndU64, pu64, DMB_SY,
4524 "and %[uNew], %[uNew], %[uVal]\n\t"
4525 ,
4526 "and %[uNew], %[uNew], %[uVal]\n\t"
4527 "and %H[uNew], %H[uNew], %H[uVal]\n\t",
4528 [uVal] "r" (u64));
4529
4530# else
4531 for (;;)
4532 {
4533 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4534 uint64_t u64New = u64Old & u64;
4535 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4536 break;
4537 ASMNopPause();
4538 }
4539# endif
4540}
4541#endif
4542
4543
4544/**
4545 * Atomically And a signed 64-bit value, ordered.
4546 *
4547 * @param pi64 Pointer to the pointer variable to AND i64 with.
4548 * @param i64 The value to AND *pi64 with.
4549 *
4550 * @remarks x86: Requires a Pentium or later.
4551 */
4552DECLINLINE(void) ASMAtomicAndS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4553{
4554 ASMAtomicAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4555}
4556
4557
4558/**
4559 * Atomically XOR an unsigned 32-bit value and a memory location, ordered.
4560 *
4561 * @param pu32 Pointer to the variable to XOR @a u32 with.
4562 * @param u32 The value to XOR @a *pu32 with.
4563 *
4564 * @remarks x86: Requires a 386 or later.
4565 */
4566#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4567RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4568#else
4569DECLINLINE(void) ASMAtomicXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4570{
4571# if RT_INLINE_ASM_USES_INTRIN
4572 _InterlockedXor((long volatile RT_FAR *)pu32, u32);
4573
4574# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4575# if RT_INLINE_ASM_GNU_STYLE
4576 __asm__ __volatile__("lock; xorl %1, %0\n\t"
4577 : "=m" (*pu32)
4578 : "ir" (u32)
4579 , "m" (*pu32)
4580 : "cc");
4581# else
4582 __asm
4583 {
4584 mov eax, [u32]
4585# ifdef RT_ARCH_AMD64
4586 mov rdx, [pu32]
4587 lock xor [rdx], eax
4588# else
4589 mov edx, [pu32]
4590 lock xor [edx], eax
4591# endif
4592 }
4593# endif
4594
4595# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4596 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicXor32, pu32, DMB_SY,
4597 "eor %w[uNew], %w[uNew], %w[uVal]\n\t",
4598 "eor %[uNew], %[uNew], %[uVal]\n\t",
4599 [uVal] "r" (u32));
4600
4601# else
4602# error "Port me"
4603# endif
4604}
4605#endif
4606
4607
4608/**
4609 * Atomically XOR an unsigned 32-bit value and a memory location, ordered,
4610 * extended version (for bitmaps).
4611 *
4612 * @returns Old value.
4613 * @param pu32 Pointer to the variable to XOR @a u32 with.
4614 * @param u32 The value to XOR @a *pu32 with.
4615 */
4616DECLINLINE(uint32_t) ASMAtomicXorExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4617{
4618#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4619 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicXorEx32, pu32, DMB_SY,
4620 "eor %w[uNew], %w[uOld], %w[uVal]\n\t",
4621 "eor %[uNew], %[uOld], %[uVal]\n\t",
4622 [uVal] "r" (u32));
4623 return u32OldRet;
4624
4625#else
4626 uint32_t u32RetOld = ASMAtomicUoReadU32(pu32);
4627 uint32_t u32New;
4628 do
4629 u32New = u32RetOld ^ u32;
4630 while (!ASMAtomicCmpXchgExU32(pu32, u32New, u32RetOld, &u32RetOld));
4631 return u32RetOld;
4632#endif
4633}
4634
4635
4636/**
4637 * Atomically XOR a signed 32-bit value, ordered.
4638 *
4639 * @param pi32 Pointer to the variable to XOR i32 with.
4640 * @param i32 The value to XOR *pi32 with.
4641 *
4642 * @remarks x86: Requires a 386 or later.
4643 */
4644DECLINLINE(void) ASMAtomicXorS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4645{
4646 ASMAtomicXorU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4647}
4648
4649
4650/**
4651 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
4652 *
4653 * @param pu32 Pointer to the pointer variable to OR u32 with.
4654 * @param u32 The value to OR *pu32 with.
4655 *
4656 * @remarks x86: Requires a 386 or later.
4657 */
4658#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
4659RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4660#else
4661DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4662{
4663# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4664# if RT_INLINE_ASM_GNU_STYLE
4665 __asm__ __volatile__("orl %1, %0\n\t"
4666 : "=m" (*pu32)
4667 : "ir" (u32)
4668 , "m" (*pu32)
4669 : "cc");
4670# else
4671 __asm
4672 {
4673 mov eax, [u32]
4674# ifdef RT_ARCH_AMD64
4675 mov rdx, [pu32]
4676 or [rdx], eax
4677# else
4678 mov edx, [pu32]
4679 or [edx], eax
4680# endif
4681 }
4682# endif
4683
4684# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4685 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoOrU32, pu32, NO_BARRIER,
4686 "orr %w[uNew], %w[uNew], %w[uVal]\n\t",
4687 "orr %[uNew], %[uNew], %[uVal]\n\t",
4688 [uVal] "r" (u32));
4689
4690# else
4691# error "Port me"
4692# endif
4693}
4694#endif
4695
4696
4697/**
4698 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe,
4699 * extended version (for bitmap fallback).
4700 *
4701 * @returns Old value.
4702 * @param pu32 Pointer to the variable to OR @a u32 with.
4703 * @param u32 The value to OR @a *pu32 with.
4704 */
4705DECLINLINE(uint32_t) ASMAtomicUoOrExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4706{
4707#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4708 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicUoOrExU32, pu32, NO_BARRIER,
4709 "orr %w[uNew], %w[uOld], %w[uVal]\n\t",
4710 "orr %[uNew], %[uOld], %[uVal]\n\t",
4711 [uVal] "r" (u32));
4712 return u32OldRet;
4713
4714#else
4715 return ASMAtomicOrExU32(pu32, u32); /* (we have no unordered cmpxchg primitive atm.) */
4716#endif
4717}
4718
4719
4720/**
4721 * Atomically OR a signed 32-bit value, unordered.
4722 *
4723 * @param pi32 Pointer to the pointer variable to OR u32 with.
4724 * @param i32 The value to OR *pu32 with.
4725 *
4726 * @remarks x86: Requires a 386 or later.
4727 */
4728DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4729{
4730 ASMAtomicUoOrU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4731}
4732
4733
4734/**
4735 * Atomically OR an unsigned 64-bit value, unordered.
4736 *
4737 * @param pu64 Pointer to the pointer variable to OR u64 with.
4738 * @param u64 The value to OR *pu64 with.
4739 *
4740 * @remarks x86: Requires a Pentium or later.
4741 */
4742#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
4743DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4744#else
4745DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4746{
4747# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4748 __asm__ __volatile__("orq %1, %q0\n\t"
4749 : "=m" (*pu64)
4750 : "r" (u64)
4751 , "m" (*pu64)
4752 : "cc");
4753
4754# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4755 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicUoOrU64, pu64, NO_BARRIER,
4756 "orr %[uNew], %[uNew], %[uVal]\n\t"
4757 ,
4758 "orr %[uNew], %[uNew], %[uVal]\n\t"
4759 "orr %H[uNew], %H[uNew], %H[uVal]\n\t",
4760 [uVal] "r" (u64));
4761
4762# else
4763 for (;;)
4764 {
4765 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4766 uint64_t u64New = u64Old | u64;
4767 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4768 break;
4769 ASMNopPause();
4770 }
4771# endif
4772}
4773#endif
4774
4775
4776/**
4777 * Atomically Or a signed 64-bit value, unordered.
4778 *
4779 * @param pi64 Pointer to the pointer variable to OR u64 with.
4780 * @param i64 The value to OR *pu64 with.
4781 *
4782 * @remarks x86: Requires a Pentium or later.
4783 */
4784DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4785{
4786 ASMAtomicUoOrU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4787}
4788
4789
4790/**
4791 * Atomically And an unsigned 32-bit value, unordered.
4792 *
4793 * @param pu32 Pointer to the pointer variable to AND u32 with.
4794 * @param u32 The value to AND *pu32 with.
4795 *
4796 * @remarks x86: Requires a 386 or later.
4797 */
4798#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
4799RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4800#else
4801DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4802{
4803# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4804# if RT_INLINE_ASM_GNU_STYLE
4805 __asm__ __volatile__("andl %1, %0\n\t"
4806 : "=m" (*pu32)
4807 : "ir" (u32)
4808 , "m" (*pu32)
4809 : "cc");
4810# else
4811 __asm
4812 {
4813 mov eax, [u32]
4814# ifdef RT_ARCH_AMD64
4815 mov rdx, [pu32]
4816 and [rdx], eax
4817# else
4818 mov edx, [pu32]
4819 and [edx], eax
4820# endif
4821 }
4822# endif
4823
4824# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4825 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoAnd32, pu32, NO_BARRIER,
4826 "and %w[uNew], %w[uNew], %w[uVal]\n\t",
4827 "and %[uNew], %[uNew], %[uVal]\n\t",
4828 [uVal] "r" (u32));
4829
4830# else
4831# error "Port me"
4832# endif
4833}
4834#endif
4835
4836
4837/**
4838 * Atomically AND an unsigned 32-bit value, unordered, extended version (for
4839 * bitmap fallback).
4840 *
4841 * @returns Old value.
4842 * @param pu32 Pointer to the pointer to AND @a u32 with.
4843 * @param u32 The value to AND @a *pu32 with.
4844 */
4845DECLINLINE(uint32_t) ASMAtomicUoAndExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4846{
4847#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4848 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicUoAndEx32, pu32, NO_BARRIER,
4849 "and %w[uNew], %w[uOld], %w[uVal]\n\t",
4850 "and %[uNew], %[uOld], %[uVal]\n\t",
4851 [uVal] "r" (u32));
4852 return u32OldRet;
4853
4854#else
4855 return ASMAtomicAndExU32(pu32, u32); /* (we have no unordered cmpxchg primitive atm.) */
4856#endif
4857}
4858
4859
4860/**
4861 * Atomically And a signed 32-bit value, unordered.
4862 *
4863 * @param pi32 Pointer to the pointer variable to AND i32 with.
4864 * @param i32 The value to AND *pi32 with.
4865 *
4866 * @remarks x86: Requires a 386 or later.
4867 */
4868DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4869{
4870 ASMAtomicUoAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4871}
4872
4873
4874/**
4875 * Atomically And an unsigned 64-bit value, unordered.
4876 *
4877 * @param pu64 Pointer to the pointer variable to AND u64 with.
4878 * @param u64 The value to AND *pu64 with.
4879 *
4880 * @remarks x86: Requires a Pentium or later.
4881 */
4882#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
4883DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4884#else
4885DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4886{
4887# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4888 __asm__ __volatile__("andq %1, %0\n\t"
4889 : "=m" (*pu64)
4890 : "r" (u64)
4891 , "m" (*pu64)
4892 : "cc");
4893
4894# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4895 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicUoAndU64, pu64, NO_BARRIER,
4896 "and %[uNew], %[uNew], %[uVal]\n\t"
4897 ,
4898 "and %[uNew], %[uNew], %[uVal]\n\t"
4899 "and %H[uNew], %H[uNew], %H[uVal]\n\t",
4900 [uVal] "r" (u64));
4901
4902# else
4903 for (;;)
4904 {
4905 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4906 uint64_t u64New = u64Old & u64;
4907 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4908 break;
4909 ASMNopPause();
4910 }
4911# endif
4912}
4913#endif
4914
4915
4916/**
4917 * Atomically And a signed 64-bit value, unordered.
4918 *
4919 * @param pi64 Pointer to the pointer variable to AND i64 with.
4920 * @param i64 The value to AND *pi64 with.
4921 *
4922 * @remarks x86: Requires a Pentium or later.
4923 */
4924DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4925{
4926 ASMAtomicUoAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4927}
4928
4929
4930/**
4931 * Atomically XOR an unsigned 32-bit value, unordered but interrupt safe.
4932 *
4933 * @param pu32 Pointer to the variable to XOR @a u32 with.
4934 * @param u32 The value to OR @a *pu32 with.
4935 *
4936 * @remarks x86: Requires a 386 or later.
4937 */
4938#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
4939RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4940#else
4941DECLINLINE(void) ASMAtomicUoXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4942{
4943# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4944# if RT_INLINE_ASM_GNU_STYLE
4945 __asm__ __volatile__("xorl %1, %0\n\t"
4946 : "=m" (*pu32)
4947 : "ir" (u32)
4948 , "m" (*pu32)
4949 : "cc");
4950# else
4951 __asm
4952 {
4953 mov eax, [u32]
4954# ifdef RT_ARCH_AMD64
4955 mov rdx, [pu32]
4956 xor [rdx], eax
4957# else
4958 mov edx, [pu32]
4959 xor [edx], eax
4960# endif
4961 }
4962# endif
4963
4964# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4965 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoXorU32, pu32, NO_BARRIER,
4966 "eor %w[uNew], %w[uNew], %w[uVal]\n\t",
4967 "eor %[uNew], %[uNew], %[uVal]\n\t",
4968 [uVal] "r" (u32));
4969
4970# else
4971# error "Port me"
4972# endif
4973}
4974#endif
4975
4976
4977/**
4978 * Atomically XOR an unsigned 32-bit value, unordered but interrupt safe,
4979 * extended version (for bitmap fallback).
4980 *
4981 * @returns Old value.
4982 * @param pu32 Pointer to the variable to XOR @a u32 with.
4983 * @param u32 The value to OR @a *pu32 with.
4984 */
4985DECLINLINE(uint32_t) ASMAtomicUoXorExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4986{
4987#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4988 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicUoXorExU32, pu32, NO_BARRIER,
4989 "eor %w[uNew], %w[uOld], %w[uVal]\n\t",
4990 "eor %[uNew], %[uOld], %[uVal]\n\t",
4991 [uVal] "r" (u32));
4992 return u32OldRet;
4993
4994#else
4995 return ASMAtomicXorExU32(pu32, u32); /* (we have no unordered cmpxchg primitive atm.) */
4996#endif
4997}
4998
4999
5000/**
5001 * Atomically XOR a signed 32-bit value, unordered.
5002 *
5003 * @param pi32 Pointer to the variable to XOR @a u32 with.
5004 * @param i32 The value to XOR @a *pu32 with.
5005 *
5006 * @remarks x86: Requires a 386 or later.
5007 */
5008DECLINLINE(void) ASMAtomicUoXorS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
5009{
5010 ASMAtomicUoXorU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
5011}
5012
5013
5014/**
5015 * Atomically increment an unsigned 32-bit value, unordered.
5016 *
5017 * @returns the new value.
5018 * @param pu32 Pointer to the variable to increment.
5019 *
5020 * @remarks x86: Requires a 486 or later.
5021 */
5022#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5023RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
5024#else
5025DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
5026{
5027# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5028 uint32_t u32;
5029# if RT_INLINE_ASM_GNU_STYLE
5030 __asm__ __volatile__("xaddl %0, %1\n\t"
5031 : "=r" (u32)
5032 , "=m" (*pu32)
5033 : "0" (1)
5034 , "m" (*pu32)
5035 : "memory" /** @todo why 'memory'? */
5036 , "cc");
5037 return u32 + 1;
5038# else
5039 __asm
5040 {
5041 mov eax, 1
5042# ifdef RT_ARCH_AMD64
5043 mov rdx, [pu32]
5044 xadd [rdx], eax
5045# else
5046 mov edx, [pu32]
5047 xadd [edx], eax
5048# endif
5049 mov u32, eax
5050 }
5051 return u32 + 1;
5052# endif
5053
5054# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5055 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoIncU32, pu32, NO_BARRIER,
5056 "add %w[uNew], %w[uNew], #1\n\t",
5057 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
5058 "X" (0) /* dummy */);
5059 return u32NewRet;
5060
5061# else
5062# error "Port me"
5063# endif
5064}
5065#endif
5066
5067
5068/**
5069 * Atomically decrement an unsigned 32-bit value, unordered.
5070 *
5071 * @returns the new value.
5072 * @param pu32 Pointer to the variable to decrement.
5073 *
5074 * @remarks x86: Requires a 486 or later.
5075 */
5076#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5077RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
5078#else
5079DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
5080{
5081# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5082 uint32_t u32;
5083# if RT_INLINE_ASM_GNU_STYLE
5084 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
5085 : "=r" (u32)
5086 , "=m" (*pu32)
5087 : "0" (-1)
5088 , "m" (*pu32)
5089 : "memory"
5090 , "cc");
5091 return u32 - 1;
5092# else
5093 __asm
5094 {
5095 mov eax, -1
5096# ifdef RT_ARCH_AMD64
5097 mov rdx, [pu32]
5098 xadd [rdx], eax
5099# else
5100 mov edx, [pu32]
5101 xadd [edx], eax
5102# endif
5103 mov u32, eax
5104 }
5105 return u32 - 1;
5106# endif
5107
5108# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5109 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoDecU32, pu32, NO_BARRIER,
5110 "sub %w[uNew], %w[uNew], #1\n\t",
5111 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
5112 "X" (0) /* dummy */);
5113 return u32NewRet;
5114
5115# else
5116# error "Port me"
5117# endif
5118}
5119#endif
5120
5121
5122/** @def RT_ASM_PAGE_SIZE
5123 * We try avoid dragging in iprt/param.h here.
5124 * @internal
5125 */
5126#if defined(RT_ARCH_SPARC64)
5127# define RT_ASM_PAGE_SIZE 0x2000
5128# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
5129# if PAGE_SIZE != 0x2000
5130# error "PAGE_SIZE is not 0x2000!"
5131# endif
5132# endif
5133#elif defined(RT_ARCH_ARM64)
5134# define RT_ASM_PAGE_SIZE 0x4000
5135# if defined(PAGE_SIZE) && !defined(NT_INCLUDED) && !defined(_MACH_ARM_VM_PARAM_H_)
5136# if PAGE_SIZE != 0x4000
5137# error "PAGE_SIZE is not 0x4000!"
5138# endif
5139# endif
5140#else
5141# define RT_ASM_PAGE_SIZE 0x1000
5142# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
5143# if PAGE_SIZE != 0x1000
5144# error "PAGE_SIZE is not 0x1000!"
5145# endif
5146# endif
5147#endif
5148
5149/**
5150 * Zeros a 4K memory page.
5151 *
5152 * @param pv Pointer to the memory block. This must be page aligned.
5153 */
5154#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
5155RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZeroPage(volatile void RT_FAR *pv) RT_NOTHROW_PROTO;
5156# else
5157DECLINLINE(void) ASMMemZeroPage(volatile void RT_FAR *pv) RT_NOTHROW_DEF
5158{
5159# if RT_INLINE_ASM_USES_INTRIN
5160# ifdef RT_ARCH_AMD64
5161 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
5162# else
5163 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
5164# endif
5165
5166# elif RT_INLINE_ASM_GNU_STYLE
5167 RTCCUINTREG uDummy;
5168# ifdef RT_ARCH_AMD64
5169 __asm__ __volatile__("rep stosq"
5170 : "=D" (pv),
5171 "=c" (uDummy)
5172 : "0" (pv),
5173 "c" (RT_ASM_PAGE_SIZE >> 3),
5174 "a" (0)
5175 : "memory");
5176# else
5177 __asm__ __volatile__("rep stosl"
5178 : "=D" (pv),
5179 "=c" (uDummy)
5180 : "0" (pv),
5181 "c" (RT_ASM_PAGE_SIZE >> 2),
5182 "a" (0)
5183 : "memory");
5184# endif
5185# else
5186 __asm
5187 {
5188# ifdef RT_ARCH_AMD64
5189 xor rax, rax
5190 mov ecx, 0200h
5191 mov rdi, [pv]
5192 rep stosq
5193# else
5194 xor eax, eax
5195 mov ecx, 0400h
5196 mov edi, [pv]
5197 rep stosd
5198# endif
5199 }
5200# endif
5201}
5202# endif
5203
5204
5205/**
5206 * Zeros a memory block with a 32-bit aligned size.
5207 *
5208 * @param pv Pointer to the memory block.
5209 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5210 */
5211#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
5212RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb) RT_NOTHROW_PROTO;
5213#else
5214DECLINLINE(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
5215{
5216# if RT_INLINE_ASM_USES_INTRIN
5217# ifdef RT_ARCH_AMD64
5218 if (!(cb & 7))
5219 __stosq((unsigned __int64 RT_FAR *)pv, 0, cb / 8);
5220 else
5221# endif
5222 __stosd((unsigned long RT_FAR *)pv, 0, cb / 4);
5223
5224# elif RT_INLINE_ASM_GNU_STYLE
5225 __asm__ __volatile__("rep stosl"
5226 : "=D" (pv),
5227 "=c" (cb)
5228 : "0" (pv),
5229 "1" (cb >> 2),
5230 "a" (0)
5231 : "memory");
5232# else
5233 __asm
5234 {
5235 xor eax, eax
5236# ifdef RT_ARCH_AMD64
5237 mov rcx, [cb]
5238 shr rcx, 2
5239 mov rdi, [pv]
5240# else
5241 mov ecx, [cb]
5242 shr ecx, 2
5243 mov edi, [pv]
5244# endif
5245 rep stosd
5246 }
5247# endif
5248}
5249#endif
5250
5251
5252/**
5253 * Fills a memory block with a 32-bit aligned size.
5254 *
5255 * @param pv Pointer to the memory block.
5256 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5257 * @param u32 The value to fill with.
5258 */
5259#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
5260RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_PROTO;
5261#else
5262DECLINLINE(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_DEF
5263{
5264# if RT_INLINE_ASM_USES_INTRIN
5265# ifdef RT_ARCH_AMD64
5266 if (!(cb & 7))
5267 __stosq((unsigned __int64 RT_FAR *)pv, RT_MAKE_U64(u32, u32), cb / 8);
5268 else
5269# endif
5270 __stosd((unsigned long RT_FAR *)pv, u32, cb / 4);
5271
5272# elif RT_INLINE_ASM_GNU_STYLE
5273 __asm__ __volatile__("rep stosl"
5274 : "=D" (pv),
5275 "=c" (cb)
5276 : "0" (pv),
5277 "1" (cb >> 2),
5278 "a" (u32)
5279 : "memory");
5280# else
5281 __asm
5282 {
5283# ifdef RT_ARCH_AMD64
5284 mov rcx, [cb]
5285 shr rcx, 2
5286 mov rdi, [pv]
5287# else
5288 mov ecx, [cb]
5289 shr ecx, 2
5290 mov edi, [pv]
5291# endif
5292 mov eax, [u32]
5293 rep stosd
5294 }
5295# endif
5296}
5297#endif
5298
5299
5300/**
5301 * Checks if a memory block is all zeros.
5302 *
5303 * @returns Pointer to the first non-zero byte.
5304 * @returns NULL if all zero.
5305 *
5306 * @param pv Pointer to the memory block.
5307 * @param cb Number of bytes in the block.
5308 */
5309#if !defined(RDESKTOP) && (!defined(RT_OS_LINUX) || !defined(__KERNEL__))
5310DECLASM(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_PROTO;
5311#else
5312DECLINLINE(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
5313{
5314/** @todo replace with ASMMemFirstNonZero-generic.cpp in kernel modules. */
5315 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
5316 for (; cb; cb--, pb++)
5317 if (RT_LIKELY(*pb == 0))
5318 { /* likely */ }
5319 else
5320 return (void RT_FAR *)pb;
5321 return NULL;
5322}
5323#endif
5324
5325
5326/**
5327 * Checks if a memory block is all zeros.
5328 *
5329 * @returns true if zero, false if not.
5330 *
5331 * @param pv Pointer to the memory block.
5332 * @param cb Number of bytes in the block.
5333 *
5334 * @sa ASMMemFirstNonZero
5335 */
5336DECLINLINE(bool) ASMMemIsZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
5337{
5338 return ASMMemFirstNonZero(pv, cb) == NULL;
5339}
5340
5341
5342/**
5343 * Checks if a memory page is all zeros.
5344 *
5345 * @returns true / false.
5346 *
5347 * @param pvPage Pointer to the page. Must be aligned on 16 byte
5348 * boundary
5349 */
5350DECLINLINE(bool) ASMMemIsZeroPage(void const RT_FAR *pvPage) RT_NOTHROW_DEF
5351{
5352# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
5353 union { RTCCUINTREG r; bool f; } uAX;
5354 RTCCUINTREG xCX, xDI;
5355 Assert(!((uintptr_t)pvPage & 15));
5356 __asm__ __volatile__("repe; "
5357# ifdef RT_ARCH_AMD64
5358 "scasq\n\t"
5359# else
5360 "scasl\n\t"
5361# endif
5362 "setnc %%al\n\t"
5363 : "=&c" (xCX)
5364 , "=&D" (xDI)
5365 , "=&a" (uAX.r)
5366 : "mr" (pvPage)
5367# ifdef RT_ARCH_AMD64
5368 , "0" (RT_ASM_PAGE_SIZE/8)
5369# else
5370 , "0" (RT_ASM_PAGE_SIZE/4)
5371# endif
5372 , "1" (pvPage)
5373 , "2" (0)
5374 : "cc");
5375 return uAX.f;
5376# else
5377 uintptr_t const RT_FAR *puPtr = (uintptr_t const RT_FAR *)pvPage;
5378 size_t cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
5379 Assert(!((uintptr_t)pvPage & 15));
5380 for (;;)
5381 {
5382 if (puPtr[0]) return false;
5383 if (puPtr[4]) return false;
5384
5385 if (puPtr[2]) return false;
5386 if (puPtr[6]) return false;
5387
5388 if (puPtr[1]) return false;
5389 if (puPtr[5]) return false;
5390
5391 if (puPtr[3]) return false;
5392 if (puPtr[7]) return false;
5393
5394 if (!--cLeft)
5395 return true;
5396 puPtr += 8;
5397 }
5398# endif
5399}
5400
5401
5402/**
5403 * Checks if a memory block is filled with the specified byte, returning the
5404 * first mismatch.
5405 *
5406 * This is sort of an inverted memchr.
5407 *
5408 * @returns Pointer to the byte which doesn't equal u8.
5409 * @returns NULL if all equal to u8.
5410 *
5411 * @param pv Pointer to the memory block.
5412 * @param cb Number of bytes in the block.
5413 * @param u8 The value it's supposed to be filled with.
5414 *
5415 * @remarks No alignment requirements.
5416 */
5417#if (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
5418 && (!defined(RT_OS_FREEBSD) || !defined(_KERNEL))
5419DECLASM(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_PROTO;
5420#else
5421DECLINLINE(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_DEF
5422{
5423/** @todo replace with ASMMemFirstMismatchingU8-generic.cpp in kernel modules. */
5424 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
5425 for (; cb; cb--, pb++)
5426 if (RT_LIKELY(*pb == u8))
5427 { /* likely */ }
5428 else
5429 return (void *)pb;
5430 return NULL;
5431}
5432#endif
5433
5434
5435/**
5436 * Checks if a memory block is filled with the specified byte.
5437 *
5438 * @returns true if all matching, false if not.
5439 *
5440 * @param pv Pointer to the memory block.
5441 * @param cb Number of bytes in the block.
5442 * @param u8 The value it's supposed to be filled with.
5443 *
5444 * @remarks No alignment requirements.
5445 */
5446DECLINLINE(bool) ASMMemIsAllU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_DEF
5447{
5448 return ASMMemFirstMismatchingU8(pv, cb, u8) == NULL;
5449}
5450
5451
5452/**
5453 * Checks if a memory block is filled with the specified 32-bit value.
5454 *
5455 * This is a sort of inverted memchr.
5456 *
5457 * @returns Pointer to the first value which doesn't equal u32.
5458 * @returns NULL if all equal to u32.
5459 *
5460 * @param pv Pointer to the memory block.
5461 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5462 * @param u32 The value it's supposed to be filled with.
5463 */
5464DECLINLINE(uint32_t RT_FAR *) ASMMemFirstMismatchingU32(void const RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_DEF
5465{
5466/** @todo rewrite this in inline assembly? */
5467 uint32_t const RT_FAR *pu32 = (uint32_t const RT_FAR *)pv;
5468 for (; cb; cb -= 4, pu32++)
5469 if (RT_LIKELY(*pu32 == u32))
5470 { /* likely */ }
5471 else
5472 return (uint32_t RT_FAR *)pu32;
5473 return NULL;
5474}
5475
5476
5477/**
5478 * Probes a byte pointer for read access.
5479 *
5480 * While the function will not fault if the byte is not read accessible,
5481 * the idea is to do this in a safe place like before acquiring locks
5482 * and such like.
5483 *
5484 * Also, this functions guarantees that an eager compiler is not going
5485 * to optimize the probing away.
5486 *
5487 * @param pvByte Pointer to the byte.
5488 */
5489#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5490RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte) RT_NOTHROW_PROTO;
5491#else
5492DECLINLINE(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte) RT_NOTHROW_DEF
5493{
5494# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5495 uint8_t u8;
5496# if RT_INLINE_ASM_GNU_STYLE
5497 __asm__ __volatile__("movb %1, %0\n\t"
5498 : "=q" (u8)
5499 : "m" (*(const uint8_t *)pvByte));
5500# else
5501 __asm
5502 {
5503# ifdef RT_ARCH_AMD64
5504 mov rax, [pvByte]
5505 mov al, [rax]
5506# else
5507 mov eax, [pvByte]
5508 mov al, [eax]
5509# endif
5510 mov [u8], al
5511 }
5512# endif
5513 return u8;
5514
5515# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5516 uint32_t u32;
5517 __asm__ __volatile__(".Lstart_ASMProbeReadByte_%=:\n\t"
5518# if defined(RT_ARCH_ARM64)
5519 "ldxrb %w[uDst], %[pMem]\n\t"
5520# else
5521 "ldrexb %[uDst], %[pMem]\n\t"
5522# endif
5523 : [uDst] "=&r" (u32)
5524 : [pMem] "m" (*(uint8_t const *)pvByte));
5525 return (uint8_t)u32;
5526
5527# else
5528# error "Port me"
5529# endif
5530}
5531#endif
5532
5533/**
5534 * Probes a buffer for read access page by page.
5535 *
5536 * While the function will fault if the buffer is not fully read
5537 * accessible, the idea is to do this in a safe place like before
5538 * acquiring locks and such like.
5539 *
5540 * Also, this functions guarantees that an eager compiler is not going
5541 * to optimize the probing away.
5542 *
5543 * @param pvBuf Pointer to the buffer.
5544 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
5545 */
5546DECLINLINE(void) ASMProbeReadBuffer(const void RT_FAR *pvBuf, size_t cbBuf) RT_NOTHROW_DEF
5547{
5548 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5549 /* the first byte */
5550 const uint8_t RT_FAR *pu8 = (const uint8_t RT_FAR *)pvBuf;
5551 ASMProbeReadByte(pu8);
5552
5553 /* the pages in between pages. */
5554 while (cbBuf > RT_ASM_PAGE_SIZE)
5555 {
5556 ASMProbeReadByte(pu8);
5557 cbBuf -= RT_ASM_PAGE_SIZE;
5558 pu8 += RT_ASM_PAGE_SIZE;
5559 }
5560
5561 /* the last byte */
5562 ASMProbeReadByte(pu8 + cbBuf - 1);
5563}
5564
5565
5566
5567/** @defgroup grp_inline_bits Bit Operations
5568 * @{
5569 */
5570
5571
5572/**
5573 * Sets a bit in a bitmap.
5574 *
5575 * @param pvBitmap Pointer to the bitmap (little endian). This should be
5576 * 32-bit aligned.
5577 * @param iBit The bit to set.
5578 *
5579 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5580 * However, doing so will yield better performance as well as avoiding
5581 * traps accessing the last bits in the bitmap.
5582 */
5583#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5584RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5585#else
5586DECLINLINE(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5587{
5588# if RT_INLINE_ASM_USES_INTRIN
5589 _bittestandset((long RT_FAR *)pvBitmap, iBit);
5590
5591# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5592# if RT_INLINE_ASM_GNU_STYLE
5593 __asm__ __volatile__("btsl %1, %0"
5594 : "=m" (*(volatile long RT_FAR *)pvBitmap)
5595 : "Ir" (iBit)
5596 , "m" (*(volatile long RT_FAR *)pvBitmap)
5597 : "memory"
5598 , "cc");
5599# else
5600 __asm
5601 {
5602# ifdef RT_ARCH_AMD64
5603 mov rax, [pvBitmap]
5604 mov edx, [iBit]
5605 bts [rax], edx
5606# else
5607 mov eax, [pvBitmap]
5608 mov edx, [iBit]
5609 bts [eax], edx
5610# endif
5611 }
5612# endif
5613
5614# else
5615 int32_t offBitmap = iBit / 32;
5616 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
5617 ASMAtomicUoOrU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
5618# endif
5619}
5620#endif
5621
5622
5623/**
5624 * Atomically sets a bit in a bitmap, ordered.
5625 *
5626 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
5627 * aligned, otherwise the memory access isn't atomic!
5628 * @param iBit The bit to set.
5629 *
5630 * @remarks x86: Requires a 386 or later.
5631 */
5632#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5633RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5634#else
5635DECLINLINE(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5636{
5637 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5638# if RT_INLINE_ASM_USES_INTRIN
5639 _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
5640# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5641# if RT_INLINE_ASM_GNU_STYLE
5642 __asm__ __volatile__("lock; btsl %1, %0"
5643 : "=m" (*(volatile long *)pvBitmap)
5644 : "Ir" (iBit)
5645 , "m" (*(volatile long *)pvBitmap)
5646 : "memory"
5647 , "cc");
5648# else
5649 __asm
5650 {
5651# ifdef RT_ARCH_AMD64
5652 mov rax, [pvBitmap]
5653 mov edx, [iBit]
5654 lock bts [rax], edx
5655# else
5656 mov eax, [pvBitmap]
5657 mov edx, [iBit]
5658 lock bts [eax], edx
5659# endif
5660 }
5661# endif
5662
5663# else
5664 ASMAtomicOrU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
5665# endif
5666}
5667#endif
5668
5669
5670/**
5671 * Clears a bit in a bitmap.
5672 *
5673 * @param pvBitmap Pointer to the bitmap (little endian).
5674 * @param iBit The bit to clear.
5675 *
5676 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5677 * However, doing so will yield better performance as well as avoiding
5678 * traps accessing the last bits in the bitmap.
5679 */
5680#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5681RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5682#else
5683DECLINLINE(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5684{
5685# if RT_INLINE_ASM_USES_INTRIN
5686 _bittestandreset((long RT_FAR *)pvBitmap, iBit);
5687
5688# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5689# if RT_INLINE_ASM_GNU_STYLE
5690 __asm__ __volatile__("btrl %1, %0"
5691 : "=m" (*(volatile long RT_FAR *)pvBitmap)
5692 : "Ir" (iBit)
5693 , "m" (*(volatile long RT_FAR *)pvBitmap)
5694 : "memory"
5695 , "cc");
5696# else
5697 __asm
5698 {
5699# ifdef RT_ARCH_AMD64
5700 mov rax, [pvBitmap]
5701 mov edx, [iBit]
5702 btr [rax], edx
5703# else
5704 mov eax, [pvBitmap]
5705 mov edx, [iBit]
5706 btr [eax], edx
5707# endif
5708 }
5709# endif
5710
5711# else
5712 int32_t offBitmap = iBit / 32;
5713 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
5714 ASMAtomicUoAndU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(~RT_BIT_32(iBit & 31)));
5715# endif
5716}
5717#endif
5718
5719
5720/**
5721 * Atomically clears a bit in a bitmap, ordered.
5722 *
5723 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
5724 * aligned, otherwise the memory access isn't atomic!
5725 * @param iBit The bit to toggle set.
5726 *
5727 * @remarks No memory barrier, take care on smp.
5728 * @remarks x86: Requires a 386 or later.
5729 */
5730#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5731RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5732#else
5733DECLINLINE(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5734{
5735 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5736# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5737# if RT_INLINE_ASM_GNU_STYLE
5738 __asm__ __volatile__("lock; btrl %1, %0"
5739 : "=m" (*(volatile long RT_FAR *)pvBitmap)
5740 : "Ir" (iBit)
5741 , "m" (*(volatile long RT_FAR *)pvBitmap)
5742 : "memory"
5743 , "cc");
5744# else
5745 __asm
5746 {
5747# ifdef RT_ARCH_AMD64
5748 mov rax, [pvBitmap]
5749 mov edx, [iBit]
5750 lock btr [rax], edx
5751# else
5752 mov eax, [pvBitmap]
5753 mov edx, [iBit]
5754 lock btr [eax], edx
5755# endif
5756 }
5757# endif
5758# else
5759 ASMAtomicAndU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(~RT_BIT_32(iBit & 31)));
5760# endif
5761}
5762#endif
5763
5764
5765/**
5766 * Toggles a bit in a bitmap.
5767 *
5768 * @param pvBitmap Pointer to the bitmap (little endian).
5769 * @param iBit The bit to toggle.
5770 *
5771 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5772 * However, doing so will yield better performance as well as avoiding
5773 * traps accessing the last bits in the bitmap.
5774 */
5775#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5776RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5777#else
5778DECLINLINE(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5779{
5780# if RT_INLINE_ASM_USES_INTRIN
5781 _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
5782# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5783# if RT_INLINE_ASM_GNU_STYLE
5784 __asm__ __volatile__("btcl %1, %0"
5785 : "=m" (*(volatile long *)pvBitmap)
5786 : "Ir" (iBit)
5787 , "m" (*(volatile long *)pvBitmap)
5788 : "memory"
5789 , "cc");
5790# else
5791 __asm
5792 {
5793# ifdef RT_ARCH_AMD64
5794 mov rax, [pvBitmap]
5795 mov edx, [iBit]
5796 btc [rax], edx
5797# else
5798 mov eax, [pvBitmap]
5799 mov edx, [iBit]
5800 btc [eax], edx
5801# endif
5802 }
5803# endif
5804# else
5805 int32_t offBitmap = iBit / 32;
5806 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
5807 ASMAtomicUoXorU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
5808# endif
5809}
5810#endif
5811
5812
5813/**
5814 * Atomically toggles a bit in a bitmap, ordered.
5815 *
5816 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
5817 * aligned, otherwise the memory access isn't atomic!
5818 * @param iBit The bit to test and set.
5819 *
5820 * @remarks x86: Requires a 386 or later.
5821 */
5822#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5823RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5824#else
5825DECLINLINE(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5826{
5827 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5828# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5829# if RT_INLINE_ASM_GNU_STYLE
5830 __asm__ __volatile__("lock; btcl %1, %0"
5831 : "=m" (*(volatile long RT_FAR *)pvBitmap)
5832 : "Ir" (iBit)
5833 , "m" (*(volatile long RT_FAR *)pvBitmap)
5834 : "memory"
5835 , "cc");
5836# else
5837 __asm
5838 {
5839# ifdef RT_ARCH_AMD64
5840 mov rax, [pvBitmap]
5841 mov edx, [iBit]
5842 lock btc [rax], edx
5843# else
5844 mov eax, [pvBitmap]
5845 mov edx, [iBit]
5846 lock btc [eax], edx
5847# endif
5848 }
5849# endif
5850# else
5851 ASMAtomicXorU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
5852# endif
5853}
5854#endif
5855
5856
5857/**
5858 * Tests and sets a bit in a bitmap.
5859 *
5860 * @returns true if the bit was set.
5861 * @returns false if the bit was clear.
5862 *
5863 * @param pvBitmap Pointer to the bitmap (little endian).
5864 * @param iBit The bit to test and set.
5865 *
5866 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5867 * However, doing so will yield better performance as well as avoiding
5868 * traps accessing the last bits in the bitmap.
5869 */
5870#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5871RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5872#else
5873DECLINLINE(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5874{
5875 union { bool f; uint32_t u32; uint8_t u8; } rc;
5876# if RT_INLINE_ASM_USES_INTRIN
5877 rc.u8 = _bittestandset((long RT_FAR *)pvBitmap, iBit);
5878
5879# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5880# if RT_INLINE_ASM_GNU_STYLE
5881 __asm__ __volatile__("btsl %2, %1\n\t"
5882 "setc %b0\n\t"
5883 "andl $1, %0\n\t"
5884 : "=q" (rc.u32)
5885 , "=m" (*(volatile long RT_FAR *)pvBitmap)
5886 : "Ir" (iBit)
5887 , "m" (*(volatile long RT_FAR *)pvBitmap)
5888 : "memory"
5889 , "cc");
5890# else
5891 __asm
5892 {
5893 mov edx, [iBit]
5894# ifdef RT_ARCH_AMD64
5895 mov rax, [pvBitmap]
5896 bts [rax], edx
5897# else
5898 mov eax, [pvBitmap]
5899 bts [eax], edx
5900# endif
5901 setc al
5902 and eax, 1
5903 mov [rc.u32], eax
5904 }
5905# endif
5906
5907# else
5908 int32_t offBitmap = iBit / 32;
5909 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
5910 rc.u32 = RT_LE2H_U32(ASMAtomicUoOrExU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31))))
5911 >> (iBit & 31);
5912 rc.u32 &= 1;
5913# endif
5914 return rc.f;
5915}
5916#endif
5917
5918
5919/**
5920 * Atomically tests and sets a bit in a bitmap, ordered.
5921 *
5922 * @returns true if the bit was set.
5923 * @returns false if the bit was clear.
5924 *
5925 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
5926 * aligned, otherwise the memory access isn't atomic!
5927 * @param iBit The bit to set.
5928 *
5929 * @remarks x86: Requires a 386 or later.
5930 */
5931#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5932RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5933#else
5934DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5935{
5936 union { bool f; uint32_t u32; uint8_t u8; } rc;
5937 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5938# if RT_INLINE_ASM_USES_INTRIN
5939 rc.u8 = _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
5940# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5941# if RT_INLINE_ASM_GNU_STYLE
5942 __asm__ __volatile__("lock; btsl %2, %1\n\t"
5943 "setc %b0\n\t"
5944 "andl $1, %0\n\t"
5945 : "=q" (rc.u32)
5946 , "=m" (*(volatile long RT_FAR *)pvBitmap)
5947 : "Ir" (iBit)
5948 , "m" (*(volatile long RT_FAR *)pvBitmap)
5949 : "memory"
5950 , "cc");
5951# else
5952 __asm
5953 {
5954 mov edx, [iBit]
5955# ifdef RT_ARCH_AMD64
5956 mov rax, [pvBitmap]
5957 lock bts [rax], edx
5958# else
5959 mov eax, [pvBitmap]
5960 lock bts [eax], edx
5961# endif
5962 setc al
5963 and eax, 1
5964 mov [rc.u32], eax
5965 }
5966# endif
5967
5968# else
5969 rc.u32 = RT_LE2H_U32(ASMAtomicOrExU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(RT_BIT_32(iBit & 31))))
5970 >> (iBit & 31);
5971 rc.u32 &= 1;
5972# endif
5973 return rc.f;
5974}
5975#endif
5976
5977
5978/**
5979 * Tests and clears a bit in a bitmap.
5980 *
5981 * @returns true if the bit was set.
5982 * @returns false if the bit was clear.
5983 *
5984 * @param pvBitmap Pointer to the bitmap (little endian).
5985 * @param iBit The bit to test and clear.
5986 *
5987 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5988 * However, doing so will yield better performance as well as avoiding
5989 * traps accessing the last bits in the bitmap.
5990 */
5991#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5992RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5993#else
5994DECLINLINE(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5995{
5996 union { bool f; uint32_t u32; uint8_t u8; } rc;
5997# if RT_INLINE_ASM_USES_INTRIN
5998 rc.u8 = _bittestandreset((long RT_FAR *)pvBitmap, iBit);
5999
6000# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6001# if RT_INLINE_ASM_GNU_STYLE
6002 __asm__ __volatile__("btrl %2, %1\n\t"
6003 "setc %b0\n\t"
6004 "andl $1, %0\n\t"
6005 : "=q" (rc.u32)
6006 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6007 : "Ir" (iBit)
6008 , "m" (*(volatile long RT_FAR *)pvBitmap)
6009 : "memory"
6010 , "cc");
6011# else
6012 __asm
6013 {
6014 mov edx, [iBit]
6015# ifdef RT_ARCH_AMD64
6016 mov rax, [pvBitmap]
6017 btr [rax], edx
6018# else
6019 mov eax, [pvBitmap]
6020 btr [eax], edx
6021# endif
6022 setc al
6023 and eax, 1
6024 mov [rc.u32], eax
6025 }
6026# endif
6027
6028# else
6029 int32_t offBitmap = iBit / 32;
6030 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6031 rc.u32 = RT_LE2H_U32(ASMAtomicUoAndExU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(~RT_BIT_32(iBit & 31))))
6032 >> (iBit & 31);
6033 rc.u32 &= 1;
6034# endif
6035 return rc.f;
6036}
6037#endif
6038
6039
6040/**
6041 * Atomically tests and clears a bit in a bitmap, ordered.
6042 *
6043 * @returns true if the bit was set.
6044 * @returns false if the bit was clear.
6045 *
6046 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6047 * aligned, otherwise the memory access isn't atomic!
6048 * @param iBit The bit to test and clear.
6049 *
6050 * @remarks No memory barrier, take care on smp.
6051 * @remarks x86: Requires a 386 or later.
6052 */
6053#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6054RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6055#else
6056DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6057{
6058 union { bool f; uint32_t u32; uint8_t u8; } rc;
6059 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6060# if RT_INLINE_ASM_USES_INTRIN
6061 rc.u8 = _interlockedbittestandreset((long RT_FAR *)pvBitmap, iBit);
6062
6063# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6064# if RT_INLINE_ASM_GNU_STYLE
6065 __asm__ __volatile__("lock; btrl %2, %1\n\t"
6066 "setc %b0\n\t"
6067 "andl $1, %0\n\t"
6068 : "=q" (rc.u32)
6069 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6070 : "Ir" (iBit)
6071 , "m" (*(volatile long RT_FAR *)pvBitmap)
6072 : "memory"
6073 , "cc");
6074# else
6075 __asm
6076 {
6077 mov edx, [iBit]
6078# ifdef RT_ARCH_AMD64
6079 mov rax, [pvBitmap]
6080 lock btr [rax], edx
6081# else
6082 mov eax, [pvBitmap]
6083 lock btr [eax], edx
6084# endif
6085 setc al
6086 and eax, 1
6087 mov [rc.u32], eax
6088 }
6089# endif
6090
6091# else
6092 rc.u32 = RT_LE2H_U32(ASMAtomicAndExU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(~RT_BIT_32(iBit & 31))))
6093 >> (iBit & 31);
6094 rc.u32 &= 1;
6095# endif
6096 return rc.f;
6097}
6098#endif
6099
6100
6101/**
6102 * Tests and toggles a bit in a bitmap.
6103 *
6104 * @returns true if the bit was set.
6105 * @returns false if the bit was clear.
6106 *
6107 * @param pvBitmap Pointer to the bitmap (little endian).
6108 * @param iBit The bit to test and toggle.
6109 *
6110 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6111 * However, doing so will yield better performance as well as avoiding
6112 * traps accessing the last bits in the bitmap.
6113 */
6114#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6115RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6116#else
6117DECLINLINE(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6118{
6119 union { bool f; uint32_t u32; uint8_t u8; } rc;
6120# if RT_INLINE_ASM_USES_INTRIN
6121 rc.u8 = _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
6122
6123# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6124# if RT_INLINE_ASM_GNU_STYLE
6125 __asm__ __volatile__("btcl %2, %1\n\t"
6126 "setc %b0\n\t"
6127 "andl $1, %0\n\t"
6128 : "=q" (rc.u32)
6129 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6130 : "Ir" (iBit)
6131 , "m" (*(volatile long RT_FAR *)pvBitmap)
6132 : "memory"
6133 , "cc");
6134# else
6135 __asm
6136 {
6137 mov edx, [iBit]
6138# ifdef RT_ARCH_AMD64
6139 mov rax, [pvBitmap]
6140 btc [rax], edx
6141# else
6142 mov eax, [pvBitmap]
6143 btc [eax], edx
6144# endif
6145 setc al
6146 and eax, 1
6147 mov [rc.u32], eax
6148 }
6149# endif
6150
6151# else
6152 int32_t offBitmap = iBit / 32;
6153 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6154 rc.u32 = RT_LE2H_U32(ASMAtomicUoXorExU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31))))
6155 >> (iBit & 31);
6156 rc.u32 &= 1;
6157# endif
6158 return rc.f;
6159}
6160#endif
6161
6162
6163/**
6164 * Atomically tests and toggles a bit in a bitmap, ordered.
6165 *
6166 * @returns true if the bit was set.
6167 * @returns false if the bit was clear.
6168 *
6169 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6170 * aligned, otherwise the memory access isn't atomic!
6171 * @param iBit The bit to test and toggle.
6172 *
6173 * @remarks x86: Requires a 386 or later.
6174 */
6175#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
6176RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6177#else
6178DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6179{
6180 union { bool f; uint32_t u32; uint8_t u8; } rc;
6181 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6182# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6183# if RT_INLINE_ASM_GNU_STYLE
6184 __asm__ __volatile__("lock; btcl %2, %1\n\t"
6185 "setc %b0\n\t"
6186 "andl $1, %0\n\t"
6187 : "=q" (rc.u32)
6188 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6189 : "Ir" (iBit)
6190 , "m" (*(volatile long RT_FAR *)pvBitmap)
6191 : "memory"
6192 , "cc");
6193# else
6194 __asm
6195 {
6196 mov edx, [iBit]
6197# ifdef RT_ARCH_AMD64
6198 mov rax, [pvBitmap]
6199 lock btc [rax], edx
6200# else
6201 mov eax, [pvBitmap]
6202 lock btc [eax], edx
6203# endif
6204 setc al
6205 and eax, 1
6206 mov [rc.u32], eax
6207 }
6208# endif
6209
6210# else
6211 rc.u32 = RT_H2LE_U32(ASMAtomicXorExU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_LE2H_U32(RT_BIT_32(iBit & 31))))
6212 >> (iBit & 31);
6213 rc.u32 &= 1;
6214# endif
6215 return rc.f;
6216}
6217#endif
6218
6219
6220/**
6221 * Tests if a bit in a bitmap is set.
6222 *
6223 * @returns true if the bit is set.
6224 * @returns false if the bit is clear.
6225 *
6226 * @param pvBitmap Pointer to the bitmap (little endian).
6227 * @param iBit The bit to test.
6228 *
6229 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6230 * However, doing so will yield better performance as well as avoiding
6231 * traps accessing the last bits in the bitmap.
6232 */
6233#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6234RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6235#else
6236DECLINLINE(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6237{
6238 union { bool f; uint32_t u32; uint8_t u8; } rc;
6239# if RT_INLINE_ASM_USES_INTRIN
6240 rc.u32 = _bittest((long *)pvBitmap, iBit);
6241
6242# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6243# if RT_INLINE_ASM_GNU_STYLE
6244
6245 __asm__ __volatile__("btl %2, %1\n\t"
6246 "setc %b0\n\t"
6247 "andl $1, %0\n\t"
6248 : "=q" (rc.u32)
6249 : "m" (*(const volatile long RT_FAR *)pvBitmap)
6250 , "Ir" (iBit)
6251 : "memory"
6252 , "cc");
6253# else
6254 __asm
6255 {
6256 mov edx, [iBit]
6257# ifdef RT_ARCH_AMD64
6258 mov rax, [pvBitmap]
6259 bt [rax], edx
6260# else
6261 mov eax, [pvBitmap]
6262 bt [eax], edx
6263# endif
6264 setc al
6265 and eax, 1
6266 mov [rc.u32], eax
6267 }
6268# endif
6269
6270# else
6271 int32_t offBitmap = iBit / 32;
6272 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6273 rc.u32 = RT_LE2H_U32(ASMAtomicUoReadU32(&((uint32_t volatile *)pvBitmap)[offBitmap])) >> (iBit & 31);
6274 rc.u32 &= 1;
6275# endif
6276 return rc.f;
6277}
6278#endif
6279
6280
6281/**
6282 * Clears a bit range within a bitmap.
6283 *
6284 * @param pvBitmap Pointer to the bitmap (little endian).
6285 * @param iBitStart The First bit to clear.
6286 * @param iBitEnd The first bit not to clear.
6287 */
6288DECLINLINE(void) ASMBitClearRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd) RT_NOTHROW_DEF
6289{
6290 if (iBitStart < iBitEnd)
6291 {
6292 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
6293 int32_t iStart = iBitStart & ~31;
6294 int32_t iEnd = iBitEnd & ~31;
6295 if (iStart == iEnd)
6296 *pu32 &= RT_H2LE_U32(((UINT32_C(1) << (iBitStart & 31)) - 1) | ~((UINT32_C(1) << (iBitEnd & 31)) - 1));
6297 else
6298 {
6299 /* bits in first dword. */
6300 if (iBitStart & 31)
6301 {
6302 *pu32 &= RT_H2LE_U32((UINT32_C(1) << (iBitStart & 31)) - 1);
6303 pu32++;
6304 iBitStart = iStart + 32;
6305 }
6306
6307 /* whole dwords. */
6308 if (iBitStart != iEnd)
6309 ASMMemZero32(pu32, ((uint32_t)iEnd - (uint32_t)iBitStart) >> 3);
6310
6311 /* bits in last dword. */
6312 if (iBitEnd & 31)
6313 {
6314 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
6315 *pu32 &= RT_H2LE_U32(~((UINT32_C(1) << (iBitEnd & 31)) - 1));
6316 }
6317 }
6318 }
6319}
6320
6321
6322/**
6323 * Sets a bit range within a bitmap.
6324 *
6325 * @param pvBitmap Pointer to the bitmap (little endian).
6326 * @param iBitStart The First bit to set.
6327 * @param iBitEnd The first bit not to set.
6328 */
6329DECLINLINE(void) ASMBitSetRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd) RT_NOTHROW_DEF
6330{
6331 if (iBitStart < iBitEnd)
6332 {
6333 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
6334 int32_t iStart = iBitStart & ~31;
6335 int32_t iEnd = iBitEnd & ~31;
6336 if (iStart == iEnd)
6337 *pu32 |= RT_H2LE_U32(((UINT32_C(1) << (iBitEnd - iBitStart)) - 1) << (iBitStart & 31));
6338 else
6339 {
6340 /* bits in first dword. */
6341 if (iBitStart & 31)
6342 {
6343 *pu32 |= RT_H2LE_U32(~((UINT32_C(1) << (iBitStart & 31)) - 1));
6344 pu32++;
6345 iBitStart = iStart + 32;
6346 }
6347
6348 /* whole dword. */
6349 if (iBitStart != iEnd)
6350 ASMMemFill32(pu32, ((uint32_t)iEnd - (uint32_t)iBitStart) >> 3, ~UINT32_C(0));
6351
6352 /* bits in last dword. */
6353 if (iBitEnd & 31)
6354 {
6355 pu32 = RT_H2LE_U32((volatile uint32_t RT_FAR *)pvBitmap + (iBitEnd >> 5));
6356 *pu32 |= (UINT32_C(1) << (iBitEnd & 31)) - 1;
6357 }
6358 }
6359 }
6360}
6361
6362
6363/**
6364 * Finds the first clear bit in a bitmap.
6365 *
6366 * @returns Index of the first zero bit.
6367 * @returns -1 if no clear bit was found.
6368 * @param pvBitmap Pointer to the bitmap (little endian).
6369 * @param cBits The number of bits in the bitmap. Multiple of 32.
6370 */
6371#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
6372DECLASM(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_PROTO;
6373#else
6374DECLINLINE(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_DEF
6375{
6376 if (cBits)
6377 {
6378 int32_t iBit;
6379# if RT_INLINE_ASM_GNU_STYLE
6380 RTCCUINTREG uEAX, uECX, uEDI;
6381 cBits = RT_ALIGN_32(cBits, 32);
6382 __asm__ __volatile__("repe; scasl\n\t"
6383 "je 1f\n\t"
6384# ifdef RT_ARCH_AMD64
6385 "lea -4(%%rdi), %%rdi\n\t"
6386 "xorl (%%rdi), %%eax\n\t"
6387 "subq %5, %%rdi\n\t"
6388# else
6389 "lea -4(%%edi), %%edi\n\t"
6390 "xorl (%%edi), %%eax\n\t"
6391 "subl %5, %%edi\n\t"
6392# endif
6393 "shll $3, %%edi\n\t"
6394 "bsfl %%eax, %%edx\n\t"
6395 "addl %%edi, %%edx\n\t"
6396 "1:\t\n"
6397 : "=d" (iBit)
6398 , "=&c" (uECX)
6399 , "=&D" (uEDI)
6400 , "=&a" (uEAX)
6401 : "0" (0xffffffff)
6402 , "mr" (pvBitmap)
6403 , "1" (cBits >> 5)
6404 , "2" (pvBitmap)
6405 , "3" (0xffffffff)
6406 : "cc");
6407# else
6408 cBits = RT_ALIGN_32(cBits, 32);
6409 __asm
6410 {
6411# ifdef RT_ARCH_AMD64
6412 mov rdi, [pvBitmap]
6413 mov rbx, rdi
6414# else
6415 mov edi, [pvBitmap]
6416 mov ebx, edi
6417# endif
6418 mov edx, 0ffffffffh
6419 mov eax, edx
6420 mov ecx, [cBits]
6421 shr ecx, 5
6422 repe scasd
6423 je done
6424
6425# ifdef RT_ARCH_AMD64
6426 lea rdi, [rdi - 4]
6427 xor eax, [rdi]
6428 sub rdi, rbx
6429# else
6430 lea edi, [edi - 4]
6431 xor eax, [edi]
6432 sub edi, ebx
6433# endif
6434 shl edi, 3
6435 bsf edx, eax
6436 add edx, edi
6437 done:
6438 mov [iBit], edx
6439 }
6440# endif
6441 return iBit;
6442 }
6443 return -1;
6444}
6445#endif
6446
6447
6448/**
6449 * Finds the next clear bit in a bitmap.
6450 *
6451 * @returns Index of the first zero bit.
6452 * @returns -1 if no clear bit was found.
6453 * @param pvBitmap Pointer to the bitmap (little endian).
6454 * @param cBits The number of bits in the bitmap. Multiple of 32.
6455 * @param iBitPrev The bit returned from the last search.
6456 * The search will start at iBitPrev + 1.
6457 */
6458#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
6459DECLASM(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_PROTO;
6460#else
6461DECLINLINE(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_DEF
6462{
6463 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
6464 int iBit = ++iBitPrev & 31;
6465 if (iBit)
6466 {
6467 /*
6468 * Inspect the 32-bit word containing the unaligned bit.
6469 */
6470 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
6471
6472# if RT_INLINE_ASM_USES_INTRIN
6473 unsigned long ulBit = 0;
6474 if (_BitScanForward(&ulBit, u32))
6475 return ulBit + iBitPrev;
6476# else
6477# if RT_INLINE_ASM_GNU_STYLE
6478 __asm__ __volatile__("bsf %1, %0\n\t"
6479 "jnz 1f\n\t"
6480 "movl $-1, %0\n\t" /** @todo use conditional move for 64-bit? */
6481 "1:\n\t"
6482 : "=r" (iBit)
6483 : "r" (u32)
6484 : "cc");
6485# else
6486 __asm
6487 {
6488 mov edx, [u32]
6489 bsf eax, edx
6490 jnz done
6491 mov eax, 0ffffffffh
6492 done:
6493 mov [iBit], eax
6494 }
6495# endif
6496 if (iBit >= 0)
6497 return iBit + (int)iBitPrev;
6498# endif
6499
6500 /*
6501 * Skip ahead and see if there is anything left to search.
6502 */
6503 iBitPrev |= 31;
6504 iBitPrev++;
6505 if (cBits <= (uint32_t)iBitPrev)
6506 return -1;
6507 }
6508
6509 /*
6510 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6511 */
6512 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6513 if (iBit >= 0)
6514 iBit += iBitPrev;
6515 return iBit;
6516}
6517#endif
6518
6519
6520/**
6521 * Finds the first set bit in a bitmap.
6522 *
6523 * @returns Index of the first set bit.
6524 * @returns -1 if no clear bit was found.
6525 * @param pvBitmap Pointer to the bitmap (little endian).
6526 * @param cBits The number of bits in the bitmap. Multiple of 32.
6527 */
6528#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
6529DECLASM(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_PROTO;
6530#else
6531DECLINLINE(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_DEF
6532{
6533 if (cBits)
6534 {
6535 int32_t iBit;
6536# if RT_INLINE_ASM_GNU_STYLE
6537 RTCCUINTREG uEAX, uECX, uEDI;
6538 cBits = RT_ALIGN_32(cBits, 32);
6539 __asm__ __volatile__("repe; scasl\n\t"
6540 "je 1f\n\t"
6541# ifdef RT_ARCH_AMD64
6542 "lea -4(%%rdi), %%rdi\n\t"
6543 "movl (%%rdi), %%eax\n\t"
6544 "subq %5, %%rdi\n\t"
6545# else
6546 "lea -4(%%edi), %%edi\n\t"
6547 "movl (%%edi), %%eax\n\t"
6548 "subl %5, %%edi\n\t"
6549# endif
6550 "shll $3, %%edi\n\t"
6551 "bsfl %%eax, %%edx\n\t"
6552 "addl %%edi, %%edx\n\t"
6553 "1:\t\n"
6554 : "=d" (iBit)
6555 , "=&c" (uECX)
6556 , "=&D" (uEDI)
6557 , "=&a" (uEAX)
6558 : "0" (0xffffffff)
6559 , "mr" (pvBitmap)
6560 , "1" (cBits >> 5)
6561 , "2" (pvBitmap)
6562 , "3" (0)
6563 : "cc");
6564# else
6565 cBits = RT_ALIGN_32(cBits, 32);
6566 __asm
6567 {
6568# ifdef RT_ARCH_AMD64
6569 mov rdi, [pvBitmap]
6570 mov rbx, rdi
6571# else
6572 mov edi, [pvBitmap]
6573 mov ebx, edi
6574# endif
6575 mov edx, 0ffffffffh
6576 xor eax, eax
6577 mov ecx, [cBits]
6578 shr ecx, 5
6579 repe scasd
6580 je done
6581# ifdef RT_ARCH_AMD64
6582 lea rdi, [rdi - 4]
6583 mov eax, [rdi]
6584 sub rdi, rbx
6585# else
6586 lea edi, [edi - 4]
6587 mov eax, [edi]
6588 sub edi, ebx
6589# endif
6590 shl edi, 3
6591 bsf edx, eax
6592 add edx, edi
6593 done:
6594 mov [iBit], edx
6595 }
6596# endif
6597 return iBit;
6598 }
6599 return -1;
6600}
6601#endif
6602
6603
6604/**
6605 * Finds the next set bit in a bitmap.
6606 *
6607 * @returns Index of the next set bit.
6608 * @returns -1 if no set bit was found.
6609 * @param pvBitmap Pointer to the bitmap (little endian).
6610 * @param cBits The number of bits in the bitmap. Multiple of 32.
6611 * @param iBitPrev The bit returned from the last search.
6612 * The search will start at iBitPrev + 1.
6613 */
6614#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
6615DECLASM(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_PROTO;
6616#else
6617DECLINLINE(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_DEF
6618{
6619 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
6620 int iBit = ++iBitPrev & 31;
6621 if (iBit)
6622 {
6623 /*
6624 * Inspect the 32-bit word containing the unaligned bit.
6625 */
6626 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
6627
6628# if RT_INLINE_ASM_USES_INTRIN
6629 unsigned long ulBit = 0;
6630 if (_BitScanForward(&ulBit, u32))
6631 return ulBit + iBitPrev;
6632# else
6633# if RT_INLINE_ASM_GNU_STYLE
6634 __asm__ __volatile__("bsf %1, %0\n\t"
6635 "jnz 1f\n\t" /** @todo use conditional move for 64-bit? */
6636 "movl $-1, %0\n\t"
6637 "1:\n\t"
6638 : "=r" (iBit)
6639 : "r" (u32)
6640 : "cc");
6641# else
6642 __asm
6643 {
6644 mov edx, [u32]
6645 bsf eax, edx
6646 jnz done
6647 mov eax, 0ffffffffh
6648 done:
6649 mov [iBit], eax
6650 }
6651# endif
6652 if (iBit >= 0)
6653 return iBit + (int)iBitPrev;
6654# endif
6655
6656 /*
6657 * Skip ahead and see if there is anything left to search.
6658 */
6659 iBitPrev |= 31;
6660 iBitPrev++;
6661 if (cBits <= (uint32_t)iBitPrev)
6662 return -1;
6663 }
6664
6665 /*
6666 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6667 */
6668 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6669 if (iBit >= 0)
6670 iBit += iBitPrev;
6671 return iBit;
6672}
6673#endif
6674
6675
6676/**
6677 * Finds the first bit which is set in the given 32-bit integer.
6678 * Bits are numbered from 1 (least significant) to 32.
6679 *
6680 * @returns index [1..32] of the first set bit.
6681 * @returns 0 if all bits are cleared.
6682 * @param u32 Integer to search for set bits.
6683 * @remarks Similar to ffs() in BSD.
6684 */
6685#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6686RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU32(uint32_t u32) RT_NOTHROW_PROTO;
6687#else
6688DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32) RT_NOTHROW_DEF
6689{
6690# if RT_INLINE_ASM_USES_INTRIN
6691 unsigned long iBit;
6692 if (_BitScanForward(&iBit, u32))
6693 iBit++;
6694 else
6695 iBit = 0;
6696
6697# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6698# if RT_INLINE_ASM_GNU_STYLE
6699 uint32_t iBit;
6700 __asm__ __volatile__("bsf %1, %0\n\t"
6701 "jnz 1f\n\t"
6702 "xorl %0, %0\n\t"
6703 "jmp 2f\n"
6704 "1:\n\t"
6705 "incl %0\n"
6706 "2:\n\t"
6707 : "=r" (iBit)
6708 : "rm" (u32)
6709 : "cc");
6710# else
6711 uint32_t iBit;
6712 _asm
6713 {
6714 bsf eax, [u32]
6715 jnz found
6716 xor eax, eax
6717 jmp done
6718 found:
6719 inc eax
6720 done:
6721 mov [iBit], eax
6722 }
6723# endif
6724
6725# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
6726 /*
6727 * Using the "count leading zeros (clz)" instruction here because there
6728 * is no dedicated instruction to get the first set bit.
6729 * Need to reverse the bits in the value with "rbit" first because
6730 * "clz" starts counting from the most significant bit.
6731 */
6732 uint32_t iBit;
6733 __asm__ __volatile__(
6734# if defined(RT_ARCH_ARM64)
6735 "rbit %w[uVal], %w[uVal]\n\t"
6736 "clz %w[iBit], %w[uVal]\n\t"
6737# else
6738 "rbit %[uVal], %[uVal]\n\t"
6739 "clz %[iBit], %[uVal]\n\t"
6740# endif
6741 : [uVal] "=r" (u32)
6742 , [iBit] "=r" (iBit)
6743 : "[uVal]" (u32));
6744 if (iBit != 32)
6745 iBit++;
6746 else
6747 iBit = 0; /* No bit set. */
6748
6749# else
6750# error "Port me"
6751# endif
6752 return iBit;
6753}
6754#endif
6755
6756
6757/**
6758 * Finds the first bit which is set in the given 32-bit integer.
6759 * Bits are numbered from 1 (least significant) to 32.
6760 *
6761 * @returns index [1..32] of the first set bit.
6762 * @returns 0 if all bits are cleared.
6763 * @param i32 Integer to search for set bits.
6764 * @remark Similar to ffs() in BSD.
6765 */
6766DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32) RT_NOTHROW_DEF
6767{
6768 return ASMBitFirstSetU32((uint32_t)i32);
6769}
6770
6771
6772/**
6773 * Finds the first bit which is set in the given 64-bit integer.
6774 *
6775 * Bits are numbered from 1 (least significant) to 64.
6776 *
6777 * @returns index [1..64] of the first set bit.
6778 * @returns 0 if all bits are cleared.
6779 * @param u64 Integer to search for set bits.
6780 * @remarks Similar to ffs() in BSD.
6781 */
6782#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6783RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU64(uint64_t u64) RT_NOTHROW_PROTO;
6784#else
6785DECLINLINE(unsigned) ASMBitFirstSetU64(uint64_t u64) RT_NOTHROW_DEF
6786{
6787# if RT_INLINE_ASM_USES_INTRIN
6788 unsigned long iBit;
6789# if ARCH_BITS == 64
6790 if (_BitScanForward64(&iBit, u64))
6791 iBit++;
6792 else
6793 iBit = 0;
6794# else
6795 if (_BitScanForward(&iBit, (uint32_t)u64))
6796 iBit++;
6797 else if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
6798 iBit += 33;
6799 else
6800 iBit = 0;
6801# endif
6802
6803# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
6804 uint64_t iBit;
6805 __asm__ __volatile__("bsfq %1, %0\n\t"
6806 "jnz 1f\n\t"
6807 "xorl %k0, %k0\n\t"
6808 "jmp 2f\n"
6809 "1:\n\t"
6810 "incl %k0\n"
6811 "2:\n\t"
6812 : "=r" (iBit)
6813 : "rm" (u64)
6814 : "cc");
6815
6816# elif defined(RT_ARCH_ARM64)
6817 uint64_t iBit;
6818 __asm__ __volatile__("rbit %[uVal], %[uVal]\n\t"
6819 "clz %[iBit], %[uVal]\n\t"
6820 : [uVal] "=r" (u64)
6821 , [iBit] "=r" (iBit)
6822 : "[uVal]" (u64));
6823 if (iBit != 64)
6824 iBit++;
6825 else
6826 iBit = 0; /* No bit set. */
6827
6828# else
6829 unsigned iBit = ASMBitFirstSetU32((uint32_t)u64);
6830 if (!iBit)
6831 {
6832 iBit = ASMBitFirstSetU32((uint32_t)(u64 >> 32));
6833 if (iBit)
6834 iBit += 32;
6835 }
6836# endif
6837 return (unsigned)iBit;
6838}
6839#endif
6840
6841
6842/**
6843 * Finds the first bit which is set in the given 16-bit integer.
6844 *
6845 * Bits are numbered from 1 (least significant) to 16.
6846 *
6847 * @returns index [1..16] of the first set bit.
6848 * @returns 0 if all bits are cleared.
6849 * @param u16 Integer to search for set bits.
6850 * @remarks For 16-bit bs3kit code.
6851 */
6852#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6853RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU16(uint16_t u16) RT_NOTHROW_PROTO;
6854#else
6855DECLINLINE(unsigned) ASMBitFirstSetU16(uint16_t u16) RT_NOTHROW_DEF
6856{
6857 return ASMBitFirstSetU32((uint32_t)u16);
6858}
6859#endif
6860
6861
6862/**
6863 * Finds the last bit which is set in the given 32-bit integer.
6864 * Bits are numbered from 1 (least significant) to 32.
6865 *
6866 * @returns index [1..32] of the last set bit.
6867 * @returns 0 if all bits are cleared.
6868 * @param u32 Integer to search for set bits.
6869 * @remark Similar to fls() in BSD.
6870 */
6871#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6872RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU32(uint32_t u32) RT_NOTHROW_PROTO;
6873#else
6874DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32) RT_NOTHROW_DEF
6875{
6876# if RT_INLINE_ASM_USES_INTRIN
6877 unsigned long iBit;
6878 if (_BitScanReverse(&iBit, u32))
6879 iBit++;
6880 else
6881 iBit = 0;
6882
6883# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6884# if RT_INLINE_ASM_GNU_STYLE
6885 uint32_t iBit;
6886 __asm__ __volatile__("bsrl %1, %0\n\t"
6887 "jnz 1f\n\t"
6888 "xorl %0, %0\n\t"
6889 "jmp 2f\n"
6890 "1:\n\t"
6891 "incl %0\n"
6892 "2:\n\t"
6893 : "=r" (iBit)
6894 : "rm" (u32)
6895 : "cc");
6896# else
6897 uint32_t iBit;
6898 _asm
6899 {
6900 bsr eax, [u32]
6901 jnz found
6902 xor eax, eax
6903 jmp done
6904 found:
6905 inc eax
6906 done:
6907 mov [iBit], eax
6908 }
6909# endif
6910
6911# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
6912 uint32_t iBit;
6913 __asm__ __volatile__(
6914# if defined(RT_ARCH_ARM64)
6915 "clz %w[iBit], %w[uVal]\n\t"
6916# else
6917 "clz %[iBit], %[uVal]\n\t"
6918# endif
6919 : [iBit] "=r" (iBit)
6920 : [uVal] "r" (u32));
6921 iBit = 32 - iBit;
6922
6923# else
6924# error "Port me"
6925# endif
6926 return iBit;
6927}
6928#endif
6929
6930
6931/**
6932 * Finds the last bit which is set in the given 32-bit integer.
6933 * Bits are numbered from 1 (least significant) to 32.
6934 *
6935 * @returns index [1..32] of the last set bit.
6936 * @returns 0 if all bits are cleared.
6937 * @param i32 Integer to search for set bits.
6938 * @remark Similar to fls() in BSD.
6939 */
6940DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32) RT_NOTHROW_DEF
6941{
6942 return ASMBitLastSetU32((uint32_t)i32);
6943}
6944
6945
6946/**
6947 * Finds the last bit which is set in the given 64-bit integer.
6948 *
6949 * Bits are numbered from 1 (least significant) to 64.
6950 *
6951 * @returns index [1..64] of the last set bit.
6952 * @returns 0 if all bits are cleared.
6953 * @param u64 Integer to search for set bits.
6954 * @remark Similar to fls() in BSD.
6955 */
6956#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6957RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU64(uint64_t u64) RT_NOTHROW_PROTO;
6958#else
6959DECLINLINE(unsigned) ASMBitLastSetU64(uint64_t u64) RT_NOTHROW_DEF
6960{
6961# if RT_INLINE_ASM_USES_INTRIN
6962 unsigned long iBit;
6963# if ARCH_BITS == 64
6964 if (_BitScanReverse64(&iBit, u64))
6965 iBit++;
6966 else
6967 iBit = 0;
6968# else
6969 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
6970 iBit += 33;
6971 else if (_BitScanReverse(&iBit, (uint32_t)u64))
6972 iBit++;
6973 else
6974 iBit = 0;
6975# endif
6976
6977# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
6978 uint64_t iBit;
6979 __asm__ __volatile__("bsrq %1, %0\n\t"
6980 "jnz 1f\n\t"
6981 "xorl %k0, %k0\n\t"
6982 "jmp 2f\n"
6983 "1:\n\t"
6984 "incl %k0\n"
6985 "2:\n\t"
6986 : "=r" (iBit)
6987 : "rm" (u64)
6988 : "cc");
6989
6990# elif defined(RT_ARCH_ARM64)
6991 uint64_t iBit;
6992 __asm__ __volatile__("clz %[iBit], %[uVal]\n\t"
6993 : [iBit] "=r" (iBit)
6994 : [uVal] "r" (u64));
6995 iBit = 64 - iBit;
6996
6997# else
6998 unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32));
6999 if (iBit)
7000 iBit += 32;
7001 else
7002 iBit = ASMBitLastSetU32((uint32_t)u64);
7003# endif
7004 return (unsigned)iBit;
7005}
7006#endif
7007
7008
7009/**
7010 * Finds the last bit which is set in the given 16-bit integer.
7011 *
7012 * Bits are numbered from 1 (least significant) to 16.
7013 *
7014 * @returns index [1..16] of the last set bit.
7015 * @returns 0 if all bits are cleared.
7016 * @param u16 Integer to search for set bits.
7017 * @remarks For 16-bit bs3kit code.
7018 */
7019#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7020RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU16(uint16_t u16) RT_NOTHROW_PROTO;
7021#else
7022DECLINLINE(unsigned) ASMBitLastSetU16(uint16_t u16) RT_NOTHROW_DEF
7023{
7024 return ASMBitLastSetU32((uint32_t)u16);
7025}
7026#endif
7027
7028
7029/**
7030 * Reverse the byte order of the given 16-bit integer.
7031 *
7032 * @returns Revert
7033 * @param u16 16-bit integer value.
7034 */
7035#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7036RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMByteSwapU16(uint16_t u16) RT_NOTHROW_PROTO;
7037#else
7038DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16) RT_NOTHROW_DEF
7039{
7040# if RT_INLINE_ASM_USES_INTRIN
7041 return _byteswap_ushort(u16);
7042
7043# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
7044# if RT_INLINE_ASM_GNU_STYLE
7045 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16) : "cc");
7046# else
7047 _asm
7048 {
7049 mov ax, [u16]
7050 ror ax, 8
7051 mov [u16], ax
7052 }
7053# endif
7054 return u16;
7055
7056# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7057 uint32_t u32Ret;
7058 __asm__ __volatile__(
7059# if defined(RT_ARCH_ARM64)
7060 "rev16 %w[uRet], %w[uVal]\n\t"
7061# else
7062 "rev16 %[uRet], %[uVal]\n\t"
7063# endif
7064 : [uRet] "=r" (u32Ret)
7065 : [uVal] "r" (u16));
7066 return (uint16_t)u32Ret;
7067
7068# else
7069# error "Port me"
7070# endif
7071}
7072#endif
7073
7074
7075/**
7076 * Reverse the byte order of the given 32-bit integer.
7077 *
7078 * @returns Revert
7079 * @param u32 32-bit integer value.
7080 */
7081#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7082RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMByteSwapU32(uint32_t u32) RT_NOTHROW_PROTO;
7083#else
7084DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32) RT_NOTHROW_DEF
7085{
7086# if RT_INLINE_ASM_USES_INTRIN
7087 return _byteswap_ulong(u32);
7088
7089# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
7090# if RT_INLINE_ASM_GNU_STYLE
7091 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
7092# else
7093 _asm
7094 {
7095 mov eax, [u32]
7096 bswap eax
7097 mov [u32], eax
7098 }
7099# endif
7100 return u32;
7101
7102# elif defined(RT_ARCH_ARM64)
7103 uint64_t u64Ret;
7104 __asm__ __volatile__("rev32 %[uRet], %[uVal]\n\t"
7105 : [uRet] "=r" (u64Ret)
7106 : [uVal] "r" ((uint64_t)u32));
7107 return (uint32_t)u64Ret;
7108
7109# elif defined(RT_ARCH_ARM32)
7110 __asm__ __volatile__("rev %[uRet], %[uVal]\n\t"
7111 : [uRet] "=r" (u32)
7112 : [uVal] "[uRet]" (u32));
7113 return u32;
7114
7115# else
7116# error "Port me"
7117# endif
7118}
7119#endif
7120
7121
7122/**
7123 * Reverse the byte order of the given 64-bit integer.
7124 *
7125 * @returns Revert
7126 * @param u64 64-bit integer value.
7127 */
7128DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64) RT_NOTHROW_DEF
7129{
7130#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
7131 return _byteswap_uint64(u64);
7132
7133# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7134 __asm__ ("bswapq %0" : "=r" (u64) : "0" (u64));
7135 return u64;
7136
7137# elif defined(RT_ARCH_ARM64)
7138 __asm__ __volatile__("rev %[uRet], %[uVal]\n\t"
7139 : [uRet] "=r" (u64)
7140 : [uVal] "[uRet]" (u64));
7141 return u64;
7142
7143#else
7144 return (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
7145 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
7146#endif
7147}
7148
7149
7150/**
7151 * Rotate 32-bit unsigned value to the left by @a cShift.
7152 *
7153 * @returns Rotated value.
7154 * @param u32 The value to rotate.
7155 * @param cShift How many bits to rotate by.
7156 */
7157#ifdef __WATCOMC__
7158RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateLeftU32(uint32_t u32, unsigned cShift) RT_NOTHROW_PROTO;
7159#else
7160DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift) RT_NOTHROW_DEF
7161{
7162# if RT_INLINE_ASM_USES_INTRIN
7163 return _rotl(u32, cShift);
7164
7165# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
7166 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32) : "cc");
7167 return u32;
7168
7169# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7170 __asm__ __volatile__(
7171# if defined(RT_ARCH_ARM64)
7172 "ror %w[uRet], %w[uVal], %w[cShift]\n\t"
7173# else
7174 "ror %[uRet], %[uVal], %[cShift]\n\t"
7175# endif
7176 : [uRet] "=r" (u32)
7177 : [uVal] "[uRet]" (u32)
7178 , [cShift] "r" (32 - (cShift & 31))); /** @todo there is an immediate form here */
7179 return u32;
7180
7181# else
7182 cShift &= 31;
7183 return (u32 << cShift) | (u32 >> (32 - cShift));
7184# endif
7185}
7186#endif
7187
7188
7189/**
7190 * Rotate 32-bit unsigned value to the right by @a cShift.
7191 *
7192 * @returns Rotated value.
7193 * @param u32 The value to rotate.
7194 * @param cShift How many bits to rotate by.
7195 */
7196#ifdef __WATCOMC__
7197RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateRightU32(uint32_t u32, unsigned cShift) RT_NOTHROW_PROTO;
7198#else
7199DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift) RT_NOTHROW_DEF
7200{
7201# if RT_INLINE_ASM_USES_INTRIN
7202 return _rotr(u32, cShift);
7203
7204# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
7205 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32) : "cc");
7206 return u32;
7207
7208# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7209 __asm__ __volatile__(
7210# if defined(RT_ARCH_ARM64)
7211 "ror %w[uRet], %w[uVal], %w[cShift]\n\t"
7212# else
7213 "ror %[uRet], %[uVal], %[cShift]\n\t"
7214# endif
7215 : [uRet] "=r" (u32)
7216 : [uVal] "[uRet]" (u32)
7217 , [cShift] "r" (cShift & 31)); /** @todo there is an immediate form here */
7218 return u32;
7219
7220# else
7221 cShift &= 31;
7222 return (u32 >> cShift) | (u32 << (32 - cShift));
7223# endif
7224}
7225#endif
7226
7227
7228/**
7229 * Rotate 64-bit unsigned value to the left by @a cShift.
7230 *
7231 * @returns Rotated value.
7232 * @param u64 The value to rotate.
7233 * @param cShift How many bits to rotate by.
7234 */
7235DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift) RT_NOTHROW_DEF
7236{
7237#if RT_INLINE_ASM_USES_INTRIN
7238 return _rotl64(u64, cShift);
7239
7240#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7241 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64) : "cc");
7242 return u64;
7243
7244#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
7245 uint32_t uSpill;
7246 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
7247 "jz 1f\n\t"
7248 "xchgl %%eax, %%edx\n\t"
7249 "1:\n\t"
7250 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
7251 "jz 2f\n\t"
7252 "movl %%edx, %2\n\t" /* save the hi value in %3. */
7253 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
7254 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
7255 "2:\n\t" /* } */
7256 : "=A" (u64)
7257 , "=c" (cShift)
7258 , "=r" (uSpill)
7259 : "0" (u64)
7260 , "1" (cShift)
7261 : "cc");
7262 return u64;
7263
7264# elif defined(RT_ARCH_ARM64)
7265 __asm__ __volatile__("ror %[uRet], %[uVal], %[cShift]\n\t"
7266 : [uRet] "=r" (u64)
7267 : [uVal] "[uRet]" (u64)
7268 , [cShift] "r" ((uint64_t)(64 - (cShift & 63)))); /** @todo there is an immediate form here */
7269 return u64;
7270
7271#else
7272 cShift &= 63;
7273 return (u64 << cShift) | (u64 >> (64 - cShift));
7274#endif
7275}
7276
7277
7278/**
7279 * Rotate 64-bit unsigned value to the right by @a cShift.
7280 *
7281 * @returns Rotated value.
7282 * @param u64 The value to rotate.
7283 * @param cShift How many bits to rotate by.
7284 */
7285DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift) RT_NOTHROW_DEF
7286{
7287#if RT_INLINE_ASM_USES_INTRIN
7288 return _rotr64(u64, cShift);
7289
7290#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7291 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64) : "cc");
7292 return u64;
7293
7294#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
7295 uint32_t uSpill;
7296 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
7297 "jz 1f\n\t"
7298 "xchgl %%eax, %%edx\n\t"
7299 "1:\n\t"
7300 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
7301 "jz 2f\n\t"
7302 "movl %%edx, %2\n\t" /* save the hi value in %3. */
7303 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
7304 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
7305 "2:\n\t" /* } */
7306 : "=A" (u64)
7307 , "=c" (cShift)
7308 , "=r" (uSpill)
7309 : "0" (u64)
7310 , "1" (cShift)
7311 : "cc");
7312 return u64;
7313
7314# elif defined(RT_ARCH_ARM64)
7315 __asm__ __volatile__("ror %[uRet], %[uVal], %[cShift]\n\t"
7316 : [uRet] "=r" (u64)
7317 : [uVal] "[uRet]" (u64)
7318 , [cShift] "r" ((uint64_t)(cShift & 63))); /** @todo there is an immediate form here */
7319 return u64;
7320
7321#else
7322 cShift &= 63;
7323 return (u64 >> cShift) | (u64 << (64 - cShift));
7324#endif
7325}
7326
7327/** @} */
7328
7329
7330/** @} */
7331
7332/*
7333 * Include #pragma aux definitions for Watcom C/C++.
7334 */
7335#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
7336# define IPRT_ASM_WATCOM_X86_16_WITH_PRAGMAS
7337# undef IPRT_INCLUDED_asm_watcom_x86_16_h
7338# include "asm-watcom-x86-16.h"
7339#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
7340# define IPRT_ASM_WATCOM_X86_32_WITH_PRAGMAS
7341# undef IPRT_INCLUDED_asm_watcom_x86_32_h
7342# include "asm-watcom-x86-32.h"
7343#endif
7344
7345#endif /* !IPRT_INCLUDED_asm_h */
7346
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette