VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 93577

最後變更 在這個檔案從93577是 93280,由 vboxsync 提交於 3 年 前

iprt/asm.h: Fix code ordering issue visible on big endian architecture only (ASMBitClearRange and ASMBitSetRange use ASMByteSwap inline functions which therefore need to be defined earlier). Also fix ASMBitSetRange on BE architecture and polish RT_FAR usage in ASMBitClearRange.

  • 屬性 svn:eol-style 設為 native
  • 屬性 svn:keywords 設為 Author Date Id Revision
檔案大小: 233.8 KB
 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2022 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.alldomusa.eu.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef IPRT_INCLUDED_asm_h
27#define IPRT_INCLUDED_asm_h
28#ifndef RT_WITHOUT_PRAGMA_ONCE
29# pragma once
30#endif
31
32#include <iprt/cdefs.h>
33#include <iprt/types.h>
34#include <iprt/assert.h>
35/** @def RT_INLINE_ASM_USES_INTRIN
36 * Defined as 1 if we're using a _MSC_VER 1400.
37 * Otherwise defined as 0.
38 */
39
40/* Solaris 10 header ugliness */
41#ifdef u
42# undef u
43#endif
44
45#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
46/* Emit the intrinsics at all optimization levels. */
47# include <iprt/sanitized/intrin.h>
48# pragma intrinsic(_ReadWriteBarrier)
49# pragma intrinsic(__cpuid)
50# pragma intrinsic(__stosd)
51# pragma intrinsic(__stosw)
52# pragma intrinsic(__stosb)
53# pragma intrinsic(_BitScanForward)
54# pragma intrinsic(_BitScanReverse)
55# pragma intrinsic(_bittest)
56# pragma intrinsic(_bittestandset)
57# pragma intrinsic(_bittestandreset)
58# pragma intrinsic(_bittestandcomplement)
59# pragma intrinsic(_byteswap_ushort)
60# pragma intrinsic(_byteswap_ulong)
61# pragma intrinsic(_interlockedbittestandset)
62# pragma intrinsic(_interlockedbittestandreset)
63# pragma intrinsic(_InterlockedAnd)
64# pragma intrinsic(_InterlockedOr)
65# pragma intrinsic(_InterlockedXor)
66# pragma intrinsic(_InterlockedIncrement)
67# pragma intrinsic(_InterlockedDecrement)
68# pragma intrinsic(_InterlockedExchange)
69# pragma intrinsic(_InterlockedExchangeAdd)
70# pragma intrinsic(_InterlockedCompareExchange)
71# pragma intrinsic(_InterlockedCompareExchange64)
72# pragma intrinsic(_rotl)
73# pragma intrinsic(_rotr)
74# pragma intrinsic(_rotl64)
75# pragma intrinsic(_rotr64)
76# ifdef RT_ARCH_AMD64
77# pragma intrinsic(__stosq)
78# pragma intrinsic(_byteswap_uint64)
79# pragma intrinsic(_InterlockedCompareExchange128)
80# pragma intrinsic(_InterlockedExchange64)
81# pragma intrinsic(_InterlockedExchangeAdd64)
82# pragma intrinsic(_InterlockedAnd64)
83# pragma intrinsic(_InterlockedOr64)
84# pragma intrinsic(_InterlockedIncrement64)
85# pragma intrinsic(_InterlockedDecrement64)
86# endif
87#endif
88
89/*
90 * Undefine all symbols we have Watcom C/C++ #pragma aux'es for.
91 */
92#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
93# include "asm-watcom-x86-16.h"
94#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
95# include "asm-watcom-x86-32.h"
96#endif
97
98
99/** @defgroup grp_rt_asm ASM - Assembly Routines
100 * @ingroup grp_rt
101 *
102 * @remarks The difference between ordered and unordered atomic operations are
103 * that the former will complete outstanding reads and writes before
104 * continuing while the latter doesn't make any promises about the
105 * order. Ordered operations doesn't, it seems, make any 100% promise
106 * wrt to whether the operation will complete before any subsequent
107 * memory access. (please, correct if wrong.)
108 *
109 * ASMAtomicSomething operations are all ordered, while
110 * ASMAtomicUoSomething are unordered (note the Uo).
111 *
112 * Please note that ordered operations does not necessarily imply a
113 * compiler (memory) barrier. The user has to use the
114 * ASMCompilerBarrier() macro when that is deemed necessary.
115 *
116 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed
117 * to reorder or even optimize assembler instructions away. For
118 * instance, in the following code the second rdmsr instruction is
119 * optimized away because gcc treats that instruction as deterministic:
120 *
121 * @code
122 * static inline uint64_t rdmsr_low(int idx)
123 * {
124 * uint32_t low;
125 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
126 * }
127 * ...
128 * uint32_t msr1 = rdmsr_low(1);
129 * foo(msr1);
130 * msr1 = rdmsr_low(1);
131 * bar(msr1);
132 * @endcode
133 *
134 * The input parameter of rdmsr_low is the same for both calls and
135 * therefore gcc will use the result of the first call as input
136 * parameter for bar() as well. For rdmsr this is not acceptable as
137 * this instruction is _not_ deterministic. This applies to reading
138 * machine status information in general.
139 *
140 * @{
141 */
142
143
144/** @def RT_INLINE_ASM_GCC_4_3_X_X86
145 * Used to work around some 4.3.x register allocation issues in this version of
146 * the compiler. So far this workaround is still required for 4.4 and 4.5 but
147 * definitely not for 5.x */
148#if (RT_GNUC_PREREQ(4, 3) && !RT_GNUC_PREREQ(5, 0) && defined(__i386__))
149# define RT_INLINE_ASM_GCC_4_3_X_X86 1
150#else
151# define RT_INLINE_ASM_GCC_4_3_X_X86 0
152#endif
153
154/** @def RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
155 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
156 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
157 * mode, x86.
158 *
159 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
160 * when in PIC mode on x86.
161 */
162#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
163# if defined(DOXYGEN_RUNNING) || defined(__WATCOMC__) /* Watcom has trouble with the expression below */
164# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
165# elif defined(_MSC_VER) /* Visual C++ has trouble too, but it'll only tell us when C4688 is enabled. */
166# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
167# elif ( (defined(PIC) || defined(__PIC__)) \
168 && defined(RT_ARCH_X86) \
169 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
170 || defined(RT_OS_DARWIN)) )
171# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
172# else
173# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 0
174# endif
175#endif
176
177
178/** @def RT_INLINE_ASM_EXTERNAL_TMP_ARM
179 * Temporary version of RT_INLINE_ASM_EXTERNAL that excludes ARM. */
180#if RT_INLINE_ASM_EXTERNAL && !(defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32))
181# define RT_INLINE_ASM_EXTERNAL_TMP_ARM 1
182#else
183# define RT_INLINE_ASM_EXTERNAL_TMP_ARM 0
184#endif
185
186/*
187 * ARM is great fun.
188 */
189#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
190
191# define RTASM_ARM_NO_BARRIER
192# ifdef RT_ARCH_ARM64
193# define RTASM_ARM_NO_BARRIER_IN_REG
194# define RTASM_ARM_NO_BARRIER_COMMA_IN_REG
195# define RTASM_ARM_DSB_SY "dsb sy\n\t"
196# define RTASM_ARM_DSB_SY_IN_REG
197# define RTASM_ARM_DSB_SY_COMMA_IN_REG
198# define RTASM_ARM_DMB_SY "dmb sy\n\t"
199# define RTASM_ARM_DMB_SY_IN_REG
200# define RTASM_ARM_DMB_SY_COMMA_IN_REG
201# define RTASM_ARM_DMB_ST "dmb st\n\t"
202# define RTASM_ARM_DMB_ST_IN_REG
203# define RTASM_ARM_DMB_ST_COMMA_IN_REG
204# define RTASM_ARM_DMB_LD "dmb ld\n\t"
205# define RTASM_ARM_DMB_LD_IN_REG
206# define RTASM_ARM_DMB_LD_COMMA_IN_REG
207# define RTASM_ARM_PICK_6432(expr64, expr32) expr64
208# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
209 uint32_t rcSpill; \
210 uint32_t u32NewRet; \
211 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
212 RTASM_ARM_##barrier_type /* before lable? */ \
213 "ldaxr %w[uNew], %[pMem]\n\t" \
214 modify64 \
215 "stlxr %w[rc], %w[uNew], %[pMem]\n\t" \
216 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
217 : [pMem] "+m" (*a_pu32Mem) \
218 , [uNew] "=&r" (u32NewRet) \
219 , [rc] "=&r" (rcSpill) \
220 : in_reg \
221 : "cc")
222# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
223 uint32_t rcSpill; \
224 uint32_t u32OldRet; \
225 uint32_t u32NewSpill; \
226 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
227 RTASM_ARM_##barrier_type /* before lable? */ \
228 "ldaxr %w[uOld], %[pMem]\n\t" \
229 modify64 \
230 "stlxr %w[rc], %w[uNew], %[pMem]\n\t" \
231 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
232 : [pMem] "+m" (*a_pu32Mem) \
233 , [uOld] "=&r" (u32OldRet) \
234 , [uNew] "=&r" (u32NewSpill) \
235 , [rc] "=&r" (rcSpill) \
236 : in_reg \
237 : "cc")
238# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
239 uint32_t rcSpill; \
240 uint64_t u64NewRet; \
241 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
242 RTASM_ARM_##barrier_type /* before lable? */ \
243 "ldaxr %[uNew], %[pMem]\n\t" \
244 modify64 \
245 "stlxr %w[rc], %[uNew], %[pMem]\n\t" \
246 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
247 : [pMem] "+m" (*a_pu64Mem) \
248 , [uNew] "=&r" (u64NewRet) \
249 , [rc] "=&r" (rcSpill) \
250 : in_reg \
251 : "cc")
252# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
253 uint32_t rcSpill; \
254 uint64_t u64OldRet; \
255 uint64_t u64NewSpill; \
256 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
257 RTASM_ARM_##barrier_type /* before lable? */ \
258 "ldaxr %[uOld], %[pMem]\n\t" \
259 modify64 \
260 "stlxr %w[rc], %[uNew], %[pMem]\n\t" \
261 "cbnz %w[rc], .Ltry_again_" #name "_%=\n\t" \
262 : [pMem] "+m" (*a_pu64Mem) \
263 , [uOld] "=&r" (u64OldRet) \
264 , [uNew] "=&r" (u64NewSpill) \
265 , [rc] "=&r" (rcSpill) \
266 : in_reg \
267 : "cc")
268
269# else /* RT_ARCH_ARM32 */
270# define RTASM_ARM_PICK_6432(expr64, expr32) expr32
271# if RT_ARCH_ARM32 >= 7
272# warning armv7
273# define RTASM_ARM_NO_BARRIER_IN_REG
274# define RTASM_ARM_NO_BARRIER_COMMA_IN_REG
275# define RTASM_ARM_DSB_SY "dsb sy\n\t"
276# define RTASM_ARM_DSB_SY_IN_REG "X" (0xfade)
277# define RTASM_ARM_DMB_SY "dmb sy\n\t"
278# define RTASM_ARM_DMB_SY_IN_REG "X" (0xfade)
279# define RTASM_ARM_DMB_ST "dmb st\n\t"
280# define RTASM_ARM_DMB_ST_IN_REG "X" (0xfade)
281# define RTASM_ARM_DMB_LD "dmb ld\n\t"
282# define RTASM_ARM_DMB_LD_IN_REG "X" (0xfade)
283
284# elif RT_ARCH_ARM32 >= 6
285# warning armv6
286# define RTASM_ARM_DSB_SY "mcr p15, 0, %[uZero], c7, c10, 4\n\t"
287# define RTASM_ARM_DSB_SY_IN_REG [uZero] "r" (0)
288# define RTASM_ARM_DMB_SY "mcr p15, 0, %[uZero], c7, c10, 5\n\t"
289# define RTASM_ARM_DMB_SY_IN_REG [uZero] "r" (0)
290# define RTASM_ARM_DMB_ST RTASM_ARM_DMB_SY
291# define RTASM_ARM_DMB_ST_IN_REG RTASM_ARM_DMB_SY_IN_REG
292# define RTASM_ARM_DMB_LD RTASM_ARM_DMB_SY
293# define RTASM_ARM_DMB_LD_IN_REG RTASM_ARM_DMB_SY_IN_REG
294# elif RT_ARCH_ARM32 >= 4
295# warning armv5 or older
296# define RTASM_ARM_DSB_SY "mcr p15, 0, %[uZero], c7, c10, 4\n\t"
297# define RTASM_ARM_DSB_SY_IN_REG [uZero] "r" (0)
298# define RTASM_ARM_DMB_SY RTASM_ARM_DSB_SY
299# define RTASM_ARM_DMB_SY_IN_REG RTASM_ARM_DSB_SY_IN_REG
300# define RTASM_ARM_DMB_ST RTASM_ARM_DSB_SY
301# define RTASM_ARM_DMB_ST_IN_REG RTASM_ARM_DSB_SY_IN_REG
302# define RTASM_ARM_DMB_LD RTASM_ARM_DSB_SY
303# define RTASM_ARM_DMB_LD_IN_REG RTASM_ARM_DSB_SY_IN_REG
304# else
305# error "huh? Odd RT_ARCH_ARM32 value!"
306# endif
307# define RTASM_ARM_DSB_SY_COMMA_IN_REG , RTASM_ARM_DSB_SY_IN_REG
308# define RTASM_ARM_DMB_SY_COMMA_IN_REG , RTASM_ARM_DMB_SY_IN_REG
309# define RTASM_ARM_DMB_ST_COMMA_IN_REG , RTASM_ARM_DMB_ST_IN_REG
310# define RTASM_ARM_DMB_LD_COMMA_IN_REG , RTASM_ARM_DMB_LD_IN_REG
311# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
312 uint32_t rcSpill; \
313 uint32_t u32NewRet; \
314 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
315 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
316 "ldrex %[uNew], %[pMem]\n\t" \
317 modify32 \
318 "strex %[rc], %[uNew], %[pMem]\n\t" \
319 "cmp %[rc], #0\n\t" \
320 "bne .Ltry_again_" #name "_%=\n\t" \
321 : [pMem] "+m" (*a_pu32Mem) \
322 , [uNew] "=&r" (u32NewRet) \
323 , [rc] "=&r" (rcSpill) \
324 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
325 , in_reg \
326 : "cc")
327# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(name, a_pu32Mem, barrier_type, modify64, modify32, in_reg) \
328 uint32_t rcSpill; \
329 uint32_t u32OldRet; \
330 uint32_t u32NewSpill; \
331 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
332 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
333 "ldrex %[uOld], %[pMem]\n\t" \
334 modify32 \
335 "strex %[rc], %[uNew], %[pMem]\n\t" \
336 "cmp %[rc], #0\n\t" \
337 "bne .Ltry_again_" #name "_%=\n\t" \
338 : [pMem] "+m" (*a_pu32Mem) \
339 , [uOld] "=&r" (u32OldRet) \
340 , [uNew] "=&r" (u32NewSpill) \
341 , [rc] "=&r" (rcSpill) \
342 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
343 , in_reg \
344 : "cc")
345# define RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
346 uint32_t rcSpill; \
347 uint64_t u64NewRet; \
348 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
349 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
350 "ldrexd %[uNew], %H[uNew], %[pMem]\n\t" \
351 modify32 \
352 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t" \
353 "cmp %[rc], #0\n\t" \
354 "bne .Ltry_again_" #name "_%=\n\t" \
355 : [pMem] "+m" (*a_pu64Mem), \
356 [uNew] "=&r" (u64NewRet), \
357 [rc] "=&r" (rcSpill) \
358 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
359 , in_reg \
360 : "cc")
361# define RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(name, a_pu64Mem, barrier_type, modify64, modify32, in_reg) \
362 uint32_t rcSpill; \
363 uint64_t u64OldRet; \
364 uint64_t u64NewSpill; \
365 __asm__ __volatile__(".Ltry_again_" #name "_%=:\n\t" \
366 RT_CONCAT(RTASM_ARM_,barrier_type) /* before lable? */ \
367 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t" \
368 modify32 \
369 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t" \
370 "cmp %[rc], #0\n\t" \
371 "bne .Ltry_again_" #name "_%=\n\t" \
372 : [pMem] "+m" (*a_pu64Mem), \
373 [uOld] "=&r" (u64OldRet), \
374 [uNew] "=&r" (u64NewSpill), \
375 [rc] "=&r" (rcSpill) \
376 : RT_CONCAT3(RTASM_ARM_,barrier_type,_IN_REG) \
377 , in_reg \
378 : "cc")
379# endif /* RT_ARCH_ARM32 */
380#endif
381
382
383/** @def ASMReturnAddress
384 * Gets the return address of the current (or calling if you like) function or method.
385 */
386#ifdef _MSC_VER
387# ifdef __cplusplus
388extern "C"
389# endif
390void * _ReturnAddress(void);
391# pragma intrinsic(_ReturnAddress)
392# define ASMReturnAddress() _ReturnAddress()
393#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
394# define ASMReturnAddress() __builtin_return_address(0)
395#elif defined(__WATCOMC__)
396# define ASMReturnAddress() Watcom_does_not_appear_to_have_intrinsic_return_address_function()
397#else
398# error "Unsupported compiler."
399#endif
400
401
402/**
403 * Compiler memory barrier.
404 *
405 * Ensure that the compiler does not use any cached (register/tmp stack) memory
406 * values or any outstanding writes when returning from this function.
407 *
408 * This function must be used if non-volatile data is modified by a
409 * device or the VMM. Typical cases are port access, MMIO access,
410 * trapping instruction, etc.
411 */
412#if RT_INLINE_ASM_GNU_STYLE
413# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
414#elif RT_INLINE_ASM_USES_INTRIN
415# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
416#elif defined(__WATCOMC__)
417void ASMCompilerBarrier(void);
418#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
419DECLINLINE(void) ASMCompilerBarrier(void) RT_NOTHROW_DEF
420{
421 __asm
422 {
423 }
424}
425#endif
426
427
428/** @def ASMBreakpoint
429 * Debugger Breakpoint.
430 * @deprecated Use RT_BREAKPOINT instead.
431 * @internal
432 */
433#define ASMBreakpoint() RT_BREAKPOINT()
434
435
436/**
437 * Spinloop hint for platforms that have these, empty function on the other
438 * platforms.
439 *
440 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
441 * spin locks.
442 */
443#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
444RT_ASM_DECL_PRAGMA_WATCOM(void) ASMNopPause(void) RT_NOTHROW_PROTO;
445#else
446DECLINLINE(void) ASMNopPause(void) RT_NOTHROW_DEF
447{
448# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
449# if RT_INLINE_ASM_GNU_STYLE
450 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
451# else
452 __asm {
453 _emit 0f3h
454 _emit 090h
455 }
456# endif
457
458# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
459 __asm__ __volatile__("yield\n\t"); /* ARMv6K+ */
460
461# else
462 /* dummy */
463# endif
464}
465#endif
466
467
468/**
469 * Atomically Exchange an unsigned 8-bit value, ordered.
470 *
471 * @returns Current *pu8 value
472 * @param pu8 Pointer to the 8-bit variable to update.
473 * @param u8 The 8-bit value to assign to *pu8.
474 */
475#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
476RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_PROTO;
477#else
478DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
479{
480# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
481# if RT_INLINE_ASM_GNU_STYLE
482 __asm__ __volatile__("xchgb %0, %1\n\t"
483 : "=m" (*pu8)
484 , "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
485 : "1" (u8)
486 , "m" (*pu8));
487# else
488 __asm
489 {
490# ifdef RT_ARCH_AMD64
491 mov rdx, [pu8]
492 mov al, [u8]
493 xchg [rdx], al
494 mov [u8], al
495# else
496 mov edx, [pu8]
497 mov al, [u8]
498 xchg [edx], al
499 mov [u8], al
500# endif
501 }
502# endif
503 return u8;
504
505# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
506 uint32_t uOld;
507 uint32_t rcSpill;
508 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU8_%=:\n\t"
509 RTASM_ARM_DMB_SY
510# if defined(RT_ARCH_ARM64)
511 "ldaxrb %w[uOld], %[pMem]\n\t"
512 "stlxrb %w[rc], %w[uNew], %[pMem]\n\t"
513 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU8_%=\n\t"
514# else
515 "ldrexb %[uOld], %[pMem]\n\t" /* ARMv6+ */
516 "strexb %[rc], %[uNew], %[pMem]\n\t"
517 "cmp %[rc], #0\n\t"
518 "bne .Ltry_again_ASMAtomicXchgU8_%=\n\t"
519# endif
520 : [pMem] "+m" (*pu8)
521 , [uOld] "=&r" (uOld)
522 , [rc] "=&r" (rcSpill)
523 : [uNew] "r" ((uint32_t)u8)
524 RTASM_ARM_DMB_SY_COMMA_IN_REG
525 : "cc");
526 return (uint8_t)uOld;
527
528# else
529# error "Port me"
530# endif
531}
532#endif
533
534
535/**
536 * Atomically Exchange a signed 8-bit value, ordered.
537 *
538 * @returns Current *pu8 value
539 * @param pi8 Pointer to the 8-bit variable to update.
540 * @param i8 The 8-bit value to assign to *pi8.
541 */
542DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
543{
544 return (int8_t)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8);
545}
546
547
548/**
549 * Atomically Exchange a bool value, ordered.
550 *
551 * @returns Current *pf value
552 * @param pf Pointer to the 8-bit variable to update.
553 * @param f The 8-bit value to assign to *pi8.
554 */
555DECLINLINE(bool) ASMAtomicXchgBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
556{
557#ifdef _MSC_VER
558 return !!ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
559#else
560 return (bool)ASMAtomicXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)f);
561#endif
562}
563
564
565/**
566 * Atomically Exchange an unsigned 16-bit value, ordered.
567 *
568 * @returns Current *pu16 value
569 * @param pu16 Pointer to the 16-bit variable to update.
570 * @param u16 The 16-bit value to assign to *pu16.
571 */
572#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
573RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_PROTO;
574#else
575DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
576{
577# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
578# if RT_INLINE_ASM_GNU_STYLE
579 __asm__ __volatile__("xchgw %0, %1\n\t"
580 : "=m" (*pu16)
581 , "=r" (u16)
582 : "1" (u16)
583 , "m" (*pu16));
584# else
585 __asm
586 {
587# ifdef RT_ARCH_AMD64
588 mov rdx, [pu16]
589 mov ax, [u16]
590 xchg [rdx], ax
591 mov [u16], ax
592# else
593 mov edx, [pu16]
594 mov ax, [u16]
595 xchg [edx], ax
596 mov [u16], ax
597# endif
598 }
599# endif
600 return u16;
601
602# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
603 uint32_t uOld;
604 uint32_t rcSpill;
605 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU16_%=:\n\t"
606 RTASM_ARM_DMB_SY
607# if defined(RT_ARCH_ARM64)
608 "ldaxrh %w[uOld], %[pMem]\n\t"
609 "stlxrh %w[rc], %w[uNew], %[pMem]\n\t"
610 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU16_%=\n\t"
611# else
612 "ldrexh %[uOld], %[pMem]\n\t" /* ARMv6+ */
613 "strexh %[rc], %[uNew], %[pMem]\n\t"
614 "cmp %[rc], #0\n\t"
615 "bne .Ltry_again_ASMAtomicXchgU16_%=\n\t"
616# endif
617 : [pMem] "+m" (*pu16)
618 , [uOld] "=&r" (uOld)
619 , [rc] "=&r" (rcSpill)
620 : [uNew] "r" ((uint32_t)u16)
621 RTASM_ARM_DMB_SY_COMMA_IN_REG
622 : "cc");
623 return (uint16_t)uOld;
624
625# else
626# error "Port me"
627# endif
628}
629#endif
630
631
632/**
633 * Atomically Exchange a signed 16-bit value, ordered.
634 *
635 * @returns Current *pu16 value
636 * @param pi16 Pointer to the 16-bit variable to update.
637 * @param i16 The 16-bit value to assign to *pi16.
638 */
639DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
640{
641 return (int16_t)ASMAtomicXchgU16((volatile uint16_t RT_FAR *)pi16, (uint16_t)i16);
642}
643
644
645/**
646 * Atomically Exchange an unsigned 32-bit value, ordered.
647 *
648 * @returns Current *pu32 value
649 * @param pu32 Pointer to the 32-bit variable to update.
650 * @param u32 The 32-bit value to assign to *pu32.
651 *
652 * @remarks Does not work on 286 and earlier.
653 */
654#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
655RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
656#else
657DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
658{
659# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
660# if RT_INLINE_ASM_GNU_STYLE
661 __asm__ __volatile__("xchgl %0, %1\n\t"
662 : "=m" (*pu32) /** @todo r=bird: +m rather than =m here? */
663 , "=r" (u32)
664 : "1" (u32)
665 , "m" (*pu32));
666
667# elif RT_INLINE_ASM_USES_INTRIN
668 u32 = _InterlockedExchange((long RT_FAR *)pu32, u32);
669
670# else
671 __asm
672 {
673# ifdef RT_ARCH_AMD64
674 mov rdx, [pu32]
675 mov eax, u32
676 xchg [rdx], eax
677 mov [u32], eax
678# else
679 mov edx, [pu32]
680 mov eax, u32
681 xchg [edx], eax
682 mov [u32], eax
683# endif
684 }
685# endif
686 return u32;
687
688# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
689 uint32_t uOld;
690 uint32_t rcSpill;
691 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU32_%=:\n\t"
692 RTASM_ARM_DMB_SY
693# if defined(RT_ARCH_ARM64)
694 "ldaxr %w[uOld], %[pMem]\n\t"
695 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
696 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU32_%=\n\t"
697# else
698 "ldrex %[uOld], %[pMem]\n\t" /* ARMv6+ */
699 "strex %[rc], %[uNew], %[pMem]\n\t"
700 "cmp %[rc], #0\n\t"
701 "bne .Ltry_again_ASMAtomicXchgU32_%=\n\t"
702# endif
703 : [pMem] "+m" (*pu32)
704 , [uOld] "=&r" (uOld)
705 , [rc] "=&r" (rcSpill)
706 : [uNew] "r" (u32)
707 RTASM_ARM_DMB_SY_COMMA_IN_REG
708 : "cc");
709 return uOld;
710
711# else
712# error "Port me"
713# endif
714}
715#endif
716
717
718/**
719 * Atomically Exchange a signed 32-bit value, ordered.
720 *
721 * @returns Current *pu32 value
722 * @param pi32 Pointer to the 32-bit variable to update.
723 * @param i32 The 32-bit value to assign to *pi32.
724 */
725DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
726{
727 return (int32_t)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32);
728}
729
730
731/**
732 * Atomically Exchange an unsigned 64-bit value, ordered.
733 *
734 * @returns Current *pu64 value
735 * @param pu64 Pointer to the 64-bit variable to update.
736 * @param u64 The 64-bit value to assign to *pu64.
737 *
738 * @remarks Works on 32-bit x86 CPUs starting with Pentium.
739 */
740#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
741 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
742RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
743#else
744DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
745{
746# if defined(RT_ARCH_AMD64)
747# if RT_INLINE_ASM_USES_INTRIN
748 return _InterlockedExchange64((__int64 *)pu64, u64);
749
750# elif RT_INLINE_ASM_GNU_STYLE
751 __asm__ __volatile__("xchgq %0, %1\n\t"
752 : "=m" (*pu64)
753 , "=r" (u64)
754 : "1" (u64)
755 , "m" (*pu64));
756 return u64;
757# else
758 __asm
759 {
760 mov rdx, [pu64]
761 mov rax, [u64]
762 xchg [rdx], rax
763 mov [u64], rax
764 }
765 return u64;
766# endif
767
768# elif defined(RT_ARCH_X86)
769# if RT_INLINE_ASM_GNU_STYLE
770# if defined(PIC) || defined(__PIC__)
771 uint32_t u32EBX = (uint32_t)u64;
772 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
773 "xchgl %%ebx, %3\n\t"
774 "1:\n\t"
775 "lock; cmpxchg8b (%5)\n\t"
776 "jnz 1b\n\t"
777 "movl %3, %%ebx\n\t"
778 /*"xchgl %%esi, %5\n\t"*/
779 : "=A" (u64)
780 , "=m" (*pu64)
781 : "0" (*pu64)
782 , "m" ( u32EBX )
783 , "c" ( (uint32_t)(u64 >> 32) )
784 , "S" (pu64)
785 : "cc");
786# else /* !PIC */
787 __asm__ __volatile__("1:\n\t"
788 "lock; cmpxchg8b %1\n\t"
789 "jnz 1b\n\t"
790 : "=A" (u64)
791 , "=m" (*pu64)
792 : "0" (*pu64)
793 , "b" ( (uint32_t)u64 )
794 , "c" ( (uint32_t)(u64 >> 32) )
795 : "cc");
796# endif
797# else
798 __asm
799 {
800 mov ebx, dword ptr [u64]
801 mov ecx, dword ptr [u64 + 4]
802 mov edi, pu64
803 mov eax, dword ptr [edi]
804 mov edx, dword ptr [edi + 4]
805 retry:
806 lock cmpxchg8b [edi]
807 jnz retry
808 mov dword ptr [u64], eax
809 mov dword ptr [u64 + 4], edx
810 }
811# endif
812 return u64;
813
814# elif defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64)
815 uint32_t rcSpill;
816 uint64_t uOld;
817 __asm__ __volatile__(".Ltry_again_ASMAtomicXchgU64_%=:\n\t"
818 RTASM_ARM_DMB_SY
819# if defined(RT_ARCH_ARM64)
820 "ldaxr %[uOld], %[pMem]\n\t"
821 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
822 "cbnz %w[rc], .Ltry_again_ASMAtomicXchgU64_%=\n\t"
823# else
824 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t" /* ARMv6+ */
825 "strexd %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
826 "cmp %[rc], #0\n\t"
827 "bne .Ltry_again_ASMAtomicXchgU64_%=\n\t"
828# endif
829 : [pMem] "+m" (*pu64)
830 , [uOld] "=&r" (uOld)
831 , [rc] "=&r" (rcSpill)
832 : [uNew] "r" (u64)
833 RTASM_ARM_DMB_SY_COMMA_IN_REG
834 : "cc");
835 return uOld;
836
837# else
838# error "Port me"
839# endif
840}
841#endif
842
843
844/**
845 * Atomically Exchange an signed 64-bit value, ordered.
846 *
847 * @returns Current *pi64 value
848 * @param pi64 Pointer to the 64-bit variable to update.
849 * @param i64 The 64-bit value to assign to *pi64.
850 */
851DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
852{
853 return (int64_t)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64);
854}
855
856
857/**
858 * Atomically Exchange a size_t value, ordered.
859 *
860 * @returns Current *ppv value
861 * @param puDst Pointer to the size_t variable to update.
862 * @param uNew The new value to assign to *puDst.
863 */
864DECLINLINE(size_t) ASMAtomicXchgZ(size_t volatile RT_FAR *puDst, const size_t uNew) RT_NOTHROW_DEF
865{
866#if ARCH_BITS == 16
867 AssertCompile(sizeof(size_t) == 2);
868 return ASMAtomicXchgU16((volatile uint16_t RT_FAR *)puDst, uNew);
869#elif ARCH_BITS == 32
870 return ASMAtomicXchgU32((volatile uint32_t RT_FAR *)puDst, uNew);
871#elif ARCH_BITS == 64
872 return ASMAtomicXchgU64((volatile uint64_t RT_FAR *)puDst, uNew);
873#else
874# error "ARCH_BITS is bogus"
875#endif
876}
877
878
879/**
880 * Atomically Exchange a pointer value, ordered.
881 *
882 * @returns Current *ppv value
883 * @param ppv Pointer to the pointer variable to update.
884 * @param pv The pointer value to assign to *ppv.
885 */
886DECLINLINE(void RT_FAR *) ASMAtomicXchgPtr(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pv) RT_NOTHROW_DEF
887{
888#if ARCH_BITS == 32 || ARCH_BITS == 16
889 return (void RT_FAR *)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
890#elif ARCH_BITS == 64
891 return (void RT_FAR *)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
892#else
893# error "ARCH_BITS is bogus"
894#endif
895}
896
897
898/**
899 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
900 *
901 * @returns Current *pv value
902 * @param ppv Pointer to the pointer variable to update.
903 * @param pv The pointer value to assign to *ppv.
904 * @param Type The type of *ppv, sans volatile.
905 */
906#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
907# define ASMAtomicXchgPtrT(ppv, pv, Type) \
908 __extension__ \
909 ({\
910 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
911 Type const pvTypeChecked = (pv); \
912 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
913 pvTypeCheckedRet; \
914 })
915#else
916# define ASMAtomicXchgPtrT(ppv, pv, Type) \
917 (Type)ASMAtomicXchgPtr((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv))
918#endif
919
920
921/**
922 * Atomically Exchange a raw-mode context pointer value, ordered.
923 *
924 * @returns Current *ppv value
925 * @param ppvRC Pointer to the pointer variable to update.
926 * @param pvRC The pointer value to assign to *ppv.
927 */
928DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile RT_FAR *ppvRC, RTRCPTR pvRC) RT_NOTHROW_DEF
929{
930 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(void RT_FAR *)ppvRC, (uint32_t)pvRC);
931}
932
933
934/**
935 * Atomically Exchange a ring-0 pointer value, ordered.
936 *
937 * @returns Current *ppv value
938 * @param ppvR0 Pointer to the pointer variable to update.
939 * @param pvR0 The pointer value to assign to *ppv.
940 */
941DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile RT_FAR *ppvR0, RTR0PTR pvR0) RT_NOTHROW_DEF
942{
943#if R0_ARCH_BITS == 32 || ARCH_BITS == 16
944 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR0, (uint32_t)pvR0);
945#elif R0_ARCH_BITS == 64
946 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR0, (uint64_t)pvR0);
947#else
948# error "R0_ARCH_BITS is bogus"
949#endif
950}
951
952
953/**
954 * Atomically Exchange a ring-3 pointer value, ordered.
955 *
956 * @returns Current *ppv value
957 * @param ppvR3 Pointer to the pointer variable to update.
958 * @param pvR3 The pointer value to assign to *ppv.
959 */
960DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile RT_FAR *ppvR3, RTR3PTR pvR3) RT_NOTHROW_DEF
961{
962#if R3_ARCH_BITS == 32 || ARCH_BITS == 16
963 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppvR3, (uint32_t)pvR3);
964#elif R3_ARCH_BITS == 64
965 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppvR3, (uint64_t)pvR3);
966#else
967# error "R3_ARCH_BITS is bogus"
968#endif
969}
970
971
972/** @def ASMAtomicXchgHandle
973 * Atomically Exchange a typical IPRT handle value, ordered.
974 *
975 * @param ph Pointer to the value to update.
976 * @param hNew The new value to assigned to *pu.
977 * @param phRes Where to store the current *ph value.
978 *
979 * @remarks This doesn't currently work for all handles (like RTFILE).
980 */
981#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
982# define ASMAtomicXchgHandle(ph, hNew, phRes) \
983 do { \
984 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
985 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
986 *(uint32_t RT_FAR *)(phRes) = ASMAtomicXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
987 } while (0)
988#elif HC_ARCH_BITS == 64
989# define ASMAtomicXchgHandle(ph, hNew, phRes) \
990 do { \
991 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
992 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
993 *(uint64_t RT_FAR *)(phRes) = ASMAtomicXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
994 } while (0)
995#else
996# error HC_ARCH_BITS
997#endif
998
999
1000/**
1001 * Atomically Exchange a value which size might differ
1002 * between platforms or compilers, ordered.
1003 *
1004 * @param pu Pointer to the variable to update.
1005 * @param uNew The value to assign to *pu.
1006 * @todo This is busted as its missing the result argument.
1007 */
1008#define ASMAtomicXchgSize(pu, uNew) \
1009 do { \
1010 switch (sizeof(*(pu))) { \
1011 case 1: ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
1012 case 2: ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
1013 case 4: ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
1014 case 8: ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
1015 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1016 } \
1017 } while (0)
1018
1019/**
1020 * Atomically Exchange a value which size might differ
1021 * between platforms or compilers, ordered.
1022 *
1023 * @param pu Pointer to the variable to update.
1024 * @param uNew The value to assign to *pu.
1025 * @param puRes Where to store the current *pu value.
1026 */
1027#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
1028 do { \
1029 switch (sizeof(*(pu))) { \
1030 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicXchgU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t)(uNew)); break; \
1031 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicXchgU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
1032 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
1033 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
1034 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1035 } \
1036 } while (0)
1037
1038
1039
1040/**
1041 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
1042 *
1043 * @returns true if xchg was done.
1044 * @returns false if xchg wasn't done.
1045 *
1046 * @param pu8 Pointer to the value to update.
1047 * @param u8New The new value to assigned to *pu8.
1048 * @param u8Old The old value to *pu8 compare with.
1049 *
1050 * @remarks x86: Requires a 486 or later.
1051 * @todo Rename ASMAtomicCmpWriteU8
1052 */
1053#if RT_INLINE_ASM_EXTERNAL_TMP_ARM || !RT_INLINE_ASM_GNU_STYLE
1054RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, const uint8_t u8Old) RT_NOTHROW_PROTO;
1055#else
1056DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t RT_FAR *pu8, const uint8_t u8New, uint8_t u8Old) RT_NOTHROW_DEF
1057{
1058# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1059 uint8_t u8Ret;
1060 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
1061 "setz %1\n\t"
1062 : "=m" (*pu8)
1063 , "=qm" (u8Ret)
1064 , "=a" (u8Old)
1065 : "q" (u8New)
1066 , "2" (u8Old)
1067 , "m" (*pu8)
1068 : "cc");
1069 return (bool)u8Ret;
1070
1071# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1072 union { uint32_t u; bool f; } fXchg;
1073 uint32_t u32Spill;
1074 uint32_t rcSpill;
1075 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU8_%=:\n\t"
1076 RTASM_ARM_DMB_SY
1077# if defined(RT_ARCH_ARM64)
1078 "ldaxrb %w[uOld], %[pMem]\n\t"
1079 "cmp %w[uOld], %w[uCmp]\n\t"
1080 "bne 1f\n\t" /* stop here if not equal */
1081 "stlxrb %w[rc], %w[uNew], %[pMem]\n\t"
1082 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU8_%=\n\t"
1083 "mov %w[fXchg], #1\n\t"
1084# else
1085 "ldrexb %[uOld], %[pMem]\n\t"
1086 "teq %[uOld], %[uCmp]\n\t"
1087 "strexbeq %[rc], %[uNew], %[pMem]\n\t"
1088 "bne 1f\n\t" /* stop here if not equal */
1089 "cmp %[rc], #0\n\t"
1090 "bne .Ltry_again_ASMAtomicCmpXchgU8_%=\n\t"
1091 "mov %[fXchg], #1\n\t"
1092# endif
1093 "1:\n\t"
1094 : [pMem] "+m" (*pu8)
1095 , [uOld] "=&r" (u32Spill)
1096 , [rc] "=&r" (rcSpill)
1097 , [fXchg] "=&r" (fXchg.u)
1098 : [uCmp] "r" ((uint32_t)u8Old)
1099 , [uNew] "r" ((uint32_t)u8New)
1100 , "[fXchg]" (0)
1101 RTASM_ARM_DMB_SY_COMMA_IN_REG
1102 : "cc");
1103 return fXchg.f;
1104
1105# else
1106# error "Port me"
1107# endif
1108}
1109#endif
1110
1111
1112/**
1113 * Atomically Compare and Exchange a signed 8-bit value, ordered.
1114 *
1115 * @returns true if xchg was done.
1116 * @returns false if xchg wasn't done.
1117 *
1118 * @param pi8 Pointer to the value to update.
1119 * @param i8New The new value to assigned to *pi8.
1120 * @param i8Old The old value to *pi8 compare with.
1121 *
1122 * @remarks x86: Requires a 486 or later.
1123 * @todo Rename ASMAtomicCmpWriteS8
1124 */
1125DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t RT_FAR *pi8, const int8_t i8New, const int8_t i8Old) RT_NOTHROW_DEF
1126{
1127 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pi8, (uint8_t)i8New, (uint8_t)i8Old);
1128}
1129
1130
1131/**
1132 * Atomically Compare and Exchange a bool value, ordered.
1133 *
1134 * @returns true if xchg was done.
1135 * @returns false if xchg wasn't done.
1136 *
1137 * @param pf Pointer to the value to update.
1138 * @param fNew The new value to assigned to *pf.
1139 * @param fOld The old value to *pf compare with.
1140 *
1141 * @remarks x86: Requires a 486 or later.
1142 * @todo Rename ASMAtomicCmpWriteBool
1143 */
1144DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool RT_FAR *pf, const bool fNew, const bool fOld) RT_NOTHROW_DEF
1145{
1146 return ASMAtomicCmpXchgU8((volatile uint8_t RT_FAR *)pf, (uint8_t)fNew, (uint8_t)fOld);
1147}
1148
1149
1150/**
1151 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
1152 *
1153 * @returns true if xchg was done.
1154 * @returns false if xchg wasn't done.
1155 *
1156 * @param pu32 Pointer to the value to update.
1157 * @param u32New The new value to assigned to *pu32.
1158 * @param u32Old The old value to *pu32 compare with.
1159 *
1160 * @remarks x86: Requires a 486 or later.
1161 * @todo Rename ASMAtomicCmpWriteU32
1162 */
1163#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1164RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old) RT_NOTHROW_PROTO;
1165#else
1166DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, uint32_t u32Old) RT_NOTHROW_DEF
1167{
1168# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1169# if RT_INLINE_ASM_GNU_STYLE
1170 uint8_t u8Ret;
1171 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1172 "setz %1\n\t"
1173 : "=m" (*pu32)
1174 , "=qm" (u8Ret)
1175 , "=a" (u32Old)
1176 : "r" (u32New)
1177 , "2" (u32Old)
1178 , "m" (*pu32)
1179 : "cc");
1180 return (bool)u8Ret;
1181
1182# elif RT_INLINE_ASM_USES_INTRIN
1183 return (uint32_t)_InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old) == u32Old;
1184
1185# else
1186 uint32_t u32Ret;
1187 __asm
1188 {
1189# ifdef RT_ARCH_AMD64
1190 mov rdx, [pu32]
1191# else
1192 mov edx, [pu32]
1193# endif
1194 mov eax, [u32Old]
1195 mov ecx, [u32New]
1196# ifdef RT_ARCH_AMD64
1197 lock cmpxchg [rdx], ecx
1198# else
1199 lock cmpxchg [edx], ecx
1200# endif
1201 setz al
1202 movzx eax, al
1203 mov [u32Ret], eax
1204 }
1205 return !!u32Ret;
1206# endif
1207
1208# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1209 union { uint32_t u; bool f; } fXchg;
1210 uint32_t u32Spill;
1211 uint32_t rcSpill;
1212 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU32_%=:\n\t"
1213 RTASM_ARM_DMB_SY
1214# if defined(RT_ARCH_ARM64)
1215 "ldaxr %w[uOld], %[pMem]\n\t"
1216 "cmp %w[uOld], %w[uCmp]\n\t"
1217 "bne 1f\n\t" /* stop here if not equal */
1218 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
1219 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU32_%=\n\t"
1220 "mov %w[fXchg], #1\n\t"
1221# else
1222 "ldrex %[uOld], %[pMem]\n\t"
1223 "teq %[uOld], %[uCmp]\n\t"
1224 "strexeq %[rc], %[uNew], %[pMem]\n\t"
1225 "bne 1f\n\t" /* stop here if not equal */
1226 "cmp %[rc], #0\n\t"
1227 "bne .Ltry_again_ASMAtomicCmpXchgU32_%=\n\t"
1228 "mov %[fXchg], #1\n\t"
1229# endif
1230 "1:\n\t"
1231 : [pMem] "+m" (*pu32)
1232 , [uOld] "=&r" (u32Spill)
1233 , [rc] "=&r" (rcSpill)
1234 , [fXchg] "=&r" (fXchg.u)
1235 : [uCmp] "r" (u32Old)
1236 , [uNew] "r" (u32New)
1237 , "[fXchg]" (0)
1238 RTASM_ARM_DMB_SY_COMMA_IN_REG
1239 : "cc");
1240 return fXchg.f;
1241
1242# else
1243# error "Port me"
1244# endif
1245}
1246#endif
1247
1248
1249/**
1250 * Atomically Compare and Exchange a signed 32-bit value, ordered.
1251 *
1252 * @returns true if xchg was done.
1253 * @returns false if xchg wasn't done.
1254 *
1255 * @param pi32 Pointer to the value to update.
1256 * @param i32New The new value to assigned to *pi32.
1257 * @param i32Old The old value to *pi32 compare with.
1258 *
1259 * @remarks x86: Requires a 486 or later.
1260 * @todo Rename ASMAtomicCmpWriteS32
1261 */
1262DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old) RT_NOTHROW_DEF
1263{
1264 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
1265}
1266
1267
1268/**
1269 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
1270 *
1271 * @returns true if xchg was done.
1272 * @returns false if xchg wasn't done.
1273 *
1274 * @param pu64 Pointer to the 64-bit variable to update.
1275 * @param u64New The 64-bit value to assign to *pu64.
1276 * @param u64Old The value to compare with.
1277 *
1278 * @remarks x86: Requires a Pentium or later.
1279 * @todo Rename ASMAtomicCmpWriteU64
1280 */
1281#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
1282 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1283RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old) RT_NOTHROW_PROTO;
1284#else
1285DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t RT_FAR *pu64, uint64_t u64New, uint64_t u64Old) RT_NOTHROW_DEF
1286{
1287# if RT_INLINE_ASM_USES_INTRIN
1288 return (uint64_t)_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old) == u64Old;
1289
1290# elif defined(RT_ARCH_AMD64)
1291# if RT_INLINE_ASM_GNU_STYLE
1292 uint8_t u8Ret;
1293 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1294 "setz %1\n\t"
1295 : "=m" (*pu64)
1296 , "=qm" (u8Ret)
1297 , "=a" (u64Old)
1298 : "r" (u64New)
1299 , "2" (u64Old)
1300 , "m" (*pu64)
1301 : "cc");
1302 return (bool)u8Ret;
1303# else
1304 bool fRet;
1305 __asm
1306 {
1307 mov rdx, [pu32]
1308 mov rax, [u64Old]
1309 mov rcx, [u64New]
1310 lock cmpxchg [rdx], rcx
1311 setz al
1312 mov [fRet], al
1313 }
1314 return fRet;
1315# endif
1316
1317# elif defined(RT_ARCH_X86)
1318 uint32_t u32Ret;
1319# if RT_INLINE_ASM_GNU_STYLE
1320# if defined(PIC) || defined(__PIC__)
1321 uint32_t u32EBX = (uint32_t)u64New;
1322 uint32_t u32Spill;
1323 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
1324 "lock; cmpxchg8b (%6)\n\t"
1325 "setz %%al\n\t"
1326 "movl %4, %%ebx\n\t"
1327 "movzbl %%al, %%eax\n\t"
1328 : "=a" (u32Ret)
1329 , "=d" (u32Spill)
1330# if RT_GNUC_PREREQ(4, 3)
1331 , "+m" (*pu64)
1332# else
1333 , "=m" (*pu64)
1334# endif
1335 : "A" (u64Old)
1336 , "m" ( u32EBX )
1337 , "c" ( (uint32_t)(u64New >> 32) )
1338 , "S" (pu64)
1339 : "cc");
1340# else /* !PIC */
1341 uint32_t u32Spill;
1342 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
1343 "setz %%al\n\t"
1344 "movzbl %%al, %%eax\n\t"
1345 : "=a" (u32Ret)
1346 , "=d" (u32Spill)
1347 , "+m" (*pu64)
1348 : "A" (u64Old)
1349 , "b" ( (uint32_t)u64New )
1350 , "c" ( (uint32_t)(u64New >> 32) )
1351 : "cc");
1352# endif
1353 return (bool)u32Ret;
1354# else
1355 __asm
1356 {
1357 mov ebx, dword ptr [u64New]
1358 mov ecx, dword ptr [u64New + 4]
1359 mov edi, [pu64]
1360 mov eax, dword ptr [u64Old]
1361 mov edx, dword ptr [u64Old + 4]
1362 lock cmpxchg8b [edi]
1363 setz al
1364 movzx eax, al
1365 mov dword ptr [u32Ret], eax
1366 }
1367 return !!u32Ret;
1368# endif
1369
1370# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1371 union { uint32_t u; bool f; } fXchg;
1372 uint64_t u64Spill;
1373 uint32_t rcSpill;
1374 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU64_%=:\n\t"
1375 RTASM_ARM_DMB_SY
1376# if defined(RT_ARCH_ARM64)
1377 "ldaxr %[uOld], %[pMem]\n\t"
1378 "cmp %[uOld], %[uCmp]\n\t"
1379 "bne 1f\n\t" /* stop here if not equal */
1380 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
1381 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1382 "mov %w[fXchg], #1\n\t"
1383# else
1384 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t"
1385 "teq %[uOld], %[uCmp]\n\t"
1386 "teqeq %H[uOld], %H[uCmp]\n\t"
1387 "strexdeq %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
1388 "bne 1f\n\t" /* stop here if not equal */
1389 "cmp %[rc], #0\n\t"
1390 "bne .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1391 "mov %[fXchg], #1\n\t"
1392# endif
1393 "1:\n\t"
1394 : [pMem] "+m" (*pu64)
1395 , [uOld] "=&r" (u64Spill)
1396 , [rc] "=&r" (rcSpill)
1397 , [fXchg] "=&r" (fXchg.u)
1398 : [uCmp] "r" (u64Old)
1399 , [uNew] "r" (u64New)
1400 , "[fXchg]" (0)
1401 RTASM_ARM_DMB_SY_COMMA_IN_REG
1402 : "cc");
1403 return fXchg.f;
1404
1405# else
1406# error "Port me"
1407# endif
1408}
1409#endif
1410
1411
1412/**
1413 * Atomically Compare and exchange a signed 64-bit value, ordered.
1414 *
1415 * @returns true if xchg was done.
1416 * @returns false if xchg wasn't done.
1417 *
1418 * @param pi64 Pointer to the 64-bit variable to update.
1419 * @param i64 The 64-bit value to assign to *pu64.
1420 * @param i64Old The value to compare with.
1421 *
1422 * @remarks x86: Requires a Pentium or later.
1423 * @todo Rename ASMAtomicCmpWriteS64
1424 */
1425DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old) RT_NOTHROW_DEF
1426{
1427 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old);
1428}
1429
1430#if defined(RT_ARCH_AMD64) || defined(DOXYGEN_RUNNING)
1431
1432/** @def RTASM_HAVE_CMP_WRITE_U128
1433 * Indicates that we've got ASMAtomicCmpWriteU128() available. */
1434# define RTASM_HAVE_CMP_WRITE_U128 1
1435
1436
1437/**
1438 * Atomically compare and write an unsigned 128-bit value, ordered.
1439 *
1440 * @returns true if write was done.
1441 * @returns false if write wasn't done.
1442 *
1443 * @param pu128 Pointer to the 128-bit variable to update.
1444 * @param u64NewHi The high 64 bits of the value to assign to *pu128.
1445 * @param u64NewLo The low 64 bits of the value to assign to *pu128.
1446 * @param u64OldHi The high 64-bit of the value to compare with.
1447 * @param u64OldLo The low 64-bit of the value to compare with.
1448 *
1449 * @remarks AMD64: Not present in the earliest CPUs, so check CPUID.
1450 */
1451# if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN)
1452DECLASM(bool) ASMAtomicCmpWriteU128v2(volatile uint128_t *pu128, const uint64_t u64NewHi, const uint64_t u64NewLo,
1453 const uint64_t u64OldHi, const uint64_t u64OldLo) RT_NOTHROW_PROTO;
1454# else
1455DECLINLINE(bool) ASMAtomicCmpWriteU128v2(volatile uint128_t *pu128, const uint64_t u64NewHi, const uint64_t u64NewLo,
1456 const uint64_t u64OldHi, const uint64_t u64OldLo) RT_NOTHROW_DEF
1457{
1458# if RT_INLINE_ASM_USES_INTRIN
1459 __int64 ai64Cmp[2];
1460 ai64Cmp[0] = u64OldLo;
1461 ai64Cmp[1] = u64OldHi;
1462 return _InterlockedCompareExchange128((__int64 volatile *)pu128, u64NewHi, u64NewLo, ai64Cmp) != 0;
1463
1464# elif defined(RT_ARCH_AMD64)
1465# if RT_INLINE_ASM_GNU_STYLE
1466 uint64_t u64Ret;
1467 uint64_t u64Spill;
1468 __asm__ __volatile__("lock; cmpxchg16b %2\n\t"
1469 "setz %%al\n\t"
1470 "movzbl %%al, %%eax\n\t"
1471 : "=a" (u64Ret)
1472 , "=d" (u64Spill)
1473 , "+m" (*pu128)
1474 : "a" (u64OldLo)
1475 , "d" (u64OldHi)
1476 , "b" (u64NewLo)
1477 , "c" (u64NewHi)
1478 : "cc");
1479
1480 return (bool)u64Ret;
1481# else
1482# error "Port me"
1483# endif
1484# else
1485# error "Port me"
1486# endif
1487}
1488# endif
1489
1490
1491/**
1492 * Atomically compare and write an unsigned 128-bit value, ordered.
1493 *
1494 * @returns true if write was done.
1495 * @returns false if write wasn't done.
1496 *
1497 * @param pu128 Pointer to the 128-bit variable to update.
1498 * @param u128New The 128-bit value to assign to *pu128.
1499 * @param u128Old The value to compare with.
1500 *
1501 * @remarks AMD64: Not present in the earliest CPUs, so check CPUID.
1502 */
1503DECLINLINE(bool) ASMAtomicCmpWriteU128(volatile uint128_t *pu128, const uint128_t u128New, const uint128_t u128Old) RT_NOTHROW_DEF
1504{
1505# ifdef RT_COMPILER_WITH_128BIT_INT_TYPES
1506 return ASMAtomicCmpWriteU128v2(pu128, (uint64_t)(u128New >> 64), (uint64_t)u128New,
1507 (uint64_t)(u128Old >> 64), (uint64_t)u128Old);
1508# else
1509 return ASMAtomicCmpWriteU128v2(pu128, u128New.Hi, u128New.Lo, u128Old.Hi, u128Old.Lo);
1510# endif
1511}
1512
1513
1514/**
1515 * RTUINT128U wrapper for ASMAtomicCmpWriteU128.
1516 */
1517DECLINLINE(bool) ASMAtomicCmpWriteU128U(volatile RTUINT128U *pu128, const RTUINT128U u128New,
1518 const RTUINT128U u128Old) RT_NOTHROW_DEF
1519{
1520 return ASMAtomicCmpWriteU128v2(&pu128->u, u128New.s.Hi, u128New.s.Lo, u128Old.s.Hi, u128Old.s.Lo);
1521}
1522
1523#endif /* RT_ARCH_AMD64 */
1524
1525
1526/**
1527 * Atomically Compare and Exchange a pointer value, ordered.
1528 *
1529 * @returns true if xchg was done.
1530 * @returns false if xchg wasn't done.
1531 *
1532 * @param ppv Pointer to the value to update.
1533 * @param pvNew The new value to assigned to *ppv.
1534 * @param pvOld The old value to *ppv compare with.
1535 *
1536 * @remarks x86: Requires a 486 or later.
1537 * @todo Rename ASMAtomicCmpWritePtrVoid
1538 */
1539DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld) RT_NOTHROW_DEF
1540{
1541#if ARCH_BITS == 32 || ARCH_BITS == 16
1542 return ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
1543#elif ARCH_BITS == 64
1544 return ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
1545#else
1546# error "ARCH_BITS is bogus"
1547#endif
1548}
1549
1550
1551/**
1552 * Atomically Compare and Exchange a pointer value, ordered.
1553 *
1554 * @returns true if xchg was done.
1555 * @returns false if xchg wasn't done.
1556 *
1557 * @param ppv Pointer to the value to update.
1558 * @param pvNew The new value to assigned to *ppv.
1559 * @param pvOld The old value to *ppv compare with.
1560 *
1561 * @remarks This is relatively type safe on GCC platforms.
1562 * @remarks x86: Requires a 486 or later.
1563 * @todo Rename ASMAtomicCmpWritePtr
1564 */
1565#ifdef __GNUC__
1566# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1567 __extension__ \
1568 ({\
1569 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1570 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1571 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1572 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
1573 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
1574 fMacroRet; \
1575 })
1576#else
1577# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
1578 ASMAtomicCmpXchgPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld))
1579#endif
1580
1581
1582/** @def ASMAtomicCmpXchgHandle
1583 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1584 *
1585 * @param ph Pointer to the value to update.
1586 * @param hNew The new value to assigned to *pu.
1587 * @param hOld The old value to *pu compare with.
1588 * @param fRc Where to store the result.
1589 *
1590 * @remarks This doesn't currently work for all handles (like RTFILE).
1591 * @remarks x86: Requires a 486 or later.
1592 * @todo Rename ASMAtomicCmpWriteHandle
1593 */
1594#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1595# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1596 do { \
1597 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1598 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
1599 } while (0)
1600#elif HC_ARCH_BITS == 64
1601# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1602 do { \
1603 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1604 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
1605 } while (0)
1606#else
1607# error HC_ARCH_BITS
1608#endif
1609
1610
1611/** @def ASMAtomicCmpXchgSize
1612 * Atomically Compare and Exchange a value which size might differ
1613 * between platforms or compilers, ordered.
1614 *
1615 * @param pu Pointer to the value to update.
1616 * @param uNew The new value to assigned to *pu.
1617 * @param uOld The old value to *pu compare with.
1618 * @param fRc Where to store the result.
1619 *
1620 * @remarks x86: Requires a 486 or later.
1621 * @todo Rename ASMAtomicCmpWriteSize
1622 */
1623#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1624 do { \
1625 switch (sizeof(*(pu))) { \
1626 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1627 break; \
1628 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1629 break; \
1630 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1631 (fRc) = false; \
1632 break; \
1633 } \
1634 } while (0)
1635
1636
1637/**
1638 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1639 * passes back old value, ordered.
1640 *
1641 * @returns true if xchg was done.
1642 * @returns false if xchg wasn't done.
1643 *
1644 * @param pu32 Pointer to the value to update.
1645 * @param u32New The new value to assigned to *pu32.
1646 * @param u32Old The old value to *pu32 compare with.
1647 * @param pu32Old Pointer store the old value at.
1648 *
1649 * @remarks x86: Requires a 486 or later.
1650 */
1651#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
1652RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old) RT_NOTHROW_PROTO;
1653#else
1654DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t RT_FAR *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t RT_FAR *pu32Old) RT_NOTHROW_DEF
1655{
1656# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1657# if RT_INLINE_ASM_GNU_STYLE
1658 uint8_t u8Ret;
1659 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1660 "setz %1\n\t"
1661 : "=m" (*pu32)
1662 , "=qm" (u8Ret)
1663 , "=a" (*pu32Old)
1664 : "r" (u32New)
1665 , "a" (u32Old)
1666 , "m" (*pu32)
1667 : "cc");
1668 return (bool)u8Ret;
1669
1670# elif RT_INLINE_ASM_USES_INTRIN
1671 return (*pu32Old = _InterlockedCompareExchange((long RT_FAR *)pu32, u32New, u32Old)) == u32Old;
1672
1673# else
1674 uint32_t u32Ret;
1675 __asm
1676 {
1677# ifdef RT_ARCH_AMD64
1678 mov rdx, [pu32]
1679# else
1680 mov edx, [pu32]
1681# endif
1682 mov eax, [u32Old]
1683 mov ecx, [u32New]
1684# ifdef RT_ARCH_AMD64
1685 lock cmpxchg [rdx], ecx
1686 mov rdx, [pu32Old]
1687 mov [rdx], eax
1688# else
1689 lock cmpxchg [edx], ecx
1690 mov edx, [pu32Old]
1691 mov [edx], eax
1692# endif
1693 setz al
1694 movzx eax, al
1695 mov [u32Ret], eax
1696 }
1697 return !!u32Ret;
1698# endif
1699
1700# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1701 union { uint32_t u; bool f; } fXchg;
1702 uint32_t u32ActualOld;
1703 uint32_t rcSpill;
1704 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgExU32_%=:\n\t"
1705 RTASM_ARM_DMB_SY
1706# if defined(RT_ARCH_ARM64)
1707 "ldaxr %w[uOld], %[pMem]\n\t"
1708 "cmp %w[uOld], %w[uCmp]\n\t"
1709 "bne 1f\n\t" /* stop here if not equal */
1710 "stlxr %w[rc], %w[uNew], %[pMem]\n\t"
1711 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgExU32_%=\n\t"
1712 "mov %w[fXchg], #1\n\t"
1713# else
1714 "ldrex %[uOld], %[pMem]\n\t"
1715 "teq %[uOld], %[uCmp]\n\t"
1716 "strexeq %[rc], %[uNew], %[pMem]\n\t"
1717 "bne 1f\n\t" /* stop here if not equal */
1718 "cmp %[rc], #0\n\t"
1719 "bne .Ltry_again_ASMAtomicCmpXchgExU32_%=\n\t"
1720 "mov %[fXchg], #1\n\t"
1721# endif
1722 "1:\n\t"
1723 : [pMem] "+m" (*pu32)
1724 , [uOld] "=&r" (u32ActualOld)
1725 , [rc] "=&r" (rcSpill)
1726 , [fXchg] "=&r" (fXchg.u)
1727 : [uCmp] "r" (u32Old)
1728 , [uNew] "r" (u32New)
1729 , "[fXchg]" (0)
1730 RTASM_ARM_DMB_SY_COMMA_IN_REG
1731 : "cc");
1732 *pu32Old = u32ActualOld;
1733 return fXchg.f;
1734
1735# else
1736# error "Port me"
1737# endif
1738}
1739#endif
1740
1741
1742/**
1743 * Atomically Compare and Exchange a signed 32-bit value, additionally
1744 * passes back old value, ordered.
1745 *
1746 * @returns true if xchg was done.
1747 * @returns false if xchg wasn't done.
1748 *
1749 * @param pi32 Pointer to the value to update.
1750 * @param i32New The new value to assigned to *pi32.
1751 * @param i32Old The old value to *pi32 compare with.
1752 * @param pi32Old Pointer store the old value at.
1753 *
1754 * @remarks x86: Requires a 486 or later.
1755 */
1756DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t RT_FAR *pi32, const int32_t i32New, const int32_t i32Old, int32_t RT_FAR *pi32Old) RT_NOTHROW_DEF
1757{
1758 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t RT_FAR *)pi32Old);
1759}
1760
1761
1762/**
1763 * Atomically Compare and exchange an unsigned 64-bit value, additionally
1764 * passing back old value, ordered.
1765 *
1766 * @returns true if xchg was done.
1767 * @returns false if xchg wasn't done.
1768 *
1769 * @param pu64 Pointer to the 64-bit variable to update.
1770 * @param u64New The 64-bit value to assign to *pu64.
1771 * @param u64Old The value to compare with.
1772 * @param pu64Old Pointer store the old value at.
1773 *
1774 * @remarks x86: Requires a Pentium or later.
1775 */
1776#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
1777 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1778RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old) RT_NOTHROW_PROTO;
1779#else
1780DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t RT_FAR *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t RT_FAR *pu64Old) RT_NOTHROW_DEF
1781{
1782# if RT_INLINE_ASM_USES_INTRIN
1783 return (*pu64Old =_InterlockedCompareExchange64((__int64 RT_FAR *)pu64, u64New, u64Old)) == u64Old;
1784
1785# elif defined(RT_ARCH_AMD64)
1786# if RT_INLINE_ASM_GNU_STYLE
1787 uint8_t u8Ret;
1788 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1789 "setz %1\n\t"
1790 : "=m" (*pu64)
1791 , "=qm" (u8Ret)
1792 , "=a" (*pu64Old)
1793 : "r" (u64New)
1794 , "a" (u64Old)
1795 , "m" (*pu64)
1796 : "cc");
1797 return (bool)u8Ret;
1798# else
1799 bool fRet;
1800 __asm
1801 {
1802 mov rdx, [pu32]
1803 mov rax, [u64Old]
1804 mov rcx, [u64New]
1805 lock cmpxchg [rdx], rcx
1806 mov rdx, [pu64Old]
1807 mov [rdx], rax
1808 setz al
1809 mov [fRet], al
1810 }
1811 return fRet;
1812# endif
1813
1814# elif defined(RT_ARCH_X86)
1815# if RT_INLINE_ASM_GNU_STYLE
1816 uint64_t u64Ret;
1817# if defined(PIC) || defined(__PIC__)
1818 /* NB: this code uses a memory clobber description, because the clean
1819 * solution with an output value for *pu64 makes gcc run out of registers.
1820 * This will cause suboptimal code, and anyone with a better solution is
1821 * welcome to improve this. */
1822 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1823 "lock; cmpxchg8b %3\n\t"
1824 "xchgl %%ebx, %1\n\t"
1825 : "=A" (u64Ret)
1826 : "DS" ((uint32_t)u64New)
1827 , "c" ((uint32_t)(u64New >> 32))
1828 , "m" (*pu64)
1829 , "0" (u64Old)
1830 : "memory"
1831 , "cc" );
1832# else /* !PIC */
1833 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1834 : "=A" (u64Ret)
1835 , "=m" (*pu64)
1836 : "b" ((uint32_t)u64New)
1837 , "c" ((uint32_t)(u64New >> 32))
1838 , "m" (*pu64)
1839 , "0" (u64Old)
1840 : "cc");
1841# endif
1842 *pu64Old = u64Ret;
1843 return u64Ret == u64Old;
1844# else
1845 uint32_t u32Ret;
1846 __asm
1847 {
1848 mov ebx, dword ptr [u64New]
1849 mov ecx, dword ptr [u64New + 4]
1850 mov edi, [pu64]
1851 mov eax, dword ptr [u64Old]
1852 mov edx, dword ptr [u64Old + 4]
1853 lock cmpxchg8b [edi]
1854 mov ebx, [pu64Old]
1855 mov [ebx], eax
1856 setz al
1857 movzx eax, al
1858 add ebx, 4
1859 mov [ebx], edx
1860 mov dword ptr [u32Ret], eax
1861 }
1862 return !!u32Ret;
1863# endif
1864
1865# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
1866 union { uint32_t u; bool f; } fXchg;
1867 uint64_t u64ActualOld;
1868 uint32_t rcSpill;
1869 __asm__ __volatile__(".Ltry_again_ASMAtomicCmpXchgU64_%=:\n\t"
1870 RTASM_ARM_DMB_SY
1871# if defined(RT_ARCH_ARM64)
1872 "ldaxr %[uOld], %[pMem]\n\t"
1873 "cmp %[uOld], %[uCmp]\n\t"
1874 "bne 1f\n\t" /* stop here if not equal */
1875 "stlxr %w[rc], %[uNew], %[pMem]\n\t"
1876 "cbnz %w[rc], .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1877 "mov %w[fXchg], #1\n\t"
1878# else
1879 "ldrexd %[uOld], %H[uOld], %[pMem]\n\t"
1880 "teq %[uOld], %[uCmp]\n\t"
1881 "teqeq %H[uOld], %H[uCmp]\n\t"
1882 "strexdeq %[rc], %[uNew], %H[uNew], %[pMem]\n\t"
1883 "bne 1f\n\t" /* stop here if not equal */
1884 "cmp %[rc], #0\n\t"
1885 "bne .Ltry_again_ASMAtomicCmpXchgU64_%=\n\t"
1886 "mov %[fXchg], #1\n\t"
1887# endif
1888 "1:\n\t"
1889 : [pMem] "+m" (*pu64)
1890 , [uOld] "=&r" (u64ActualOld)
1891 , [rc] "=&r" (rcSpill)
1892 , [fXchg] "=&r" (fXchg.u)
1893 : [uCmp] "r" (u64Old)
1894 , [uNew] "r" (u64New)
1895 , "[fXchg]" (0)
1896 RTASM_ARM_DMB_SY_COMMA_IN_REG
1897 : "cc");
1898 *pu64Old = u64ActualOld;
1899 return fXchg.f;
1900
1901# else
1902# error "Port me"
1903# endif
1904}
1905#endif
1906
1907
1908/**
1909 * Atomically Compare and exchange a signed 64-bit value, additionally
1910 * passing back old value, ordered.
1911 *
1912 * @returns true if xchg was done.
1913 * @returns false if xchg wasn't done.
1914 *
1915 * @param pi64 Pointer to the 64-bit variable to update.
1916 * @param i64 The 64-bit value to assign to *pu64.
1917 * @param i64Old The value to compare with.
1918 * @param pi64Old Pointer store the old value at.
1919 *
1920 * @remarks x86: Requires a Pentium or later.
1921 */
1922DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t RT_FAR *pi64, const int64_t i64, const int64_t i64Old, int64_t RT_FAR *pi64Old) RT_NOTHROW_DEF
1923{
1924 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t RT_FAR *)pi64Old);
1925}
1926
1927/** @def ASMAtomicCmpXchgExHandle
1928 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1929 *
1930 * @param ph Pointer to the value to update.
1931 * @param hNew The new value to assigned to *pu.
1932 * @param hOld The old value to *pu compare with.
1933 * @param fRc Where to store the result.
1934 * @param phOldVal Pointer to where to store the old value.
1935 *
1936 * @remarks This doesn't currently work for all handles (like RTFILE).
1937 */
1938#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
1939# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1940 do { \
1941 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1942 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1943 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(ph), (uint32_t)(hNew), (uint32_t)(hOld), (uint32_t RT_FAR *)(phOldVal)); \
1944 } while (0)
1945#elif HC_ARCH_BITS == 64
1946# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1947 do { \
1948 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1949 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1950 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(ph), (uint64_t)(hNew), (uint64_t)(hOld), (uint64_t RT_FAR *)(phOldVal)); \
1951 } while (0)
1952#else
1953# error HC_ARCH_BITS
1954#endif
1955
1956
1957/** @def ASMAtomicCmpXchgExSize
1958 * Atomically Compare and Exchange a value which size might differ
1959 * between platforms or compilers. Additionally passes back old value.
1960 *
1961 * @param pu Pointer to the value to update.
1962 * @param uNew The new value to assigned to *pu.
1963 * @param uOld The old value to *pu compare with.
1964 * @param fRc Where to store the result.
1965 * @param puOldVal Pointer to where to store the old value.
1966 *
1967 * @remarks x86: Requires a 486 or later.
1968 */
1969#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1970 do { \
1971 switch (sizeof(*(pu))) { \
1972 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t RT_FAR *)(uOldVal)); \
1973 break; \
1974 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t RT_FAR *)(uOldVal)); \
1975 break; \
1976 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1977 (fRc) = false; \
1978 (uOldVal) = 0; \
1979 break; \
1980 } \
1981 } while (0)
1982
1983
1984/**
1985 * Atomically Compare and Exchange a pointer value, additionally
1986 * passing back old value, ordered.
1987 *
1988 * @returns true if xchg was done.
1989 * @returns false if xchg wasn't done.
1990 *
1991 * @param ppv Pointer to the value to update.
1992 * @param pvNew The new value to assigned to *ppv.
1993 * @param pvOld The old value to *ppv compare with.
1994 * @param ppvOld Pointer store the old value at.
1995 *
1996 * @remarks x86: Requires a 486 or later.
1997 */
1998DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void RT_FAR *pvNew, const void RT_FAR *pvOld,
1999 void RT_FAR * RT_FAR *ppvOld) RT_NOTHROW_DEF
2000{
2001#if ARCH_BITS == 32 || ARCH_BITS == 16
2002 return ASMAtomicCmpXchgExU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t RT_FAR *)ppvOld);
2003#elif ARCH_BITS == 64
2004 return ASMAtomicCmpXchgExU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t RT_FAR *)ppvOld);
2005#else
2006# error "ARCH_BITS is bogus"
2007#endif
2008}
2009
2010
2011/**
2012 * Atomically Compare and Exchange a pointer value, additionally
2013 * passing back old value, ordered.
2014 *
2015 * @returns true if xchg was done.
2016 * @returns false if xchg wasn't done.
2017 *
2018 * @param ppv Pointer to the value to update.
2019 * @param pvNew The new value to assigned to *ppv.
2020 * @param pvOld The old value to *ppv compare with.
2021 * @param ppvOld Pointer store the old value at.
2022 *
2023 * @remarks This is relatively type safe on GCC platforms.
2024 * @remarks x86: Requires a 486 or later.
2025 */
2026#ifdef __GNUC__
2027# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
2028 __extension__ \
2029 ({\
2030 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2031 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
2032 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
2033 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
2034 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
2035 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
2036 (void **)ppvOldTypeChecked); \
2037 fMacroRet; \
2038 })
2039#else
2040# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
2041 ASMAtomicCmpXchgExPtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pvNew), (void RT_FAR *)(pvOld), (void RT_FAR * RT_FAR *)(ppvOld))
2042#endif
2043
2044
2045/**
2046 * Virtualization unfriendly serializing instruction, always exits.
2047 */
2048#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
2049RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionCpuId(void) RT_NOTHROW_PROTO;
2050#else
2051DECLINLINE(void) ASMSerializeInstructionCpuId(void) RT_NOTHROW_DEF
2052{
2053# if RT_INLINE_ASM_GNU_STYLE
2054 RTCCUINTREG xAX = 0;
2055# ifdef RT_ARCH_AMD64
2056 __asm__ __volatile__ ("cpuid"
2057 : "=a" (xAX)
2058 : "0" (xAX)
2059 : "rbx", "rcx", "rdx", "memory");
2060# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
2061 __asm__ __volatile__ ("push %%ebx\n\t"
2062 "cpuid\n\t"
2063 "pop %%ebx\n\t"
2064 : "=a" (xAX)
2065 : "0" (xAX)
2066 : "ecx", "edx", "memory");
2067# else
2068 __asm__ __volatile__ ("cpuid"
2069 : "=a" (xAX)
2070 : "0" (xAX)
2071 : "ebx", "ecx", "edx", "memory");
2072# endif
2073
2074# elif RT_INLINE_ASM_USES_INTRIN
2075 int aInfo[4];
2076 _ReadWriteBarrier();
2077 __cpuid(aInfo, 0);
2078
2079# else
2080 __asm
2081 {
2082 push ebx
2083 xor eax, eax
2084 cpuid
2085 pop ebx
2086 }
2087# endif
2088}
2089#endif
2090
2091/**
2092 * Virtualization friendly serializing instruction, though more expensive.
2093 */
2094#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
2095RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionIRet(void) RT_NOTHROW_PROTO;
2096#else
2097DECLINLINE(void) ASMSerializeInstructionIRet(void) RT_NOTHROW_DEF
2098{
2099# if RT_INLINE_ASM_GNU_STYLE
2100# ifdef RT_ARCH_AMD64
2101 __asm__ __volatile__ ("movq %%rsp,%%r10\n\t"
2102 "subq $128, %%rsp\n\t" /*redzone*/
2103 "mov %%ss, %%eax\n\t"
2104 "pushq %%rax\n\t"
2105 "pushq %%r10\n\t"
2106 "pushfq\n\t"
2107 "movl %%cs, %%eax\n\t"
2108 "pushq %%rax\n\t"
2109 "leaq 1f(%%rip), %%rax\n\t"
2110 "pushq %%rax\n\t"
2111 "iretq\n\t"
2112 "1:\n\t"
2113 ::: "rax", "r10", "memory", "cc");
2114# else
2115 __asm__ __volatile__ ("pushfl\n\t"
2116 "pushl %%cs\n\t"
2117 "pushl $1f\n\t"
2118 "iretl\n\t"
2119 "1:\n\t"
2120 ::: "memory");
2121# endif
2122
2123# else
2124 __asm
2125 {
2126 pushfd
2127 push cs
2128 push la_ret
2129 iretd
2130 la_ret:
2131 }
2132# endif
2133}
2134#endif
2135
2136/**
2137 * Virtualization friendlier serializing instruction, may still cause exits.
2138 */
2139#if (RT_INLINE_ASM_EXTERNAL && RT_INLINE_ASM_USES_INTRIN < RT_MSC_VER_VS2008) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
2140RT_ASM_DECL_PRAGMA_WATCOM(void) ASMSerializeInstructionRdTscp(void) RT_NOTHROW_PROTO;
2141#else
2142DECLINLINE(void) ASMSerializeInstructionRdTscp(void) RT_NOTHROW_DEF
2143{
2144# if RT_INLINE_ASM_GNU_STYLE
2145 /* rdtscp is not supported by ancient linux build VM of course :-( */
2146# ifdef RT_ARCH_AMD64
2147 /*__asm__ __volatile__("rdtscp\n\t" ::: "rax", "rdx, "rcx"); */
2148 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "rax", "rdx", "rcx", "memory");
2149# else
2150 /*__asm__ __volatile__("rdtscp\n\t" ::: "eax", "edx, "ecx"); */
2151 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "eax", "edx", "ecx", "memory");
2152# endif
2153# else
2154# if RT_INLINE_ASM_USES_INTRIN >= RT_MSC_VER_VS2008
2155 uint32_t uIgnore;
2156 _ReadWriteBarrier();
2157 (void)__rdtscp(&uIgnore);
2158 (void)uIgnore;
2159# else
2160 __asm
2161 {
2162 rdtscp
2163 }
2164# endif
2165# endif
2166}
2167#endif
2168
2169
2170/**
2171 * Serialize Instruction (both data store and instruction flush).
2172 */
2173#if (defined(RT_ARCH_X86) && ARCH_BITS == 16) || defined(IN_GUEST)
2174# define ASMSerializeInstruction() ASMSerializeInstructionIRet()
2175#elif defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
2176# define ASMSerializeInstruction() ASMSerializeInstructionCpuId()
2177#elif defined(RT_ARCH_SPARC64)
2178RTDECL(void) ASMSerializeInstruction(void) RT_NOTHROW_PROTO;
2179#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2180DECLINLINE(void) ASMSerializeInstruction(void) RT_NOTHROW_DEF
2181{
2182 __asm__ __volatile__ (RTASM_ARM_DSB_SY :: RTASM_ARM_DSB_SY_IN_REG :);
2183}
2184#else
2185# error "Port me"
2186#endif
2187
2188
2189/**
2190 * Memory fence, waits for any pending writes and reads to complete.
2191 * @note No implicit compiler barrier (which is probably stupid).
2192 */
2193DECLINLINE(void) ASMMemoryFence(void) RT_NOTHROW_DEF
2194{
2195#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2196# if RT_INLINE_ASM_GNU_STYLE
2197 __asm__ __volatile__ (".byte 0x0f,0xae,0xf0\n\t");
2198# elif RT_INLINE_ASM_USES_INTRIN
2199 _mm_mfence();
2200# else
2201 __asm
2202 {
2203 _emit 0x0f
2204 _emit 0xae
2205 _emit 0xf0
2206 }
2207# endif
2208#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2209 __asm__ __volatile__ (RTASM_ARM_DMB_SY :: RTASM_ARM_DMB_SY_IN_REG :);
2210#elif ARCH_BITS == 16
2211 uint16_t volatile u16;
2212 ASMAtomicXchgU16(&u16, 0);
2213#else
2214 uint32_t volatile u32;
2215 ASMAtomicXchgU32(&u32, 0);
2216#endif
2217}
2218
2219
2220/**
2221 * Write fence, waits for any pending writes to complete.
2222 * @note No implicit compiler barrier (which is probably stupid).
2223 */
2224DECLINLINE(void) ASMWriteFence(void) RT_NOTHROW_DEF
2225{
2226#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2227# if RT_INLINE_ASM_GNU_STYLE
2228 __asm__ __volatile__ (".byte 0x0f,0xae,0xf8\n\t");
2229# elif RT_INLINE_ASM_USES_INTRIN
2230 _mm_sfence();
2231# else
2232 __asm
2233 {
2234 _emit 0x0f
2235 _emit 0xae
2236 _emit 0xf8
2237 }
2238# endif
2239#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2240 __asm__ __volatile__ (RTASM_ARM_DMB_ST :: RTASM_ARM_DMB_ST_IN_REG :);
2241#else
2242 ASMMemoryFence();
2243#endif
2244}
2245
2246
2247/**
2248 * Read fence, waits for any pending reads to complete.
2249 * @note No implicit compiler barrier (which is probably stupid).
2250 */
2251DECLINLINE(void) ASMReadFence(void) RT_NOTHROW_DEF
2252{
2253#if defined(RT_ARCH_AMD64) || (defined(RT_ARCH_X86) && !defined(RT_WITH_OLD_CPU_SUPPORT))
2254# if RT_INLINE_ASM_GNU_STYLE
2255 __asm__ __volatile__ (".byte 0x0f,0xae,0xe8\n\t");
2256# elif RT_INLINE_ASM_USES_INTRIN
2257 _mm_lfence();
2258# else
2259 __asm
2260 {
2261 _emit 0x0f
2262 _emit 0xae
2263 _emit 0xe8
2264 }
2265# endif
2266#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2267 __asm__ __volatile__ (RTASM_ARM_DMB_LD :: RTASM_ARM_DMB_LD_IN_REG :);
2268#else
2269 ASMMemoryFence();
2270#endif
2271}
2272
2273
2274/**
2275 * Atomically reads an unsigned 8-bit value, ordered.
2276 *
2277 * @returns Current *pu8 value
2278 * @param pu8 Pointer to the 8-bit variable to read.
2279 */
2280DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t RT_FAR *pu8) RT_NOTHROW_DEF
2281{
2282#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2283 uint32_t u32;
2284 __asm__ __volatile__(".Lstart_ASMAtomicReadU8_%=:\n\t"
2285 RTASM_ARM_DMB_SY
2286# if defined(RT_ARCH_ARM64)
2287 "ldxrb %w[uDst], %[pMem]\n\t"
2288# else
2289 "ldrexb %[uDst], %[pMem]\n\t"
2290# endif
2291 : [uDst] "=&r" (u32)
2292 : [pMem] "m" (*pu8)
2293 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2294 return (uint8_t)u32;
2295#else
2296 ASMMemoryFence();
2297 return *pu8; /* byte reads are atomic on x86 */
2298#endif
2299}
2300
2301
2302/**
2303 * Atomically reads an unsigned 8-bit value, unordered.
2304 *
2305 * @returns Current *pu8 value
2306 * @param pu8 Pointer to the 8-bit variable to read.
2307 */
2308DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t RT_FAR *pu8) RT_NOTHROW_DEF
2309{
2310#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2311 uint32_t u32;
2312 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU8_%=:\n\t"
2313# if defined(RT_ARCH_ARM64)
2314 "ldxrb %w[uDst], %[pMem]\n\t"
2315# else
2316 "ldrexb %[uDst], %[pMem]\n\t"
2317# endif
2318 : [uDst] "=&r" (u32)
2319 : [pMem] "m" (*pu8));
2320 return (uint8_t)u32;
2321#else
2322 return *pu8; /* byte reads are atomic on x86 */
2323#endif
2324}
2325
2326
2327/**
2328 * Atomically reads a signed 8-bit value, ordered.
2329 *
2330 * @returns Current *pi8 value
2331 * @param pi8 Pointer to the 8-bit variable to read.
2332 */
2333DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t RT_FAR *pi8) RT_NOTHROW_DEF
2334{
2335 ASMMemoryFence();
2336#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2337 int32_t i32;
2338 __asm__ __volatile__(".Lstart_ASMAtomicReadS8_%=:\n\t"
2339 RTASM_ARM_DMB_SY
2340# if defined(RT_ARCH_ARM64)
2341 "ldxrb %w[iDst], %[pMem]\n\t"
2342# else
2343 "ldrexb %[iDst], %[pMem]\n\t"
2344# endif
2345 : [iDst] "=&r" (i32)
2346 : [pMem] "m" (*pi8)
2347 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2348 return (int8_t)i32;
2349#else
2350 return *pi8; /* byte reads are atomic on x86 */
2351#endif
2352}
2353
2354
2355/**
2356 * Atomically reads a signed 8-bit value, unordered.
2357 *
2358 * @returns Current *pi8 value
2359 * @param pi8 Pointer to the 8-bit variable to read.
2360 */
2361DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t RT_FAR *pi8) RT_NOTHROW_DEF
2362{
2363#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2364 int32_t i32;
2365 __asm__ __volatile__(".Lstart_ASMAtomicUoReadS8_%=:\n\t"
2366# if defined(RT_ARCH_ARM64)
2367 "ldxrb %w[iDst], %[pMem]\n\t"
2368# else
2369 "ldrexb %[iDst], %[pMem]\n\t"
2370# endif
2371 : [iDst] "=&r" (i32)
2372 : [pMem] "m" (*pi8));
2373 return (int8_t)i32;
2374#else
2375 return *pi8; /* byte reads are atomic on x86 */
2376#endif
2377}
2378
2379
2380/**
2381 * Atomically reads an unsigned 16-bit value, ordered.
2382 *
2383 * @returns Current *pu16 value
2384 * @param pu16 Pointer to the 16-bit variable to read.
2385 */
2386DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t RT_FAR *pu16) RT_NOTHROW_DEF
2387{
2388 Assert(!((uintptr_t)pu16 & 1));
2389#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2390 uint32_t u32;
2391 __asm__ __volatile__(".Lstart_ASMAtomicReadU16_%=:\n\t"
2392 RTASM_ARM_DMB_SY
2393# if defined(RT_ARCH_ARM64)
2394 "ldxrh %w[uDst], %[pMem]\n\t"
2395# else
2396 "ldrexh %[uDst], %[pMem]\n\t"
2397# endif
2398 : [uDst] "=&r" (u32)
2399 : [pMem] "m" (*pu16)
2400 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2401 return (uint16_t)u32;
2402#else
2403 ASMMemoryFence();
2404 return *pu16;
2405#endif
2406}
2407
2408
2409/**
2410 * Atomically reads an unsigned 16-bit value, unordered.
2411 *
2412 * @returns Current *pu16 value
2413 * @param pu16 Pointer to the 16-bit variable to read.
2414 */
2415DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t RT_FAR *pu16) RT_NOTHROW_DEF
2416{
2417 Assert(!((uintptr_t)pu16 & 1));
2418#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2419 uint32_t u32;
2420 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU16_%=:\n\t"
2421# if defined(RT_ARCH_ARM64)
2422 "ldxrh %w[uDst], %[pMem]\n\t"
2423# else
2424 "ldrexh %[uDst], %[pMem]\n\t"
2425# endif
2426 : [uDst] "=&r" (u32)
2427 : [pMem] "m" (*pu16));
2428 return (uint16_t)u32;
2429#else
2430 return *pu16;
2431#endif
2432}
2433
2434
2435/**
2436 * Atomically reads a signed 16-bit value, ordered.
2437 *
2438 * @returns Current *pi16 value
2439 * @param pi16 Pointer to the 16-bit variable to read.
2440 */
2441DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t RT_FAR *pi16) RT_NOTHROW_DEF
2442{
2443 Assert(!((uintptr_t)pi16 & 1));
2444#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2445 int32_t i32;
2446 __asm__ __volatile__(".Lstart_ASMAtomicReadS16_%=:\n\t"
2447 RTASM_ARM_DMB_SY
2448# if defined(RT_ARCH_ARM64)
2449 "ldxrh %w[iDst], %[pMem]\n\t"
2450# else
2451 "ldrexh %[iDst], %[pMem]\n\t"
2452# endif
2453 : [iDst] "=&r" (i32)
2454 : [pMem] "m" (*pi16)
2455 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2456 return (int16_t)i32;
2457#else
2458 ASMMemoryFence();
2459 return *pi16;
2460#endif
2461}
2462
2463
2464/**
2465 * Atomically reads a signed 16-bit value, unordered.
2466 *
2467 * @returns Current *pi16 value
2468 * @param pi16 Pointer to the 16-bit variable to read.
2469 */
2470DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t RT_FAR *pi16) RT_NOTHROW_DEF
2471{
2472 Assert(!((uintptr_t)pi16 & 1));
2473#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2474 int32_t i32;
2475 __asm__ __volatile__(".Lstart_ASMAtomicUoReadS16_%=:\n\t"
2476# if defined(RT_ARCH_ARM64)
2477 "ldxrh %w[iDst], %[pMem]\n\t"
2478# else
2479 "ldrexh %[iDst], %[pMem]\n\t"
2480# endif
2481 : [iDst] "=&r" (i32)
2482 : [pMem] "m" (*pi16));
2483 return (int16_t)i32;
2484#else
2485 return *pi16;
2486#endif
2487}
2488
2489
2490/**
2491 * Atomically reads an unsigned 32-bit value, ordered.
2492 *
2493 * @returns Current *pu32 value
2494 * @param pu32 Pointer to the 32-bit variable to read.
2495 */
2496DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t RT_FAR *pu32) RT_NOTHROW_DEF
2497{
2498 Assert(!((uintptr_t)pu32 & 3));
2499#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2500 uint32_t u32;
2501 __asm__ __volatile__(".Lstart_ASMAtomicReadU32_%=:\n\t"
2502 RTASM_ARM_DMB_SY
2503# if defined(RT_ARCH_ARM64)
2504 "ldxr %w[uDst], %[pMem]\n\t"
2505# else
2506 "ldrex %[uDst], %[pMem]\n\t"
2507# endif
2508 : [uDst] "=&r" (u32)
2509 : [pMem] "m" (*pu32)
2510 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2511 return u32;
2512#else
2513 ASMMemoryFence();
2514# if ARCH_BITS == 16
2515 AssertFailed(); /** @todo 16-bit */
2516# endif
2517 return *pu32;
2518#endif
2519}
2520
2521
2522/**
2523 * Atomically reads an unsigned 32-bit value, unordered.
2524 *
2525 * @returns Current *pu32 value
2526 * @param pu32 Pointer to the 32-bit variable to read.
2527 */
2528DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t RT_FAR *pu32) RT_NOTHROW_DEF
2529{
2530 Assert(!((uintptr_t)pu32 & 3));
2531#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2532 uint32_t u32;
2533 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU32_%=:\n\t"
2534# if defined(RT_ARCH_ARM64)
2535 "ldxr %w[uDst], %[pMem]\n\t"
2536# else
2537 "ldrex %[uDst], %[pMem]\n\t"
2538# endif
2539 : [uDst] "=&r" (u32)
2540 : [pMem] "m" (*pu32));
2541 return u32;
2542#else
2543# if ARCH_BITS == 16
2544 AssertFailed(); /** @todo 16-bit */
2545# endif
2546 return *pu32;
2547#endif
2548}
2549
2550
2551/**
2552 * Atomically reads a signed 32-bit value, ordered.
2553 *
2554 * @returns Current *pi32 value
2555 * @param pi32 Pointer to the 32-bit variable to read.
2556 */
2557DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t RT_FAR *pi32) RT_NOTHROW_DEF
2558{
2559 Assert(!((uintptr_t)pi32 & 3));
2560#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2561 int32_t i32;
2562 __asm__ __volatile__(".Lstart_ASMAtomicReadS32_%=:\n\t"
2563 RTASM_ARM_DMB_SY
2564# if defined(RT_ARCH_ARM64)
2565 "ldxr %w[iDst], %[pMem]\n\t"
2566# else
2567 "ldrex %[iDst], %[pMem]\n\t"
2568# endif
2569 : [iDst] "=&r" (i32)
2570 : [pMem] "m" (*pi32)
2571 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2572 return i32;
2573#else
2574 ASMMemoryFence();
2575# if ARCH_BITS == 16
2576 AssertFailed(); /** @todo 16-bit */
2577# endif
2578 return *pi32;
2579#endif
2580}
2581
2582
2583/**
2584 * Atomically reads a signed 32-bit value, unordered.
2585 *
2586 * @returns Current *pi32 value
2587 * @param pi32 Pointer to the 32-bit variable to read.
2588 */
2589DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t RT_FAR *pi32) RT_NOTHROW_DEF
2590{
2591 Assert(!((uintptr_t)pi32 & 3));
2592#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2593 int32_t i32;
2594 __asm__ __volatile__(".Lstart_ASMAtomicUoReadS32_%=:\n\t"
2595# if defined(RT_ARCH_ARM64)
2596 "ldxr %w[iDst], %[pMem]\n\t"
2597# else
2598 "ldrex %[iDst], %[pMem]\n\t"
2599# endif
2600 : [iDst] "=&r" (i32)
2601 : [pMem] "m" (*pi32));
2602 return i32;
2603
2604#else
2605# if ARCH_BITS == 16
2606 AssertFailed(); /** @todo 16-bit */
2607# endif
2608 return *pi32;
2609#endif
2610}
2611
2612
2613/**
2614 * Atomically reads an unsigned 64-bit value, ordered.
2615 *
2616 * @returns Current *pu64 value
2617 * @param pu64 Pointer to the 64-bit variable to read.
2618 * The memory pointed to must be writable.
2619 *
2620 * @remarks This may fault if the memory is read-only!
2621 * @remarks x86: Requires a Pentium or later.
2622 */
2623#if (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !defined(RT_ARCH_AMD64)) \
2624 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
2625RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_PROTO;
2626#else
2627DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_DEF
2628{
2629 uint64_t u64;
2630# ifdef RT_ARCH_AMD64
2631 Assert(!((uintptr_t)pu64 & 7));
2632/*# if RT_INLINE_ASM_GNU_STYLE
2633 __asm__ __volatile__( "mfence\n\t"
2634 "movq %1, %0\n\t"
2635 : "=r" (u64)
2636 : "m" (*pu64));
2637# else
2638 __asm
2639 {
2640 mfence
2641 mov rdx, [pu64]
2642 mov rax, [rdx]
2643 mov [u64], rax
2644 }
2645# endif*/
2646 ASMMemoryFence();
2647 u64 = *pu64;
2648
2649# elif defined(RT_ARCH_X86)
2650# if RT_INLINE_ASM_GNU_STYLE
2651# if defined(PIC) || defined(__PIC__)
2652 uint32_t u32EBX = 0;
2653 Assert(!((uintptr_t)pu64 & 7));
2654 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
2655 "lock; cmpxchg8b (%5)\n\t"
2656 "movl %3, %%ebx\n\t"
2657 : "=A" (u64)
2658# if RT_GNUC_PREREQ(4, 3)
2659 , "+m" (*pu64)
2660# else
2661 , "=m" (*pu64)
2662# endif
2663 : "0" (0ULL)
2664 , "m" (u32EBX)
2665 , "c" (0)
2666 , "S" (pu64)
2667 : "cc");
2668# else /* !PIC */
2669 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2670 : "=A" (u64)
2671 , "+m" (*pu64)
2672 : "0" (0ULL)
2673 , "b" (0)
2674 , "c" (0)
2675 : "cc");
2676# endif
2677# else
2678 Assert(!((uintptr_t)pu64 & 7));
2679 __asm
2680 {
2681 xor eax, eax
2682 xor edx, edx
2683 mov edi, pu64
2684 xor ecx, ecx
2685 xor ebx, ebx
2686 lock cmpxchg8b [edi]
2687 mov dword ptr [u64], eax
2688 mov dword ptr [u64 + 4], edx
2689 }
2690# endif
2691
2692# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2693 Assert(!((uintptr_t)pu64 & 7));
2694 __asm__ __volatile__(".Lstart_ASMAtomicReadU64_%=:\n\t"
2695 RTASM_ARM_DMB_SY
2696# if defined(RT_ARCH_ARM64)
2697 "ldxr %[uDst], %[pMem]\n\t"
2698# else
2699 "ldrexd %[uDst], %H[uDst], %[pMem]\n\t"
2700# endif
2701 : [uDst] "=&r" (u64)
2702 : [pMem] "m" (*pu64)
2703 RTASM_ARM_DMB_SY_COMMA_IN_REG);
2704
2705# else
2706# error "Port me"
2707# endif
2708 return u64;
2709}
2710#endif
2711
2712
2713/**
2714 * Atomically reads an unsigned 64-bit value, unordered.
2715 *
2716 * @returns Current *pu64 value
2717 * @param pu64 Pointer to the 64-bit variable to read.
2718 * The memory pointed to must be writable.
2719 *
2720 * @remarks This may fault if the memory is read-only!
2721 * @remarks x86: Requires a Pentium or later.
2722 */
2723#if !defined(RT_ARCH_AMD64) \
2724 && ( (RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN) \
2725 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
2726RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_PROTO;
2727#else
2728DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t RT_FAR *pu64) RT_NOTHROW_DEF
2729{
2730 uint64_t u64;
2731# ifdef RT_ARCH_AMD64
2732 Assert(!((uintptr_t)pu64 & 7));
2733/*# if RT_INLINE_ASM_GNU_STYLE
2734 Assert(!((uintptr_t)pu64 & 7));
2735 __asm__ __volatile__("movq %1, %0\n\t"
2736 : "=r" (u64)
2737 : "m" (*pu64));
2738# else
2739 __asm
2740 {
2741 mov rdx, [pu64]
2742 mov rax, [rdx]
2743 mov [u64], rax
2744 }
2745# endif */
2746 u64 = *pu64;
2747
2748# elif defined(RT_ARCH_X86)
2749# if RT_INLINE_ASM_GNU_STYLE
2750# if defined(PIC) || defined(__PIC__)
2751 uint32_t u32EBX = 0;
2752 uint32_t u32Spill;
2753 Assert(!((uintptr_t)pu64 & 7));
2754 __asm__ __volatile__("xor %%eax,%%eax\n\t"
2755 "xor %%ecx,%%ecx\n\t"
2756 "xor %%edx,%%edx\n\t"
2757 "xchgl %%ebx, %3\n\t"
2758 "lock; cmpxchg8b (%4)\n\t"
2759 "movl %3, %%ebx\n\t"
2760 : "=A" (u64)
2761# if RT_GNUC_PREREQ(4, 3)
2762 , "+m" (*pu64)
2763# else
2764 , "=m" (*pu64)
2765# endif
2766 , "=c" (u32Spill)
2767 : "m" (u32EBX)
2768 , "S" (pu64)
2769 : "cc");
2770# else /* !PIC */
2771 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
2772 : "=A" (u64)
2773 , "+m" (*pu64)
2774 : "0" (0ULL)
2775 , "b" (0)
2776 , "c" (0)
2777 : "cc");
2778# endif
2779# else
2780 Assert(!((uintptr_t)pu64 & 7));
2781 __asm
2782 {
2783 xor eax, eax
2784 xor edx, edx
2785 mov edi, pu64
2786 xor ecx, ecx
2787 xor ebx, ebx
2788 lock cmpxchg8b [edi]
2789 mov dword ptr [u64], eax
2790 mov dword ptr [u64 + 4], edx
2791 }
2792# endif
2793
2794# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
2795 Assert(!((uintptr_t)pu64 & 7));
2796 __asm__ __volatile__(".Lstart_ASMAtomicUoReadU64_%=:\n\t"
2797# if defined(RT_ARCH_ARM64)
2798 "ldxr %[uDst], %[pMem]\n\t"
2799# else
2800 "ldrexd %[uDst], %H[uDst], %[pMem]\n\t"
2801# endif
2802 : [uDst] "=&r" (u64)
2803 : [pMem] "m" (*pu64));
2804
2805# else
2806# error "Port me"
2807# endif
2808 return u64;
2809}
2810#endif
2811
2812
2813/**
2814 * Atomically reads a signed 64-bit value, ordered.
2815 *
2816 * @returns Current *pi64 value
2817 * @param pi64 Pointer to the 64-bit variable to read.
2818 * The memory pointed to must be writable.
2819 *
2820 * @remarks This may fault if the memory is read-only!
2821 * @remarks x86: Requires a Pentium or later.
2822 */
2823DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t RT_FAR *pi64) RT_NOTHROW_DEF
2824{
2825 return (int64_t)ASMAtomicReadU64((volatile uint64_t RT_FAR *)pi64);
2826}
2827
2828
2829/**
2830 * Atomically reads a signed 64-bit value, unordered.
2831 *
2832 * @returns Current *pi64 value
2833 * @param pi64 Pointer to the 64-bit variable to read.
2834 * The memory pointed to must be writable.
2835 *
2836 * @remarks This will fault if the memory is read-only!
2837 * @remarks x86: Requires a Pentium or later.
2838 */
2839DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t RT_FAR *pi64) RT_NOTHROW_DEF
2840{
2841 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)pi64);
2842}
2843
2844
2845/**
2846 * Atomically reads a size_t value, ordered.
2847 *
2848 * @returns Current *pcb value
2849 * @param pcb Pointer to the size_t variable to read.
2850 */
2851DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
2852{
2853#if ARCH_BITS == 64
2854 return ASMAtomicReadU64((uint64_t volatile RT_FAR *)pcb);
2855#elif ARCH_BITS == 32
2856 return ASMAtomicReadU32((uint32_t volatile RT_FAR *)pcb);
2857#elif ARCH_BITS == 16
2858 AssertCompileSize(size_t, 2);
2859 return ASMAtomicReadU16((uint16_t volatile RT_FAR *)pcb);
2860#else
2861# error "Unsupported ARCH_BITS value"
2862#endif
2863}
2864
2865
2866/**
2867 * Atomically reads a size_t value, unordered.
2868 *
2869 * @returns Current *pcb value
2870 * @param pcb Pointer to the size_t variable to read.
2871 */
2872DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
2873{
2874#if ARCH_BITS == 64 || ARCH_BITS == 16
2875 return ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)pcb);
2876#elif ARCH_BITS == 32
2877 return ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)pcb);
2878#elif ARCH_BITS == 16
2879 AssertCompileSize(size_t, 2);
2880 return ASMAtomicUoReadU16((uint16_t volatile RT_FAR *)pcb);
2881#else
2882# error "Unsupported ARCH_BITS value"
2883#endif
2884}
2885
2886
2887/**
2888 * Atomically reads a pointer value, ordered.
2889 *
2890 * @returns Current *pv value
2891 * @param ppv Pointer to the pointer variable to read.
2892 *
2893 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
2894 * requires less typing (no casts).
2895 */
2896DECLINLINE(void RT_FAR *) ASMAtomicReadPtr(void RT_FAR * volatile RT_FAR *ppv) RT_NOTHROW_DEF
2897{
2898#if ARCH_BITS == 32 || ARCH_BITS == 16
2899 return (void RT_FAR *)ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
2900#elif ARCH_BITS == 64
2901 return (void RT_FAR *)ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
2902#else
2903# error "ARCH_BITS is bogus"
2904#endif
2905}
2906
2907/**
2908 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
2909 *
2910 * @returns Current *pv value
2911 * @param ppv Pointer to the pointer variable to read.
2912 * @param Type The type of *ppv, sans volatile.
2913 */
2914#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
2915# define ASMAtomicReadPtrT(ppv, Type) \
2916 __extension__ \
2917 ({\
2918 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
2919 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
2920 pvTypeChecked; \
2921 })
2922#else
2923# define ASMAtomicReadPtrT(ppv, Type) \
2924 (Type)ASMAtomicReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
2925#endif
2926
2927
2928/**
2929 * Atomically reads a pointer value, unordered.
2930 *
2931 * @returns Current *pv value
2932 * @param ppv Pointer to the pointer variable to read.
2933 *
2934 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
2935 * requires less typing (no casts).
2936 */
2937DECLINLINE(void RT_FAR *) ASMAtomicUoReadPtr(void RT_FAR * volatile RT_FAR *ppv) RT_NOTHROW_DEF
2938{
2939#if ARCH_BITS == 32 || ARCH_BITS == 16
2940 return (void RT_FAR *)ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv);
2941#elif ARCH_BITS == 64
2942 return (void RT_FAR *)ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv);
2943#else
2944# error "ARCH_BITS is bogus"
2945#endif
2946}
2947
2948
2949/**
2950 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
2951 *
2952 * @returns Current *pv value
2953 * @param ppv Pointer to the pointer variable to read.
2954 * @param Type The type of *ppv, sans volatile.
2955 */
2956#ifdef __GNUC__ /* 8.2.0 requires -Wno-ignored-qualifiers */
2957# define ASMAtomicUoReadPtrT(ppv, Type) \
2958 __extension__ \
2959 ({\
2960 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2961 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
2962 pvTypeChecked; \
2963 })
2964#else
2965# define ASMAtomicUoReadPtrT(ppv, Type) \
2966 (Type)ASMAtomicUoReadPtr((void RT_FAR * volatile RT_FAR *)(ppv))
2967#endif
2968
2969
2970/**
2971 * Atomically reads a boolean value, ordered.
2972 *
2973 * @returns Current *pf value
2974 * @param pf Pointer to the boolean variable to read.
2975 */
2976DECLINLINE(bool) ASMAtomicReadBool(volatile bool RT_FAR *pf) RT_NOTHROW_DEF
2977{
2978 ASMMemoryFence();
2979 return *pf; /* byte reads are atomic on x86 */
2980}
2981
2982
2983/**
2984 * Atomically reads a boolean value, unordered.
2985 *
2986 * @returns Current *pf value
2987 * @param pf Pointer to the boolean variable to read.
2988 */
2989DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool RT_FAR *pf) RT_NOTHROW_DEF
2990{
2991 return *pf; /* byte reads are atomic on x86 */
2992}
2993
2994
2995/**
2996 * Atomically read a typical IPRT handle value, ordered.
2997 *
2998 * @param ph Pointer to the handle variable to read.
2999 * @param phRes Where to store the result.
3000 *
3001 * @remarks This doesn't currently work for all handles (like RTFILE).
3002 */
3003#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3004# define ASMAtomicReadHandle(ph, phRes) \
3005 do { \
3006 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3007 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
3008 *(uint32_t RT_FAR *)(phRes) = ASMAtomicReadU32((uint32_t volatile RT_FAR *)(ph)); \
3009 } while (0)
3010#elif HC_ARCH_BITS == 64
3011# define ASMAtomicReadHandle(ph, phRes) \
3012 do { \
3013 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3014 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
3015 *(uint64_t RT_FAR *)(phRes) = ASMAtomicReadU64((uint64_t volatile RT_FAR *)(ph)); \
3016 } while (0)
3017#else
3018# error HC_ARCH_BITS
3019#endif
3020
3021
3022/**
3023 * Atomically read a typical IPRT handle value, unordered.
3024 *
3025 * @param ph Pointer to the handle variable to read.
3026 * @param phRes Where to store the result.
3027 *
3028 * @remarks This doesn't currently work for all handles (like RTFILE).
3029 */
3030#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3031# define ASMAtomicUoReadHandle(ph, phRes) \
3032 do { \
3033 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3034 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
3035 *(uint32_t RT_FAR *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile RT_FAR *)(ph)); \
3036 } while (0)
3037#elif HC_ARCH_BITS == 64
3038# define ASMAtomicUoReadHandle(ph, phRes) \
3039 do { \
3040 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3041 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
3042 *(uint64_t RT_FAR *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile RT_FAR *)(ph)); \
3043 } while (0)
3044#else
3045# error HC_ARCH_BITS
3046#endif
3047
3048
3049/**
3050 * Atomically read a value which size might differ
3051 * between platforms or compilers, ordered.
3052 *
3053 * @param pu Pointer to the variable to read.
3054 * @param puRes Where to store the result.
3055 */
3056#define ASMAtomicReadSize(pu, puRes) \
3057 do { \
3058 switch (sizeof(*(pu))) { \
3059 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3060 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3061 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3062 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3063 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3064 } \
3065 } while (0)
3066
3067
3068/**
3069 * Atomically read a value which size might differ
3070 * between platforms or compilers, unordered.
3071 *
3072 * @param pu Pointer to the variable to read.
3073 * @param puRes Where to store the result.
3074 */
3075#define ASMAtomicUoReadSize(pu, puRes) \
3076 do { \
3077 switch (sizeof(*(pu))) { \
3078 case 1: *(uint8_t RT_FAR *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3079 case 2: *(uint16_t RT_FAR *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3080 case 4: *(uint32_t RT_FAR *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3081 case 8: *(uint64_t RT_FAR *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu)); break; \
3082 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
3083 } \
3084 } while (0)
3085
3086
3087/**
3088 * Atomically writes an unsigned 8-bit value, ordered.
3089 *
3090 * @param pu8 Pointer to the 8-bit variable.
3091 * @param u8 The 8-bit value to assign to *pu8.
3092 */
3093DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
3094{
3095 /** @todo Any possible ARM32/ARM64 optimizations here? */
3096 ASMAtomicXchgU8(pu8, u8);
3097}
3098
3099
3100/**
3101 * Atomically writes an unsigned 8-bit value, unordered.
3102 *
3103 * @param pu8 Pointer to the 8-bit variable.
3104 * @param u8 The 8-bit value to assign to *pu8.
3105 */
3106DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t RT_FAR *pu8, uint8_t u8) RT_NOTHROW_DEF
3107{
3108 /** @todo Any possible ARM32/ARM64 improvements here? */
3109 *pu8 = u8; /* byte writes are atomic on x86 */
3110}
3111
3112
3113/**
3114 * Atomically writes a signed 8-bit value, ordered.
3115 *
3116 * @param pi8 Pointer to the 8-bit variable to read.
3117 * @param i8 The 8-bit value to assign to *pi8.
3118 */
3119DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
3120{
3121 /** @todo Any possible ARM32/ARM64 optimizations here? */
3122 ASMAtomicXchgS8(pi8, i8);
3123}
3124
3125
3126/**
3127 * Atomically writes a signed 8-bit value, unordered.
3128 *
3129 * @param pi8 Pointer to the 8-bit variable to write.
3130 * @param i8 The 8-bit value to assign to *pi8.
3131 */
3132DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t RT_FAR *pi8, int8_t i8) RT_NOTHROW_DEF
3133{
3134 *pi8 = i8; /* byte writes are atomic on x86 */
3135}
3136
3137
3138/**
3139 * Atomically writes an unsigned 16-bit value, ordered.
3140 *
3141 * @param pu16 Pointer to the 16-bit variable to write.
3142 * @param u16 The 16-bit value to assign to *pu16.
3143 */
3144DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
3145{
3146 /** @todo Any possible ARM32/ARM64 optimizations here? */
3147 ASMAtomicXchgU16(pu16, u16);
3148}
3149
3150
3151/**
3152 * Atomically writes an unsigned 16-bit value, unordered.
3153 *
3154 * @param pu16 Pointer to the 16-bit variable to write.
3155 * @param u16 The 16-bit value to assign to *pu16.
3156 */
3157DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t RT_FAR *pu16, uint16_t u16) RT_NOTHROW_DEF
3158{
3159 Assert(!((uintptr_t)pu16 & 1));
3160 *pu16 = u16;
3161}
3162
3163
3164/**
3165 * Atomically writes a signed 16-bit value, ordered.
3166 *
3167 * @param pi16 Pointer to the 16-bit variable to write.
3168 * @param i16 The 16-bit value to assign to *pi16.
3169 */
3170DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
3171{
3172 /** @todo Any possible ARM32/ARM64 optimizations here? */
3173 ASMAtomicXchgS16(pi16, i16);
3174}
3175
3176
3177/**
3178 * Atomically writes a signed 16-bit value, unordered.
3179 *
3180 * @param pi16 Pointer to the 16-bit variable to write.
3181 * @param i16 The 16-bit value to assign to *pi16.
3182 */
3183DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
3184{
3185 Assert(!((uintptr_t)pi16 & 1));
3186 *pi16 = i16;
3187}
3188
3189
3190/**
3191 * Atomically writes an unsigned 32-bit value, ordered.
3192 *
3193 * @param pu32 Pointer to the 32-bit variable to write.
3194 * @param u32 The 32-bit value to assign to *pu32.
3195 */
3196DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3197{
3198 /** @todo Any possible ARM32/ARM64 optimizations here? */
3199 ASMAtomicXchgU32(pu32, u32);
3200}
3201
3202
3203/**
3204 * Atomically writes an unsigned 32-bit value, unordered.
3205 *
3206 * @param pu32 Pointer to the 32-bit variable to write.
3207 * @param u32 The 32-bit value to assign to *pu32.
3208 */
3209DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3210{
3211 Assert(!((uintptr_t)pu32 & 3));
3212#if ARCH_BITS >= 32
3213 *pu32 = u32;
3214#else
3215 ASMAtomicXchgU32(pu32, u32);
3216#endif
3217}
3218
3219
3220/**
3221 * Atomically writes a signed 32-bit value, ordered.
3222 *
3223 * @param pi32 Pointer to the 32-bit variable to write.
3224 * @param i32 The 32-bit value to assign to *pi32.
3225 */
3226DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3227{
3228 ASMAtomicXchgS32(pi32, i32);
3229}
3230
3231
3232/**
3233 * Atomically writes a signed 32-bit value, unordered.
3234 *
3235 * @param pi32 Pointer to the 32-bit variable to write.
3236 * @param i32 The 32-bit value to assign to *pi32.
3237 */
3238DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3239{
3240 Assert(!((uintptr_t)pi32 & 3));
3241#if ARCH_BITS >= 32
3242 *pi32 = i32;
3243#else
3244 ASMAtomicXchgS32(pi32, i32);
3245#endif
3246}
3247
3248
3249/**
3250 * Atomically writes an unsigned 64-bit value, ordered.
3251 *
3252 * @param pu64 Pointer to the 64-bit variable to write.
3253 * @param u64 The 64-bit value to assign to *pu64.
3254 */
3255DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3256{
3257 /** @todo Any possible ARM32/ARM64 optimizations here? */
3258 ASMAtomicXchgU64(pu64, u64);
3259}
3260
3261
3262/**
3263 * Atomically writes an unsigned 64-bit value, unordered.
3264 *
3265 * @param pu64 Pointer to the 64-bit variable to write.
3266 * @param u64 The 64-bit value to assign to *pu64.
3267 */
3268DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3269{
3270 Assert(!((uintptr_t)pu64 & 7));
3271#if ARCH_BITS == 64
3272 *pu64 = u64;
3273#else
3274 ASMAtomicXchgU64(pu64, u64);
3275#endif
3276}
3277
3278
3279/**
3280 * Atomically writes a signed 64-bit value, ordered.
3281 *
3282 * @param pi64 Pointer to the 64-bit variable to write.
3283 * @param i64 The 64-bit value to assign to *pi64.
3284 */
3285DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3286{
3287 /** @todo Any possible ARM32/ARM64 optimizations here? */
3288 ASMAtomicXchgS64(pi64, i64);
3289}
3290
3291
3292/**
3293 * Atomically writes a signed 64-bit value, unordered.
3294 *
3295 * @param pi64 Pointer to the 64-bit variable to write.
3296 * @param i64 The 64-bit value to assign to *pi64.
3297 */
3298DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3299{
3300 Assert(!((uintptr_t)pi64 & 7));
3301#if ARCH_BITS == 64
3302 *pi64 = i64;
3303#else
3304 ASMAtomicXchgS64(pi64, i64);
3305#endif
3306}
3307
3308
3309/**
3310 * Atomically writes a size_t value, ordered.
3311 *
3312 * @returns nothing.
3313 * @param pcb Pointer to the size_t variable to write.
3314 * @param cb The value to assign to *pcb.
3315 */
3316DECLINLINE(void) ASMAtomicWriteZ(volatile size_t RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3317{
3318#if ARCH_BITS == 64
3319 ASMAtomicWriteU64((uint64_t volatile *)pcb, cb);
3320#elif ARCH_BITS == 32
3321 ASMAtomicWriteU32((uint32_t volatile *)pcb, cb);
3322#elif ARCH_BITS == 16
3323 AssertCompileSize(size_t, 2);
3324 ASMAtomicWriteU16((uint16_t volatile *)pcb, cb);
3325#else
3326# error "Unsupported ARCH_BITS value"
3327#endif
3328}
3329
3330
3331/**
3332 * Atomically writes a size_t value, unordered.
3333 *
3334 * @returns nothing.
3335 * @param pcb Pointer to the size_t variable to write.
3336 * @param cb The value to assign to *pcb.
3337 */
3338DECLINLINE(void) ASMAtomicUoWriteZ(volatile size_t RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3339{
3340#if ARCH_BITS == 64
3341 ASMAtomicUoWriteU64((uint64_t volatile *)pcb, cb);
3342#elif ARCH_BITS == 32
3343 ASMAtomicUoWriteU32((uint32_t volatile *)pcb, cb);
3344#elif ARCH_BITS == 16
3345 AssertCompileSize(size_t, 2);
3346 ASMAtomicUoWriteU16((uint16_t volatile *)pcb, cb);
3347#else
3348# error "Unsupported ARCH_BITS value"
3349#endif
3350}
3351
3352
3353/**
3354 * Atomically writes a boolean value, unordered.
3355 *
3356 * @param pf Pointer to the boolean variable to write.
3357 * @param f The boolean value to assign to *pf.
3358 */
3359DECLINLINE(void) ASMAtomicWriteBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
3360{
3361 ASMAtomicWriteU8((uint8_t volatile RT_FAR *)pf, f);
3362}
3363
3364
3365/**
3366 * Atomically writes a boolean value, unordered.
3367 *
3368 * @param pf Pointer to the boolean variable to write.
3369 * @param f The boolean value to assign to *pf.
3370 */
3371DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool RT_FAR *pf, bool f) RT_NOTHROW_DEF
3372{
3373 *pf = f; /* byte writes are atomic on x86 */
3374}
3375
3376
3377/**
3378 * Atomically writes a pointer value, ordered.
3379 *
3380 * @param ppv Pointer to the pointer variable to write.
3381 * @param pv The pointer value to assign to *ppv.
3382 */
3383DECLINLINE(void) ASMAtomicWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv) RT_NOTHROW_DEF
3384{
3385#if ARCH_BITS == 32 || ARCH_BITS == 16
3386 ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
3387#elif ARCH_BITS == 64
3388 ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
3389#else
3390# error "ARCH_BITS is bogus"
3391#endif
3392}
3393
3394
3395/**
3396 * Atomically writes a pointer value, unordered.
3397 *
3398 * @param ppv Pointer to the pointer variable to write.
3399 * @param pv The pointer value to assign to *ppv.
3400 */
3401DECLINLINE(void) ASMAtomicUoWritePtrVoid(void RT_FAR * volatile RT_FAR *ppv, const void *pv) RT_NOTHROW_DEF
3402{
3403#if ARCH_BITS == 32 || ARCH_BITS == 16
3404 ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)ppv, (uint32_t)pv);
3405#elif ARCH_BITS == 64
3406 ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)ppv, (uint64_t)pv);
3407#else
3408# error "ARCH_BITS is bogus"
3409#endif
3410}
3411
3412
3413/**
3414 * Atomically writes a pointer value, ordered.
3415 *
3416 * @param ppv Pointer to the pointer variable to write.
3417 * @param pv The pointer value to assign to *ppv. If NULL use
3418 * ASMAtomicWriteNullPtr or you'll land in trouble.
3419 *
3420 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
3421 * NULL.
3422 */
3423#ifdef __GNUC__
3424# define ASMAtomicWritePtr(ppv, pv) \
3425 do \
3426 { \
3427 __typeof__(*(ppv)) volatile RT_FAR * const ppvTypeChecked = (ppv); \
3428 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
3429 \
3430 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3431 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3432 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3433 \
3434 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), (void RT_FAR *)(pvTypeChecked)); \
3435 } while (0)
3436#else
3437# define ASMAtomicWritePtr(ppv, pv) \
3438 do \
3439 { \
3440 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3441 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3442 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3443 \
3444 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), (void RT_FAR *)(pv)); \
3445 } while (0)
3446#endif
3447
3448
3449/**
3450 * Atomically sets a pointer to NULL, ordered.
3451 *
3452 * @param ppv Pointer to the pointer variable that should be set to NULL.
3453 *
3454 * @remarks This is relatively type safe on GCC platforms.
3455 */
3456#if RT_GNUC_PREREQ(4, 2)
3457# define ASMAtomicWriteNullPtr(ppv) \
3458 do \
3459 { \
3460 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
3461 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3462 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3463 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppvTypeChecked), NULL); \
3464 } while (0)
3465#else
3466# define ASMAtomicWriteNullPtr(ppv) \
3467 do \
3468 { \
3469 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3470 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3471 ASMAtomicWritePtrVoid((void RT_FAR * volatile RT_FAR *)(ppv), NULL); \
3472 } while (0)
3473#endif
3474
3475
3476/**
3477 * Atomically writes a pointer value, unordered.
3478 *
3479 * @returns Current *pv value
3480 * @param ppv Pointer to the pointer variable.
3481 * @param pv The pointer value to assign to *ppv. If NULL use
3482 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
3483 *
3484 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
3485 * NULL.
3486 */
3487#if RT_GNUC_PREREQ(4, 2)
3488# define ASMAtomicUoWritePtr(ppv, pv) \
3489 do \
3490 { \
3491 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
3492 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
3493 \
3494 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
3495 AssertCompile(sizeof(pv) == sizeof(void *)); \
3496 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3497 \
3498 *(ppvTypeChecked) = pvTypeChecked; \
3499 } while (0)
3500#else
3501# define ASMAtomicUoWritePtr(ppv, pv) \
3502 do \
3503 { \
3504 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3505 AssertCompile(sizeof(pv) == sizeof(void RT_FAR *)); \
3506 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3507 *(ppv) = pv; \
3508 } while (0)
3509#endif
3510
3511
3512/**
3513 * Atomically sets a pointer to NULL, unordered.
3514 *
3515 * @param ppv Pointer to the pointer variable that should be set to NULL.
3516 *
3517 * @remarks This is relatively type safe on GCC platforms.
3518 */
3519#ifdef __GNUC__
3520# define ASMAtomicUoWriteNullPtr(ppv) \
3521 do \
3522 { \
3523 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
3524 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
3525 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3526 *(ppvTypeChecked) = NULL; \
3527 } while (0)
3528#else
3529# define ASMAtomicUoWriteNullPtr(ppv) \
3530 do \
3531 { \
3532 AssertCompile(sizeof(*ppv) == sizeof(void RT_FAR *)); \
3533 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
3534 *(ppv) = NULL; \
3535 } while (0)
3536#endif
3537
3538
3539/**
3540 * Atomically write a typical IPRT handle value, ordered.
3541 *
3542 * @param ph Pointer to the variable to update.
3543 * @param hNew The value to assign to *ph.
3544 *
3545 * @remarks This doesn't currently work for all handles (like RTFILE).
3546 */
3547#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3548# define ASMAtomicWriteHandle(ph, hNew) \
3549 do { \
3550 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3551 ASMAtomicWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)(hNew)); \
3552 } while (0)
3553#elif HC_ARCH_BITS == 64
3554# define ASMAtomicWriteHandle(ph, hNew) \
3555 do { \
3556 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3557 ASMAtomicWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)(hNew)); \
3558 } while (0)
3559#else
3560# error HC_ARCH_BITS
3561#endif
3562
3563
3564/**
3565 * Atomically write a typical IPRT handle value, unordered.
3566 *
3567 * @param ph Pointer to the variable to update.
3568 * @param hNew The value to assign to *ph.
3569 *
3570 * @remarks This doesn't currently work for all handles (like RTFILE).
3571 */
3572#if HC_ARCH_BITS == 32 || ARCH_BITS == 16
3573# define ASMAtomicUoWriteHandle(ph, hNew) \
3574 do { \
3575 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
3576 ASMAtomicUoWriteU32((uint32_t volatile RT_FAR *)(ph), (const uint32_t)hNew); \
3577 } while (0)
3578#elif HC_ARCH_BITS == 64
3579# define ASMAtomicUoWriteHandle(ph, hNew) \
3580 do { \
3581 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
3582 ASMAtomicUoWriteU64((uint64_t volatile RT_FAR *)(ph), (const uint64_t)hNew); \
3583 } while (0)
3584#else
3585# error HC_ARCH_BITS
3586#endif
3587
3588
3589/**
3590 * Atomically write a value which size might differ
3591 * between platforms or compilers, ordered.
3592 *
3593 * @param pu Pointer to the variable to update.
3594 * @param uNew The value to assign to *pu.
3595 */
3596#define ASMAtomicWriteSize(pu, uNew) \
3597 do { \
3598 switch (sizeof(*(pu))) { \
3599 case 1: ASMAtomicWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
3600 case 2: ASMAtomicWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
3601 case 4: ASMAtomicWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3602 case 8: ASMAtomicWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3603 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3604 } \
3605 } while (0)
3606
3607/**
3608 * Atomically write a value which size might differ
3609 * between platforms or compilers, unordered.
3610 *
3611 * @param pu Pointer to the variable to update.
3612 * @param uNew The value to assign to *pu.
3613 */
3614#define ASMAtomicUoWriteSize(pu, uNew) \
3615 do { \
3616 switch (sizeof(*(pu))) { \
3617 case 1: ASMAtomicUoWriteU8( (volatile uint8_t RT_FAR *)(void RT_FAR *)(pu), (uint8_t )(uNew)); break; \
3618 case 2: ASMAtomicUoWriteU16((volatile uint16_t RT_FAR *)(void RT_FAR *)(pu), (uint16_t)(uNew)); break; \
3619 case 4: ASMAtomicUoWriteU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3620 case 8: ASMAtomicUoWriteU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3621 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
3622 } \
3623 } while (0)
3624
3625
3626
3627/**
3628 * Atomically exchanges and adds to a 16-bit value, ordered.
3629 *
3630 * @returns The old value.
3631 * @param pu16 Pointer to the value.
3632 * @param u16 Number to add.
3633 *
3634 * @remarks Currently not implemented, just to make 16-bit code happy.
3635 * @remarks x86: Requires a 486 or later.
3636 */
3637RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicAddU16(uint16_t volatile RT_FAR *pu16, uint32_t u16) RT_NOTHROW_PROTO;
3638
3639
3640/**
3641 * Atomically exchanges and adds to a 32-bit value, ordered.
3642 *
3643 * @returns The old value.
3644 * @param pu32 Pointer to the value.
3645 * @param u32 Number to add.
3646 *
3647 * @remarks x86: Requires a 486 or later.
3648 */
3649#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
3650RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
3651#else
3652DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3653{
3654# if RT_INLINE_ASM_USES_INTRIN
3655 u32 = _InterlockedExchangeAdd((long RT_FAR *)pu32, u32);
3656 return u32;
3657
3658# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
3659# if RT_INLINE_ASM_GNU_STYLE
3660 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3661 : "=r" (u32)
3662 , "=m" (*pu32)
3663 : "0" (u32)
3664 , "m" (*pu32)
3665 : "memory"
3666 , "cc");
3667 return u32;
3668# else
3669 __asm
3670 {
3671 mov eax, [u32]
3672# ifdef RT_ARCH_AMD64
3673 mov rdx, [pu32]
3674 lock xadd [rdx], eax
3675# else
3676 mov edx, [pu32]
3677 lock xadd [edx], eax
3678# endif
3679 mov [u32], eax
3680 }
3681 return u32;
3682# endif
3683
3684# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3685 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicAddU32, pu32, DMB_SY,
3686 "add %w[uNew], %w[uOld], %w[uVal]\n\t",
3687 "add %[uNew], %[uOld], %[uVal]\n\t",
3688 [uVal] "r" (u32));
3689 return u32OldRet;
3690
3691# else
3692# error "Port me"
3693# endif
3694}
3695#endif
3696
3697
3698/**
3699 * Atomically exchanges and adds to a signed 32-bit value, ordered.
3700 *
3701 * @returns The old value.
3702 * @param pi32 Pointer to the value.
3703 * @param i32 Number to add.
3704 *
3705 * @remarks x86: Requires a 486 or later.
3706 */
3707DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3708{
3709 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
3710}
3711
3712
3713/**
3714 * Atomically exchanges and adds to a 64-bit value, ordered.
3715 *
3716 * @returns The old value.
3717 * @param pu64 Pointer to the value.
3718 * @param u64 Number to add.
3719 *
3720 * @remarks x86: Requires a Pentium or later.
3721 */
3722#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
3723DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
3724#else
3725DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3726{
3727# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3728 u64 = _InterlockedExchangeAdd64((__int64 RT_FAR *)pu64, u64);
3729 return u64;
3730
3731# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3732 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
3733 : "=r" (u64)
3734 , "=m" (*pu64)
3735 : "0" (u64)
3736 , "m" (*pu64)
3737 : "memory"
3738 , "cc");
3739 return u64;
3740
3741# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
3742 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_64(ASMAtomicAddU64, pu64, DMB_SY,
3743 "add %[uNew], %[uOld], %[uVal]\n\t"
3744 ,
3745 "add %[uNew], %[uOld], %[uVal]\n\t"
3746 "adc %H[uNew], %H[uOld], %H[uVal]\n\t",
3747 [uVal] "r" (u64));
3748 return u64OldRet;
3749
3750# else
3751 uint64_t u64Old;
3752 for (;;)
3753 {
3754 uint64_t u64New;
3755 u64Old = ASMAtomicUoReadU64(pu64);
3756 u64New = u64Old + u64;
3757 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3758 break;
3759 ASMNopPause();
3760 }
3761 return u64Old;
3762# endif
3763}
3764#endif
3765
3766
3767/**
3768 * Atomically exchanges and adds to a signed 64-bit value, ordered.
3769 *
3770 * @returns The old value.
3771 * @param pi64 Pointer to the value.
3772 * @param i64 Number to add.
3773 *
3774 * @remarks x86: Requires a Pentium or later.
3775 */
3776DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3777{
3778 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
3779}
3780
3781
3782/**
3783 * Atomically exchanges and adds to a size_t value, ordered.
3784 *
3785 * @returns The old value.
3786 * @param pcb Pointer to the size_t value.
3787 * @param cb Number to add.
3788 */
3789DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3790{
3791#if ARCH_BITS == 64
3792 AssertCompileSize(size_t, 8);
3793 return ASMAtomicAddU64((uint64_t volatile RT_FAR *)pcb, cb);
3794#elif ARCH_BITS == 32
3795 AssertCompileSize(size_t, 4);
3796 return ASMAtomicAddU32((uint32_t volatile RT_FAR *)pcb, cb);
3797#elif ARCH_BITS == 16
3798 AssertCompileSize(size_t, 2);
3799 return ASMAtomicAddU16((uint16_t volatile RT_FAR *)pcb, cb);
3800#else
3801# error "Unsupported ARCH_BITS value"
3802#endif
3803}
3804
3805
3806/**
3807 * Atomically exchanges and adds a value which size might differ between
3808 * platforms or compilers, ordered.
3809 *
3810 * @param pu Pointer to the variable to update.
3811 * @param uNew The value to add to *pu.
3812 * @param puOld Where to store the old value.
3813 */
3814#define ASMAtomicAddSize(pu, uNew, puOld) \
3815 do { \
3816 switch (sizeof(*(pu))) { \
3817 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3818 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3819 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
3820 } \
3821 } while (0)
3822
3823
3824
3825/**
3826 * Atomically exchanges and subtracts to an unsigned 16-bit value, ordered.
3827 *
3828 * @returns The old value.
3829 * @param pu16 Pointer to the value.
3830 * @param u16 Number to subtract.
3831 *
3832 * @remarks x86: Requires a 486 or later.
3833 */
3834DECLINLINE(uint16_t) ASMAtomicSubU16(uint16_t volatile RT_FAR *pu16, uint32_t u16) RT_NOTHROW_DEF
3835{
3836 return ASMAtomicAddU16(pu16, (uint16_t)-(int16_t)u16);
3837}
3838
3839
3840/**
3841 * Atomically exchanges and subtracts to a signed 16-bit value, ordered.
3842 *
3843 * @returns The old value.
3844 * @param pi16 Pointer to the value.
3845 * @param i16 Number to subtract.
3846 *
3847 * @remarks x86: Requires a 486 or later.
3848 */
3849DECLINLINE(int16_t) ASMAtomicSubS16(int16_t volatile RT_FAR *pi16, int16_t i16) RT_NOTHROW_DEF
3850{
3851 return (int16_t)ASMAtomicAddU16((uint16_t volatile RT_FAR *)pi16, (uint16_t)-i16);
3852}
3853
3854
3855/**
3856 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
3857 *
3858 * @returns The old value.
3859 * @param pu32 Pointer to the value.
3860 * @param u32 Number to subtract.
3861 *
3862 * @remarks x86: Requires a 486 or later.
3863 */
3864DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
3865{
3866 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
3867}
3868
3869
3870/**
3871 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
3872 *
3873 * @returns The old value.
3874 * @param pi32 Pointer to the value.
3875 * @param i32 Number to subtract.
3876 *
3877 * @remarks x86: Requires a 486 or later.
3878 */
3879DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
3880{
3881 return (int32_t)ASMAtomicAddU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)-i32);
3882}
3883
3884
3885/**
3886 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
3887 *
3888 * @returns The old value.
3889 * @param pu64 Pointer to the value.
3890 * @param u64 Number to subtract.
3891 *
3892 * @remarks x86: Requires a Pentium or later.
3893 */
3894DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
3895{
3896 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
3897}
3898
3899
3900/**
3901 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
3902 *
3903 * @returns The old value.
3904 * @param pi64 Pointer to the value.
3905 * @param i64 Number to subtract.
3906 *
3907 * @remarks x86: Requires a Pentium or later.
3908 */
3909DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
3910{
3911 return (int64_t)ASMAtomicAddU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)-i64);
3912}
3913
3914
3915/**
3916 * Atomically exchanges and subtracts to a size_t value, ordered.
3917 *
3918 * @returns The old value.
3919 * @param pcb Pointer to the size_t value.
3920 * @param cb Number to subtract.
3921 *
3922 * @remarks x86: Requires a 486 or later.
3923 */
3924DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile RT_FAR *pcb, size_t cb) RT_NOTHROW_DEF
3925{
3926#if ARCH_BITS == 64
3927 return ASMAtomicSubU64((uint64_t volatile RT_FAR *)pcb, cb);
3928#elif ARCH_BITS == 32
3929 return ASMAtomicSubU32((uint32_t volatile RT_FAR *)pcb, cb);
3930#elif ARCH_BITS == 16
3931 AssertCompileSize(size_t, 2);
3932 return ASMAtomicSubU16((uint16_t volatile RT_FAR *)pcb, cb);
3933#else
3934# error "Unsupported ARCH_BITS value"
3935#endif
3936}
3937
3938
3939/**
3940 * Atomically exchanges and subtracts a value which size might differ between
3941 * platforms or compilers, ordered.
3942 *
3943 * @param pu Pointer to the variable to update.
3944 * @param uNew The value to subtract to *pu.
3945 * @param puOld Where to store the old value.
3946 *
3947 * @remarks x86: Requires a 486 or later.
3948 */
3949#define ASMAtomicSubSize(pu, uNew, puOld) \
3950 do { \
3951 switch (sizeof(*(pu))) { \
3952 case 4: *(uint32_t RT_FAR *)(puOld) = ASMAtomicSubU32((volatile uint32_t RT_FAR *)(void RT_FAR *)(pu), (uint32_t)(uNew)); break; \
3953 case 8: *(uint64_t RT_FAR *)(puOld) = ASMAtomicSubU64((volatile uint64_t RT_FAR *)(void RT_FAR *)(pu), (uint64_t)(uNew)); break; \
3954 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
3955 } \
3956 } while (0)
3957
3958
3959
3960/**
3961 * Atomically increment a 16-bit value, ordered.
3962 *
3963 * @returns The new value.
3964 * @param pu16 Pointer to the value to increment.
3965 * @remarks Not implemented. Just to make 16-bit code happy.
3966 *
3967 * @remarks x86: Requires a 486 or later.
3968 */
3969RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMAtomicIncU16(uint16_t volatile RT_FAR *pu16) RT_NOTHROW_PROTO;
3970
3971
3972/**
3973 * Atomically increment a 32-bit value, ordered.
3974 *
3975 * @returns The new value.
3976 * @param pu32 Pointer to the value to increment.
3977 *
3978 * @remarks x86: Requires a 486 or later.
3979 */
3980#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
3981RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
3982#else
3983DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
3984{
3985# if RT_INLINE_ASM_USES_INTRIN
3986 return (uint32_t)_InterlockedIncrement((long RT_FAR *)pu32);
3987
3988# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
3989# if RT_INLINE_ASM_GNU_STYLE
3990 uint32_t u32;
3991 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3992 : "=r" (u32)
3993 , "=m" (*pu32)
3994 : "0" (1)
3995 , "m" (*pu32)
3996 : "memory"
3997 , "cc");
3998 return u32+1;
3999# else
4000 __asm
4001 {
4002 mov eax, 1
4003# ifdef RT_ARCH_AMD64
4004 mov rdx, [pu32]
4005 lock xadd [rdx], eax
4006# else
4007 mov edx, [pu32]
4008 lock xadd [edx], eax
4009# endif
4010 mov u32, eax
4011 }
4012 return u32+1;
4013# endif
4014
4015# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4016 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicIncU32, pu32, DMB_SY,
4017 "add %w[uNew], %w[uNew], #1\n\t",
4018 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
4019 "X" (0) /* dummy */);
4020 return u32NewRet;
4021
4022# else
4023 return ASMAtomicAddU32(pu32, 1) + 1;
4024# endif
4025}
4026#endif
4027
4028
4029/**
4030 * Atomically increment a signed 32-bit value, ordered.
4031 *
4032 * @returns The new value.
4033 * @param pi32 Pointer to the value to increment.
4034 *
4035 * @remarks x86: Requires a 486 or later.
4036 */
4037DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile RT_FAR *pi32) RT_NOTHROW_DEF
4038{
4039 return (int32_t)ASMAtomicIncU32((uint32_t volatile RT_FAR *)pi32);
4040}
4041
4042
4043/**
4044 * Atomically increment a 64-bit value, ordered.
4045 *
4046 * @returns The new value.
4047 * @param pu64 Pointer to the value to increment.
4048 *
4049 * @remarks x86: Requires a Pentium or later.
4050 */
4051#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4052DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_PROTO;
4053#else
4054DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_DEF
4055{
4056# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4057 return (uint64_t)_InterlockedIncrement64((__int64 RT_FAR *)pu64);
4058
4059# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4060 uint64_t u64;
4061 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
4062 : "=r" (u64)
4063 , "=m" (*pu64)
4064 : "0" (1)
4065 , "m" (*pu64)
4066 : "memory"
4067 , "cc");
4068 return u64 + 1;
4069
4070# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4071 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicIncU64, pu64, DMB_SY,
4072 "add %[uNew], %[uNew], #1\n\t"
4073 ,
4074 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */
4075 "adc %H[uNew], %H[uNew], %[uZeroVal]\n\t",
4076 RTASM_ARM_PICK_6432("X" (0) /* dummy */, [uZeroVal] "r" (0)) );
4077 return u64NewRet;
4078
4079# else
4080 return ASMAtomicAddU64(pu64, 1) + 1;
4081# endif
4082}
4083#endif
4084
4085
4086/**
4087 * Atomically increment a signed 64-bit value, ordered.
4088 *
4089 * @returns The new value.
4090 * @param pi64 Pointer to the value to increment.
4091 *
4092 * @remarks x86: Requires a Pentium or later.
4093 */
4094DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile RT_FAR *pi64) RT_NOTHROW_DEF
4095{
4096 return (int64_t)ASMAtomicIncU64((uint64_t volatile RT_FAR *)pi64);
4097}
4098
4099
4100/**
4101 * Atomically increment a size_t value, ordered.
4102 *
4103 * @returns The new value.
4104 * @param pcb Pointer to the value to increment.
4105 *
4106 * @remarks x86: Requires a 486 or later.
4107 */
4108DECLINLINE(size_t) ASMAtomicIncZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
4109{
4110#if ARCH_BITS == 64
4111 return ASMAtomicIncU64((uint64_t volatile RT_FAR *)pcb);
4112#elif ARCH_BITS == 32
4113 return ASMAtomicIncU32((uint32_t volatile RT_FAR *)pcb);
4114#elif ARCH_BITS == 16
4115 return ASMAtomicIncU16((uint16_t volatile RT_FAR *)pcb);
4116#else
4117# error "Unsupported ARCH_BITS value"
4118#endif
4119}
4120
4121
4122
4123/**
4124 * Atomically decrement an unsigned 32-bit value, ordered.
4125 *
4126 * @returns The new value.
4127 * @param pu16 Pointer to the value to decrement.
4128 * @remarks Not implemented. Just to make 16-bit code happy.
4129 *
4130 * @remarks x86: Requires a 486 or later.
4131 */
4132RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU16(uint16_t volatile RT_FAR *pu16) RT_NOTHROW_PROTO;
4133
4134
4135/**
4136 * Atomically decrement an unsigned 32-bit value, ordered.
4137 *
4138 * @returns The new value.
4139 * @param pu32 Pointer to the value to decrement.
4140 *
4141 * @remarks x86: Requires a 486 or later.
4142 */
4143#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4144RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
4145#else
4146DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
4147{
4148# if RT_INLINE_ASM_USES_INTRIN
4149 return (uint32_t)_InterlockedDecrement((long RT_FAR *)pu32);
4150
4151# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4152# if RT_INLINE_ASM_GNU_STYLE
4153 uint32_t u32;
4154 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
4155 : "=r" (u32)
4156 , "=m" (*pu32)
4157 : "0" (-1)
4158 , "m" (*pu32)
4159 : "memory"
4160 , "cc");
4161 return u32-1;
4162# else
4163 uint32_t u32;
4164 __asm
4165 {
4166 mov eax, -1
4167# ifdef RT_ARCH_AMD64
4168 mov rdx, [pu32]
4169 lock xadd [rdx], eax
4170# else
4171 mov edx, [pu32]
4172 lock xadd [edx], eax
4173# endif
4174 mov u32, eax
4175 }
4176 return u32-1;
4177# endif
4178
4179# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4180 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicDecU32, pu32, DMB_SY,
4181 "sub %w[uNew], %w[uNew], #1\n\t",
4182 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
4183 "X" (0) /* dummy */);
4184 return u32NewRet;
4185
4186# else
4187 return ASMAtomicSubU32(pu32, 1) - (uint32_t)1;
4188# endif
4189}
4190#endif
4191
4192
4193/**
4194 * Atomically decrement a signed 32-bit value, ordered.
4195 *
4196 * @returns The new value.
4197 * @param pi32 Pointer to the value to decrement.
4198 *
4199 * @remarks x86: Requires a 486 or later.
4200 */
4201DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile RT_FAR *pi32) RT_NOTHROW_DEF
4202{
4203 return (int32_t)ASMAtomicDecU32((uint32_t volatile RT_FAR *)pi32);
4204}
4205
4206
4207/**
4208 * Atomically decrement an unsigned 64-bit value, ordered.
4209 *
4210 * @returns The new value.
4211 * @param pu64 Pointer to the value to decrement.
4212 *
4213 * @remarks x86: Requires a Pentium or later.
4214 */
4215#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4216RT_ASM_DECL_PRAGMA_WATCOM(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_PROTO;
4217#else
4218DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile RT_FAR *pu64) RT_NOTHROW_DEF
4219{
4220# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4221 return (uint64_t)_InterlockedDecrement64((__int64 volatile RT_FAR *)pu64);
4222
4223# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4224 uint64_t u64;
4225 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
4226 : "=r" (u64)
4227 , "=m" (*pu64)
4228 : "0" (~(uint64_t)0)
4229 , "m" (*pu64)
4230 : "memory"
4231 , "cc");
4232 return u64-1;
4233
4234# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4235 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicDecU64, pu64, DMB_SY,
4236 "sub %[uNew], %[uNew], #1\n\t"
4237 ,
4238 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */
4239 "sbc %H[uNew], %H[uNew], %[uZeroVal]\n\t",
4240 RTASM_ARM_PICK_6432("X" (0) /* dummy */, [uZeroVal] "r" (0)) );
4241 return u64NewRet;
4242
4243# else
4244 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
4245# endif
4246}
4247#endif
4248
4249
4250/**
4251 * Atomically decrement a signed 64-bit value, ordered.
4252 *
4253 * @returns The new value.
4254 * @param pi64 Pointer to the value to decrement.
4255 *
4256 * @remarks x86: Requires a Pentium or later.
4257 */
4258DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile RT_FAR *pi64) RT_NOTHROW_DEF
4259{
4260 return (int64_t)ASMAtomicDecU64((uint64_t volatile RT_FAR *)pi64);
4261}
4262
4263
4264/**
4265 * Atomically decrement a size_t value, ordered.
4266 *
4267 * @returns The new value.
4268 * @param pcb Pointer to the value to decrement.
4269 *
4270 * @remarks x86: Requires a 486 or later.
4271 */
4272DECLINLINE(size_t) ASMAtomicDecZ(size_t volatile RT_FAR *pcb) RT_NOTHROW_DEF
4273{
4274#if ARCH_BITS == 64
4275 return ASMAtomicDecU64((uint64_t volatile RT_FAR *)pcb);
4276#elif ARCH_BITS == 32
4277 return ASMAtomicDecU32((uint32_t volatile RT_FAR *)pcb);
4278#elif ARCH_BITS == 16
4279 return ASMAtomicDecU16((uint16_t volatile RT_FAR *)pcb);
4280#else
4281# error "Unsupported ARCH_BITS value"
4282#endif
4283}
4284
4285
4286/**
4287 * Atomically Or an unsigned 32-bit value, ordered.
4288 *
4289 * @param pu32 Pointer to the pointer variable to OR u32 with.
4290 * @param u32 The value to OR *pu32 with.
4291 *
4292 * @remarks x86: Requires a 386 or later.
4293 */
4294#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4295RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4296#else
4297DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4298{
4299# if RT_INLINE_ASM_USES_INTRIN
4300 _InterlockedOr((long volatile RT_FAR *)pu32, (long)u32);
4301
4302# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4303# if RT_INLINE_ASM_GNU_STYLE
4304 __asm__ __volatile__("lock; orl %1, %0\n\t"
4305 : "=m" (*pu32)
4306 : "ir" (u32)
4307 , "m" (*pu32)
4308 : "cc");
4309# else
4310 __asm
4311 {
4312 mov eax, [u32]
4313# ifdef RT_ARCH_AMD64
4314 mov rdx, [pu32]
4315 lock or [rdx], eax
4316# else
4317 mov edx, [pu32]
4318 lock or [edx], eax
4319# endif
4320 }
4321# endif
4322
4323# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4324 /* For more on Orr see https://en.wikipedia.org/wiki/Orr_(Catch-22) ;-) */
4325 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicOr32, pu32, DMB_SY,
4326 "orr %w[uNew], %w[uNew], %w[uVal]\n\t",
4327 "orr %[uNew], %[uNew], %[uVal]\n\t",
4328 [uVal] "r" (u32));
4329
4330# else
4331# error "Port me"
4332# endif
4333}
4334#endif
4335
4336
4337/**
4338 * Atomically OR an unsigned 32-bit value, ordered, extended version (for bitmap
4339 * fallback).
4340 *
4341 * @returns Old value.
4342 * @param pu32 Pointer to the variable to OR @a u32 with.
4343 * @param u32 The value to OR @a *pu32 with.
4344 */
4345DECLINLINE(uint32_t) ASMAtomicOrExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4346{
4347#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4348 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicOrEx32, pu32, DMB_SY,
4349 "orr %w[uNew], %w[uOld], %w[uVal]\n\t",
4350 "orr %[uNew], %[uOld], %[uVal]\n\t",
4351 [uVal] "r" (u32));
4352 return u32OldRet;
4353
4354#else
4355 uint32_t u32RetOld = ASMAtomicUoReadU32(pu32);
4356 uint32_t u32New;
4357 do
4358 u32New = u32RetOld | u32;
4359 while (!ASMAtomicCmpXchgExU32(pu32, u32New, u32RetOld, &u32RetOld));
4360 return u32RetOld;
4361#endif
4362}
4363
4364
4365/**
4366 * Atomically Or a signed 32-bit value, ordered.
4367 *
4368 * @param pi32 Pointer to the pointer variable to OR u32 with.
4369 * @param i32 The value to OR *pu32 with.
4370 *
4371 * @remarks x86: Requires a 386 or later.
4372 */
4373DECLINLINE(void) ASMAtomicOrS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4374{
4375 ASMAtomicOrU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4376}
4377
4378
4379/**
4380 * Atomically Or an unsigned 64-bit value, ordered.
4381 *
4382 * @param pu64 Pointer to the pointer variable to OR u64 with.
4383 * @param u64 The value to OR *pu64 with.
4384 *
4385 * @remarks x86: Requires a Pentium or later.
4386 */
4387#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4388DECLASM(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4389#else
4390DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4391{
4392# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4393 _InterlockedOr64((__int64 volatile RT_FAR *)pu64, (__int64)u64);
4394
4395# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4396 __asm__ __volatile__("lock; orq %1, %q0\n\t"
4397 : "=m" (*pu64)
4398 : "r" (u64)
4399 , "m" (*pu64)
4400 : "cc");
4401
4402# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4403 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicOrU64, pu64, DMB_SY,
4404 "orr %[uNew], %[uNew], %[uVal]\n\t"
4405 ,
4406 "orr %[uNew], %[uNew], %[uVal]\n\t"
4407 "orr %H[uNew], %H[uNew], %H[uVal]\n\t",
4408 [uVal] "r" (u64));
4409
4410# else
4411 for (;;)
4412 {
4413 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4414 uint64_t u64New = u64Old | u64;
4415 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4416 break;
4417 ASMNopPause();
4418 }
4419# endif
4420}
4421#endif
4422
4423
4424/**
4425 * Atomically Or a signed 64-bit value, ordered.
4426 *
4427 * @param pi64 Pointer to the pointer variable to OR u64 with.
4428 * @param i64 The value to OR *pu64 with.
4429 *
4430 * @remarks x86: Requires a Pentium or later.
4431 */
4432DECLINLINE(void) ASMAtomicOrS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4433{
4434 ASMAtomicOrU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4435}
4436
4437
4438/**
4439 * Atomically And an unsigned 32-bit value, ordered.
4440 *
4441 * @param pu32 Pointer to the pointer variable to AND u32 with.
4442 * @param u32 The value to AND *pu32 with.
4443 *
4444 * @remarks x86: Requires a 386 or later.
4445 */
4446#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4447RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4448#else
4449DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4450{
4451# if RT_INLINE_ASM_USES_INTRIN
4452 _InterlockedAnd((long volatile RT_FAR *)pu32, u32);
4453
4454# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4455# if RT_INLINE_ASM_GNU_STYLE
4456 __asm__ __volatile__("lock; andl %1, %0\n\t"
4457 : "=m" (*pu32)
4458 : "ir" (u32)
4459 , "m" (*pu32)
4460 : "cc");
4461# else
4462 __asm
4463 {
4464 mov eax, [u32]
4465# ifdef RT_ARCH_AMD64
4466 mov rdx, [pu32]
4467 lock and [rdx], eax
4468# else
4469 mov edx, [pu32]
4470 lock and [edx], eax
4471# endif
4472 }
4473# endif
4474
4475# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4476 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicAnd32, pu32, DMB_SY,
4477 "and %w[uNew], %w[uNew], %w[uVal]\n\t",
4478 "and %[uNew], %[uNew], %[uVal]\n\t",
4479 [uVal] "r" (u32));
4480
4481# else
4482# error "Port me"
4483# endif
4484}
4485#endif
4486
4487
4488/**
4489 * Atomically AND an unsigned 32-bit value, ordered, extended version.
4490 *
4491 * @returns Old value.
4492 * @param pu32 Pointer to the variable to AND @a u32 with.
4493 * @param u32 The value to AND @a *pu32 with.
4494 */
4495DECLINLINE(uint32_t) ASMAtomicAndExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4496{
4497#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4498 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicAndEx32, pu32, DMB_SY,
4499 "and %w[uNew], %w[uOld], %w[uVal]\n\t",
4500 "and %[uNew], %[uOld], %[uVal]\n\t",
4501 [uVal] "r" (u32));
4502 return u32OldRet;
4503
4504#else
4505 uint32_t u32RetOld = ASMAtomicUoReadU32(pu32);
4506 uint32_t u32New;
4507 do
4508 u32New = u32RetOld & u32;
4509 while (!ASMAtomicCmpXchgExU32(pu32, u32New, u32RetOld, &u32RetOld));
4510 return u32RetOld;
4511#endif
4512}
4513
4514
4515/**
4516 * Atomically And a signed 32-bit value, ordered.
4517 *
4518 * @param pi32 Pointer to the pointer variable to AND i32 with.
4519 * @param i32 The value to AND *pi32 with.
4520 *
4521 * @remarks x86: Requires a 386 or later.
4522 */
4523DECLINLINE(void) ASMAtomicAndS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4524{
4525 ASMAtomicAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4526}
4527
4528
4529/**
4530 * Atomically And an unsigned 64-bit value, ordered.
4531 *
4532 * @param pu64 Pointer to the pointer variable to AND u64 with.
4533 * @param u64 The value to AND *pu64 with.
4534 *
4535 * @remarks x86: Requires a Pentium or later.
4536 */
4537#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4538DECLASM(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4539#else
4540DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4541{
4542# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
4543 _InterlockedAnd64((__int64 volatile RT_FAR *)pu64, u64);
4544
4545# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4546 __asm__ __volatile__("lock; andq %1, %0\n\t"
4547 : "=m" (*pu64)
4548 : "r" (u64)
4549 , "m" (*pu64)
4550 : "cc");
4551
4552# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4553 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicAndU64, pu64, DMB_SY,
4554 "and %[uNew], %[uNew], %[uVal]\n\t"
4555 ,
4556 "and %[uNew], %[uNew], %[uVal]\n\t"
4557 "and %H[uNew], %H[uNew], %H[uVal]\n\t",
4558 [uVal] "r" (u64));
4559
4560# else
4561 for (;;)
4562 {
4563 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4564 uint64_t u64New = u64Old & u64;
4565 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4566 break;
4567 ASMNopPause();
4568 }
4569# endif
4570}
4571#endif
4572
4573
4574/**
4575 * Atomically And a signed 64-bit value, ordered.
4576 *
4577 * @param pi64 Pointer to the pointer variable to AND i64 with.
4578 * @param i64 The value to AND *pi64 with.
4579 *
4580 * @remarks x86: Requires a Pentium or later.
4581 */
4582DECLINLINE(void) ASMAtomicAndS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4583{
4584 ASMAtomicAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4585}
4586
4587
4588/**
4589 * Atomically XOR an unsigned 32-bit value and a memory location, ordered.
4590 *
4591 * @param pu32 Pointer to the variable to XOR @a u32 with.
4592 * @param u32 The value to XOR @a *pu32 with.
4593 *
4594 * @remarks x86: Requires a 386 or later.
4595 */
4596#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
4597RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4598#else
4599DECLINLINE(void) ASMAtomicXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4600{
4601# if RT_INLINE_ASM_USES_INTRIN
4602 _InterlockedXor((long volatile RT_FAR *)pu32, u32);
4603
4604# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4605# if RT_INLINE_ASM_GNU_STYLE
4606 __asm__ __volatile__("lock; xorl %1, %0\n\t"
4607 : "=m" (*pu32)
4608 : "ir" (u32)
4609 , "m" (*pu32)
4610 : "cc");
4611# else
4612 __asm
4613 {
4614 mov eax, [u32]
4615# ifdef RT_ARCH_AMD64
4616 mov rdx, [pu32]
4617 lock xor [rdx], eax
4618# else
4619 mov edx, [pu32]
4620 lock xor [edx], eax
4621# endif
4622 }
4623# endif
4624
4625# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4626 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicXor32, pu32, DMB_SY,
4627 "eor %w[uNew], %w[uNew], %w[uVal]\n\t",
4628 "eor %[uNew], %[uNew], %[uVal]\n\t",
4629 [uVal] "r" (u32));
4630
4631# else
4632# error "Port me"
4633# endif
4634}
4635#endif
4636
4637
4638/**
4639 * Atomically XOR an unsigned 32-bit value and a memory location, ordered,
4640 * extended version (for bitmaps).
4641 *
4642 * @returns Old value.
4643 * @param pu32 Pointer to the variable to XOR @a u32 with.
4644 * @param u32 The value to XOR @a *pu32 with.
4645 */
4646DECLINLINE(uint32_t) ASMAtomicXorExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4647{
4648#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4649 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicXorEx32, pu32, DMB_SY,
4650 "eor %w[uNew], %w[uOld], %w[uVal]\n\t",
4651 "eor %[uNew], %[uOld], %[uVal]\n\t",
4652 [uVal] "r" (u32));
4653 return u32OldRet;
4654
4655#else
4656 uint32_t u32RetOld = ASMAtomicUoReadU32(pu32);
4657 uint32_t u32New;
4658 do
4659 u32New = u32RetOld ^ u32;
4660 while (!ASMAtomicCmpXchgExU32(pu32, u32New, u32RetOld, &u32RetOld));
4661 return u32RetOld;
4662#endif
4663}
4664
4665
4666/**
4667 * Atomically XOR a signed 32-bit value, ordered.
4668 *
4669 * @param pi32 Pointer to the variable to XOR i32 with.
4670 * @param i32 The value to XOR *pi32 with.
4671 *
4672 * @remarks x86: Requires a 386 or later.
4673 */
4674DECLINLINE(void) ASMAtomicXorS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4675{
4676 ASMAtomicXorU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4677}
4678
4679
4680/**
4681 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
4682 *
4683 * @param pu32 Pointer to the pointer variable to OR u32 with.
4684 * @param u32 The value to OR *pu32 with.
4685 *
4686 * @remarks x86: Requires a 386 or later.
4687 */
4688#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
4689RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4690#else
4691DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4692{
4693# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4694# if RT_INLINE_ASM_GNU_STYLE
4695 __asm__ __volatile__("orl %1, %0\n\t"
4696 : "=m" (*pu32)
4697 : "ir" (u32)
4698 , "m" (*pu32)
4699 : "cc");
4700# else
4701 __asm
4702 {
4703 mov eax, [u32]
4704# ifdef RT_ARCH_AMD64
4705 mov rdx, [pu32]
4706 or [rdx], eax
4707# else
4708 mov edx, [pu32]
4709 or [edx], eax
4710# endif
4711 }
4712# endif
4713
4714# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4715 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoOrU32, pu32, NO_BARRIER,
4716 "orr %w[uNew], %w[uNew], %w[uVal]\n\t",
4717 "orr %[uNew], %[uNew], %[uVal]\n\t",
4718 [uVal] "r" (u32));
4719
4720# else
4721# error "Port me"
4722# endif
4723}
4724#endif
4725
4726
4727/**
4728 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe,
4729 * extended version (for bitmap fallback).
4730 *
4731 * @returns Old value.
4732 * @param pu32 Pointer to the variable to OR @a u32 with.
4733 * @param u32 The value to OR @a *pu32 with.
4734 */
4735DECLINLINE(uint32_t) ASMAtomicUoOrExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4736{
4737#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4738 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicUoOrExU32, pu32, NO_BARRIER,
4739 "orr %w[uNew], %w[uOld], %w[uVal]\n\t",
4740 "orr %[uNew], %[uOld], %[uVal]\n\t",
4741 [uVal] "r" (u32));
4742 return u32OldRet;
4743
4744#else
4745 return ASMAtomicOrExU32(pu32, u32); /* (we have no unordered cmpxchg primitive atm.) */
4746#endif
4747}
4748
4749
4750/**
4751 * Atomically OR a signed 32-bit value, unordered.
4752 *
4753 * @param pi32 Pointer to the pointer variable to OR u32 with.
4754 * @param i32 The value to OR *pu32 with.
4755 *
4756 * @remarks x86: Requires a 386 or later.
4757 */
4758DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4759{
4760 ASMAtomicUoOrU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4761}
4762
4763
4764/**
4765 * Atomically OR an unsigned 64-bit value, unordered.
4766 *
4767 * @param pu64 Pointer to the pointer variable to OR u64 with.
4768 * @param u64 The value to OR *pu64 with.
4769 *
4770 * @remarks x86: Requires a Pentium or later.
4771 */
4772#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
4773DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4774#else
4775DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4776{
4777# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4778 __asm__ __volatile__("orq %1, %q0\n\t"
4779 : "=m" (*pu64)
4780 : "r" (u64)
4781 , "m" (*pu64)
4782 : "cc");
4783
4784# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4785 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicUoOrU64, pu64, NO_BARRIER,
4786 "orr %[uNew], %[uNew], %[uVal]\n\t"
4787 ,
4788 "orr %[uNew], %[uNew], %[uVal]\n\t"
4789 "orr %H[uNew], %H[uNew], %H[uVal]\n\t",
4790 [uVal] "r" (u64));
4791
4792# else
4793 for (;;)
4794 {
4795 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4796 uint64_t u64New = u64Old | u64;
4797 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4798 break;
4799 ASMNopPause();
4800 }
4801# endif
4802}
4803#endif
4804
4805
4806/**
4807 * Atomically Or a signed 64-bit value, unordered.
4808 *
4809 * @param pi64 Pointer to the pointer variable to OR u64 with.
4810 * @param i64 The value to OR *pu64 with.
4811 *
4812 * @remarks x86: Requires a Pentium or later.
4813 */
4814DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4815{
4816 ASMAtomicUoOrU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4817}
4818
4819
4820/**
4821 * Atomically And an unsigned 32-bit value, unordered.
4822 *
4823 * @param pu32 Pointer to the pointer variable to AND u32 with.
4824 * @param u32 The value to AND *pu32 with.
4825 *
4826 * @remarks x86: Requires a 386 or later.
4827 */
4828#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
4829RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4830#else
4831DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4832{
4833# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4834# if RT_INLINE_ASM_GNU_STYLE
4835 __asm__ __volatile__("andl %1, %0\n\t"
4836 : "=m" (*pu32)
4837 : "ir" (u32)
4838 , "m" (*pu32)
4839 : "cc");
4840# else
4841 __asm
4842 {
4843 mov eax, [u32]
4844# ifdef RT_ARCH_AMD64
4845 mov rdx, [pu32]
4846 and [rdx], eax
4847# else
4848 mov edx, [pu32]
4849 and [edx], eax
4850# endif
4851 }
4852# endif
4853
4854# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4855 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoAnd32, pu32, NO_BARRIER,
4856 "and %w[uNew], %w[uNew], %w[uVal]\n\t",
4857 "and %[uNew], %[uNew], %[uVal]\n\t",
4858 [uVal] "r" (u32));
4859
4860# else
4861# error "Port me"
4862# endif
4863}
4864#endif
4865
4866
4867/**
4868 * Atomically AND an unsigned 32-bit value, unordered, extended version (for
4869 * bitmap fallback).
4870 *
4871 * @returns Old value.
4872 * @param pu32 Pointer to the pointer to AND @a u32 with.
4873 * @param u32 The value to AND @a *pu32 with.
4874 */
4875DECLINLINE(uint32_t) ASMAtomicUoAndExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4876{
4877#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4878 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicUoAndEx32, pu32, NO_BARRIER,
4879 "and %w[uNew], %w[uOld], %w[uVal]\n\t",
4880 "and %[uNew], %[uOld], %[uVal]\n\t",
4881 [uVal] "r" (u32));
4882 return u32OldRet;
4883
4884#else
4885 return ASMAtomicAndExU32(pu32, u32); /* (we have no unordered cmpxchg primitive atm.) */
4886#endif
4887}
4888
4889
4890/**
4891 * Atomically And a signed 32-bit value, unordered.
4892 *
4893 * @param pi32 Pointer to the pointer variable to AND i32 with.
4894 * @param i32 The value to AND *pi32 with.
4895 *
4896 * @remarks x86: Requires a 386 or later.
4897 */
4898DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
4899{
4900 ASMAtomicUoAndU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
4901}
4902
4903
4904/**
4905 * Atomically And an unsigned 64-bit value, unordered.
4906 *
4907 * @param pu64 Pointer to the pointer variable to AND u64 with.
4908 * @param u64 The value to AND *pu64 with.
4909 *
4910 * @remarks x86: Requires a Pentium or later.
4911 */
4912#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
4913DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_PROTO;
4914#else
4915DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile RT_FAR *pu64, uint64_t u64) RT_NOTHROW_DEF
4916{
4917# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
4918 __asm__ __volatile__("andq %1, %0\n\t"
4919 : "=m" (*pu64)
4920 : "r" (u64)
4921 , "m" (*pu64)
4922 : "cc");
4923
4924# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4925 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_64(ASMAtomicUoAndU64, pu64, NO_BARRIER,
4926 "and %[uNew], %[uNew], %[uVal]\n\t"
4927 ,
4928 "and %[uNew], %[uNew], %[uVal]\n\t"
4929 "and %H[uNew], %H[uNew], %H[uVal]\n\t",
4930 [uVal] "r" (u64));
4931
4932# else
4933 for (;;)
4934 {
4935 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
4936 uint64_t u64New = u64Old & u64;
4937 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
4938 break;
4939 ASMNopPause();
4940 }
4941# endif
4942}
4943#endif
4944
4945
4946/**
4947 * Atomically And a signed 64-bit value, unordered.
4948 *
4949 * @param pi64 Pointer to the pointer variable to AND i64 with.
4950 * @param i64 The value to AND *pi64 with.
4951 *
4952 * @remarks x86: Requires a Pentium or later.
4953 */
4954DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile RT_FAR *pi64, int64_t i64) RT_NOTHROW_DEF
4955{
4956 ASMAtomicUoAndU64((uint64_t volatile RT_FAR *)pi64, (uint64_t)i64);
4957}
4958
4959
4960/**
4961 * Atomically XOR an unsigned 32-bit value, unordered but interrupt safe.
4962 *
4963 * @param pu32 Pointer to the variable to XOR @a u32 with.
4964 * @param u32 The value to OR @a *pu32 with.
4965 *
4966 * @remarks x86: Requires a 386 or later.
4967 */
4968#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
4969RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicUoXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_PROTO;
4970#else
4971DECLINLINE(void) ASMAtomicUoXorU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
4972{
4973# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4974# if RT_INLINE_ASM_GNU_STYLE
4975 __asm__ __volatile__("xorl %1, %0\n\t"
4976 : "=m" (*pu32)
4977 : "ir" (u32)
4978 , "m" (*pu32)
4979 : "cc");
4980# else
4981 __asm
4982 {
4983 mov eax, [u32]
4984# ifdef RT_ARCH_AMD64
4985 mov rdx, [pu32]
4986 xor [rdx], eax
4987# else
4988 mov edx, [pu32]
4989 xor [edx], eax
4990# endif
4991 }
4992# endif
4993
4994# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
4995 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoXorU32, pu32, NO_BARRIER,
4996 "eor %w[uNew], %w[uNew], %w[uVal]\n\t",
4997 "eor %[uNew], %[uNew], %[uVal]\n\t",
4998 [uVal] "r" (u32));
4999
5000# else
5001# error "Port me"
5002# endif
5003}
5004#endif
5005
5006
5007/**
5008 * Atomically XOR an unsigned 32-bit value, unordered but interrupt safe,
5009 * extended version (for bitmap fallback).
5010 *
5011 * @returns Old value.
5012 * @param pu32 Pointer to the variable to XOR @a u32 with.
5013 * @param u32 The value to OR @a *pu32 with.
5014 */
5015DECLINLINE(uint32_t) ASMAtomicUoXorExU32(uint32_t volatile RT_FAR *pu32, uint32_t u32) RT_NOTHROW_DEF
5016{
5017#if defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5018 RTASM_ARM_LOAD_MODIFY_STORE_RET_OLD_32(ASMAtomicUoXorExU32, pu32, NO_BARRIER,
5019 "eor %w[uNew], %w[uOld], %w[uVal]\n\t",
5020 "eor %[uNew], %[uOld], %[uVal]\n\t",
5021 [uVal] "r" (u32));
5022 return u32OldRet;
5023
5024#else
5025 return ASMAtomicXorExU32(pu32, u32); /* (we have no unordered cmpxchg primitive atm.) */
5026#endif
5027}
5028
5029
5030/**
5031 * Atomically XOR a signed 32-bit value, unordered.
5032 *
5033 * @param pi32 Pointer to the variable to XOR @a u32 with.
5034 * @param i32 The value to XOR @a *pu32 with.
5035 *
5036 * @remarks x86: Requires a 386 or later.
5037 */
5038DECLINLINE(void) ASMAtomicUoXorS32(int32_t volatile RT_FAR *pi32, int32_t i32) RT_NOTHROW_DEF
5039{
5040 ASMAtomicUoXorU32((uint32_t volatile RT_FAR *)pi32, (uint32_t)i32);
5041}
5042
5043
5044/**
5045 * Atomically increment an unsigned 32-bit value, unordered.
5046 *
5047 * @returns the new value.
5048 * @param pu32 Pointer to the variable to increment.
5049 *
5050 * @remarks x86: Requires a 486 or later.
5051 */
5052#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5053RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
5054#else
5055DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
5056{
5057# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5058 uint32_t u32;
5059# if RT_INLINE_ASM_GNU_STYLE
5060 __asm__ __volatile__("xaddl %0, %1\n\t"
5061 : "=r" (u32)
5062 , "=m" (*pu32)
5063 : "0" (1)
5064 , "m" (*pu32)
5065 : "memory" /** @todo why 'memory'? */
5066 , "cc");
5067 return u32 + 1;
5068# else
5069 __asm
5070 {
5071 mov eax, 1
5072# ifdef RT_ARCH_AMD64
5073 mov rdx, [pu32]
5074 xadd [rdx], eax
5075# else
5076 mov edx, [pu32]
5077 xadd [edx], eax
5078# endif
5079 mov u32, eax
5080 }
5081 return u32 + 1;
5082# endif
5083
5084# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5085 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoIncU32, pu32, NO_BARRIER,
5086 "add %w[uNew], %w[uNew], #1\n\t",
5087 "add %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
5088 "X" (0) /* dummy */);
5089 return u32NewRet;
5090
5091# else
5092# error "Port me"
5093# endif
5094}
5095#endif
5096
5097
5098/**
5099 * Atomically decrement an unsigned 32-bit value, unordered.
5100 *
5101 * @returns the new value.
5102 * @param pu32 Pointer to the variable to decrement.
5103 *
5104 * @remarks x86: Requires a 486 or later.
5105 */
5106#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5107RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_PROTO;
5108#else
5109DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile RT_FAR *pu32) RT_NOTHROW_DEF
5110{
5111# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5112 uint32_t u32;
5113# if RT_INLINE_ASM_GNU_STYLE
5114 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
5115 : "=r" (u32)
5116 , "=m" (*pu32)
5117 : "0" (-1)
5118 , "m" (*pu32)
5119 : "memory"
5120 , "cc");
5121 return u32 - 1;
5122# else
5123 __asm
5124 {
5125 mov eax, -1
5126# ifdef RT_ARCH_AMD64
5127 mov rdx, [pu32]
5128 xadd [rdx], eax
5129# else
5130 mov edx, [pu32]
5131 xadd [edx], eax
5132# endif
5133 mov u32, eax
5134 }
5135 return u32 - 1;
5136# endif
5137
5138# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5139 RTASM_ARM_LOAD_MODIFY_STORE_RET_NEW_32(ASMAtomicUoDecU32, pu32, NO_BARRIER,
5140 "sub %w[uNew], %w[uNew], #1\n\t",
5141 "sub %[uNew], %[uNew], #1\n\t" /* arm6 / thumb2+ */,
5142 "X" (0) /* dummy */);
5143 return u32NewRet;
5144
5145# else
5146# error "Port me"
5147# endif
5148}
5149#endif
5150
5151
5152/** @def RT_ASM_PAGE_SIZE
5153 * We try avoid dragging in iprt/param.h here.
5154 * @internal
5155 */
5156#if defined(RT_ARCH_SPARC64)
5157# define RT_ASM_PAGE_SIZE 0x2000
5158# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
5159# if PAGE_SIZE != 0x2000
5160# error "PAGE_SIZE is not 0x2000!"
5161# endif
5162# endif
5163#elif defined(RT_ARCH_ARM64)
5164# define RT_ASM_PAGE_SIZE 0x4000
5165# if defined(PAGE_SIZE) && !defined(NT_INCLUDED) && !defined(_MACH_ARM_VM_PARAM_H_)
5166# if PAGE_SIZE != 0x4000
5167# error "PAGE_SIZE is not 0x4000!"
5168# endif
5169# endif
5170#else
5171# define RT_ASM_PAGE_SIZE 0x1000
5172# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
5173# if PAGE_SIZE != 0x1000
5174# error "PAGE_SIZE is not 0x1000!"
5175# endif
5176# endif
5177#endif
5178
5179/**
5180 * Zeros a 4K memory page.
5181 *
5182 * @param pv Pointer to the memory block. This must be page aligned.
5183 */
5184#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
5185RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZeroPage(volatile void RT_FAR *pv) RT_NOTHROW_PROTO;
5186# else
5187DECLINLINE(void) ASMMemZeroPage(volatile void RT_FAR *pv) RT_NOTHROW_DEF
5188{
5189# if RT_INLINE_ASM_USES_INTRIN
5190# ifdef RT_ARCH_AMD64
5191 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
5192# else
5193 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
5194# endif
5195
5196# elif RT_INLINE_ASM_GNU_STYLE
5197 RTCCUINTREG uDummy;
5198# ifdef RT_ARCH_AMD64
5199 __asm__ __volatile__("rep stosq"
5200 : "=D" (pv),
5201 "=c" (uDummy)
5202 : "0" (pv),
5203 "c" (RT_ASM_PAGE_SIZE >> 3),
5204 "a" (0)
5205 : "memory");
5206# else
5207 __asm__ __volatile__("rep stosl"
5208 : "=D" (pv),
5209 "=c" (uDummy)
5210 : "0" (pv),
5211 "c" (RT_ASM_PAGE_SIZE >> 2),
5212 "a" (0)
5213 : "memory");
5214# endif
5215# else
5216 __asm
5217 {
5218# ifdef RT_ARCH_AMD64
5219 xor rax, rax
5220 mov ecx, 0200h
5221 mov rdi, [pv]
5222 rep stosq
5223# else
5224 xor eax, eax
5225 mov ecx, 0400h
5226 mov edi, [pv]
5227 rep stosd
5228# endif
5229 }
5230# endif
5231}
5232# endif
5233
5234
5235/**
5236 * Zeros a memory block with a 32-bit aligned size.
5237 *
5238 * @param pv Pointer to the memory block.
5239 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5240 */
5241#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
5242RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb) RT_NOTHROW_PROTO;
5243#else
5244DECLINLINE(void) ASMMemZero32(volatile void RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
5245{
5246# if RT_INLINE_ASM_USES_INTRIN
5247# ifdef RT_ARCH_AMD64
5248 if (!(cb & 7))
5249 __stosq((unsigned __int64 RT_FAR *)pv, 0, cb / 8);
5250 else
5251# endif
5252 __stosd((unsigned long RT_FAR *)pv, 0, cb / 4);
5253
5254# elif RT_INLINE_ASM_GNU_STYLE
5255 __asm__ __volatile__("rep stosl"
5256 : "=D" (pv),
5257 "=c" (cb)
5258 : "0" (pv),
5259 "1" (cb >> 2),
5260 "a" (0)
5261 : "memory");
5262# else
5263 __asm
5264 {
5265 xor eax, eax
5266# ifdef RT_ARCH_AMD64
5267 mov rcx, [cb]
5268 shr rcx, 2
5269 mov rdi, [pv]
5270# else
5271 mov ecx, [cb]
5272 shr ecx, 2
5273 mov edi, [pv]
5274# endif
5275 rep stosd
5276 }
5277# endif
5278}
5279#endif
5280
5281
5282/**
5283 * Fills a memory block with a 32-bit aligned size.
5284 *
5285 * @param pv Pointer to the memory block.
5286 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5287 * @param u32 The value to fill with.
5288 */
5289#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
5290RT_ASM_DECL_PRAGMA_WATCOM(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_PROTO;
5291#else
5292DECLINLINE(void) ASMMemFill32(volatile void RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_DEF
5293{
5294# if RT_INLINE_ASM_USES_INTRIN
5295# ifdef RT_ARCH_AMD64
5296 if (!(cb & 7))
5297 __stosq((unsigned __int64 RT_FAR *)pv, RT_MAKE_U64(u32, u32), cb / 8);
5298 else
5299# endif
5300 __stosd((unsigned long RT_FAR *)pv, u32, cb / 4);
5301
5302# elif RT_INLINE_ASM_GNU_STYLE
5303 __asm__ __volatile__("rep stosl"
5304 : "=D" (pv),
5305 "=c" (cb)
5306 : "0" (pv),
5307 "1" (cb >> 2),
5308 "a" (u32)
5309 : "memory");
5310# else
5311 __asm
5312 {
5313# ifdef RT_ARCH_AMD64
5314 mov rcx, [cb]
5315 shr rcx, 2
5316 mov rdi, [pv]
5317# else
5318 mov ecx, [cb]
5319 shr ecx, 2
5320 mov edi, [pv]
5321# endif
5322 mov eax, [u32]
5323 rep stosd
5324 }
5325# endif
5326}
5327#endif
5328
5329
5330/**
5331 * Checks if a memory block is all zeros.
5332 *
5333 * @returns Pointer to the first non-zero byte.
5334 * @returns NULL if all zero.
5335 *
5336 * @param pv Pointer to the memory block.
5337 * @param cb Number of bytes in the block.
5338 */
5339#if !defined(RDESKTOP) && (!defined(RT_OS_LINUX) || !defined(__KERNEL__))
5340DECLASM(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_PROTO;
5341#else
5342DECLINLINE(void RT_FAR *) ASMMemFirstNonZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
5343{
5344/** @todo replace with ASMMemFirstNonZero-generic.cpp in kernel modules. */
5345 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
5346 for (; cb; cb--, pb++)
5347 if (RT_LIKELY(*pb == 0))
5348 { /* likely */ }
5349 else
5350 return (void RT_FAR *)pb;
5351 return NULL;
5352}
5353#endif
5354
5355
5356/**
5357 * Checks if a memory block is all zeros.
5358 *
5359 * @returns true if zero, false if not.
5360 *
5361 * @param pv Pointer to the memory block.
5362 * @param cb Number of bytes in the block.
5363 *
5364 * @sa ASMMemFirstNonZero
5365 */
5366DECLINLINE(bool) ASMMemIsZero(void const RT_FAR *pv, size_t cb) RT_NOTHROW_DEF
5367{
5368 return ASMMemFirstNonZero(pv, cb) == NULL;
5369}
5370
5371
5372/**
5373 * Checks if a memory page is all zeros.
5374 *
5375 * @returns true / false.
5376 *
5377 * @param pvPage Pointer to the page. Must be aligned on 16 byte
5378 * boundary
5379 */
5380DECLINLINE(bool) ASMMemIsZeroPage(void const RT_FAR *pvPage) RT_NOTHROW_DEF
5381{
5382# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
5383 union { RTCCUINTREG r; bool f; } uAX;
5384 RTCCUINTREG xCX, xDI;
5385 Assert(!((uintptr_t)pvPage & 15));
5386 __asm__ __volatile__("repe; "
5387# ifdef RT_ARCH_AMD64
5388 "scasq\n\t"
5389# else
5390 "scasl\n\t"
5391# endif
5392 "setnc %%al\n\t"
5393 : "=&c" (xCX)
5394 , "=&D" (xDI)
5395 , "=&a" (uAX.r)
5396 : "mr" (pvPage)
5397# ifdef RT_ARCH_AMD64
5398 , "0" (RT_ASM_PAGE_SIZE/8)
5399# else
5400 , "0" (RT_ASM_PAGE_SIZE/4)
5401# endif
5402 , "1" (pvPage)
5403 , "2" (0)
5404 : "cc");
5405 return uAX.f;
5406# else
5407 uintptr_t const RT_FAR *puPtr = (uintptr_t const RT_FAR *)pvPage;
5408 size_t cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
5409 Assert(!((uintptr_t)pvPage & 15));
5410 for (;;)
5411 {
5412 if (puPtr[0]) return false;
5413 if (puPtr[4]) return false;
5414
5415 if (puPtr[2]) return false;
5416 if (puPtr[6]) return false;
5417
5418 if (puPtr[1]) return false;
5419 if (puPtr[5]) return false;
5420
5421 if (puPtr[3]) return false;
5422 if (puPtr[7]) return false;
5423
5424 if (!--cLeft)
5425 return true;
5426 puPtr += 8;
5427 }
5428# endif
5429}
5430
5431
5432/**
5433 * Checks if a memory block is filled with the specified byte, returning the
5434 * first mismatch.
5435 *
5436 * This is sort of an inverted memchr.
5437 *
5438 * @returns Pointer to the byte which doesn't equal u8.
5439 * @returns NULL if all equal to u8.
5440 *
5441 * @param pv Pointer to the memory block.
5442 * @param cb Number of bytes in the block.
5443 * @param u8 The value it's supposed to be filled with.
5444 *
5445 * @remarks No alignment requirements.
5446 */
5447#if (!defined(RT_OS_LINUX) || !defined(__KERNEL__)) \
5448 && (!defined(RT_OS_FREEBSD) || !defined(_KERNEL))
5449DECLASM(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_PROTO;
5450#else
5451DECLINLINE(void *) ASMMemFirstMismatchingU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_DEF
5452{
5453/** @todo replace with ASMMemFirstMismatchingU8-generic.cpp in kernel modules. */
5454 uint8_t const *pb = (uint8_t const RT_FAR *)pv;
5455 for (; cb; cb--, pb++)
5456 if (RT_LIKELY(*pb == u8))
5457 { /* likely */ }
5458 else
5459 return (void *)pb;
5460 return NULL;
5461}
5462#endif
5463
5464
5465/**
5466 * Checks if a memory block is filled with the specified byte.
5467 *
5468 * @returns true if all matching, false if not.
5469 *
5470 * @param pv Pointer to the memory block.
5471 * @param cb Number of bytes in the block.
5472 * @param u8 The value it's supposed to be filled with.
5473 *
5474 * @remarks No alignment requirements.
5475 */
5476DECLINLINE(bool) ASMMemIsAllU8(void const RT_FAR *pv, size_t cb, uint8_t u8) RT_NOTHROW_DEF
5477{
5478 return ASMMemFirstMismatchingU8(pv, cb, u8) == NULL;
5479}
5480
5481
5482/**
5483 * Checks if a memory block is filled with the specified 32-bit value.
5484 *
5485 * This is a sort of inverted memchr.
5486 *
5487 * @returns Pointer to the first value which doesn't equal u32.
5488 * @returns NULL if all equal to u32.
5489 *
5490 * @param pv Pointer to the memory block.
5491 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
5492 * @param u32 The value it's supposed to be filled with.
5493 */
5494DECLINLINE(uint32_t RT_FAR *) ASMMemFirstMismatchingU32(void const RT_FAR *pv, size_t cb, uint32_t u32) RT_NOTHROW_DEF
5495{
5496/** @todo rewrite this in inline assembly? */
5497 uint32_t const RT_FAR *pu32 = (uint32_t const RT_FAR *)pv;
5498 for (; cb; cb -= 4, pu32++)
5499 if (RT_LIKELY(*pu32 == u32))
5500 { /* likely */ }
5501 else
5502 return (uint32_t RT_FAR *)pu32;
5503 return NULL;
5504}
5505
5506
5507/**
5508 * Probes a byte pointer for read access.
5509 *
5510 * While the function will not fault if the byte is not read accessible,
5511 * the idea is to do this in a safe place like before acquiring locks
5512 * and such like.
5513 *
5514 * Also, this functions guarantees that an eager compiler is not going
5515 * to optimize the probing away.
5516 *
5517 * @param pvByte Pointer to the byte.
5518 */
5519#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5520RT_ASM_DECL_PRAGMA_WATCOM(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte) RT_NOTHROW_PROTO;
5521#else
5522DECLINLINE(uint8_t) ASMProbeReadByte(const void RT_FAR *pvByte) RT_NOTHROW_DEF
5523{
5524# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5525 uint8_t u8;
5526# if RT_INLINE_ASM_GNU_STYLE
5527 __asm__ __volatile__("movb %1, %0\n\t"
5528 : "=q" (u8)
5529 : "m" (*(const uint8_t *)pvByte));
5530# else
5531 __asm
5532 {
5533# ifdef RT_ARCH_AMD64
5534 mov rax, [pvByte]
5535 mov al, [rax]
5536# else
5537 mov eax, [pvByte]
5538 mov al, [eax]
5539# endif
5540 mov [u8], al
5541 }
5542# endif
5543 return u8;
5544
5545# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5546 uint32_t u32;
5547 __asm__ __volatile__(".Lstart_ASMProbeReadByte_%=:\n\t"
5548# if defined(RT_ARCH_ARM64)
5549 "ldxrb %w[uDst], %[pMem]\n\t"
5550# else
5551 "ldrexb %[uDst], %[pMem]\n\t"
5552# endif
5553 : [uDst] "=&r" (u32)
5554 : [pMem] "m" (*(uint8_t const *)pvByte));
5555 return (uint8_t)u32;
5556
5557# else
5558# error "Port me"
5559# endif
5560}
5561#endif
5562
5563/**
5564 * Probes a buffer for read access page by page.
5565 *
5566 * While the function will fault if the buffer is not fully read
5567 * accessible, the idea is to do this in a safe place like before
5568 * acquiring locks and such like.
5569 *
5570 * Also, this functions guarantees that an eager compiler is not going
5571 * to optimize the probing away.
5572 *
5573 * @param pvBuf Pointer to the buffer.
5574 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
5575 */
5576DECLINLINE(void) ASMProbeReadBuffer(const void RT_FAR *pvBuf, size_t cbBuf) RT_NOTHROW_DEF
5577{
5578 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
5579 /* the first byte */
5580 const uint8_t RT_FAR *pu8 = (const uint8_t RT_FAR *)pvBuf;
5581 ASMProbeReadByte(pu8);
5582
5583 /* the pages in between pages. */
5584 while (cbBuf > RT_ASM_PAGE_SIZE)
5585 {
5586 ASMProbeReadByte(pu8);
5587 cbBuf -= RT_ASM_PAGE_SIZE;
5588 pu8 += RT_ASM_PAGE_SIZE;
5589 }
5590
5591 /* the last byte */
5592 ASMProbeReadByte(pu8 + cbBuf - 1);
5593}
5594
5595
5596/**
5597 * Reverse the byte order of the given 16-bit integer.
5598 *
5599 * @returns Revert
5600 * @param u16 16-bit integer value.
5601 */
5602#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5603RT_ASM_DECL_PRAGMA_WATCOM(uint16_t) ASMByteSwapU16(uint16_t u16) RT_NOTHROW_PROTO;
5604#else
5605DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16) RT_NOTHROW_DEF
5606{
5607# if RT_INLINE_ASM_USES_INTRIN
5608 return _byteswap_ushort(u16);
5609
5610# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5611# if RT_INLINE_ASM_GNU_STYLE
5612 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16) : "cc");
5613# else
5614 _asm
5615 {
5616 mov ax, [u16]
5617 ror ax, 8
5618 mov [u16], ax
5619 }
5620# endif
5621 return u16;
5622
5623# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
5624 uint32_t u32Ret;
5625 __asm__ __volatile__(
5626# if defined(RT_ARCH_ARM64)
5627 "rev16 %w[uRet], %w[uVal]\n\t"
5628# else
5629 "rev16 %[uRet], %[uVal]\n\t"
5630# endif
5631 : [uRet] "=r" (u32Ret)
5632 : [uVal] "r" (u16));
5633 return (uint16_t)u32Ret;
5634
5635# else
5636# error "Port me"
5637# endif
5638}
5639#endif
5640
5641
5642/**
5643 * Reverse the byte order of the given 32-bit integer.
5644 *
5645 * @returns Revert
5646 * @param u32 32-bit integer value.
5647 */
5648#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5649RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMByteSwapU32(uint32_t u32) RT_NOTHROW_PROTO;
5650#else
5651DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32) RT_NOTHROW_DEF
5652{
5653# if RT_INLINE_ASM_USES_INTRIN
5654 return _byteswap_ulong(u32);
5655
5656# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5657# if RT_INLINE_ASM_GNU_STYLE
5658 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5659# else
5660 _asm
5661 {
5662 mov eax, [u32]
5663 bswap eax
5664 mov [u32], eax
5665 }
5666# endif
5667 return u32;
5668
5669# elif defined(RT_ARCH_ARM64)
5670 uint64_t u64Ret;
5671 __asm__ __volatile__("rev32 %[uRet], %[uVal]\n\t"
5672 : [uRet] "=r" (u64Ret)
5673 : [uVal] "r" ((uint64_t)u32));
5674 return (uint32_t)u64Ret;
5675
5676# elif defined(RT_ARCH_ARM32)
5677 __asm__ __volatile__("rev %[uRet], %[uVal]\n\t"
5678 : [uRet] "=r" (u32)
5679 : [uVal] "[uRet]" (u32));
5680 return u32;
5681
5682# else
5683# error "Port me"
5684# endif
5685}
5686#endif
5687
5688
5689/**
5690 * Reverse the byte order of the given 64-bit integer.
5691 *
5692 * @returns Revert
5693 * @param u64 64-bit integer value.
5694 */
5695DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64) RT_NOTHROW_DEF
5696{
5697#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
5698 return _byteswap_uint64(u64);
5699
5700# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5701 __asm__ ("bswapq %0" : "=r" (u64) : "0" (u64));
5702 return u64;
5703
5704# elif defined(RT_ARCH_ARM64)
5705 __asm__ __volatile__("rev %[uRet], %[uVal]\n\t"
5706 : [uRet] "=r" (u64)
5707 : [uVal] "[uRet]" (u64));
5708 return u64;
5709
5710#else
5711 return (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
5712 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
5713#endif
5714}
5715
5716
5717
5718/** @defgroup grp_inline_bits Bit Operations
5719 * @{
5720 */
5721
5722
5723/**
5724 * Sets a bit in a bitmap.
5725 *
5726 * @param pvBitmap Pointer to the bitmap (little endian). This should be
5727 * 32-bit aligned.
5728 * @param iBit The bit to set.
5729 *
5730 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5731 * However, doing so will yield better performance as well as avoiding
5732 * traps accessing the last bits in the bitmap.
5733 */
5734#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5735RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5736#else
5737DECLINLINE(void) ASMBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5738{
5739# if RT_INLINE_ASM_USES_INTRIN
5740 _bittestandset((long RT_FAR *)pvBitmap, iBit);
5741
5742# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5743# if RT_INLINE_ASM_GNU_STYLE
5744 __asm__ __volatile__("btsl %1, %0"
5745 : "=m" (*(volatile long RT_FAR *)pvBitmap)
5746 : "Ir" (iBit)
5747 , "m" (*(volatile long RT_FAR *)pvBitmap)
5748 : "memory"
5749 , "cc");
5750# else
5751 __asm
5752 {
5753# ifdef RT_ARCH_AMD64
5754 mov rax, [pvBitmap]
5755 mov edx, [iBit]
5756 bts [rax], edx
5757# else
5758 mov eax, [pvBitmap]
5759 mov edx, [iBit]
5760 bts [eax], edx
5761# endif
5762 }
5763# endif
5764
5765# else
5766 int32_t offBitmap = iBit / 32;
5767 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
5768 ASMAtomicUoOrU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
5769# endif
5770}
5771#endif
5772
5773
5774/**
5775 * Atomically sets a bit in a bitmap, ordered.
5776 *
5777 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
5778 * aligned, otherwise the memory access isn't atomic!
5779 * @param iBit The bit to set.
5780 *
5781 * @remarks x86: Requires a 386 or later.
5782 */
5783#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5784RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5785#else
5786DECLINLINE(void) ASMAtomicBitSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5787{
5788 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5789# if RT_INLINE_ASM_USES_INTRIN
5790 _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
5791# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5792# if RT_INLINE_ASM_GNU_STYLE
5793 __asm__ __volatile__("lock; btsl %1, %0"
5794 : "=m" (*(volatile long *)pvBitmap)
5795 : "Ir" (iBit)
5796 , "m" (*(volatile long *)pvBitmap)
5797 : "memory"
5798 , "cc");
5799# else
5800 __asm
5801 {
5802# ifdef RT_ARCH_AMD64
5803 mov rax, [pvBitmap]
5804 mov edx, [iBit]
5805 lock bts [rax], edx
5806# else
5807 mov eax, [pvBitmap]
5808 mov edx, [iBit]
5809 lock bts [eax], edx
5810# endif
5811 }
5812# endif
5813
5814# else
5815 ASMAtomicOrU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
5816# endif
5817}
5818#endif
5819
5820
5821/**
5822 * Clears a bit in a bitmap.
5823 *
5824 * @param pvBitmap Pointer to the bitmap (little endian).
5825 * @param iBit The bit to clear.
5826 *
5827 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5828 * However, doing so will yield better performance as well as avoiding
5829 * traps accessing the last bits in the bitmap.
5830 */
5831#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5832RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5833#else
5834DECLINLINE(void) ASMBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5835{
5836# if RT_INLINE_ASM_USES_INTRIN
5837 _bittestandreset((long RT_FAR *)pvBitmap, iBit);
5838
5839# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5840# if RT_INLINE_ASM_GNU_STYLE
5841 __asm__ __volatile__("btrl %1, %0"
5842 : "=m" (*(volatile long RT_FAR *)pvBitmap)
5843 : "Ir" (iBit)
5844 , "m" (*(volatile long RT_FAR *)pvBitmap)
5845 : "memory"
5846 , "cc");
5847# else
5848 __asm
5849 {
5850# ifdef RT_ARCH_AMD64
5851 mov rax, [pvBitmap]
5852 mov edx, [iBit]
5853 btr [rax], edx
5854# else
5855 mov eax, [pvBitmap]
5856 mov edx, [iBit]
5857 btr [eax], edx
5858# endif
5859 }
5860# endif
5861
5862# else
5863 int32_t offBitmap = iBit / 32;
5864 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
5865 ASMAtomicUoAndU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(~RT_BIT_32(iBit & 31)));
5866# endif
5867}
5868#endif
5869
5870
5871/**
5872 * Atomically clears a bit in a bitmap, ordered.
5873 *
5874 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
5875 * aligned, otherwise the memory access isn't atomic!
5876 * @param iBit The bit to toggle set.
5877 *
5878 * @remarks No memory barrier, take care on smp.
5879 * @remarks x86: Requires a 386 or later.
5880 */
5881#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5882RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5883#else
5884DECLINLINE(void) ASMAtomicBitClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5885{
5886 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5887# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5888# if RT_INLINE_ASM_GNU_STYLE
5889 __asm__ __volatile__("lock; btrl %1, %0"
5890 : "=m" (*(volatile long RT_FAR *)pvBitmap)
5891 : "Ir" (iBit)
5892 , "m" (*(volatile long RT_FAR *)pvBitmap)
5893 : "memory"
5894 , "cc");
5895# else
5896 __asm
5897 {
5898# ifdef RT_ARCH_AMD64
5899 mov rax, [pvBitmap]
5900 mov edx, [iBit]
5901 lock btr [rax], edx
5902# else
5903 mov eax, [pvBitmap]
5904 mov edx, [iBit]
5905 lock btr [eax], edx
5906# endif
5907 }
5908# endif
5909# else
5910 ASMAtomicAndU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(~RT_BIT_32(iBit & 31)));
5911# endif
5912}
5913#endif
5914
5915
5916/**
5917 * Toggles a bit in a bitmap.
5918 *
5919 * @param pvBitmap Pointer to the bitmap (little endian).
5920 * @param iBit The bit to toggle.
5921 *
5922 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
5923 * However, doing so will yield better performance as well as avoiding
5924 * traps accessing the last bits in the bitmap.
5925 */
5926#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
5927RT_ASM_DECL_PRAGMA_WATCOM(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5928#else
5929DECLINLINE(void) ASMBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5930{
5931# if RT_INLINE_ASM_USES_INTRIN
5932 _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
5933# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5934# if RT_INLINE_ASM_GNU_STYLE
5935 __asm__ __volatile__("btcl %1, %0"
5936 : "=m" (*(volatile long *)pvBitmap)
5937 : "Ir" (iBit)
5938 , "m" (*(volatile long *)pvBitmap)
5939 : "memory"
5940 , "cc");
5941# else
5942 __asm
5943 {
5944# ifdef RT_ARCH_AMD64
5945 mov rax, [pvBitmap]
5946 mov edx, [iBit]
5947 btc [rax], edx
5948# else
5949 mov eax, [pvBitmap]
5950 mov edx, [iBit]
5951 btc [eax], edx
5952# endif
5953 }
5954# endif
5955# else
5956 int32_t offBitmap = iBit / 32;
5957 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
5958 ASMAtomicUoXorU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
5959# endif
5960}
5961#endif
5962
5963
5964/**
5965 * Atomically toggles a bit in a bitmap, ordered.
5966 *
5967 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
5968 * aligned, otherwise the memory access isn't atomic!
5969 * @param iBit The bit to test and set.
5970 *
5971 * @remarks x86: Requires a 386 or later.
5972 */
5973#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
5974RT_ASM_DECL_PRAGMA_WATCOM(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
5975#else
5976DECLINLINE(void) ASMAtomicBitToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
5977{
5978 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
5979# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
5980# if RT_INLINE_ASM_GNU_STYLE
5981 __asm__ __volatile__("lock; btcl %1, %0"
5982 : "=m" (*(volatile long RT_FAR *)pvBitmap)
5983 : "Ir" (iBit)
5984 , "m" (*(volatile long RT_FAR *)pvBitmap)
5985 : "memory"
5986 , "cc");
5987# else
5988 __asm
5989 {
5990# ifdef RT_ARCH_AMD64
5991 mov rax, [pvBitmap]
5992 mov edx, [iBit]
5993 lock btc [rax], edx
5994# else
5995 mov eax, [pvBitmap]
5996 mov edx, [iBit]
5997 lock btc [eax], edx
5998# endif
5999 }
6000# endif
6001# else
6002 ASMAtomicXorU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(RT_BIT_32(iBit & 31)));
6003# endif
6004}
6005#endif
6006
6007
6008/**
6009 * Tests and sets a bit in a bitmap.
6010 *
6011 * @returns true if the bit was set.
6012 * @returns false if the bit was clear.
6013 *
6014 * @param pvBitmap Pointer to the bitmap (little endian).
6015 * @param iBit The bit to test and set.
6016 *
6017 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6018 * However, doing so will yield better performance as well as avoiding
6019 * traps accessing the last bits in the bitmap.
6020 */
6021#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6022RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6023#else
6024DECLINLINE(bool) ASMBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6025{
6026 union { bool f; uint32_t u32; uint8_t u8; } rc;
6027# if RT_INLINE_ASM_USES_INTRIN
6028 rc.u8 = _bittestandset((long RT_FAR *)pvBitmap, iBit);
6029
6030# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6031# if RT_INLINE_ASM_GNU_STYLE
6032 __asm__ __volatile__("btsl %2, %1\n\t"
6033 "setc %b0\n\t"
6034 "andl $1, %0\n\t"
6035 : "=q" (rc.u32)
6036 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6037 : "Ir" (iBit)
6038 , "m" (*(volatile long RT_FAR *)pvBitmap)
6039 : "memory"
6040 , "cc");
6041# else
6042 __asm
6043 {
6044 mov edx, [iBit]
6045# ifdef RT_ARCH_AMD64
6046 mov rax, [pvBitmap]
6047 bts [rax], edx
6048# else
6049 mov eax, [pvBitmap]
6050 bts [eax], edx
6051# endif
6052 setc al
6053 and eax, 1
6054 mov [rc.u32], eax
6055 }
6056# endif
6057
6058# else
6059 int32_t offBitmap = iBit / 32;
6060 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6061 rc.u32 = RT_LE2H_U32(ASMAtomicUoOrExU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31))))
6062 >> (iBit & 31);
6063 rc.u32 &= 1;
6064# endif
6065 return rc.f;
6066}
6067#endif
6068
6069
6070/**
6071 * Atomically tests and sets a bit in a bitmap, ordered.
6072 *
6073 * @returns true if the bit was set.
6074 * @returns false if the bit was clear.
6075 *
6076 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6077 * aligned, otherwise the memory access isn't atomic!
6078 * @param iBit The bit to set.
6079 *
6080 * @remarks x86: Requires a 386 or later.
6081 */
6082#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6083RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6084#else
6085DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6086{
6087 union { bool f; uint32_t u32; uint8_t u8; } rc;
6088 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6089# if RT_INLINE_ASM_USES_INTRIN
6090 rc.u8 = _interlockedbittestandset((long RT_FAR *)pvBitmap, iBit);
6091# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6092# if RT_INLINE_ASM_GNU_STYLE
6093 __asm__ __volatile__("lock; btsl %2, %1\n\t"
6094 "setc %b0\n\t"
6095 "andl $1, %0\n\t"
6096 : "=q" (rc.u32)
6097 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6098 : "Ir" (iBit)
6099 , "m" (*(volatile long RT_FAR *)pvBitmap)
6100 : "memory"
6101 , "cc");
6102# else
6103 __asm
6104 {
6105 mov edx, [iBit]
6106# ifdef RT_ARCH_AMD64
6107 mov rax, [pvBitmap]
6108 lock bts [rax], edx
6109# else
6110 mov eax, [pvBitmap]
6111 lock bts [eax], edx
6112# endif
6113 setc al
6114 and eax, 1
6115 mov [rc.u32], eax
6116 }
6117# endif
6118
6119# else
6120 rc.u32 = RT_LE2H_U32(ASMAtomicOrExU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(RT_BIT_32(iBit & 31))))
6121 >> (iBit & 31);
6122 rc.u32 &= 1;
6123# endif
6124 return rc.f;
6125}
6126#endif
6127
6128
6129/**
6130 * Tests and clears a bit in a bitmap.
6131 *
6132 * @returns true if the bit was set.
6133 * @returns false if the bit was clear.
6134 *
6135 * @param pvBitmap Pointer to the bitmap (little endian).
6136 * @param iBit The bit to test and clear.
6137 *
6138 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6139 * However, doing so will yield better performance as well as avoiding
6140 * traps accessing the last bits in the bitmap.
6141 */
6142#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6143RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6144#else
6145DECLINLINE(bool) ASMBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6146{
6147 union { bool f; uint32_t u32; uint8_t u8; } rc;
6148# if RT_INLINE_ASM_USES_INTRIN
6149 rc.u8 = _bittestandreset((long RT_FAR *)pvBitmap, iBit);
6150
6151# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6152# if RT_INLINE_ASM_GNU_STYLE
6153 __asm__ __volatile__("btrl %2, %1\n\t"
6154 "setc %b0\n\t"
6155 "andl $1, %0\n\t"
6156 : "=q" (rc.u32)
6157 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6158 : "Ir" (iBit)
6159 , "m" (*(volatile long RT_FAR *)pvBitmap)
6160 : "memory"
6161 , "cc");
6162# else
6163 __asm
6164 {
6165 mov edx, [iBit]
6166# ifdef RT_ARCH_AMD64
6167 mov rax, [pvBitmap]
6168 btr [rax], edx
6169# else
6170 mov eax, [pvBitmap]
6171 btr [eax], edx
6172# endif
6173 setc al
6174 and eax, 1
6175 mov [rc.u32], eax
6176 }
6177# endif
6178
6179# else
6180 int32_t offBitmap = iBit / 32;
6181 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6182 rc.u32 = RT_LE2H_U32(ASMAtomicUoAndExU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(~RT_BIT_32(iBit & 31))))
6183 >> (iBit & 31);
6184 rc.u32 &= 1;
6185# endif
6186 return rc.f;
6187}
6188#endif
6189
6190
6191/**
6192 * Atomically tests and clears a bit in a bitmap, ordered.
6193 *
6194 * @returns true if the bit was set.
6195 * @returns false if the bit was clear.
6196 *
6197 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6198 * aligned, otherwise the memory access isn't atomic!
6199 * @param iBit The bit to test and clear.
6200 *
6201 * @remarks No memory barrier, take care on smp.
6202 * @remarks x86: Requires a 386 or later.
6203 */
6204#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6205RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6206#else
6207DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6208{
6209 union { bool f; uint32_t u32; uint8_t u8; } rc;
6210 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6211# if RT_INLINE_ASM_USES_INTRIN
6212 rc.u8 = _interlockedbittestandreset((long RT_FAR *)pvBitmap, iBit);
6213
6214# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6215# if RT_INLINE_ASM_GNU_STYLE
6216 __asm__ __volatile__("lock; btrl %2, %1\n\t"
6217 "setc %b0\n\t"
6218 "andl $1, %0\n\t"
6219 : "=q" (rc.u32)
6220 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6221 : "Ir" (iBit)
6222 , "m" (*(volatile long RT_FAR *)pvBitmap)
6223 : "memory"
6224 , "cc");
6225# else
6226 __asm
6227 {
6228 mov edx, [iBit]
6229# ifdef RT_ARCH_AMD64
6230 mov rax, [pvBitmap]
6231 lock btr [rax], edx
6232# else
6233 mov eax, [pvBitmap]
6234 lock btr [eax], edx
6235# endif
6236 setc al
6237 and eax, 1
6238 mov [rc.u32], eax
6239 }
6240# endif
6241
6242# else
6243 rc.u32 = RT_LE2H_U32(ASMAtomicAndExU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_H2LE_U32(~RT_BIT_32(iBit & 31))))
6244 >> (iBit & 31);
6245 rc.u32 &= 1;
6246# endif
6247 return rc.f;
6248}
6249#endif
6250
6251
6252/**
6253 * Tests and toggles a bit in a bitmap.
6254 *
6255 * @returns true if the bit was set.
6256 * @returns false if the bit was clear.
6257 *
6258 * @param pvBitmap Pointer to the bitmap (little endian).
6259 * @param iBit The bit to test and toggle.
6260 *
6261 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6262 * However, doing so will yield better performance as well as avoiding
6263 * traps accessing the last bits in the bitmap.
6264 */
6265#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6266RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6267#else
6268DECLINLINE(bool) ASMBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6269{
6270 union { bool f; uint32_t u32; uint8_t u8; } rc;
6271# if RT_INLINE_ASM_USES_INTRIN
6272 rc.u8 = _bittestandcomplement((long RT_FAR *)pvBitmap, iBit);
6273
6274# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6275# if RT_INLINE_ASM_GNU_STYLE
6276 __asm__ __volatile__("btcl %2, %1\n\t"
6277 "setc %b0\n\t"
6278 "andl $1, %0\n\t"
6279 : "=q" (rc.u32)
6280 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6281 : "Ir" (iBit)
6282 , "m" (*(volatile long RT_FAR *)pvBitmap)
6283 : "memory"
6284 , "cc");
6285# else
6286 __asm
6287 {
6288 mov edx, [iBit]
6289# ifdef RT_ARCH_AMD64
6290 mov rax, [pvBitmap]
6291 btc [rax], edx
6292# else
6293 mov eax, [pvBitmap]
6294 btc [eax], edx
6295# endif
6296 setc al
6297 and eax, 1
6298 mov [rc.u32], eax
6299 }
6300# endif
6301
6302# else
6303 int32_t offBitmap = iBit / 32;
6304 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6305 rc.u32 = RT_LE2H_U32(ASMAtomicUoXorExU32(&((uint32_t volatile *)pvBitmap)[offBitmap], RT_H2LE_U32(RT_BIT_32(iBit & 31))))
6306 >> (iBit & 31);
6307 rc.u32 &= 1;
6308# endif
6309 return rc.f;
6310}
6311#endif
6312
6313
6314/**
6315 * Atomically tests and toggles a bit in a bitmap, ordered.
6316 *
6317 * @returns true if the bit was set.
6318 * @returns false if the bit was clear.
6319 *
6320 * @param pvBitmap Pointer to the bitmap (little endian). Must be 32-bit
6321 * aligned, otherwise the memory access isn't atomic!
6322 * @param iBit The bit to test and toggle.
6323 *
6324 * @remarks x86: Requires a 386 or later.
6325 */
6326#if RT_INLINE_ASM_EXTERNAL_TMP_ARM
6327RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6328#else
6329DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6330{
6331 union { bool f; uint32_t u32; uint8_t u8; } rc;
6332 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
6333# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6334# if RT_INLINE_ASM_GNU_STYLE
6335 __asm__ __volatile__("lock; btcl %2, %1\n\t"
6336 "setc %b0\n\t"
6337 "andl $1, %0\n\t"
6338 : "=q" (rc.u32)
6339 , "=m" (*(volatile long RT_FAR *)pvBitmap)
6340 : "Ir" (iBit)
6341 , "m" (*(volatile long RT_FAR *)pvBitmap)
6342 : "memory"
6343 , "cc");
6344# else
6345 __asm
6346 {
6347 mov edx, [iBit]
6348# ifdef RT_ARCH_AMD64
6349 mov rax, [pvBitmap]
6350 lock btc [rax], edx
6351# else
6352 mov eax, [pvBitmap]
6353 lock btc [eax], edx
6354# endif
6355 setc al
6356 and eax, 1
6357 mov [rc.u32], eax
6358 }
6359# endif
6360
6361# else
6362 rc.u32 = RT_H2LE_U32(ASMAtomicXorExU32(&((uint32_t volatile *)pvBitmap)[iBit / 32], RT_LE2H_U32(RT_BIT_32(iBit & 31))))
6363 >> (iBit & 31);
6364 rc.u32 &= 1;
6365# endif
6366 return rc.f;
6367}
6368#endif
6369
6370
6371/**
6372 * Tests if a bit in a bitmap is set.
6373 *
6374 * @returns true if the bit is set.
6375 * @returns false if the bit is clear.
6376 *
6377 * @param pvBitmap Pointer to the bitmap (little endian).
6378 * @param iBit The bit to test.
6379 *
6380 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
6381 * However, doing so will yield better performance as well as avoiding
6382 * traps accessing the last bits in the bitmap.
6383 */
6384#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6385RT_ASM_DECL_PRAGMA_WATCOM(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_PROTO;
6386#else
6387DECLINLINE(bool) ASMBitTest(const volatile void RT_FAR *pvBitmap, int32_t iBit) RT_NOTHROW_DEF
6388{
6389 union { bool f; uint32_t u32; uint8_t u8; } rc;
6390# if RT_INLINE_ASM_USES_INTRIN
6391 rc.u32 = _bittest((long *)pvBitmap, iBit);
6392
6393# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6394# if RT_INLINE_ASM_GNU_STYLE
6395
6396 __asm__ __volatile__("btl %2, %1\n\t"
6397 "setc %b0\n\t"
6398 "andl $1, %0\n\t"
6399 : "=q" (rc.u32)
6400 : "m" (*(const volatile long RT_FAR *)pvBitmap)
6401 , "Ir" (iBit)
6402 : "memory"
6403 , "cc");
6404# else
6405 __asm
6406 {
6407 mov edx, [iBit]
6408# ifdef RT_ARCH_AMD64
6409 mov rax, [pvBitmap]
6410 bt [rax], edx
6411# else
6412 mov eax, [pvBitmap]
6413 bt [eax], edx
6414# endif
6415 setc al
6416 and eax, 1
6417 mov [rc.u32], eax
6418 }
6419# endif
6420
6421# else
6422 int32_t offBitmap = iBit / 32;
6423 AssertStmt(!((uintptr_t)pvBitmap & 3), offBitmap += (uintptr_t)pvBitmap & 3; iBit += ((uintptr_t)pvBitmap & 3) * 8);
6424 rc.u32 = RT_LE2H_U32(ASMAtomicUoReadU32(&((uint32_t volatile *)pvBitmap)[offBitmap])) >> (iBit & 31);
6425 rc.u32 &= 1;
6426# endif
6427 return rc.f;
6428}
6429#endif
6430
6431
6432/**
6433 * Clears a bit range within a bitmap.
6434 *
6435 * @param pvBitmap Pointer to the bitmap (little endian).
6436 * @param iBitStart The First bit to clear.
6437 * @param iBitEnd The first bit not to clear.
6438 */
6439DECLINLINE(void) ASMBitClearRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd) RT_NOTHROW_DEF
6440{
6441 if (iBitStart < iBitEnd)
6442 {
6443 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
6444 int32_t iStart = iBitStart & ~31;
6445 int32_t iEnd = iBitEnd & ~31;
6446 if (iStart == iEnd)
6447 *pu32 &= RT_H2LE_U32(((UINT32_C(1) << (iBitStart & 31)) - 1) | ~((UINT32_C(1) << (iBitEnd & 31)) - 1));
6448 else
6449 {
6450 /* bits in first dword. */
6451 if (iBitStart & 31)
6452 {
6453 *pu32 &= RT_H2LE_U32((UINT32_C(1) << (iBitStart & 31)) - 1);
6454 pu32++;
6455 iBitStart = iStart + 32;
6456 }
6457
6458 /* whole dwords. */
6459 if (iBitStart != iEnd)
6460 ASMMemZero32(pu32, ((uint32_t)iEnd - (uint32_t)iBitStart) >> 3);
6461
6462 /* bits in last dword. */
6463 if (iBitEnd & 31)
6464 {
6465 pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitEnd >> 5);
6466 *pu32 &= RT_H2LE_U32(~((UINT32_C(1) << (iBitEnd & 31)) - 1));
6467 }
6468 }
6469 }
6470}
6471
6472
6473/**
6474 * Sets a bit range within a bitmap.
6475 *
6476 * @param pvBitmap Pointer to the bitmap (little endian).
6477 * @param iBitStart The First bit to set.
6478 * @param iBitEnd The first bit not to set.
6479 */
6480DECLINLINE(void) ASMBitSetRange(volatile void RT_FAR *pvBitmap, int32_t iBitStart, int32_t iBitEnd) RT_NOTHROW_DEF
6481{
6482 if (iBitStart < iBitEnd)
6483 {
6484 volatile uint32_t RT_FAR *pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitStart >> 5);
6485 int32_t iStart = iBitStart & ~31;
6486 int32_t iEnd = iBitEnd & ~31;
6487 if (iStart == iEnd)
6488 *pu32 |= RT_H2LE_U32(((UINT32_C(1) << (iBitEnd - iBitStart)) - 1) << (iBitStart & 31));
6489 else
6490 {
6491 /* bits in first dword. */
6492 if (iBitStart & 31)
6493 {
6494 *pu32 |= RT_H2LE_U32(~((UINT32_C(1) << (iBitStart & 31)) - 1));
6495 pu32++;
6496 iBitStart = iStart + 32;
6497 }
6498
6499 /* whole dword. */
6500 if (iBitStart != iEnd)
6501 ASMMemFill32(pu32, ((uint32_t)iEnd - (uint32_t)iBitStart) >> 3, ~UINT32_C(0));
6502
6503 /* bits in last dword. */
6504 if (iBitEnd & 31)
6505 {
6506 pu32 = (volatile uint32_t RT_FAR *)pvBitmap + (iBitEnd >> 5);
6507 *pu32 |= RT_H2LE_U32((UINT32_C(1) << (iBitEnd & 31)) - 1);
6508 }
6509 }
6510 }
6511}
6512
6513
6514/**
6515 * Finds the first clear bit in a bitmap.
6516 *
6517 * @returns Index of the first zero bit.
6518 * @returns -1 if no clear bit was found.
6519 * @param pvBitmap Pointer to the bitmap (little endian).
6520 * @param cBits The number of bits in the bitmap. Multiple of 32.
6521 */
6522#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
6523DECLASM(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_PROTO;
6524#else
6525DECLINLINE(int32_t) ASMBitFirstClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_DEF
6526{
6527 if (cBits)
6528 {
6529 int32_t iBit;
6530# if RT_INLINE_ASM_GNU_STYLE
6531 RTCCUINTREG uEAX, uECX, uEDI;
6532 cBits = RT_ALIGN_32(cBits, 32);
6533 __asm__ __volatile__("repe; scasl\n\t"
6534 "je 1f\n\t"
6535# ifdef RT_ARCH_AMD64
6536 "lea -4(%%rdi), %%rdi\n\t"
6537 "xorl (%%rdi), %%eax\n\t"
6538 "subq %5, %%rdi\n\t"
6539# else
6540 "lea -4(%%edi), %%edi\n\t"
6541 "xorl (%%edi), %%eax\n\t"
6542 "subl %5, %%edi\n\t"
6543# endif
6544 "shll $3, %%edi\n\t"
6545 "bsfl %%eax, %%edx\n\t"
6546 "addl %%edi, %%edx\n\t"
6547 "1:\t\n"
6548 : "=d" (iBit)
6549 , "=&c" (uECX)
6550 , "=&D" (uEDI)
6551 , "=&a" (uEAX)
6552 : "0" (0xffffffff)
6553 , "mr" (pvBitmap)
6554 , "1" (cBits >> 5)
6555 , "2" (pvBitmap)
6556 , "3" (0xffffffff)
6557 : "cc");
6558# else
6559 cBits = RT_ALIGN_32(cBits, 32);
6560 __asm
6561 {
6562# ifdef RT_ARCH_AMD64
6563 mov rdi, [pvBitmap]
6564 mov rbx, rdi
6565# else
6566 mov edi, [pvBitmap]
6567 mov ebx, edi
6568# endif
6569 mov edx, 0ffffffffh
6570 mov eax, edx
6571 mov ecx, [cBits]
6572 shr ecx, 5
6573 repe scasd
6574 je done
6575
6576# ifdef RT_ARCH_AMD64
6577 lea rdi, [rdi - 4]
6578 xor eax, [rdi]
6579 sub rdi, rbx
6580# else
6581 lea edi, [edi - 4]
6582 xor eax, [edi]
6583 sub edi, ebx
6584# endif
6585 shl edi, 3
6586 bsf edx, eax
6587 add edx, edi
6588 done:
6589 mov [iBit], edx
6590 }
6591# endif
6592 return iBit;
6593 }
6594 return -1;
6595}
6596#endif
6597
6598
6599/**
6600 * Finds the next clear bit in a bitmap.
6601 *
6602 * @returns Index of the first zero bit.
6603 * @returns -1 if no clear bit was found.
6604 * @param pvBitmap Pointer to the bitmap (little endian).
6605 * @param cBits The number of bits in the bitmap. Multiple of 32.
6606 * @param iBitPrev The bit returned from the last search.
6607 * The search will start at iBitPrev + 1.
6608 */
6609#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
6610DECLASM(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_PROTO;
6611#else
6612DECLINLINE(int) ASMBitNextClear(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_DEF
6613{
6614 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
6615 int iBit = ++iBitPrev & 31;
6616 if (iBit)
6617 {
6618 /*
6619 * Inspect the 32-bit word containing the unaligned bit.
6620 */
6621 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
6622
6623# if RT_INLINE_ASM_USES_INTRIN
6624 unsigned long ulBit = 0;
6625 if (_BitScanForward(&ulBit, u32))
6626 return ulBit + iBitPrev;
6627# else
6628# if RT_INLINE_ASM_GNU_STYLE
6629 __asm__ __volatile__("bsf %1, %0\n\t"
6630 "jnz 1f\n\t"
6631 "movl $-1, %0\n\t" /** @todo use conditional move for 64-bit? */
6632 "1:\n\t"
6633 : "=r" (iBit)
6634 : "r" (u32)
6635 : "cc");
6636# else
6637 __asm
6638 {
6639 mov edx, [u32]
6640 bsf eax, edx
6641 jnz done
6642 mov eax, 0ffffffffh
6643 done:
6644 mov [iBit], eax
6645 }
6646# endif
6647 if (iBit >= 0)
6648 return iBit + (int)iBitPrev;
6649# endif
6650
6651 /*
6652 * Skip ahead and see if there is anything left to search.
6653 */
6654 iBitPrev |= 31;
6655 iBitPrev++;
6656 if (cBits <= (uint32_t)iBitPrev)
6657 return -1;
6658 }
6659
6660 /*
6661 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6662 */
6663 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6664 if (iBit >= 0)
6665 iBit += iBitPrev;
6666 return iBit;
6667}
6668#endif
6669
6670
6671/**
6672 * Finds the first set bit in a bitmap.
6673 *
6674 * @returns Index of the first set bit.
6675 * @returns -1 if no clear bit was found.
6676 * @param pvBitmap Pointer to the bitmap (little endian).
6677 * @param cBits The number of bits in the bitmap. Multiple of 32.
6678 */
6679#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
6680DECLASM(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_PROTO;
6681#else
6682DECLINLINE(int32_t) ASMBitFirstSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits) RT_NOTHROW_DEF
6683{
6684 if (cBits)
6685 {
6686 int32_t iBit;
6687# if RT_INLINE_ASM_GNU_STYLE
6688 RTCCUINTREG uEAX, uECX, uEDI;
6689 cBits = RT_ALIGN_32(cBits, 32);
6690 __asm__ __volatile__("repe; scasl\n\t"
6691 "je 1f\n\t"
6692# ifdef RT_ARCH_AMD64
6693 "lea -4(%%rdi), %%rdi\n\t"
6694 "movl (%%rdi), %%eax\n\t"
6695 "subq %5, %%rdi\n\t"
6696# else
6697 "lea -4(%%edi), %%edi\n\t"
6698 "movl (%%edi), %%eax\n\t"
6699 "subl %5, %%edi\n\t"
6700# endif
6701 "shll $3, %%edi\n\t"
6702 "bsfl %%eax, %%edx\n\t"
6703 "addl %%edi, %%edx\n\t"
6704 "1:\t\n"
6705 : "=d" (iBit)
6706 , "=&c" (uECX)
6707 , "=&D" (uEDI)
6708 , "=&a" (uEAX)
6709 : "0" (0xffffffff)
6710 , "mr" (pvBitmap)
6711 , "1" (cBits >> 5)
6712 , "2" (pvBitmap)
6713 , "3" (0)
6714 : "cc");
6715# else
6716 cBits = RT_ALIGN_32(cBits, 32);
6717 __asm
6718 {
6719# ifdef RT_ARCH_AMD64
6720 mov rdi, [pvBitmap]
6721 mov rbx, rdi
6722# else
6723 mov edi, [pvBitmap]
6724 mov ebx, edi
6725# endif
6726 mov edx, 0ffffffffh
6727 xor eax, eax
6728 mov ecx, [cBits]
6729 shr ecx, 5
6730 repe scasd
6731 je done
6732# ifdef RT_ARCH_AMD64
6733 lea rdi, [rdi - 4]
6734 mov eax, [rdi]
6735 sub rdi, rbx
6736# else
6737 lea edi, [edi - 4]
6738 mov eax, [edi]
6739 sub edi, ebx
6740# endif
6741 shl edi, 3
6742 bsf edx, eax
6743 add edx, edi
6744 done:
6745 mov [iBit], edx
6746 }
6747# endif
6748 return iBit;
6749 }
6750 return -1;
6751}
6752#endif
6753
6754
6755/**
6756 * Finds the next set bit in a bitmap.
6757 *
6758 * @returns Index of the next set bit.
6759 * @returns -1 if no set bit was found.
6760 * @param pvBitmap Pointer to the bitmap (little endian).
6761 * @param cBits The number of bits in the bitmap. Multiple of 32.
6762 * @param iBitPrev The bit returned from the last search.
6763 * The search will start at iBitPrev + 1.
6764 */
6765#if RT_INLINE_ASM_EXTERNAL || (!defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86))
6766DECLASM(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_PROTO;
6767#else
6768DECLINLINE(int) ASMBitNextSet(const volatile void RT_FAR *pvBitmap, uint32_t cBits, uint32_t iBitPrev) RT_NOTHROW_DEF
6769{
6770 const volatile uint32_t RT_FAR *pau32Bitmap = (const volatile uint32_t RT_FAR *)pvBitmap;
6771 int iBit = ++iBitPrev & 31;
6772 if (iBit)
6773 {
6774 /*
6775 * Inspect the 32-bit word containing the unaligned bit.
6776 */
6777 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
6778
6779# if RT_INLINE_ASM_USES_INTRIN
6780 unsigned long ulBit = 0;
6781 if (_BitScanForward(&ulBit, u32))
6782 return ulBit + iBitPrev;
6783# else
6784# if RT_INLINE_ASM_GNU_STYLE
6785 __asm__ __volatile__("bsf %1, %0\n\t"
6786 "jnz 1f\n\t" /** @todo use conditional move for 64-bit? */
6787 "movl $-1, %0\n\t"
6788 "1:\n\t"
6789 : "=r" (iBit)
6790 : "r" (u32)
6791 : "cc");
6792# else
6793 __asm
6794 {
6795 mov edx, [u32]
6796 bsf eax, edx
6797 jnz done
6798 mov eax, 0ffffffffh
6799 done:
6800 mov [iBit], eax
6801 }
6802# endif
6803 if (iBit >= 0)
6804 return iBit + (int)iBitPrev;
6805# endif
6806
6807 /*
6808 * Skip ahead and see if there is anything left to search.
6809 */
6810 iBitPrev |= 31;
6811 iBitPrev++;
6812 if (cBits <= (uint32_t)iBitPrev)
6813 return -1;
6814 }
6815
6816 /*
6817 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
6818 */
6819 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
6820 if (iBit >= 0)
6821 iBit += iBitPrev;
6822 return iBit;
6823}
6824#endif
6825
6826
6827/**
6828 * Finds the first bit which is set in the given 32-bit integer.
6829 * Bits are numbered from 1 (least significant) to 32.
6830 *
6831 * @returns index [1..32] of the first set bit.
6832 * @returns 0 if all bits are cleared.
6833 * @param u32 Integer to search for set bits.
6834 * @remarks Similar to ffs() in BSD.
6835 */
6836#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6837RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU32(uint32_t u32) RT_NOTHROW_PROTO;
6838#else
6839DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32) RT_NOTHROW_DEF
6840{
6841# if RT_INLINE_ASM_USES_INTRIN
6842 unsigned long iBit;
6843 if (_BitScanForward(&iBit, u32))
6844 iBit++;
6845 else
6846 iBit = 0;
6847
6848# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
6849# if RT_INLINE_ASM_GNU_STYLE
6850 uint32_t iBit;
6851 __asm__ __volatile__("bsf %1, %0\n\t"
6852 "jnz 1f\n\t"
6853 "xorl %0, %0\n\t"
6854 "jmp 2f\n"
6855 "1:\n\t"
6856 "incl %0\n"
6857 "2:\n\t"
6858 : "=r" (iBit)
6859 : "rm" (u32)
6860 : "cc");
6861# else
6862 uint32_t iBit;
6863 _asm
6864 {
6865 bsf eax, [u32]
6866 jnz found
6867 xor eax, eax
6868 jmp done
6869 found:
6870 inc eax
6871 done:
6872 mov [iBit], eax
6873 }
6874# endif
6875
6876# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
6877 /*
6878 * Using the "count leading zeros (clz)" instruction here because there
6879 * is no dedicated instruction to get the first set bit.
6880 * Need to reverse the bits in the value with "rbit" first because
6881 * "clz" starts counting from the most significant bit.
6882 */
6883 uint32_t iBit;
6884 __asm__ __volatile__(
6885# if defined(RT_ARCH_ARM64)
6886 "rbit %w[uVal], %w[uVal]\n\t"
6887 "clz %w[iBit], %w[uVal]\n\t"
6888# else
6889 "rbit %[uVal], %[uVal]\n\t"
6890 "clz %[iBit], %[uVal]\n\t"
6891# endif
6892 : [uVal] "=r" (u32)
6893 , [iBit] "=r" (iBit)
6894 : "[uVal]" (u32));
6895 if (iBit != 32)
6896 iBit++;
6897 else
6898 iBit = 0; /* No bit set. */
6899
6900# else
6901# error "Port me"
6902# endif
6903 return iBit;
6904}
6905#endif
6906
6907
6908/**
6909 * Finds the first bit which is set in the given 32-bit integer.
6910 * Bits are numbered from 1 (least significant) to 32.
6911 *
6912 * @returns index [1..32] of the first set bit.
6913 * @returns 0 if all bits are cleared.
6914 * @param i32 Integer to search for set bits.
6915 * @remark Similar to ffs() in BSD.
6916 */
6917DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32) RT_NOTHROW_DEF
6918{
6919 return ASMBitFirstSetU32((uint32_t)i32);
6920}
6921
6922
6923/**
6924 * Finds the first bit which is set in the given 64-bit integer.
6925 *
6926 * Bits are numbered from 1 (least significant) to 64.
6927 *
6928 * @returns index [1..64] of the first set bit.
6929 * @returns 0 if all bits are cleared.
6930 * @param u64 Integer to search for set bits.
6931 * @remarks Similar to ffs() in BSD.
6932 */
6933#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
6934RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU64(uint64_t u64) RT_NOTHROW_PROTO;
6935#else
6936DECLINLINE(unsigned) ASMBitFirstSetU64(uint64_t u64) RT_NOTHROW_DEF
6937{
6938# if RT_INLINE_ASM_USES_INTRIN
6939 unsigned long iBit;
6940# if ARCH_BITS == 64
6941 if (_BitScanForward64(&iBit, u64))
6942 iBit++;
6943 else
6944 iBit = 0;
6945# else
6946 if (_BitScanForward(&iBit, (uint32_t)u64))
6947 iBit++;
6948 else if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
6949 iBit += 33;
6950 else
6951 iBit = 0;
6952# endif
6953
6954# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
6955 uint64_t iBit;
6956 __asm__ __volatile__("bsfq %1, %0\n\t"
6957 "jnz 1f\n\t"
6958 "xorl %k0, %k0\n\t"
6959 "jmp 2f\n"
6960 "1:\n\t"
6961 "incl %k0\n"
6962 "2:\n\t"
6963 : "=r" (iBit)
6964 : "rm" (u64)
6965 : "cc");
6966
6967# elif defined(RT_ARCH_ARM64)
6968 uint64_t iBit;
6969 __asm__ __volatile__("rbit %[uVal], %[uVal]\n\t"
6970 "clz %[iBit], %[uVal]\n\t"
6971 : [uVal] "=r" (u64)
6972 , [iBit] "=r" (iBit)
6973 : "[uVal]" (u64));
6974 if (iBit != 64)
6975 iBit++;
6976 else
6977 iBit = 0; /* No bit set. */
6978
6979# else
6980 unsigned iBit = ASMBitFirstSetU32((uint32_t)u64);
6981 if (!iBit)
6982 {
6983 iBit = ASMBitFirstSetU32((uint32_t)(u64 >> 32));
6984 if (iBit)
6985 iBit += 32;
6986 }
6987# endif
6988 return (unsigned)iBit;
6989}
6990#endif
6991
6992
6993/**
6994 * Finds the first bit which is set in the given 16-bit integer.
6995 *
6996 * Bits are numbered from 1 (least significant) to 16.
6997 *
6998 * @returns index [1..16] of the first set bit.
6999 * @returns 0 if all bits are cleared.
7000 * @param u16 Integer to search for set bits.
7001 * @remarks For 16-bit bs3kit code.
7002 */
7003#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7004RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitFirstSetU16(uint16_t u16) RT_NOTHROW_PROTO;
7005#else
7006DECLINLINE(unsigned) ASMBitFirstSetU16(uint16_t u16) RT_NOTHROW_DEF
7007{
7008 return ASMBitFirstSetU32((uint32_t)u16);
7009}
7010#endif
7011
7012
7013/**
7014 * Finds the last bit which is set in the given 32-bit integer.
7015 * Bits are numbered from 1 (least significant) to 32.
7016 *
7017 * @returns index [1..32] of the last set bit.
7018 * @returns 0 if all bits are cleared.
7019 * @param u32 Integer to search for set bits.
7020 * @remark Similar to fls() in BSD.
7021 */
7022#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7023RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU32(uint32_t u32) RT_NOTHROW_PROTO;
7024#else
7025DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32) RT_NOTHROW_DEF
7026{
7027# if RT_INLINE_ASM_USES_INTRIN
7028 unsigned long iBit;
7029 if (_BitScanReverse(&iBit, u32))
7030 iBit++;
7031 else
7032 iBit = 0;
7033
7034# elif defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
7035# if RT_INLINE_ASM_GNU_STYLE
7036 uint32_t iBit;
7037 __asm__ __volatile__("bsrl %1, %0\n\t"
7038 "jnz 1f\n\t"
7039 "xorl %0, %0\n\t"
7040 "jmp 2f\n"
7041 "1:\n\t"
7042 "incl %0\n"
7043 "2:\n\t"
7044 : "=r" (iBit)
7045 : "rm" (u32)
7046 : "cc");
7047# else
7048 uint32_t iBit;
7049 _asm
7050 {
7051 bsr eax, [u32]
7052 jnz found
7053 xor eax, eax
7054 jmp done
7055 found:
7056 inc eax
7057 done:
7058 mov [iBit], eax
7059 }
7060# endif
7061
7062# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7063 uint32_t iBit;
7064 __asm__ __volatile__(
7065# if defined(RT_ARCH_ARM64)
7066 "clz %w[iBit], %w[uVal]\n\t"
7067# else
7068 "clz %[iBit], %[uVal]\n\t"
7069# endif
7070 : [iBit] "=r" (iBit)
7071 : [uVal] "r" (u32));
7072 iBit = 32 - iBit;
7073
7074# else
7075# error "Port me"
7076# endif
7077 return iBit;
7078}
7079#endif
7080
7081
7082/**
7083 * Finds the last bit which is set in the given 32-bit integer.
7084 * Bits are numbered from 1 (least significant) to 32.
7085 *
7086 * @returns index [1..32] of the last set bit.
7087 * @returns 0 if all bits are cleared.
7088 * @param i32 Integer to search for set bits.
7089 * @remark Similar to fls() in BSD.
7090 */
7091DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32) RT_NOTHROW_DEF
7092{
7093 return ASMBitLastSetU32((uint32_t)i32);
7094}
7095
7096
7097/**
7098 * Finds the last bit which is set in the given 64-bit integer.
7099 *
7100 * Bits are numbered from 1 (least significant) to 64.
7101 *
7102 * @returns index [1..64] of the last set bit.
7103 * @returns 0 if all bits are cleared.
7104 * @param u64 Integer to search for set bits.
7105 * @remark Similar to fls() in BSD.
7106 */
7107#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7108RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU64(uint64_t u64) RT_NOTHROW_PROTO;
7109#else
7110DECLINLINE(unsigned) ASMBitLastSetU64(uint64_t u64) RT_NOTHROW_DEF
7111{
7112# if RT_INLINE_ASM_USES_INTRIN
7113 unsigned long iBit;
7114# if ARCH_BITS == 64
7115 if (_BitScanReverse64(&iBit, u64))
7116 iBit++;
7117 else
7118 iBit = 0;
7119# else
7120 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
7121 iBit += 33;
7122 else if (_BitScanReverse(&iBit, (uint32_t)u64))
7123 iBit++;
7124 else
7125 iBit = 0;
7126# endif
7127
7128# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7129 uint64_t iBit;
7130 __asm__ __volatile__("bsrq %1, %0\n\t"
7131 "jnz 1f\n\t"
7132 "xorl %k0, %k0\n\t"
7133 "jmp 2f\n"
7134 "1:\n\t"
7135 "incl %k0\n"
7136 "2:\n\t"
7137 : "=r" (iBit)
7138 : "rm" (u64)
7139 : "cc");
7140
7141# elif defined(RT_ARCH_ARM64)
7142 uint64_t iBit;
7143 __asm__ __volatile__("clz %[iBit], %[uVal]\n\t"
7144 : [iBit] "=r" (iBit)
7145 : [uVal] "r" (u64));
7146 iBit = 64 - iBit;
7147
7148# else
7149 unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32));
7150 if (iBit)
7151 iBit += 32;
7152 else
7153 iBit = ASMBitLastSetU32((uint32_t)u64);
7154# endif
7155 return (unsigned)iBit;
7156}
7157#endif
7158
7159
7160/**
7161 * Finds the last bit which is set in the given 16-bit integer.
7162 *
7163 * Bits are numbered from 1 (least significant) to 16.
7164 *
7165 * @returns index [1..16] of the last set bit.
7166 * @returns 0 if all bits are cleared.
7167 * @param u16 Integer to search for set bits.
7168 * @remarks For 16-bit bs3kit code.
7169 */
7170#if RT_INLINE_ASM_EXTERNAL_TMP_ARM && !RT_INLINE_ASM_USES_INTRIN
7171RT_ASM_DECL_PRAGMA_WATCOM_386(unsigned) ASMBitLastSetU16(uint16_t u16) RT_NOTHROW_PROTO;
7172#else
7173DECLINLINE(unsigned) ASMBitLastSetU16(uint16_t u16) RT_NOTHROW_DEF
7174{
7175 return ASMBitLastSetU32((uint32_t)u16);
7176}
7177#endif
7178
7179
7180/**
7181 * Rotate 32-bit unsigned value to the left by @a cShift.
7182 *
7183 * @returns Rotated value.
7184 * @param u32 The value to rotate.
7185 * @param cShift How many bits to rotate by.
7186 */
7187#ifdef __WATCOMC__
7188RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateLeftU32(uint32_t u32, unsigned cShift) RT_NOTHROW_PROTO;
7189#else
7190DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift) RT_NOTHROW_DEF
7191{
7192# if RT_INLINE_ASM_USES_INTRIN
7193 return _rotl(u32, cShift);
7194
7195# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
7196 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32) : "cc");
7197 return u32;
7198
7199# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7200 __asm__ __volatile__(
7201# if defined(RT_ARCH_ARM64)
7202 "ror %w[uRet], %w[uVal], %w[cShift]\n\t"
7203# else
7204 "ror %[uRet], %[uVal], %[cShift]\n\t"
7205# endif
7206 : [uRet] "=r" (u32)
7207 : [uVal] "[uRet]" (u32)
7208 , [cShift] "r" (32 - (cShift & 31))); /** @todo there is an immediate form here */
7209 return u32;
7210
7211# else
7212 cShift &= 31;
7213 return (u32 << cShift) | (u32 >> (32 - cShift));
7214# endif
7215}
7216#endif
7217
7218
7219/**
7220 * Rotate 32-bit unsigned value to the right by @a cShift.
7221 *
7222 * @returns Rotated value.
7223 * @param u32 The value to rotate.
7224 * @param cShift How many bits to rotate by.
7225 */
7226#ifdef __WATCOMC__
7227RT_ASM_DECL_PRAGMA_WATCOM(uint32_t) ASMRotateRightU32(uint32_t u32, unsigned cShift) RT_NOTHROW_PROTO;
7228#else
7229DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift) RT_NOTHROW_DEF
7230{
7231# if RT_INLINE_ASM_USES_INTRIN
7232 return _rotr(u32, cShift);
7233
7234# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
7235 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32) : "cc");
7236 return u32;
7237
7238# elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
7239 __asm__ __volatile__(
7240# if defined(RT_ARCH_ARM64)
7241 "ror %w[uRet], %w[uVal], %w[cShift]\n\t"
7242# else
7243 "ror %[uRet], %[uVal], %[cShift]\n\t"
7244# endif
7245 : [uRet] "=r" (u32)
7246 : [uVal] "[uRet]" (u32)
7247 , [cShift] "r" (cShift & 31)); /** @todo there is an immediate form here */
7248 return u32;
7249
7250# else
7251 cShift &= 31;
7252 return (u32 >> cShift) | (u32 << (32 - cShift));
7253# endif
7254}
7255#endif
7256
7257
7258/**
7259 * Rotate 64-bit unsigned value to the left by @a cShift.
7260 *
7261 * @returns Rotated value.
7262 * @param u64 The value to rotate.
7263 * @param cShift How many bits to rotate by.
7264 */
7265DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift) RT_NOTHROW_DEF
7266{
7267#if RT_INLINE_ASM_USES_INTRIN
7268 return _rotl64(u64, cShift);
7269
7270#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7271 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64) : "cc");
7272 return u64;
7273
7274#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
7275 uint32_t uSpill;
7276 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
7277 "jz 1f\n\t"
7278 "xchgl %%eax, %%edx\n\t"
7279 "1:\n\t"
7280 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
7281 "jz 2f\n\t"
7282 "movl %%edx, %2\n\t" /* save the hi value in %3. */
7283 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
7284 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
7285 "2:\n\t" /* } */
7286 : "=A" (u64)
7287 , "=c" (cShift)
7288 , "=r" (uSpill)
7289 : "0" (u64)
7290 , "1" (cShift)
7291 : "cc");
7292 return u64;
7293
7294# elif defined(RT_ARCH_ARM64)
7295 __asm__ __volatile__("ror %[uRet], %[uVal], %[cShift]\n\t"
7296 : [uRet] "=r" (u64)
7297 : [uVal] "[uRet]" (u64)
7298 , [cShift] "r" ((uint64_t)(64 - (cShift & 63)))); /** @todo there is an immediate form here */
7299 return u64;
7300
7301#else
7302 cShift &= 63;
7303 return (u64 << cShift) | (u64 >> (64 - cShift));
7304#endif
7305}
7306
7307
7308/**
7309 * Rotate 64-bit unsigned value to the right by @a cShift.
7310 *
7311 * @returns Rotated value.
7312 * @param u64 The value to rotate.
7313 * @param cShift How many bits to rotate by.
7314 */
7315DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift) RT_NOTHROW_DEF
7316{
7317#if RT_INLINE_ASM_USES_INTRIN
7318 return _rotr64(u64, cShift);
7319
7320#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
7321 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64) : "cc");
7322 return u64;
7323
7324#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
7325 uint32_t uSpill;
7326 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
7327 "jz 1f\n\t"
7328 "xchgl %%eax, %%edx\n\t"
7329 "1:\n\t"
7330 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
7331 "jz 2f\n\t"
7332 "movl %%edx, %2\n\t" /* save the hi value in %3. */
7333 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
7334 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
7335 "2:\n\t" /* } */
7336 : "=A" (u64)
7337 , "=c" (cShift)
7338 , "=r" (uSpill)
7339 : "0" (u64)
7340 , "1" (cShift)
7341 : "cc");
7342 return u64;
7343
7344# elif defined(RT_ARCH_ARM64)
7345 __asm__ __volatile__("ror %[uRet], %[uVal], %[cShift]\n\t"
7346 : [uRet] "=r" (u64)
7347 : [uVal] "[uRet]" (u64)
7348 , [cShift] "r" ((uint64_t)(cShift & 63))); /** @todo there is an immediate form here */
7349 return u64;
7350
7351#else
7352 cShift &= 63;
7353 return (u64 >> cShift) | (u64 << (64 - cShift));
7354#endif
7355}
7356
7357/** @} */
7358
7359
7360/** @} */
7361
7362/*
7363 * Include #pragma aux definitions for Watcom C/C++.
7364 */
7365#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
7366# define IPRT_ASM_WATCOM_X86_16_WITH_PRAGMAS
7367# undef IPRT_INCLUDED_asm_watcom_x86_16_h
7368# include "asm-watcom-x86-16.h"
7369#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
7370# define IPRT_ASM_WATCOM_X86_32_WITH_PRAGMAS
7371# undef IPRT_INCLUDED_asm_watcom_x86_32_h
7372# include "asm-watcom-x86-32.h"
7373#endif
7374
7375#endif /* !IPRT_INCLUDED_asm_h */
7376
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette