VirtualBox

source: vbox/trunk/include/iprt/asm.h@ 59965

最後變更 在這個檔案從59965是 59791,由 vboxsync 提交於 9 年 前

iprt/asm.h: fixed ASMSerializeInstructionIRef for Windows

  • 屬性 svn:eol-style 設為 native
  • 屬性 svn:keywords 設為 Author Date Id Revision
檔案大小: 157.5 KB
 
1/** @file
2 * IPRT - Assembly Functions.
3 */
4
5/*
6 * Copyright (C) 2006-2015 Oracle Corporation
7 *
8 * This file is part of VirtualBox Open Source Edition (OSE), as
9 * available from http://www.alldomusa.eu.org. This file is free software;
10 * you can redistribute it and/or modify it under the terms of the GNU
11 * General Public License (GPL) as published by the Free Software
12 * Foundation, in version 2 as it comes in the "COPYING" file of the
13 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15 *
16 * The contents of this file may alternatively be used under the terms
17 * of the Common Development and Distribution License Version 1.0
18 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
19 * VirtualBox OSE distribution, in which case the provisions of the
20 * CDDL are applicable instead of those of the GPL.
21 *
22 * You may elect to license modified versions of this file under the
23 * terms and conditions of either the GPL or the CDDL or both.
24 */
25
26#ifndef ___iprt_asm_h
27#define ___iprt_asm_h
28
29#include <iprt/cdefs.h>
30#include <iprt/types.h>
31#include <iprt/assert.h>
32/** @def RT_INLINE_ASM_USES_INTRIN
33 * Defined as 1 if we're using a _MSC_VER 1400.
34 * Otherwise defined as 0.
35 */
36
37/* Solaris 10 header ugliness */
38#ifdef u
39# undef u
40#endif
41
42#if defined(_MSC_VER) && RT_INLINE_ASM_USES_INTRIN
43# include <intrin.h>
44 /* Emit the intrinsics at all optimization levels. */
45# pragma intrinsic(_ReadWriteBarrier)
46# pragma intrinsic(__cpuid)
47# pragma intrinsic(__stosd)
48# pragma intrinsic(__stosw)
49# pragma intrinsic(__stosb)
50# pragma intrinsic(_BitScanForward)
51# pragma intrinsic(_BitScanReverse)
52# pragma intrinsic(_bittest)
53# pragma intrinsic(_bittestandset)
54# pragma intrinsic(_bittestandreset)
55# pragma intrinsic(_bittestandcomplement)
56# pragma intrinsic(_byteswap_ushort)
57# pragma intrinsic(_byteswap_ulong)
58# pragma intrinsic(_interlockedbittestandset)
59# pragma intrinsic(_interlockedbittestandreset)
60# pragma intrinsic(_InterlockedAnd)
61# pragma intrinsic(_InterlockedOr)
62# pragma intrinsic(_InterlockedIncrement)
63# pragma intrinsic(_InterlockedDecrement)
64# pragma intrinsic(_InterlockedExchange)
65# pragma intrinsic(_InterlockedExchangeAdd)
66# pragma intrinsic(_InterlockedCompareExchange)
67# pragma intrinsic(_InterlockedCompareExchange64)
68# pragma intrinsic(_rotl)
69# pragma intrinsic(_rotr)
70# pragma intrinsic(_rotl64)
71# pragma intrinsic(_rotr64)
72# ifdef RT_ARCH_AMD64
73# pragma intrinsic(__stosq)
74# pragma intrinsic(_byteswap_uint64)
75# pragma intrinsic(_InterlockedExchange64)
76# pragma intrinsic(_InterlockedExchangeAdd64)
77# pragma intrinsic(_InterlockedAnd64)
78# pragma intrinsic(_InterlockedOr64)
79# pragma intrinsic(_InterlockedIncrement64)
80# pragma intrinsic(_InterlockedDecrement64)
81# endif
82#endif
83
84/*
85 * Include #pragma aux definitions for Watcom C/C++.
86 */
87#if defined(__WATCOMC__) && ARCH_BITS == 16 && defined(RT_ARCH_X86)
88# include "asm-watcom-x86-16.h"
89#elif defined(__WATCOMC__) && ARCH_BITS == 32 && defined(RT_ARCH_X86)
90# include "asm-watcom-x86-32.h"
91#endif
92
93
94
95/** @defgroup grp_rt_asm ASM - Assembly Routines
96 * @ingroup grp_rt
97 *
98 * @remarks The difference between ordered and unordered atomic operations are that
99 * the former will complete outstanding reads and writes before continuing
100 * while the latter doesn't make any promises about the order. Ordered
101 * operations doesn't, it seems, make any 100% promise wrt to whether
102 * the operation will complete before any subsequent memory access.
103 * (please, correct if wrong.)
104 *
105 * ASMAtomicSomething operations are all ordered, while ASMAtomicUoSomething
106 * are unordered (note the Uo).
107 *
108 * @remarks Some remarks about __volatile__: Without this keyword gcc is allowed to reorder
109 * or even optimize assembler instructions away. For instance, in the following code
110 * the second rdmsr instruction is optimized away because gcc treats that instruction
111 * as deterministic:
112 *
113 * @code
114 * static inline uint64_t rdmsr_low(int idx)
115 * {
116 * uint32_t low;
117 * __asm__ ("rdmsr" : "=a"(low) : "c"(idx) : "edx");
118 * }
119 * ...
120 * uint32_t msr1 = rdmsr_low(1);
121 * foo(msr1);
122 * msr1 = rdmsr_low(1);
123 * bar(msr1);
124 * @endcode
125 *
126 * The input parameter of rdmsr_low is the same for both calls and therefore gcc will
127 * use the result of the first call as input parameter for bar() as well. For rdmsr this
128 * is not acceptable as this instruction is _not_ deterministic. This applies to reading
129 * machine status information in general.
130 *
131 * @{
132 */
133
134
135/** @def RT_INLINE_ASM_GCC_4_3_X_X86
136 * Used to work around some 4.3.x register allocation issues in this version of
137 * the compiler. So far this workaround is still required for 4.4 and 4.5. */
138#ifdef __GNUC__
139# define RT_INLINE_ASM_GCC_4_3_X_X86 (__GNUC__ == 4 && __GNUC_MINOR__ >= 3 && defined(__i386__))
140#endif
141#ifndef RT_INLINE_ASM_GCC_4_3_X_X86
142# define RT_INLINE_ASM_GCC_4_3_X_X86 0
143#endif
144
145/** @def RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
146 * i686-apple-darwin9-gcc-4.0.1 (GCC) 4.0.1 (Apple Inc. build 5493) screws up
147 * RTSemRWRequestWrite semsemrw-lockless-generic.cpp in release builds. PIC
148 * mode, x86.
149 *
150 * Some gcc 4.3.x versions may have register allocation issues with cmpxchg8b
151 * when in PIC mode on x86.
152 */
153#ifndef RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
154# if defined(DOXYGEN_RUNNING) || defined(__WATCOMC__) /* Watcom has trouble with the expression below */
155# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC 1
156# else
157# define RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC \
158 ( (defined(PIC) || defined(__PIC__)) \
159 && defined(RT_ARCH_X86) \
160 && ( RT_INLINE_ASM_GCC_4_3_X_X86 \
161 || defined(RT_OS_DARWIN)) )
162# endif
163#endif
164
165
166/** @def ASMReturnAddress
167 * Gets the return address of the current (or calling if you like) function or method.
168 */
169#ifdef _MSC_VER
170# ifdef __cplusplus
171extern "C"
172# endif
173void * _ReturnAddress(void);
174# pragma intrinsic(_ReturnAddress)
175# define ASMReturnAddress() _ReturnAddress()
176#elif defined(__GNUC__) || defined(DOXYGEN_RUNNING)
177# define ASMReturnAddress() __builtin_return_address(0)
178#elif defined(__WATCOMC__)
179# define ASMReturnAddress() Watcom_does_not_appear_to_have_intrinsic_return_address_function()
180#else
181# error "Unsupported compiler."
182#endif
183
184
185/**
186 * Compiler memory barrier.
187 *
188 * Ensure that the compiler does not use any cached (register/tmp stack) memory
189 * values or any outstanding writes when returning from this function.
190 *
191 * This function must be used if non-volatile data is modified by a
192 * device or the VMM. Typical cases are port access, MMIO access,
193 * trapping instruction, etc.
194 */
195#if RT_INLINE_ASM_GNU_STYLE
196# define ASMCompilerBarrier() do { __asm__ __volatile__("" : : : "memory"); } while (0)
197#elif RT_INLINE_ASM_USES_INTRIN
198# define ASMCompilerBarrier() do { _ReadWriteBarrier(); } while (0)
199#elif defined(__WATCOMC__)
200void ASMCompilerBarrier(void);
201#else /* 2003 should have _ReadWriteBarrier() but I guess we're at 2002 level then... */
202DECLINLINE(void) ASMCompilerBarrier(void)
203{
204 __asm
205 {
206 }
207}
208#endif
209
210
211/** @def ASMBreakpoint
212 * Debugger Breakpoint.
213 * @deprecated Use RT_BREAKPOINT instead.
214 * @internal
215 */
216#define ASMBreakpoint() RT_BREAKPOINT()
217
218
219/**
220 * Spinloop hint for platforms that have these, empty function on the other
221 * platforms.
222 *
223 * x86 & AMD64: The PAUSE variant of NOP for helping hyperthreaded CPUs detecting
224 * spin locks.
225 */
226#if RT_INLINE_ASM_EXTERNAL && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
227DECLASM(void) ASMNopPause(void);
228#else
229DECLINLINE(void) ASMNopPause(void)
230{
231# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
232# if RT_INLINE_ASM_GNU_STYLE
233 __asm__ __volatile__(".byte 0xf3,0x90\n\t");
234# else
235 __asm {
236 _emit 0f3h
237 _emit 090h
238 }
239# endif
240# else
241 /* dummy */
242# endif
243}
244#endif
245
246
247/**
248 * Atomically Exchange an unsigned 8-bit value, ordered.
249 *
250 * @returns Current *pu8 value
251 * @param pu8 Pointer to the 8-bit variable to update.
252 * @param u8 The 8-bit value to assign to *pu8.
253 */
254#if RT_INLINE_ASM_EXTERNAL
255DECLASM(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8);
256#else
257DECLINLINE(uint8_t) ASMAtomicXchgU8(volatile uint8_t *pu8, uint8_t u8)
258{
259# if RT_INLINE_ASM_GNU_STYLE
260 __asm__ __volatile__("xchgb %0, %1\n\t"
261 : "=m" (*pu8),
262 "=q" (u8) /* =r - busted on g++ (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2) */
263 : "1" (u8),
264 "m" (*pu8));
265# else
266 __asm
267 {
268# ifdef RT_ARCH_AMD64
269 mov rdx, [pu8]
270 mov al, [u8]
271 xchg [rdx], al
272 mov [u8], al
273# else
274 mov edx, [pu8]
275 mov al, [u8]
276 xchg [edx], al
277 mov [u8], al
278# endif
279 }
280# endif
281 return u8;
282}
283#endif
284
285
286/**
287 * Atomically Exchange a signed 8-bit value, ordered.
288 *
289 * @returns Current *pu8 value
290 * @param pi8 Pointer to the 8-bit variable to update.
291 * @param i8 The 8-bit value to assign to *pi8.
292 */
293DECLINLINE(int8_t) ASMAtomicXchgS8(volatile int8_t *pi8, int8_t i8)
294{
295 return (int8_t)ASMAtomicXchgU8((volatile uint8_t *)pi8, (uint8_t)i8);
296}
297
298
299/**
300 * Atomically Exchange a bool value, ordered.
301 *
302 * @returns Current *pf value
303 * @param pf Pointer to the 8-bit variable to update.
304 * @param f The 8-bit value to assign to *pi8.
305 */
306DECLINLINE(bool) ASMAtomicXchgBool(volatile bool *pf, bool f)
307{
308#ifdef _MSC_VER
309 return !!ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
310#else
311 return (bool)ASMAtomicXchgU8((volatile uint8_t *)pf, (uint8_t)f);
312#endif
313}
314
315
316/**
317 * Atomically Exchange an unsigned 16-bit value, ordered.
318 *
319 * @returns Current *pu16 value
320 * @param pu16 Pointer to the 16-bit variable to update.
321 * @param u16 The 16-bit value to assign to *pu16.
322 */
323#if RT_INLINE_ASM_EXTERNAL
324DECLASM(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16);
325#else
326DECLINLINE(uint16_t) ASMAtomicXchgU16(volatile uint16_t *pu16, uint16_t u16)
327{
328# if RT_INLINE_ASM_GNU_STYLE
329 __asm__ __volatile__("xchgw %0, %1\n\t"
330 : "=m" (*pu16),
331 "=r" (u16)
332 : "1" (u16),
333 "m" (*pu16));
334# else
335 __asm
336 {
337# ifdef RT_ARCH_AMD64
338 mov rdx, [pu16]
339 mov ax, [u16]
340 xchg [rdx], ax
341 mov [u16], ax
342# else
343 mov edx, [pu16]
344 mov ax, [u16]
345 xchg [edx], ax
346 mov [u16], ax
347# endif
348 }
349# endif
350 return u16;
351}
352#endif
353
354
355/**
356 * Atomically Exchange a signed 16-bit value, ordered.
357 *
358 * @returns Current *pu16 value
359 * @param pi16 Pointer to the 16-bit variable to update.
360 * @param i16 The 16-bit value to assign to *pi16.
361 */
362DECLINLINE(int16_t) ASMAtomicXchgS16(volatile int16_t *pi16, int16_t i16)
363{
364 return (int16_t)ASMAtomicXchgU16((volatile uint16_t *)pi16, (uint16_t)i16);
365}
366
367
368/**
369 * Atomically Exchange an unsigned 32-bit value, ordered.
370 *
371 * @returns Current *pu32 value
372 * @param pu32 Pointer to the 32-bit variable to update.
373 * @param u32 The 32-bit value to assign to *pu32.
374 *
375 * @remarks Does not work on 286 and earlier.
376 */
377#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
378DECLASM(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32);
379#else
380DECLINLINE(uint32_t) ASMAtomicXchgU32(volatile uint32_t *pu32, uint32_t u32)
381{
382# if RT_INLINE_ASM_GNU_STYLE
383 __asm__ __volatile__("xchgl %0, %1\n\t"
384 : "=m" (*pu32),
385 "=r" (u32)
386 : "1" (u32),
387 "m" (*pu32));
388
389# elif RT_INLINE_ASM_USES_INTRIN
390 u32 = _InterlockedExchange((long *)pu32, u32);
391
392# else
393 __asm
394 {
395# ifdef RT_ARCH_AMD64
396 mov rdx, [pu32]
397 mov eax, u32
398 xchg [rdx], eax
399 mov [u32], eax
400# else
401 mov edx, [pu32]
402 mov eax, u32
403 xchg [edx], eax
404 mov [u32], eax
405# endif
406 }
407# endif
408 return u32;
409}
410#endif
411
412
413/**
414 * Atomically Exchange a signed 32-bit value, ordered.
415 *
416 * @returns Current *pu32 value
417 * @param pi32 Pointer to the 32-bit variable to update.
418 * @param i32 The 32-bit value to assign to *pi32.
419 */
420DECLINLINE(int32_t) ASMAtomicXchgS32(volatile int32_t *pi32, int32_t i32)
421{
422 return (int32_t)ASMAtomicXchgU32((volatile uint32_t *)pi32, (uint32_t)i32);
423}
424
425
426/**
427 * Atomically Exchange an unsigned 64-bit value, ordered.
428 *
429 * @returns Current *pu64 value
430 * @param pu64 Pointer to the 64-bit variable to update.
431 * @param u64 The 64-bit value to assign to *pu64.
432 *
433 * @remarks Works on 32-bit x86 CPUs starting with Pentium.
434 */
435#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
436 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
437DECLASM(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64);
438#else
439DECLINLINE(uint64_t) ASMAtomicXchgU64(volatile uint64_t *pu64, uint64_t u64)
440{
441# if defined(RT_ARCH_AMD64)
442# if RT_INLINE_ASM_USES_INTRIN
443 u64 = _InterlockedExchange64((__int64 *)pu64, u64);
444
445# elif RT_INLINE_ASM_GNU_STYLE
446 __asm__ __volatile__("xchgq %0, %1\n\t"
447 : "=m" (*pu64),
448 "=r" (u64)
449 : "1" (u64),
450 "m" (*pu64));
451# else
452 __asm
453 {
454 mov rdx, [pu64]
455 mov rax, [u64]
456 xchg [rdx], rax
457 mov [u64], rax
458 }
459# endif
460# else /* !RT_ARCH_AMD64 */
461# if RT_INLINE_ASM_GNU_STYLE
462# if defined(PIC) || defined(__PIC__)
463 uint32_t u32EBX = (uint32_t)u64;
464 __asm__ __volatile__(/*"xchgl %%esi, %5\n\t"*/
465 "xchgl %%ebx, %3\n\t"
466 "1:\n\t"
467 "lock; cmpxchg8b (%5)\n\t"
468 "jnz 1b\n\t"
469 "movl %3, %%ebx\n\t"
470 /*"xchgl %%esi, %5\n\t"*/
471 : "=A" (u64),
472 "=m" (*pu64)
473 : "0" (*pu64),
474 "m" ( u32EBX ),
475 "c" ( (uint32_t)(u64 >> 32) ),
476 "S" (pu64));
477# else /* !PIC */
478 __asm__ __volatile__("1:\n\t"
479 "lock; cmpxchg8b %1\n\t"
480 "jnz 1b\n\t"
481 : "=A" (u64),
482 "=m" (*pu64)
483 : "0" (*pu64),
484 "b" ( (uint32_t)u64 ),
485 "c" ( (uint32_t)(u64 >> 32) ));
486# endif
487# else
488 __asm
489 {
490 mov ebx, dword ptr [u64]
491 mov ecx, dword ptr [u64 + 4]
492 mov edi, pu64
493 mov eax, dword ptr [edi]
494 mov edx, dword ptr [edi + 4]
495 retry:
496 lock cmpxchg8b [edi]
497 jnz retry
498 mov dword ptr [u64], eax
499 mov dword ptr [u64 + 4], edx
500 }
501# endif
502# endif /* !RT_ARCH_AMD64 */
503 return u64;
504}
505#endif
506
507
508/**
509 * Atomically Exchange an signed 64-bit value, ordered.
510 *
511 * @returns Current *pi64 value
512 * @param pi64 Pointer to the 64-bit variable to update.
513 * @param i64 The 64-bit value to assign to *pi64.
514 */
515DECLINLINE(int64_t) ASMAtomicXchgS64(volatile int64_t *pi64, int64_t i64)
516{
517 return (int64_t)ASMAtomicXchgU64((volatile uint64_t *)pi64, (uint64_t)i64);
518}
519
520
521/**
522 * Atomically Exchange a pointer value, ordered.
523 *
524 * @returns Current *ppv value
525 * @param ppv Pointer to the pointer variable to update.
526 * @param pv The pointer value to assign to *ppv.
527 */
528DECLINLINE(void *) ASMAtomicXchgPtr(void * volatile *ppv, const void *pv)
529{
530#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
531 return (void *)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
532#elif ARCH_BITS == 64
533 return (void *)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
534#else
535# error "ARCH_BITS is bogus"
536#endif
537}
538
539
540/**
541 * Convenience macro for avoiding the annoying casting with ASMAtomicXchgPtr.
542 *
543 * @returns Current *pv value
544 * @param ppv Pointer to the pointer variable to update.
545 * @param pv The pointer value to assign to *ppv.
546 * @param Type The type of *ppv, sans volatile.
547 */
548#ifdef __GNUC__
549# define ASMAtomicXchgPtrT(ppv, pv, Type) \
550 __extension__ \
551 ({\
552 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
553 Type const pvTypeChecked = (pv); \
554 Type pvTypeCheckedRet = (__typeof__(*(ppv))) ASMAtomicXchgPtr((void * volatile *)ppvTypeChecked, (void *)pvTypeChecked); \
555 pvTypeCheckedRet; \
556 })
557#else
558# define ASMAtomicXchgPtrT(ppv, pv, Type) \
559 (Type)ASMAtomicXchgPtr((void * volatile *)(ppv), (void *)(pv))
560#endif
561
562
563/**
564 * Atomically Exchange a raw-mode context pointer value, ordered.
565 *
566 * @returns Current *ppv value
567 * @param ppvRC Pointer to the pointer variable to update.
568 * @param pvRC The pointer value to assign to *ppv.
569 */
570DECLINLINE(RTRCPTR) ASMAtomicXchgRCPtr(RTRCPTR volatile *ppvRC, RTRCPTR pvRC)
571{
572 return (RTRCPTR)ASMAtomicXchgU32((uint32_t volatile *)(void *)ppvRC, (uint32_t)pvRC);
573}
574
575
576/**
577 * Atomically Exchange a ring-0 pointer value, ordered.
578 *
579 * @returns Current *ppv value
580 * @param ppvR0 Pointer to the pointer variable to update.
581 * @param pvR0 The pointer value to assign to *ppv.
582 */
583DECLINLINE(RTR0PTR) ASMAtomicXchgR0Ptr(RTR0PTR volatile *ppvR0, RTR0PTR pvR0)
584{
585#if R0_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
586 return (RTR0PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR0, (uint32_t)pvR0);
587#elif R0_ARCH_BITS == 64
588 return (RTR0PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR0, (uint64_t)pvR0);
589#else
590# error "R0_ARCH_BITS is bogus"
591#endif
592}
593
594
595/**
596 * Atomically Exchange a ring-3 pointer value, ordered.
597 *
598 * @returns Current *ppv value
599 * @param ppvR3 Pointer to the pointer variable to update.
600 * @param pvR3 The pointer value to assign to *ppv.
601 */
602DECLINLINE(RTR3PTR) ASMAtomicXchgR3Ptr(RTR3PTR volatile *ppvR3, RTR3PTR pvR3)
603{
604#if R3_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
605 return (RTR3PTR)ASMAtomicXchgU32((volatile uint32_t *)(void *)ppvR3, (uint32_t)pvR3);
606#elif R3_ARCH_BITS == 64
607 return (RTR3PTR)ASMAtomicXchgU64((volatile uint64_t *)(void *)ppvR3, (uint64_t)pvR3);
608#else
609# error "R3_ARCH_BITS is bogus"
610#endif
611}
612
613
614/** @def ASMAtomicXchgHandle
615 * Atomically Exchange a typical IPRT handle value, ordered.
616 *
617 * @param ph Pointer to the value to update.
618 * @param hNew The new value to assigned to *pu.
619 * @param phRes Where to store the current *ph value.
620 *
621 * @remarks This doesn't currently work for all handles (like RTFILE).
622 */
623#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
624# define ASMAtomicXchgHandle(ph, hNew, phRes) \
625 do { \
626 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
627 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
628 *(uint32_t *)(phRes) = ASMAtomicXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
629 } while (0)
630#elif HC_ARCH_BITS == 64
631# define ASMAtomicXchgHandle(ph, hNew, phRes) \
632 do { \
633 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
634 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
635 *(uint64_t *)(phRes) = ASMAtomicXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
636 } while (0)
637#else
638# error HC_ARCH_BITS
639#endif
640
641
642/**
643 * Atomically Exchange a value which size might differ
644 * between platforms or compilers, ordered.
645 *
646 * @param pu Pointer to the variable to update.
647 * @param uNew The value to assign to *pu.
648 * @todo This is busted as its missing the result argument.
649 */
650#define ASMAtomicXchgSize(pu, uNew) \
651 do { \
652 switch (sizeof(*(pu))) { \
653 case 1: ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
654 case 2: ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
655 case 4: ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
656 case 8: ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
657 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
658 } \
659 } while (0)
660
661/**
662 * Atomically Exchange a value which size might differ
663 * between platforms or compilers, ordered.
664 *
665 * @param pu Pointer to the variable to update.
666 * @param uNew The value to assign to *pu.
667 * @param puRes Where to store the current *pu value.
668 */
669#define ASMAtomicXchgSizeCorrect(pu, uNew, puRes) \
670 do { \
671 switch (sizeof(*(pu))) { \
672 case 1: *(uint8_t *)(puRes) = ASMAtomicXchgU8((volatile uint8_t *)(void *)(pu), (uint8_t)(uNew)); break; \
673 case 2: *(uint16_t *)(puRes) = ASMAtomicXchgU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
674 case 4: *(uint32_t *)(puRes) = ASMAtomicXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
675 case 8: *(uint64_t *)(puRes) = ASMAtomicXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
676 default: AssertMsgFailed(("ASMAtomicXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
677 } \
678 } while (0)
679
680
681
682/**
683 * Atomically Compare and Exchange an unsigned 8-bit value, ordered.
684 *
685 * @returns true if xchg was done.
686 * @returns false if xchg wasn't done.
687 *
688 * @param pu8 Pointer to the value to update.
689 * @param u8New The new value to assigned to *pu8.
690 * @param u8Old The old value to *pu8 compare with.
691 *
692 * @remarks x86: Requires a 486 or later.
693 */
694#if RT_INLINE_ASM_EXTERNAL || !RT_INLINE_ASM_GNU_STYLE
695DECLASM(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, const uint8_t u8Old);
696#else
697DECLINLINE(bool) ASMAtomicCmpXchgU8(volatile uint8_t *pu8, const uint8_t u8New, uint8_t u8Old)
698{
699 uint8_t u8Ret;
700 __asm__ __volatile__("lock; cmpxchgb %3, %0\n\t"
701 "setz %1\n\t"
702 : "=m" (*pu8),
703 "=qm" (u8Ret),
704 "=a" (u8Old)
705 : "q" (u8New),
706 "2" (u8Old),
707 "m" (*pu8));
708 return (bool)u8Ret;
709}
710#endif
711
712
713/**
714 * Atomically Compare and Exchange a signed 8-bit value, ordered.
715 *
716 * @returns true if xchg was done.
717 * @returns false if xchg wasn't done.
718 *
719 * @param pi8 Pointer to the value to update.
720 * @param i8New The new value to assigned to *pi8.
721 * @param i8Old The old value to *pi8 compare with.
722 *
723 * @remarks x86: Requires a 486 or later.
724 */
725DECLINLINE(bool) ASMAtomicCmpXchgS8(volatile int8_t *pi8, const int8_t i8New, const int8_t i8Old)
726{
727 return ASMAtomicCmpXchgU8((volatile uint8_t *)pi8, (const uint8_t)i8New, (const uint8_t)i8Old);
728}
729
730
731/**
732 * Atomically Compare and Exchange a bool value, ordered.
733 *
734 * @returns true if xchg was done.
735 * @returns false if xchg wasn't done.
736 *
737 * @param pf Pointer to the value to update.
738 * @param fNew The new value to assigned to *pf.
739 * @param fOld The old value to *pf compare with.
740 *
741 * @remarks x86: Requires a 486 or later.
742 */
743DECLINLINE(bool) ASMAtomicCmpXchgBool(volatile bool *pf, const bool fNew, const bool fOld)
744{
745 return ASMAtomicCmpXchgU8((volatile uint8_t *)pf, (const uint8_t)fNew, (const uint8_t)fOld);
746}
747
748
749/**
750 * Atomically Compare and Exchange an unsigned 32-bit value, ordered.
751 *
752 * @returns true if xchg was done.
753 * @returns false if xchg wasn't done.
754 *
755 * @param pu32 Pointer to the value to update.
756 * @param u32New The new value to assigned to *pu32.
757 * @param u32Old The old value to *pu32 compare with.
758 *
759 * @remarks x86: Requires a 486 or later.
760 */
761#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
762DECLASM(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old);
763#else
764DECLINLINE(bool) ASMAtomicCmpXchgU32(volatile uint32_t *pu32, const uint32_t u32New, uint32_t u32Old)
765{
766# if RT_INLINE_ASM_GNU_STYLE
767 uint8_t u8Ret;
768 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
769 "setz %1\n\t"
770 : "=m" (*pu32),
771 "=qm" (u8Ret),
772 "=a" (u32Old)
773 : "r" (u32New),
774 "2" (u32Old),
775 "m" (*pu32));
776 return (bool)u8Ret;
777
778# elif RT_INLINE_ASM_USES_INTRIN
779 return (uint32_t)_InterlockedCompareExchange((long *)pu32, u32New, u32Old) == u32Old;
780
781# else
782 uint32_t u32Ret;
783 __asm
784 {
785# ifdef RT_ARCH_AMD64
786 mov rdx, [pu32]
787# else
788 mov edx, [pu32]
789# endif
790 mov eax, [u32Old]
791 mov ecx, [u32New]
792# ifdef RT_ARCH_AMD64
793 lock cmpxchg [rdx], ecx
794# else
795 lock cmpxchg [edx], ecx
796# endif
797 setz al
798 movzx eax, al
799 mov [u32Ret], eax
800 }
801 return !!u32Ret;
802# endif
803}
804#endif
805
806
807/**
808 * Atomically Compare and Exchange a signed 32-bit value, ordered.
809 *
810 * @returns true if xchg was done.
811 * @returns false if xchg wasn't done.
812 *
813 * @param pi32 Pointer to the value to update.
814 * @param i32New The new value to assigned to *pi32.
815 * @param i32Old The old value to *pi32 compare with.
816 *
817 * @remarks x86: Requires a 486 or later.
818 */
819DECLINLINE(bool) ASMAtomicCmpXchgS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old)
820{
821 return ASMAtomicCmpXchgU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old);
822}
823
824
825/**
826 * Atomically Compare and exchange an unsigned 64-bit value, ordered.
827 *
828 * @returns true if xchg was done.
829 * @returns false if xchg wasn't done.
830 *
831 * @param pu64 Pointer to the 64-bit variable to update.
832 * @param u64New The 64-bit value to assign to *pu64.
833 * @param u64Old The value to compare with.
834 *
835 * @remarks x86: Requires a Pentium or later.
836 */
837#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
838 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
839DECLASM(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old);
840#else
841DECLINLINE(bool) ASMAtomicCmpXchgU64(volatile uint64_t *pu64, uint64_t u64New, uint64_t u64Old)
842{
843# if RT_INLINE_ASM_USES_INTRIN
844 return (uint64_t)_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old) == u64Old;
845
846# elif defined(RT_ARCH_AMD64)
847# if RT_INLINE_ASM_GNU_STYLE
848 uint8_t u8Ret;
849 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
850 "setz %1\n\t"
851 : "=m" (*pu64),
852 "=qm" (u8Ret),
853 "=a" (u64Old)
854 : "r" (u64New),
855 "2" (u64Old),
856 "m" (*pu64));
857 return (bool)u8Ret;
858# else
859 bool fRet;
860 __asm
861 {
862 mov rdx, [pu32]
863 mov rax, [u64Old]
864 mov rcx, [u64New]
865 lock cmpxchg [rdx], rcx
866 setz al
867 mov [fRet], al
868 }
869 return fRet;
870# endif
871# else /* !RT_ARCH_AMD64 */
872 uint32_t u32Ret;
873# if RT_INLINE_ASM_GNU_STYLE
874# if defined(PIC) || defined(__PIC__)
875 uint32_t u32EBX = (uint32_t)u64New;
876 uint32_t u32Spill;
877 __asm__ __volatile__("xchgl %%ebx, %4\n\t"
878 "lock; cmpxchg8b (%6)\n\t"
879 "setz %%al\n\t"
880 "movl %4, %%ebx\n\t"
881 "movzbl %%al, %%eax\n\t"
882 : "=a" (u32Ret),
883 "=d" (u32Spill),
884# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
885 "+m" (*pu64)
886# else
887 "=m" (*pu64)
888# endif
889 : "A" (u64Old),
890 "m" ( u32EBX ),
891 "c" ( (uint32_t)(u64New >> 32) ),
892 "S" (pu64));
893# else /* !PIC */
894 uint32_t u32Spill;
895 __asm__ __volatile__("lock; cmpxchg8b %2\n\t"
896 "setz %%al\n\t"
897 "movzbl %%al, %%eax\n\t"
898 : "=a" (u32Ret),
899 "=d" (u32Spill),
900 "+m" (*pu64)
901 : "A" (u64Old),
902 "b" ( (uint32_t)u64New ),
903 "c" ( (uint32_t)(u64New >> 32) ));
904# endif
905 return (bool)u32Ret;
906# else
907 __asm
908 {
909 mov ebx, dword ptr [u64New]
910 mov ecx, dword ptr [u64New + 4]
911 mov edi, [pu64]
912 mov eax, dword ptr [u64Old]
913 mov edx, dword ptr [u64Old + 4]
914 lock cmpxchg8b [edi]
915 setz al
916 movzx eax, al
917 mov dword ptr [u32Ret], eax
918 }
919 return !!u32Ret;
920# endif
921# endif /* !RT_ARCH_AMD64 */
922}
923#endif
924
925
926/**
927 * Atomically Compare and exchange a signed 64-bit value, ordered.
928 *
929 * @returns true if xchg was done.
930 * @returns false if xchg wasn't done.
931 *
932 * @param pi64 Pointer to the 64-bit variable to update.
933 * @param i64 The 64-bit value to assign to *pu64.
934 * @param i64Old The value to compare with.
935 *
936 * @remarks x86: Requires a Pentium or later.
937 */
938DECLINLINE(bool) ASMAtomicCmpXchgS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old)
939{
940 return ASMAtomicCmpXchgU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old);
941}
942
943
944/**
945 * Atomically Compare and Exchange a pointer value, ordered.
946 *
947 * @returns true if xchg was done.
948 * @returns false if xchg wasn't done.
949 *
950 * @param ppv Pointer to the value to update.
951 * @param pvNew The new value to assigned to *ppv.
952 * @param pvOld The old value to *ppv compare with.
953 *
954 * @remarks x86: Requires a 486 or later.
955 */
956DECLINLINE(bool) ASMAtomicCmpXchgPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld)
957{
958#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
959 return ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld);
960#elif ARCH_BITS == 64
961 return ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld);
962#else
963# error "ARCH_BITS is bogus"
964#endif
965}
966
967
968/**
969 * Atomically Compare and Exchange a pointer value, ordered.
970 *
971 * @returns true if xchg was done.
972 * @returns false if xchg wasn't done.
973 *
974 * @param ppv Pointer to the value to update.
975 * @param pvNew The new value to assigned to *ppv.
976 * @param pvOld The old value to *ppv compare with.
977 *
978 * @remarks This is relatively type safe on GCC platforms.
979 * @remarks x86: Requires a 486 or later.
980 */
981#ifdef __GNUC__
982# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
983 __extension__ \
984 ({\
985 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
986 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
987 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
988 bool fMacroRet = ASMAtomicCmpXchgPtrVoid((void * volatile *)ppvTypeChecked, \
989 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked); \
990 fMacroRet; \
991 })
992#else
993# define ASMAtomicCmpXchgPtr(ppv, pvNew, pvOld) \
994 ASMAtomicCmpXchgPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld))
995#endif
996
997
998/** @def ASMAtomicCmpXchgHandle
999 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1000 *
1001 * @param ph Pointer to the value to update.
1002 * @param hNew The new value to assigned to *pu.
1003 * @param hOld The old value to *pu compare with.
1004 * @param fRc Where to store the result.
1005 *
1006 * @remarks This doesn't currently work for all handles (like RTFILE).
1007 * @remarks x86: Requires a 486 or later.
1008 */
1009#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1010# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1011 do { \
1012 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
1013 (fRc) = ASMAtomicCmpXchgU32((uint32_t volatile *)(ph), (const uint32_t)(hNew), (const uint32_t)(hOld)); \
1014 } while (0)
1015#elif HC_ARCH_BITS == 64
1016# define ASMAtomicCmpXchgHandle(ph, hNew, hOld, fRc) \
1017 do { \
1018 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1019 (fRc) = ASMAtomicCmpXchgU64((uint64_t volatile *)(ph), (const uint64_t)(hNew), (const uint64_t)(hOld)); \
1020 } while (0)
1021#else
1022# error HC_ARCH_BITS
1023#endif
1024
1025
1026/** @def ASMAtomicCmpXchgSize
1027 * Atomically Compare and Exchange a value which size might differ
1028 * between platforms or compilers, ordered.
1029 *
1030 * @param pu Pointer to the value to update.
1031 * @param uNew The new value to assigned to *pu.
1032 * @param uOld The old value to *pu compare with.
1033 * @param fRc Where to store the result.
1034 *
1035 * @remarks x86: Requires a 486 or later.
1036 */
1037#define ASMAtomicCmpXchgSize(pu, uNew, uOld, fRc) \
1038 do { \
1039 switch (sizeof(*(pu))) { \
1040 case 4: (fRc) = ASMAtomicCmpXchgU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld)); \
1041 break; \
1042 case 8: (fRc) = ASMAtomicCmpXchgU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld)); \
1043 break; \
1044 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1045 (fRc) = false; \
1046 break; \
1047 } \
1048 } while (0)
1049
1050
1051/**
1052 * Atomically Compare and Exchange an unsigned 32-bit value, additionally
1053 * passes back old value, ordered.
1054 *
1055 * @returns true if xchg was done.
1056 * @returns false if xchg wasn't done.
1057 *
1058 * @param pu32 Pointer to the value to update.
1059 * @param u32New The new value to assigned to *pu32.
1060 * @param u32Old The old value to *pu32 compare with.
1061 * @param pu32Old Pointer store the old value at.
1062 *
1063 * @remarks x86: Requires a 486 or later.
1064 */
1065#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1066DECLASM(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old);
1067#else
1068DECLINLINE(bool) ASMAtomicCmpXchgExU32(volatile uint32_t *pu32, const uint32_t u32New, const uint32_t u32Old, uint32_t *pu32Old)
1069{
1070# if RT_INLINE_ASM_GNU_STYLE
1071 uint8_t u8Ret;
1072 __asm__ __volatile__("lock; cmpxchgl %3, %0\n\t"
1073 "setz %1\n\t"
1074 : "=m" (*pu32),
1075 "=qm" (u8Ret),
1076 "=a" (*pu32Old)
1077 : "r" (u32New),
1078 "a" (u32Old),
1079 "m" (*pu32));
1080 return (bool)u8Ret;
1081
1082# elif RT_INLINE_ASM_USES_INTRIN
1083 return (*pu32Old =_InterlockedCompareExchange((long *)pu32, u32New, u32Old)) == u32Old;
1084
1085# else
1086 uint32_t u32Ret;
1087 __asm
1088 {
1089# ifdef RT_ARCH_AMD64
1090 mov rdx, [pu32]
1091# else
1092 mov edx, [pu32]
1093# endif
1094 mov eax, [u32Old]
1095 mov ecx, [u32New]
1096# ifdef RT_ARCH_AMD64
1097 lock cmpxchg [rdx], ecx
1098 mov rdx, [pu32Old]
1099 mov [rdx], eax
1100# else
1101 lock cmpxchg [edx], ecx
1102 mov edx, [pu32Old]
1103 mov [edx], eax
1104# endif
1105 setz al
1106 movzx eax, al
1107 mov [u32Ret], eax
1108 }
1109 return !!u32Ret;
1110# endif
1111}
1112#endif
1113
1114
1115/**
1116 * Atomically Compare and Exchange a signed 32-bit value, additionally
1117 * passes back old value, ordered.
1118 *
1119 * @returns true if xchg was done.
1120 * @returns false if xchg wasn't done.
1121 *
1122 * @param pi32 Pointer to the value to update.
1123 * @param i32New The new value to assigned to *pi32.
1124 * @param i32Old The old value to *pi32 compare with.
1125 * @param pi32Old Pointer store the old value at.
1126 *
1127 * @remarks x86: Requires a 486 or later.
1128 */
1129DECLINLINE(bool) ASMAtomicCmpXchgExS32(volatile int32_t *pi32, const int32_t i32New, const int32_t i32Old, int32_t *pi32Old)
1130{
1131 return ASMAtomicCmpXchgExU32((volatile uint32_t *)pi32, (uint32_t)i32New, (uint32_t)i32Old, (uint32_t *)pi32Old);
1132}
1133
1134
1135/**
1136 * Atomically Compare and exchange an unsigned 64-bit value, additionally
1137 * passing back old value, ordered.
1138 *
1139 * @returns true if xchg was done.
1140 * @returns false if xchg wasn't done.
1141 *
1142 * @param pu64 Pointer to the 64-bit variable to update.
1143 * @param u64New The 64-bit value to assign to *pu64.
1144 * @param u64Old The value to compare with.
1145 * @param pu64Old Pointer store the old value at.
1146 *
1147 * @remarks x86: Requires a Pentium or later.
1148 */
1149#if (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1150 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1151DECLASM(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old);
1152#else
1153DECLINLINE(bool) ASMAtomicCmpXchgExU64(volatile uint64_t *pu64, const uint64_t u64New, const uint64_t u64Old, uint64_t *pu64Old)
1154{
1155# if RT_INLINE_ASM_USES_INTRIN
1156 return (*pu64Old =_InterlockedCompareExchange64((__int64 *)pu64, u64New, u64Old)) == u64Old;
1157
1158# elif defined(RT_ARCH_AMD64)
1159# if RT_INLINE_ASM_GNU_STYLE
1160 uint8_t u8Ret;
1161 __asm__ __volatile__("lock; cmpxchgq %3, %0\n\t"
1162 "setz %1\n\t"
1163 : "=m" (*pu64),
1164 "=qm" (u8Ret),
1165 "=a" (*pu64Old)
1166 : "r" (u64New),
1167 "a" (u64Old),
1168 "m" (*pu64));
1169 return (bool)u8Ret;
1170# else
1171 bool fRet;
1172 __asm
1173 {
1174 mov rdx, [pu32]
1175 mov rax, [u64Old]
1176 mov rcx, [u64New]
1177 lock cmpxchg [rdx], rcx
1178 mov rdx, [pu64Old]
1179 mov [rdx], rax
1180 setz al
1181 mov [fRet], al
1182 }
1183 return fRet;
1184# endif
1185# else /* !RT_ARCH_AMD64 */
1186# if RT_INLINE_ASM_GNU_STYLE
1187 uint64_t u64Ret;
1188# if defined(PIC) || defined(__PIC__)
1189 /* NB: this code uses a memory clobber description, because the clean
1190 * solution with an output value for *pu64 makes gcc run out of registers.
1191 * This will cause suboptimal code, and anyone with a better solution is
1192 * welcome to improve this. */
1193 __asm__ __volatile__("xchgl %%ebx, %1\n\t"
1194 "lock; cmpxchg8b %3\n\t"
1195 "xchgl %%ebx, %1\n\t"
1196 : "=A" (u64Ret)
1197 : "DS" ((uint32_t)u64New),
1198 "c" ((uint32_t)(u64New >> 32)),
1199 "m" (*pu64),
1200 "0" (u64Old)
1201 : "memory" );
1202# else /* !PIC */
1203 __asm__ __volatile__("lock; cmpxchg8b %4\n\t"
1204 : "=A" (u64Ret),
1205 "=m" (*pu64)
1206 : "b" ((uint32_t)u64New),
1207 "c" ((uint32_t)(u64New >> 32)),
1208 "m" (*pu64),
1209 "0" (u64Old));
1210# endif
1211 *pu64Old = u64Ret;
1212 return u64Ret == u64Old;
1213# else
1214 uint32_t u32Ret;
1215 __asm
1216 {
1217 mov ebx, dword ptr [u64New]
1218 mov ecx, dword ptr [u64New + 4]
1219 mov edi, [pu64]
1220 mov eax, dword ptr [u64Old]
1221 mov edx, dword ptr [u64Old + 4]
1222 lock cmpxchg8b [edi]
1223 mov ebx, [pu64Old]
1224 mov [ebx], eax
1225 setz al
1226 movzx eax, al
1227 add ebx, 4
1228 mov [ebx], edx
1229 mov dword ptr [u32Ret], eax
1230 }
1231 return !!u32Ret;
1232# endif
1233# endif /* !RT_ARCH_AMD64 */
1234}
1235#endif
1236
1237
1238/**
1239 * Atomically Compare and exchange a signed 64-bit value, additionally
1240 * passing back old value, ordered.
1241 *
1242 * @returns true if xchg was done.
1243 * @returns false if xchg wasn't done.
1244 *
1245 * @param pi64 Pointer to the 64-bit variable to update.
1246 * @param i64 The 64-bit value to assign to *pu64.
1247 * @param i64Old The value to compare with.
1248 * @param pi64Old Pointer store the old value at.
1249 *
1250 * @remarks x86: Requires a Pentium or later.
1251 */
1252DECLINLINE(bool) ASMAtomicCmpXchgExS64(volatile int64_t *pi64, const int64_t i64, const int64_t i64Old, int64_t *pi64Old)
1253{
1254 return ASMAtomicCmpXchgExU64((volatile uint64_t *)pi64, (uint64_t)i64, (uint64_t)i64Old, (uint64_t *)pi64Old);
1255}
1256
1257/** @def ASMAtomicCmpXchgExHandle
1258 * Atomically Compare and Exchange a typical IPRT handle value, ordered.
1259 *
1260 * @param ph Pointer to the value to update.
1261 * @param hNew The new value to assigned to *pu.
1262 * @param hOld The old value to *pu compare with.
1263 * @param fRc Where to store the result.
1264 * @param phOldVal Pointer to where to store the old value.
1265 *
1266 * @remarks This doesn't currently work for all handles (like RTFILE).
1267 */
1268#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1269# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1270 do { \
1271 AssertCompile(sizeof(*ph) == sizeof(uint32_t)); \
1272 AssertCompile(sizeof(*phOldVal) == sizeof(uint32_t)); \
1273 (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(puOldVal)); \
1274 } while (0)
1275#elif HC_ARCH_BITS == 64
1276# define ASMAtomicCmpXchgExHandle(ph, hNew, hOld, fRc, phOldVal) \
1277 do { \
1278 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
1279 AssertCompile(sizeof(*(phOldVal)) == sizeof(uint64_t)); \
1280 (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(puOldVal)); \
1281 } while (0)
1282#else
1283# error HC_ARCH_BITS
1284#endif
1285
1286
1287/** @def ASMAtomicCmpXchgExSize
1288 * Atomically Compare and Exchange a value which size might differ
1289 * between platforms or compilers. Additionally passes back old value.
1290 *
1291 * @param pu Pointer to the value to update.
1292 * @param uNew The new value to assigned to *pu.
1293 * @param uOld The old value to *pu compare with.
1294 * @param fRc Where to store the result.
1295 * @param puOldVal Pointer to where to store the old value.
1296 *
1297 * @remarks x86: Requires a 486 or later.
1298 */
1299#define ASMAtomicCmpXchgExSize(pu, uNew, uOld, fRc, puOldVal) \
1300 do { \
1301 switch (sizeof(*(pu))) { \
1302 case 4: (fRc) = ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew), (uint32_t)(uOld), (uint32_t *)(uOldVal)); \
1303 break; \
1304 case 8: (fRc) = ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew), (uint64_t)(uOld), (uint64_t *)(uOldVal)); \
1305 break; \
1306 default: AssertMsgFailed(("ASMAtomicCmpXchgSize: size %d is not supported\n", sizeof(*(pu)))); \
1307 (fRc) = false; \
1308 (uOldVal) = 0; \
1309 break; \
1310 } \
1311 } while (0)
1312
1313
1314/**
1315 * Atomically Compare and Exchange a pointer value, additionally
1316 * passing back old value, ordered.
1317 *
1318 * @returns true if xchg was done.
1319 * @returns false if xchg wasn't done.
1320 *
1321 * @param ppv Pointer to the value to update.
1322 * @param pvNew The new value to assigned to *ppv.
1323 * @param pvOld The old value to *ppv compare with.
1324 * @param ppvOld Pointer store the old value at.
1325 *
1326 * @remarks x86: Requires a 486 or later.
1327 */
1328DECLINLINE(bool) ASMAtomicCmpXchgExPtrVoid(void * volatile *ppv, const void *pvNew, const void *pvOld, void **ppvOld)
1329{
1330#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1331 return ASMAtomicCmpXchgExU32((volatile uint32_t *)(void *)ppv, (uint32_t)pvNew, (uint32_t)pvOld, (uint32_t *)ppvOld);
1332#elif ARCH_BITS == 64
1333 return ASMAtomicCmpXchgExU64((volatile uint64_t *)(void *)ppv, (uint64_t)pvNew, (uint64_t)pvOld, (uint64_t *)ppvOld);
1334#else
1335# error "ARCH_BITS is bogus"
1336#endif
1337}
1338
1339
1340/**
1341 * Atomically Compare and Exchange a pointer value, additionally
1342 * passing back old value, ordered.
1343 *
1344 * @returns true if xchg was done.
1345 * @returns false if xchg wasn't done.
1346 *
1347 * @param ppv Pointer to the value to update.
1348 * @param pvNew The new value to assigned to *ppv.
1349 * @param pvOld The old value to *ppv compare with.
1350 * @param ppvOld Pointer store the old value at.
1351 *
1352 * @remarks This is relatively type safe on GCC platforms.
1353 * @remarks x86: Requires a 486 or later.
1354 */
1355#ifdef __GNUC__
1356# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1357 __extension__ \
1358 ({\
1359 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
1360 __typeof__(*(ppv)) const pvNewTypeChecked = (pvNew); \
1361 __typeof__(*(ppv)) const pvOldTypeChecked = (pvOld); \
1362 __typeof__(*(ppv)) * const ppvOldTypeChecked = (ppvOld); \
1363 bool fMacroRet = ASMAtomicCmpXchgExPtrVoid((void * volatile *)ppvTypeChecked, \
1364 (void *)pvNewTypeChecked, (void *)pvOldTypeChecked, \
1365 (void **)ppvOldTypeChecked); \
1366 fMacroRet; \
1367 })
1368#else
1369# define ASMAtomicCmpXchgExPtr(ppv, pvNew, pvOld, ppvOld) \
1370 ASMAtomicCmpXchgExPtrVoid((void * volatile *)(ppv), (void *)(pvNew), (void *)(pvOld), (void **)(ppvOld))
1371#endif
1372
1373
1374/**
1375 * Virtualization unfriendly serializing instruction, always exits.
1376 */
1377#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
1378DECLASM(void) ASMSerializeInstructionCpuId(void);
1379#else
1380DECLINLINE(void) ASMSerializeInstructionCpuId(void)
1381{
1382# if RT_INLINE_ASM_GNU_STYLE
1383 RTCCUINTREG xAX = 0;
1384# ifdef RT_ARCH_AMD64
1385 __asm__ __volatile__ ("cpuid"
1386 : "=a" (xAX)
1387 : "0" (xAX)
1388 : "rbx", "rcx", "rdx", "memory");
1389# elif (defined(PIC) || defined(__PIC__)) && defined(__i386__)
1390 __asm__ __volatile__ ("push %%ebx\n\t"
1391 "cpuid\n\t"
1392 "pop %%ebx\n\t"
1393 : "=a" (xAX)
1394 : "0" (xAX)
1395 : "ecx", "edx", "memory");
1396# else
1397 __asm__ __volatile__ ("cpuid"
1398 : "=a" (xAX)
1399 : "0" (xAX)
1400 : "ebx", "ecx", "edx", "memory");
1401# endif
1402
1403# elif RT_INLINE_ASM_USES_INTRIN
1404 int aInfo[4];
1405 _ReadWriteBarrier();
1406 __cpuid(aInfo, 0);
1407
1408# else
1409 __asm
1410 {
1411 push ebx
1412 xor eax, eax
1413 cpuid
1414 pop ebx
1415 }
1416# endif
1417}
1418#endif
1419
1420/**
1421 * Virtualization friendly serializing instruction, though more expensive.
1422 */
1423#if RT_INLINE_ASM_EXTERNAL
1424DECLASM(void) ASMSerializeInstructionIRet(void);
1425#else
1426DECLINLINE(void) ASMSerializeInstructionIRet(void)
1427{
1428# if RT_INLINE_ASM_GNU_STYLE
1429# ifdef RT_ARCH_AMD64
1430 __asm__ __volatile__ ("movq %%rsp,%%r10\n\t"
1431 "subq $128, %%rsp\n\t" /*redzone*/
1432 "mov %%ss, %%eax\n\t"
1433 "pushq %%rax\n\t"
1434 "pushq %%r10\n\t"
1435 "pushfq\n\t"
1436 "movl %%cs, %%eax\n\t"
1437 "pushq %%rax\n\t"
1438 "leaq 1f(%%rip), %%rax\n\t"
1439 "pushq %%rax\n\t"
1440 "iretq\n\t"
1441 "1:\n\t"
1442 ::: "rax", "r10", "memory");
1443# else
1444 __asm__ __volatile__ ("pushfl\n\t"
1445 "pushl %%cs\n\t"
1446 "pushl $1f\n\t"
1447 "iretl\n\t"
1448 "1:\n\t"
1449 ::: "memory");
1450# endif
1451
1452# else
1453 __asm
1454 {
1455 pushfd
1456 push cs
1457 push la_ret
1458 iretd
1459 la_ret:
1460 }
1461# endif
1462}
1463#endif
1464
1465/**
1466 * Virtualization friendlier serializing instruction, may still cause exits.
1467 */
1468#if RT_INLINE_ASM_EXTERNAL && RT_INLINE_ASM_USES_INTRIN < 15
1469DECLASM(void) ASMSerializeInstructionRdTscp(void);
1470#else
1471DECLINLINE(void) ASMSerializeInstructionRdTscp(void)
1472{
1473# if RT_INLINE_ASM_GNU_STYLE
1474 /* rdtscp is not supported by ancient linux build VM of course :-( */
1475# ifdef RT_ARCH_AMD64
1476 /*__asm__ __volatile__("rdtscp\n\t" ::: "rax", "rdx, "rcx"); */
1477 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "rax", "rdx", "rcx", "memory");
1478# else
1479 /*__asm__ __volatile__("rdtscp\n\t" ::: "eax", "edx, "ecx"); */
1480 __asm__ __volatile__(".byte 0x0f,0x01,0xf9\n\t" ::: "eax", "edx", "ecx", "memory");
1481# endif
1482# else
1483# if RT_INLINE_ASM_USES_INTRIN >= 15
1484 uint32_t uIgnore;
1485 _ReadWriteBarrier();
1486 (void)__rdtscp(&uIgnore);
1487 (void)uIgnore;
1488# else
1489 __asm
1490 {
1491 rdtscp
1492 }
1493# endif
1494# endif
1495}
1496#endif
1497
1498
1499/**
1500 * Serialize Instruction.
1501 */
1502#if (defined(RT_ARCH_X86) && ARCH_BITS == 16) || defined(IN_GUEST)
1503# define ASMSerializeInstruction() ASMSerializeInstructionIRet()
1504#else
1505# define ASMSerializeInstruction() ASMSerializeInstructionCpuId()
1506#endif
1507
1508
1509/**
1510 * Memory fence, waits for any pending writes and reads to complete.
1511 */
1512DECLINLINE(void) ASMMemoryFence(void)
1513{
1514 /** @todo use mfence? check if all cpus we care for support it. */
1515 uint32_t volatile u32;
1516 ASMAtomicXchgU32(&u32, 0);
1517}
1518
1519
1520/**
1521 * Write fence, waits for any pending writes to complete.
1522 */
1523DECLINLINE(void) ASMWriteFence(void)
1524{
1525 /** @todo use sfence? check if all cpus we care for support it. */
1526 ASMMemoryFence();
1527}
1528
1529
1530/**
1531 * Read fence, waits for any pending reads to complete.
1532 */
1533DECLINLINE(void) ASMReadFence(void)
1534{
1535 /** @todo use lfence? check if all cpus we care for support it. */
1536 ASMMemoryFence();
1537}
1538
1539
1540/**
1541 * Atomically reads an unsigned 8-bit value, ordered.
1542 *
1543 * @returns Current *pu8 value
1544 * @param pu8 Pointer to the 8-bit variable to read.
1545 */
1546DECLINLINE(uint8_t) ASMAtomicReadU8(volatile uint8_t *pu8)
1547{
1548 ASMMemoryFence();
1549 return *pu8; /* byte reads are atomic on x86 */
1550}
1551
1552
1553/**
1554 * Atomically reads an unsigned 8-bit value, unordered.
1555 *
1556 * @returns Current *pu8 value
1557 * @param pu8 Pointer to the 8-bit variable to read.
1558 */
1559DECLINLINE(uint8_t) ASMAtomicUoReadU8(volatile uint8_t *pu8)
1560{
1561 return *pu8; /* byte reads are atomic on x86 */
1562}
1563
1564
1565/**
1566 * Atomically reads a signed 8-bit value, ordered.
1567 *
1568 * @returns Current *pi8 value
1569 * @param pi8 Pointer to the 8-bit variable to read.
1570 */
1571DECLINLINE(int8_t) ASMAtomicReadS8(volatile int8_t *pi8)
1572{
1573 ASMMemoryFence();
1574 return *pi8; /* byte reads are atomic on x86 */
1575}
1576
1577
1578/**
1579 * Atomically reads a signed 8-bit value, unordered.
1580 *
1581 * @returns Current *pi8 value
1582 * @param pi8 Pointer to the 8-bit variable to read.
1583 */
1584DECLINLINE(int8_t) ASMAtomicUoReadS8(volatile int8_t *pi8)
1585{
1586 return *pi8; /* byte reads are atomic on x86 */
1587}
1588
1589
1590/**
1591 * Atomically reads an unsigned 16-bit value, ordered.
1592 *
1593 * @returns Current *pu16 value
1594 * @param pu16 Pointer to the 16-bit variable to read.
1595 */
1596DECLINLINE(uint16_t) ASMAtomicReadU16(volatile uint16_t *pu16)
1597{
1598 ASMMemoryFence();
1599 Assert(!((uintptr_t)pu16 & 1));
1600 return *pu16;
1601}
1602
1603
1604/**
1605 * Atomically reads an unsigned 16-bit value, unordered.
1606 *
1607 * @returns Current *pu16 value
1608 * @param pu16 Pointer to the 16-bit variable to read.
1609 */
1610DECLINLINE(uint16_t) ASMAtomicUoReadU16(volatile uint16_t *pu16)
1611{
1612 Assert(!((uintptr_t)pu16 & 1));
1613 return *pu16;
1614}
1615
1616
1617/**
1618 * Atomically reads a signed 16-bit value, ordered.
1619 *
1620 * @returns Current *pi16 value
1621 * @param pi16 Pointer to the 16-bit variable to read.
1622 */
1623DECLINLINE(int16_t) ASMAtomicReadS16(volatile int16_t *pi16)
1624{
1625 ASMMemoryFence();
1626 Assert(!((uintptr_t)pi16 & 1));
1627 return *pi16;
1628}
1629
1630
1631/**
1632 * Atomically reads a signed 16-bit value, unordered.
1633 *
1634 * @returns Current *pi16 value
1635 * @param pi16 Pointer to the 16-bit variable to read.
1636 */
1637DECLINLINE(int16_t) ASMAtomicUoReadS16(volatile int16_t *pi16)
1638{
1639 Assert(!((uintptr_t)pi16 & 1));
1640 return *pi16;
1641}
1642
1643
1644/**
1645 * Atomically reads an unsigned 32-bit value, ordered.
1646 *
1647 * @returns Current *pu32 value
1648 * @param pu32 Pointer to the 32-bit variable to read.
1649 */
1650DECLINLINE(uint32_t) ASMAtomicReadU32(volatile uint32_t *pu32)
1651{
1652 ASMMemoryFence();
1653 Assert(!((uintptr_t)pu32 & 3));
1654 return *pu32;
1655}
1656
1657
1658/**
1659 * Atomically reads an unsigned 32-bit value, unordered.
1660 *
1661 * @returns Current *pu32 value
1662 * @param pu32 Pointer to the 32-bit variable to read.
1663 */
1664DECLINLINE(uint32_t) ASMAtomicUoReadU32(volatile uint32_t *pu32)
1665{
1666 Assert(!((uintptr_t)pu32 & 3));
1667 return *pu32;
1668}
1669
1670
1671/**
1672 * Atomically reads a signed 32-bit value, ordered.
1673 *
1674 * @returns Current *pi32 value
1675 * @param pi32 Pointer to the 32-bit variable to read.
1676 */
1677DECLINLINE(int32_t) ASMAtomicReadS32(volatile int32_t *pi32)
1678{
1679 ASMMemoryFence();
1680 Assert(!((uintptr_t)pi32 & 3));
1681 return *pi32;
1682}
1683
1684
1685/**
1686 * Atomically reads a signed 32-bit value, unordered.
1687 *
1688 * @returns Current *pi32 value
1689 * @param pi32 Pointer to the 32-bit variable to read.
1690 */
1691DECLINLINE(int32_t) ASMAtomicUoReadS32(volatile int32_t *pi32)
1692{
1693 Assert(!((uintptr_t)pi32 & 3));
1694 return *pi32;
1695}
1696
1697
1698/**
1699 * Atomically reads an unsigned 64-bit value, ordered.
1700 *
1701 * @returns Current *pu64 value
1702 * @param pu64 Pointer to the 64-bit variable to read.
1703 * The memory pointed to must be writable.
1704 *
1705 * @remarks This may fault if the memory is read-only!
1706 * @remarks x86: Requires a Pentium or later.
1707 */
1708#if (RT_INLINE_ASM_EXTERNAL && !defined(RT_ARCH_AMD64)) \
1709 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC
1710DECLASM(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64);
1711#else
1712DECLINLINE(uint64_t) ASMAtomicReadU64(volatile uint64_t *pu64)
1713{
1714 uint64_t u64;
1715# ifdef RT_ARCH_AMD64
1716 Assert(!((uintptr_t)pu64 & 7));
1717/*# if RT_INLINE_ASM_GNU_STYLE
1718 __asm__ __volatile__( "mfence\n\t"
1719 "movq %1, %0\n\t"
1720 : "=r" (u64)
1721 : "m" (*pu64));
1722# else
1723 __asm
1724 {
1725 mfence
1726 mov rdx, [pu64]
1727 mov rax, [rdx]
1728 mov [u64], rax
1729 }
1730# endif*/
1731 ASMMemoryFence();
1732 u64 = *pu64;
1733# else /* !RT_ARCH_AMD64 */
1734# if RT_INLINE_ASM_GNU_STYLE
1735# if defined(PIC) || defined(__PIC__)
1736 uint32_t u32EBX = 0;
1737 Assert(!((uintptr_t)pu64 & 7));
1738 __asm__ __volatile__("xchgl %%ebx, %3\n\t"
1739 "lock; cmpxchg8b (%5)\n\t"
1740 "movl %3, %%ebx\n\t"
1741 : "=A" (u64),
1742# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
1743 "+m" (*pu64)
1744# else
1745 "=m" (*pu64)
1746# endif
1747 : "0" (0ULL),
1748 "m" (u32EBX),
1749 "c" (0),
1750 "S" (pu64));
1751# else /* !PIC */
1752 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1753 : "=A" (u64),
1754 "+m" (*pu64)
1755 : "0" (0ULL),
1756 "b" (0),
1757 "c" (0));
1758# endif
1759# else
1760 Assert(!((uintptr_t)pu64 & 7));
1761 __asm
1762 {
1763 xor eax, eax
1764 xor edx, edx
1765 mov edi, pu64
1766 xor ecx, ecx
1767 xor ebx, ebx
1768 lock cmpxchg8b [edi]
1769 mov dword ptr [u64], eax
1770 mov dword ptr [u64 + 4], edx
1771 }
1772# endif
1773# endif /* !RT_ARCH_AMD64 */
1774 return u64;
1775}
1776#endif
1777
1778
1779/**
1780 * Atomically reads an unsigned 64-bit value, unordered.
1781 *
1782 * @returns Current *pu64 value
1783 * @param pu64 Pointer to the 64-bit variable to read.
1784 * The memory pointed to must be writable.
1785 *
1786 * @remarks This may fault if the memory is read-only!
1787 * @remarks x86: Requires a Pentium or later.
1788 */
1789#if !defined(RT_ARCH_AMD64) \
1790 && ( (RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN) \
1791 || RT_INLINE_DONT_MIX_CMPXCHG8B_AND_PIC)
1792DECLASM(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64);
1793#else
1794DECLINLINE(uint64_t) ASMAtomicUoReadU64(volatile uint64_t *pu64)
1795{
1796 uint64_t u64;
1797# ifdef RT_ARCH_AMD64
1798 Assert(!((uintptr_t)pu64 & 7));
1799/*# if RT_INLINE_ASM_GNU_STYLE
1800 Assert(!((uintptr_t)pu64 & 7));
1801 __asm__ __volatile__("movq %1, %0\n\t"
1802 : "=r" (u64)
1803 : "m" (*pu64));
1804# else
1805 __asm
1806 {
1807 mov rdx, [pu64]
1808 mov rax, [rdx]
1809 mov [u64], rax
1810 }
1811# endif */
1812 u64 = *pu64;
1813# else /* !RT_ARCH_AMD64 */
1814# if RT_INLINE_ASM_GNU_STYLE
1815# if defined(PIC) || defined(__PIC__)
1816 uint32_t u32EBX = 0;
1817 uint32_t u32Spill;
1818 Assert(!((uintptr_t)pu64 & 7));
1819 __asm__ __volatile__("xor %%eax,%%eax\n\t"
1820 "xor %%ecx,%%ecx\n\t"
1821 "xor %%edx,%%edx\n\t"
1822 "xchgl %%ebx, %3\n\t"
1823 "lock; cmpxchg8b (%4)\n\t"
1824 "movl %3, %%ebx\n\t"
1825 : "=A" (u64),
1826# if (__GNUC__ * 100 + __GNUC_MINOR__) >= 403
1827 "+m" (*pu64),
1828# else
1829 "=m" (*pu64),
1830# endif
1831 "=c" (u32Spill)
1832 : "m" (u32EBX),
1833 "S" (pu64));
1834# else /* !PIC */
1835 __asm__ __volatile__("lock; cmpxchg8b %1\n\t"
1836 : "=A" (u64),
1837 "+m" (*pu64)
1838 : "0" (0ULL),
1839 "b" (0),
1840 "c" (0));
1841# endif
1842# else
1843 Assert(!((uintptr_t)pu64 & 7));
1844 __asm
1845 {
1846 xor eax, eax
1847 xor edx, edx
1848 mov edi, pu64
1849 xor ecx, ecx
1850 xor ebx, ebx
1851 lock cmpxchg8b [edi]
1852 mov dword ptr [u64], eax
1853 mov dword ptr [u64 + 4], edx
1854 }
1855# endif
1856# endif /* !RT_ARCH_AMD64 */
1857 return u64;
1858}
1859#endif
1860
1861
1862/**
1863 * Atomically reads a signed 64-bit value, ordered.
1864 *
1865 * @returns Current *pi64 value
1866 * @param pi64 Pointer to the 64-bit variable to read.
1867 * The memory pointed to must be writable.
1868 *
1869 * @remarks This may fault if the memory is read-only!
1870 * @remarks x86: Requires a Pentium or later.
1871 */
1872DECLINLINE(int64_t) ASMAtomicReadS64(volatile int64_t *pi64)
1873{
1874 return (int64_t)ASMAtomicReadU64((volatile uint64_t *)pi64);
1875}
1876
1877
1878/**
1879 * Atomically reads a signed 64-bit value, unordered.
1880 *
1881 * @returns Current *pi64 value
1882 * @param pi64 Pointer to the 64-bit variable to read.
1883 * The memory pointed to must be writable.
1884 *
1885 * @remarks This will fault if the memory is read-only!
1886 * @remarks x86: Requires a Pentium or later.
1887 */
1888DECLINLINE(int64_t) ASMAtomicUoReadS64(volatile int64_t *pi64)
1889{
1890 return (int64_t)ASMAtomicUoReadU64((volatile uint64_t *)pi64);
1891}
1892
1893
1894/**
1895 * Atomically reads a size_t value, ordered.
1896 *
1897 * @returns Current *pcb value
1898 * @param pcb Pointer to the size_t variable to read.
1899 */
1900DECLINLINE(size_t) ASMAtomicReadZ(size_t volatile *pcb)
1901{
1902#if ARCH_BITS == 64
1903 return ASMAtomicReadU64((uint64_t volatile *)pcb);
1904#elif ARCH_BITS == 32
1905 return ASMAtomicReadU32((uint32_t volatile *)pcb);
1906#elif ARCH_BITS == 16
1907 AssertCompileSize(size_t, 2);
1908 return ASMAtomicReadU16((uint16_t volatile *)pcb);
1909#else
1910# error "Unsupported ARCH_BITS value"
1911#endif
1912}
1913
1914
1915/**
1916 * Atomically reads a size_t value, unordered.
1917 *
1918 * @returns Current *pcb value
1919 * @param pcb Pointer to the size_t variable to read.
1920 */
1921DECLINLINE(size_t) ASMAtomicUoReadZ(size_t volatile *pcb)
1922{
1923#if ARCH_BITS == 64 || (ARCH_BITS == 16 && RT_FAR_DATA)
1924 return ASMAtomicUoReadU64((uint64_t volatile *)pcb);
1925#elif ARCH_BITS == 32
1926 return ASMAtomicUoReadU32((uint32_t volatile *)pcb);
1927#elif ARCH_BITS == 16
1928 AssertCompileSize(size_t, 2);
1929 return ASMAtomicUoReadU16((uint16_t volatile *)pcb);
1930#else
1931# error "Unsupported ARCH_BITS value"
1932#endif
1933}
1934
1935
1936/**
1937 * Atomically reads a pointer value, ordered.
1938 *
1939 * @returns Current *pv value
1940 * @param ppv Pointer to the pointer variable to read.
1941 *
1942 * @remarks Please use ASMAtomicReadPtrT, it provides better type safety and
1943 * requires less typing (no casts).
1944 */
1945DECLINLINE(void *) ASMAtomicReadPtr(void * volatile *ppv)
1946{
1947#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1948 return (void *)ASMAtomicReadU32((volatile uint32_t *)(void *)ppv);
1949#elif ARCH_BITS == 64
1950 return (void *)ASMAtomicReadU64((volatile uint64_t *)(void *)ppv);
1951#else
1952# error "ARCH_BITS is bogus"
1953#endif
1954}
1955
1956/**
1957 * Convenience macro for avoiding the annoying casting with ASMAtomicReadPtr.
1958 *
1959 * @returns Current *pv value
1960 * @param ppv Pointer to the pointer variable to read.
1961 * @param Type The type of *ppv, sans volatile.
1962 */
1963#ifdef __GNUC__
1964# define ASMAtomicReadPtrT(ppv, Type) \
1965 __extension__ \
1966 ({\
1967 __typeof__(*(ppv)) volatile *ppvTypeChecked = (ppv); \
1968 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicReadPtr((void * volatile *)ppvTypeChecked); \
1969 pvTypeChecked; \
1970 })
1971#else
1972# define ASMAtomicReadPtrT(ppv, Type) \
1973 (Type)ASMAtomicReadPtr((void * volatile *)(ppv))
1974#endif
1975
1976
1977/**
1978 * Atomically reads a pointer value, unordered.
1979 *
1980 * @returns Current *pv value
1981 * @param ppv Pointer to the pointer variable to read.
1982 *
1983 * @remarks Please use ASMAtomicUoReadPtrT, it provides better type safety and
1984 * requires less typing (no casts).
1985 */
1986DECLINLINE(void *) ASMAtomicUoReadPtr(void * volatile *ppv)
1987{
1988#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
1989 return (void *)ASMAtomicUoReadU32((volatile uint32_t *)(void *)ppv);
1990#elif ARCH_BITS == 64
1991 return (void *)ASMAtomicUoReadU64((volatile uint64_t *)(void *)ppv);
1992#else
1993# error "ARCH_BITS is bogus"
1994#endif
1995}
1996
1997
1998/**
1999 * Convenience macro for avoiding the annoying casting with ASMAtomicUoReadPtr.
2000 *
2001 * @returns Current *pv value
2002 * @param ppv Pointer to the pointer variable to read.
2003 * @param Type The type of *ppv, sans volatile.
2004 */
2005#ifdef __GNUC__
2006# define ASMAtomicUoReadPtrT(ppv, Type) \
2007 __extension__ \
2008 ({\
2009 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2010 Type pvTypeChecked = (__typeof__(*(ppv))) ASMAtomicUoReadPtr((void * volatile *)ppvTypeChecked); \
2011 pvTypeChecked; \
2012 })
2013#else
2014# define ASMAtomicUoReadPtrT(ppv, Type) \
2015 (Type)ASMAtomicUoReadPtr((void * volatile *)(ppv))
2016#endif
2017
2018
2019/**
2020 * Atomically reads a boolean value, ordered.
2021 *
2022 * @returns Current *pf value
2023 * @param pf Pointer to the boolean variable to read.
2024 */
2025DECLINLINE(bool) ASMAtomicReadBool(volatile bool *pf)
2026{
2027 ASMMemoryFence();
2028 return *pf; /* byte reads are atomic on x86 */
2029}
2030
2031
2032/**
2033 * Atomically reads a boolean value, unordered.
2034 *
2035 * @returns Current *pf value
2036 * @param pf Pointer to the boolean variable to read.
2037 */
2038DECLINLINE(bool) ASMAtomicUoReadBool(volatile bool *pf)
2039{
2040 return *pf; /* byte reads are atomic on x86 */
2041}
2042
2043
2044/**
2045 * Atomically read a typical IPRT handle value, ordered.
2046 *
2047 * @param ph Pointer to the handle variable to read.
2048 * @param phRes Where to store the result.
2049 *
2050 * @remarks This doesn't currently work for all handles (like RTFILE).
2051 */
2052#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2053# define ASMAtomicReadHandle(ph, phRes) \
2054 do { \
2055 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2056 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2057 *(uint32_t *)(phRes) = ASMAtomicReadU32((uint32_t volatile *)(ph)); \
2058 } while (0)
2059#elif HC_ARCH_BITS == 64
2060# define ASMAtomicReadHandle(ph, phRes) \
2061 do { \
2062 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2063 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2064 *(uint64_t *)(phRes) = ASMAtomicReadU64((uint64_t volatile *)(ph)); \
2065 } while (0)
2066#else
2067# error HC_ARCH_BITS
2068#endif
2069
2070
2071/**
2072 * Atomically read a typical IPRT handle value, unordered.
2073 *
2074 * @param ph Pointer to the handle variable to read.
2075 * @param phRes Where to store the result.
2076 *
2077 * @remarks This doesn't currently work for all handles (like RTFILE).
2078 */
2079#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2080# define ASMAtomicUoReadHandle(ph, phRes) \
2081 do { \
2082 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2083 AssertCompile(sizeof(*(phRes)) == sizeof(uint32_t)); \
2084 *(uint32_t *)(phRes) = ASMAtomicUoReadU32((uint32_t volatile *)(ph)); \
2085 } while (0)
2086#elif HC_ARCH_BITS == 64
2087# define ASMAtomicUoReadHandle(ph, phRes) \
2088 do { \
2089 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2090 AssertCompile(sizeof(*(phRes)) == sizeof(uint64_t)); \
2091 *(uint64_t *)(phRes) = ASMAtomicUoReadU64((uint64_t volatile *)(ph)); \
2092 } while (0)
2093#else
2094# error HC_ARCH_BITS
2095#endif
2096
2097
2098/**
2099 * Atomically read a value which size might differ
2100 * between platforms or compilers, ordered.
2101 *
2102 * @param pu Pointer to the variable to read.
2103 * @param puRes Where to store the result.
2104 */
2105#define ASMAtomicReadSize(pu, puRes) \
2106 do { \
2107 switch (sizeof(*(pu))) { \
2108 case 1: *(uint8_t *)(puRes) = ASMAtomicReadU8( (volatile uint8_t *)(void *)(pu)); break; \
2109 case 2: *(uint16_t *)(puRes) = ASMAtomicReadU16((volatile uint16_t *)(void *)(pu)); break; \
2110 case 4: *(uint32_t *)(puRes) = ASMAtomicReadU32((volatile uint32_t *)(void *)(pu)); break; \
2111 case 8: *(uint64_t *)(puRes) = ASMAtomicReadU64((volatile uint64_t *)(void *)(pu)); break; \
2112 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2113 } \
2114 } while (0)
2115
2116
2117/**
2118 * Atomically read a value which size might differ
2119 * between platforms or compilers, unordered.
2120 *
2121 * @param pu Pointer to the variable to read.
2122 * @param puRes Where to store the result.
2123 */
2124#define ASMAtomicUoReadSize(pu, puRes) \
2125 do { \
2126 switch (sizeof(*(pu))) { \
2127 case 1: *(uint8_t *)(puRes) = ASMAtomicUoReadU8( (volatile uint8_t *)(void *)(pu)); break; \
2128 case 2: *(uint16_t *)(puRes) = ASMAtomicUoReadU16((volatile uint16_t *)(void *)(pu)); break; \
2129 case 4: *(uint32_t *)(puRes) = ASMAtomicUoReadU32((volatile uint32_t *)(void *)(pu)); break; \
2130 case 8: *(uint64_t *)(puRes) = ASMAtomicUoReadU64((volatile uint64_t *)(void *)(pu)); break; \
2131 default: AssertMsgFailed(("ASMAtomicReadSize: size %d is not supported\n", sizeof(*(pu)))); \
2132 } \
2133 } while (0)
2134
2135
2136/**
2137 * Atomically writes an unsigned 8-bit value, ordered.
2138 *
2139 * @param pu8 Pointer to the 8-bit variable.
2140 * @param u8 The 8-bit value to assign to *pu8.
2141 */
2142DECLINLINE(void) ASMAtomicWriteU8(volatile uint8_t *pu8, uint8_t u8)
2143{
2144 ASMAtomicXchgU8(pu8, u8);
2145}
2146
2147
2148/**
2149 * Atomically writes an unsigned 8-bit value, unordered.
2150 *
2151 * @param pu8 Pointer to the 8-bit variable.
2152 * @param u8 The 8-bit value to assign to *pu8.
2153 */
2154DECLINLINE(void) ASMAtomicUoWriteU8(volatile uint8_t *pu8, uint8_t u8)
2155{
2156 *pu8 = u8; /* byte writes are atomic on x86 */
2157}
2158
2159
2160/**
2161 * Atomically writes a signed 8-bit value, ordered.
2162 *
2163 * @param pi8 Pointer to the 8-bit variable to read.
2164 * @param i8 The 8-bit value to assign to *pi8.
2165 */
2166DECLINLINE(void) ASMAtomicWriteS8(volatile int8_t *pi8, int8_t i8)
2167{
2168 ASMAtomicXchgS8(pi8, i8);
2169}
2170
2171
2172/**
2173 * Atomically writes a signed 8-bit value, unordered.
2174 *
2175 * @param pi8 Pointer to the 8-bit variable to write.
2176 * @param i8 The 8-bit value to assign to *pi8.
2177 */
2178DECLINLINE(void) ASMAtomicUoWriteS8(volatile int8_t *pi8, int8_t i8)
2179{
2180 *pi8 = i8; /* byte writes are atomic on x86 */
2181}
2182
2183
2184/**
2185 * Atomically writes an unsigned 16-bit value, ordered.
2186 *
2187 * @param pu16 Pointer to the 16-bit variable to write.
2188 * @param u16 The 16-bit value to assign to *pu16.
2189 */
2190DECLINLINE(void) ASMAtomicWriteU16(volatile uint16_t *pu16, uint16_t u16)
2191{
2192 ASMAtomicXchgU16(pu16, u16);
2193}
2194
2195
2196/**
2197 * Atomically writes an unsigned 16-bit value, unordered.
2198 *
2199 * @param pu16 Pointer to the 16-bit variable to write.
2200 * @param u16 The 16-bit value to assign to *pu16.
2201 */
2202DECLINLINE(void) ASMAtomicUoWriteU16(volatile uint16_t *pu16, uint16_t u16)
2203{
2204 Assert(!((uintptr_t)pu16 & 1));
2205 *pu16 = u16;
2206}
2207
2208
2209/**
2210 * Atomically writes a signed 16-bit value, ordered.
2211 *
2212 * @param pi16 Pointer to the 16-bit variable to write.
2213 * @param i16 The 16-bit value to assign to *pi16.
2214 */
2215DECLINLINE(void) ASMAtomicWriteS16(volatile int16_t *pi16, int16_t i16)
2216{
2217 ASMAtomicXchgS16(pi16, i16);
2218}
2219
2220
2221/**
2222 * Atomically writes a signed 16-bit value, unordered.
2223 *
2224 * @param pi16 Pointer to the 16-bit variable to write.
2225 * @param i16 The 16-bit value to assign to *pi16.
2226 */
2227DECLINLINE(void) ASMAtomicUoWriteS16(volatile int16_t *pi16, int16_t i16)
2228{
2229 Assert(!((uintptr_t)pi16 & 1));
2230 *pi16 = i16;
2231}
2232
2233
2234/**
2235 * Atomically writes an unsigned 32-bit value, ordered.
2236 *
2237 * @param pu32 Pointer to the 32-bit variable to write.
2238 * @param u32 The 32-bit value to assign to *pu32.
2239 */
2240DECLINLINE(void) ASMAtomicWriteU32(volatile uint32_t *pu32, uint32_t u32)
2241{
2242 ASMAtomicXchgU32(pu32, u32);
2243}
2244
2245
2246/**
2247 * Atomically writes an unsigned 32-bit value, unordered.
2248 *
2249 * @param pu32 Pointer to the 32-bit variable to write.
2250 * @param u32 The 32-bit value to assign to *pu32.
2251 */
2252DECLINLINE(void) ASMAtomicUoWriteU32(volatile uint32_t *pu32, uint32_t u32)
2253{
2254 Assert(!((uintptr_t)pu32 & 3));
2255 *pu32 = u32;
2256}
2257
2258
2259/**
2260 * Atomically writes a signed 32-bit value, ordered.
2261 *
2262 * @param pi32 Pointer to the 32-bit variable to write.
2263 * @param i32 The 32-bit value to assign to *pi32.
2264 */
2265DECLINLINE(void) ASMAtomicWriteS32(volatile int32_t *pi32, int32_t i32)
2266{
2267 ASMAtomicXchgS32(pi32, i32);
2268}
2269
2270
2271/**
2272 * Atomically writes a signed 32-bit value, unordered.
2273 *
2274 * @param pi32 Pointer to the 32-bit variable to write.
2275 * @param i32 The 32-bit value to assign to *pi32.
2276 */
2277DECLINLINE(void) ASMAtomicUoWriteS32(volatile int32_t *pi32, int32_t i32)
2278{
2279 Assert(!((uintptr_t)pi32 & 3));
2280 *pi32 = i32;
2281}
2282
2283
2284/**
2285 * Atomically writes an unsigned 64-bit value, ordered.
2286 *
2287 * @param pu64 Pointer to the 64-bit variable to write.
2288 * @param u64 The 64-bit value to assign to *pu64.
2289 */
2290DECLINLINE(void) ASMAtomicWriteU64(volatile uint64_t *pu64, uint64_t u64)
2291{
2292 ASMAtomicXchgU64(pu64, u64);
2293}
2294
2295
2296/**
2297 * Atomically writes an unsigned 64-bit value, unordered.
2298 *
2299 * @param pu64 Pointer to the 64-bit variable to write.
2300 * @param u64 The 64-bit value to assign to *pu64.
2301 */
2302DECLINLINE(void) ASMAtomicUoWriteU64(volatile uint64_t *pu64, uint64_t u64)
2303{
2304 Assert(!((uintptr_t)pu64 & 7));
2305#if ARCH_BITS == 64
2306 *pu64 = u64;
2307#else
2308 ASMAtomicXchgU64(pu64, u64);
2309#endif
2310}
2311
2312
2313/**
2314 * Atomically writes a signed 64-bit value, ordered.
2315 *
2316 * @param pi64 Pointer to the 64-bit variable to write.
2317 * @param i64 The 64-bit value to assign to *pi64.
2318 */
2319DECLINLINE(void) ASMAtomicWriteS64(volatile int64_t *pi64, int64_t i64)
2320{
2321 ASMAtomicXchgS64(pi64, i64);
2322}
2323
2324
2325/**
2326 * Atomically writes a signed 64-bit value, unordered.
2327 *
2328 * @param pi64 Pointer to the 64-bit variable to write.
2329 * @param i64 The 64-bit value to assign to *pi64.
2330 */
2331DECLINLINE(void) ASMAtomicUoWriteS64(volatile int64_t *pi64, int64_t i64)
2332{
2333 Assert(!((uintptr_t)pi64 & 7));
2334#if ARCH_BITS == 64
2335 *pi64 = i64;
2336#else
2337 ASMAtomicXchgS64(pi64, i64);
2338#endif
2339}
2340
2341
2342/**
2343 * Atomically writes a boolean value, unordered.
2344 *
2345 * @param pf Pointer to the boolean variable to write.
2346 * @param f The boolean value to assign to *pf.
2347 */
2348DECLINLINE(void) ASMAtomicWriteBool(volatile bool *pf, bool f)
2349{
2350 ASMAtomicWriteU8((uint8_t volatile *)pf, f);
2351}
2352
2353
2354/**
2355 * Atomically writes a boolean value, unordered.
2356 *
2357 * @param pf Pointer to the boolean variable to write.
2358 * @param f The boolean value to assign to *pf.
2359 */
2360DECLINLINE(void) ASMAtomicUoWriteBool(volatile bool *pf, bool f)
2361{
2362 *pf = f; /* byte writes are atomic on x86 */
2363}
2364
2365
2366/**
2367 * Atomically writes a pointer value, ordered.
2368 *
2369 * @param ppv Pointer to the pointer variable to write.
2370 * @param pv The pointer value to assign to *ppv.
2371 */
2372DECLINLINE(void) ASMAtomicWritePtrVoid(void * volatile *ppv, const void *pv)
2373{
2374#if ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2375 ASMAtomicWriteU32((volatile uint32_t *)(void *)ppv, (uint32_t)pv);
2376#elif ARCH_BITS == 64
2377 ASMAtomicWriteU64((volatile uint64_t *)(void *)ppv, (uint64_t)pv);
2378#else
2379# error "ARCH_BITS is bogus"
2380#endif
2381}
2382
2383
2384/**
2385 * Atomically writes a pointer value, ordered.
2386 *
2387 * @param ppv Pointer to the pointer variable to write.
2388 * @param pv The pointer value to assign to *ppv. If NULL use
2389 * ASMAtomicWriteNullPtr or you'll land in trouble.
2390 *
2391 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2392 * NULL.
2393 */
2394#ifdef __GNUC__
2395# define ASMAtomicWritePtr(ppv, pv) \
2396 do \
2397 { \
2398 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2399 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2400 \
2401 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2402 AssertCompile(sizeof(pv) == sizeof(void *)); \
2403 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2404 \
2405 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), (void *)(pvTypeChecked)); \
2406 } while (0)
2407#else
2408# define ASMAtomicWritePtr(ppv, pv) \
2409 do \
2410 { \
2411 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2412 AssertCompile(sizeof(pv) == sizeof(void *)); \
2413 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2414 \
2415 ASMAtomicWritePtrVoid((void * volatile *)(ppv), (void *)(pv)); \
2416 } while (0)
2417#endif
2418
2419
2420/**
2421 * Atomically sets a pointer to NULL, ordered.
2422 *
2423 * @param ppv Pointer to the pointer variable that should be set to NULL.
2424 *
2425 * @remarks This is relatively type safe on GCC platforms.
2426 */
2427#ifdef __GNUC__
2428# define ASMAtomicWriteNullPtr(ppv) \
2429 do \
2430 { \
2431 __typeof__(*(ppv)) * const ppvTypeChecked = (ppv); \
2432 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2433 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2434 ASMAtomicWritePtrVoid((void * volatile *)(ppvTypeChecked), NULL); \
2435 } while (0)
2436#else
2437# define ASMAtomicWriteNullPtr(ppv) \
2438 do \
2439 { \
2440 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2441 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2442 ASMAtomicWritePtrVoid((void * volatile *)(ppv), NULL); \
2443 } while (0)
2444#endif
2445
2446
2447/**
2448 * Atomically writes a pointer value, unordered.
2449 *
2450 * @returns Current *pv value
2451 * @param ppv Pointer to the pointer variable.
2452 * @param pv The pointer value to assign to *ppv. If NULL use
2453 * ASMAtomicUoWriteNullPtr or you'll land in trouble.
2454 *
2455 * @remarks This is relatively type safe on GCC platforms when @a pv isn't
2456 * NULL.
2457 */
2458#ifdef __GNUC__
2459# define ASMAtomicUoWritePtr(ppv, pv) \
2460 do \
2461 { \
2462 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2463 __typeof__(*(ppv)) const pvTypeChecked = (pv); \
2464 \
2465 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2466 AssertCompile(sizeof(pv) == sizeof(void *)); \
2467 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2468 \
2469 *(ppvTypeChecked) = pvTypeChecked; \
2470 } while (0)
2471#else
2472# define ASMAtomicUoWritePtr(ppv, pv) \
2473 do \
2474 { \
2475 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2476 AssertCompile(sizeof(pv) == sizeof(void *)); \
2477 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2478 *(ppv) = pv; \
2479 } while (0)
2480#endif
2481
2482
2483/**
2484 * Atomically sets a pointer to NULL, unordered.
2485 *
2486 * @param ppv Pointer to the pointer variable that should be set to NULL.
2487 *
2488 * @remarks This is relatively type safe on GCC platforms.
2489 */
2490#ifdef __GNUC__
2491# define ASMAtomicUoWriteNullPtr(ppv) \
2492 do \
2493 { \
2494 __typeof__(*(ppv)) volatile * const ppvTypeChecked = (ppv); \
2495 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2496 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2497 *(ppvTypeChecked) = NULL; \
2498 } while (0)
2499#else
2500# define ASMAtomicUoWriteNullPtr(ppv) \
2501 do \
2502 { \
2503 AssertCompile(sizeof(*ppv) == sizeof(void *)); \
2504 Assert(!( (uintptr_t)ppv & ((ARCH_BITS / 8) - 1) )); \
2505 *(ppv) = NULL; \
2506 } while (0)
2507#endif
2508
2509
2510/**
2511 * Atomically write a typical IPRT handle value, ordered.
2512 *
2513 * @param ph Pointer to the variable to update.
2514 * @param hNew The value to assign to *ph.
2515 *
2516 * @remarks This doesn't currently work for all handles (like RTFILE).
2517 */
2518#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2519# define ASMAtomicWriteHandle(ph, hNew) \
2520 do { \
2521 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2522 ASMAtomicWriteU32((uint32_t volatile *)(ph), (const uint32_t)(hNew)); \
2523 } while (0)
2524#elif HC_ARCH_BITS == 64
2525# define ASMAtomicWriteHandle(ph, hNew) \
2526 do { \
2527 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2528 ASMAtomicWriteU64((uint64_t volatile *)(ph), (const uint64_t)(hNew)); \
2529 } while (0)
2530#else
2531# error HC_ARCH_BITS
2532#endif
2533
2534
2535/**
2536 * Atomically write a typical IPRT handle value, unordered.
2537 *
2538 * @param ph Pointer to the variable to update.
2539 * @param hNew The value to assign to *ph.
2540 *
2541 * @remarks This doesn't currently work for all handles (like RTFILE).
2542 */
2543#if HC_ARCH_BITS == 32 || (ARCH_BITS == 16 && RT_FAR_DATA)
2544# define ASMAtomicUoWriteHandle(ph, hNew) \
2545 do { \
2546 AssertCompile(sizeof(*(ph)) == sizeof(uint32_t)); \
2547 ASMAtomicUoWriteU32((uint32_t volatile *)(ph), (const uint32_t)hNew); \
2548 } while (0)
2549#elif HC_ARCH_BITS == 64
2550# define ASMAtomicUoWriteHandle(ph, hNew) \
2551 do { \
2552 AssertCompile(sizeof(*(ph)) == sizeof(uint64_t)); \
2553 ASMAtomicUoWriteU64((uint64_t volatile *)(ph), (const uint64_t)hNew); \
2554 } while (0)
2555#else
2556# error HC_ARCH_BITS
2557#endif
2558
2559
2560/**
2561 * Atomically write a value which size might differ
2562 * between platforms or compilers, ordered.
2563 *
2564 * @param pu Pointer to the variable to update.
2565 * @param uNew The value to assign to *pu.
2566 */
2567#define ASMAtomicWriteSize(pu, uNew) \
2568 do { \
2569 switch (sizeof(*(pu))) { \
2570 case 1: ASMAtomicWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2571 case 2: ASMAtomicWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2572 case 4: ASMAtomicWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2573 case 8: ASMAtomicWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2574 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2575 } \
2576 } while (0)
2577
2578/**
2579 * Atomically write a value which size might differ
2580 * between platforms or compilers, unordered.
2581 *
2582 * @param pu Pointer to the variable to update.
2583 * @param uNew The value to assign to *pu.
2584 */
2585#define ASMAtomicUoWriteSize(pu, uNew) \
2586 do { \
2587 switch (sizeof(*(pu))) { \
2588 case 1: ASMAtomicUoWriteU8( (volatile uint8_t *)(void *)(pu), (uint8_t )(uNew)); break; \
2589 case 2: ASMAtomicUoWriteU16((volatile uint16_t *)(void *)(pu), (uint16_t)(uNew)); break; \
2590 case 4: ASMAtomicUoWriteU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2591 case 8: ASMAtomicUoWriteU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2592 default: AssertMsgFailed(("ASMAtomicWriteSize: size %d is not supported\n", sizeof(*(pu)))); \
2593 } \
2594 } while (0)
2595
2596
2597
2598/**
2599 * Atomically exchanges and adds to a 16-bit value, ordered.
2600 *
2601 * @returns The old value.
2602 * @param pu16 Pointer to the value.
2603 * @param u16 Number to add.
2604 *
2605 * @remarks Currently not implemented, just to make 16-bit code happy.
2606 * @remarks x86: Requires a 486 or later.
2607 */
2608DECLASM(uint16_t) ASMAtomicAddU16(uint16_t volatile *pu16, uint32_t u16);
2609
2610
2611/**
2612 * Atomically exchanges and adds to a 32-bit value, ordered.
2613 *
2614 * @returns The old value.
2615 * @param pu32 Pointer to the value.
2616 * @param u32 Number to add.
2617 *
2618 * @remarks x86: Requires a 486 or later.
2619 */
2620#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2621DECLASM(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32);
2622#else
2623DECLINLINE(uint32_t) ASMAtomicAddU32(uint32_t volatile *pu32, uint32_t u32)
2624{
2625# if RT_INLINE_ASM_USES_INTRIN
2626 u32 = _InterlockedExchangeAdd((long *)pu32, u32);
2627 return u32;
2628
2629# elif RT_INLINE_ASM_GNU_STYLE
2630 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2631 : "=r" (u32),
2632 "=m" (*pu32)
2633 : "0" (u32),
2634 "m" (*pu32)
2635 : "memory");
2636 return u32;
2637# else
2638 __asm
2639 {
2640 mov eax, [u32]
2641# ifdef RT_ARCH_AMD64
2642 mov rdx, [pu32]
2643 lock xadd [rdx], eax
2644# else
2645 mov edx, [pu32]
2646 lock xadd [edx], eax
2647# endif
2648 mov [u32], eax
2649 }
2650 return u32;
2651# endif
2652}
2653#endif
2654
2655
2656/**
2657 * Atomically exchanges and adds to a signed 32-bit value, ordered.
2658 *
2659 * @returns The old value.
2660 * @param pi32 Pointer to the value.
2661 * @param i32 Number to add.
2662 *
2663 * @remarks x86: Requires a 486 or later.
2664 */
2665DECLINLINE(int32_t) ASMAtomicAddS32(int32_t volatile *pi32, int32_t i32)
2666{
2667 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)i32);
2668}
2669
2670
2671/**
2672 * Atomically exchanges and adds to a 64-bit value, ordered.
2673 *
2674 * @returns The old value.
2675 * @param pu64 Pointer to the value.
2676 * @param u64 Number to add.
2677 *
2678 * @remarks x86: Requires a Pentium or later.
2679 */
2680#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2681DECLASM(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64);
2682#else
2683DECLINLINE(uint64_t) ASMAtomicAddU64(uint64_t volatile *pu64, uint64_t u64)
2684{
2685# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2686 u64 = _InterlockedExchangeAdd64((__int64 *)pu64, u64);
2687 return u64;
2688
2689# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2690 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2691 : "=r" (u64),
2692 "=m" (*pu64)
2693 : "0" (u64),
2694 "m" (*pu64)
2695 : "memory");
2696 return u64;
2697# else
2698 uint64_t u64Old;
2699 for (;;)
2700 {
2701 uint64_t u64New;
2702 u64Old = ASMAtomicUoReadU64(pu64);
2703 u64New = u64Old + u64;
2704 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
2705 break;
2706 ASMNopPause();
2707 }
2708 return u64Old;
2709# endif
2710}
2711#endif
2712
2713
2714/**
2715 * Atomically exchanges and adds to a signed 64-bit value, ordered.
2716 *
2717 * @returns The old value.
2718 * @param pi64 Pointer to the value.
2719 * @param i64 Number to add.
2720 *
2721 * @remarks x86: Requires a Pentium or later.
2722 */
2723DECLINLINE(int64_t) ASMAtomicAddS64(int64_t volatile *pi64, int64_t i64)
2724{
2725 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)i64);
2726}
2727
2728
2729/**
2730 * Atomically exchanges and adds to a size_t value, ordered.
2731 *
2732 * @returns The old value.
2733 * @param pcb Pointer to the size_t value.
2734 * @param cb Number to add.
2735 */
2736DECLINLINE(size_t) ASMAtomicAddZ(size_t volatile *pcb, size_t cb)
2737{
2738#if ARCH_BITS == 64
2739 AssertCompileSize(size_t, 8);
2740 return ASMAtomicAddU64((uint64_t volatile *)pcb, cb);
2741#elif ARCH_BITS == 32
2742 AssertCompileSize(size_t, 4);
2743 return ASMAtomicAddU32((uint32_t volatile *)pcb, cb);
2744#elif ARCH_BITS == 16
2745 AssertCompileSize(size_t, 2);
2746 return ASMAtomicAddU16((uint16_t volatile *)pcb, cb);
2747#else
2748# error "Unsupported ARCH_BITS value"
2749#endif
2750}
2751
2752
2753/**
2754 * Atomically exchanges and adds a value which size might differ between
2755 * platforms or compilers, ordered.
2756 *
2757 * @param pu Pointer to the variable to update.
2758 * @param uNew The value to add to *pu.
2759 * @param puOld Where to store the old value.
2760 */
2761#define ASMAtomicAddSize(pu, uNew, puOld) \
2762 do { \
2763 switch (sizeof(*(pu))) { \
2764 case 4: *(uint32_t *)(puOld) = ASMAtomicAddU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2765 case 8: *(uint64_t *)(puOld) = ASMAtomicAddU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2766 default: AssertMsgFailed(("ASMAtomicAddSize: size %d is not supported\n", sizeof(*(pu)))); \
2767 } \
2768 } while (0)
2769
2770
2771
2772/**
2773 * Atomically exchanges and subtracts to an unsigned 16-bit value, ordered.
2774 *
2775 * @returns The old value.
2776 * @param pu16 Pointer to the value.
2777 * @param u16 Number to subtract.
2778 *
2779 * @remarks x86: Requires a 486 or later.
2780 */
2781DECLINLINE(uint16_t) ASMAtomicSubU16(uint16_t volatile *pu16, uint32_t u16)
2782{
2783 return ASMAtomicAddU16(pu16, (uint16_t)-(int16_t)u16);
2784}
2785
2786
2787/**
2788 * Atomically exchanges and subtracts to a signed 16-bit value, ordered.
2789 *
2790 * @returns The old value.
2791 * @param pi16 Pointer to the value.
2792 * @param i16 Number to subtract.
2793 *
2794 * @remarks x86: Requires a 486 or later.
2795 */
2796DECLINLINE(int16_t) ASMAtomicSubS16(int16_t volatile *pi16, int16_t i16)
2797{
2798 return (int16_t)ASMAtomicAddU16((uint16_t volatile *)pi16, (uint16_t)-i16);
2799}
2800
2801
2802/**
2803 * Atomically exchanges and subtracts to an unsigned 32-bit value, ordered.
2804 *
2805 * @returns The old value.
2806 * @param pu32 Pointer to the value.
2807 * @param u32 Number to subtract.
2808 *
2809 * @remarks x86: Requires a 486 or later.
2810 */
2811DECLINLINE(uint32_t) ASMAtomicSubU32(uint32_t volatile *pu32, uint32_t u32)
2812{
2813 return ASMAtomicAddU32(pu32, (uint32_t)-(int32_t)u32);
2814}
2815
2816
2817/**
2818 * Atomically exchanges and subtracts to a signed 32-bit value, ordered.
2819 *
2820 * @returns The old value.
2821 * @param pi32 Pointer to the value.
2822 * @param i32 Number to subtract.
2823 *
2824 * @remarks x86: Requires a 486 or later.
2825 */
2826DECLINLINE(int32_t) ASMAtomicSubS32(int32_t volatile *pi32, int32_t i32)
2827{
2828 return (int32_t)ASMAtomicAddU32((uint32_t volatile *)pi32, (uint32_t)-i32);
2829}
2830
2831
2832/**
2833 * Atomically exchanges and subtracts to an unsigned 64-bit value, ordered.
2834 *
2835 * @returns The old value.
2836 * @param pu64 Pointer to the value.
2837 * @param u64 Number to subtract.
2838 *
2839 * @remarks x86: Requires a Pentium or later.
2840 */
2841DECLINLINE(uint64_t) ASMAtomicSubU64(uint64_t volatile *pu64, uint64_t u64)
2842{
2843 return ASMAtomicAddU64(pu64, (uint64_t)-(int64_t)u64);
2844}
2845
2846
2847/**
2848 * Atomically exchanges and subtracts to a signed 64-bit value, ordered.
2849 *
2850 * @returns The old value.
2851 * @param pi64 Pointer to the value.
2852 * @param i64 Number to subtract.
2853 *
2854 * @remarks x86: Requires a Pentium or later.
2855 */
2856DECLINLINE(int64_t) ASMAtomicSubS64(int64_t volatile *pi64, int64_t i64)
2857{
2858 return (int64_t)ASMAtomicAddU64((uint64_t volatile *)pi64, (uint64_t)-i64);
2859}
2860
2861
2862/**
2863 * Atomically exchanges and subtracts to a size_t value, ordered.
2864 *
2865 * @returns The old value.
2866 * @param pcb Pointer to the size_t value.
2867 * @param cb Number to subtract.
2868 *
2869 * @remarks x86: Requires a 486 or later.
2870 */
2871DECLINLINE(size_t) ASMAtomicSubZ(size_t volatile *pcb, size_t cb)
2872{
2873#if ARCH_BITS == 64
2874 return ASMAtomicSubU64((uint64_t volatile *)pcb, cb);
2875#elif ARCH_BITS == 32
2876 return ASMAtomicSubU32((uint32_t volatile *)pcb, cb);
2877#elif ARCH_BITS == 16
2878 AssertCompileSize(size_t, 2);
2879 return ASMAtomicSubU16((uint16_t volatile *)pcb, cb);
2880#else
2881# error "Unsupported ARCH_BITS value"
2882#endif
2883}
2884
2885
2886/**
2887 * Atomically exchanges and subtracts a value which size might differ between
2888 * platforms or compilers, ordered.
2889 *
2890 * @param pu Pointer to the variable to update.
2891 * @param uNew The value to subtract to *pu.
2892 * @param puOld Where to store the old value.
2893 *
2894 * @remarks x86: Requires a 486 or later.
2895 */
2896#define ASMAtomicSubSize(pu, uNew, puOld) \
2897 do { \
2898 switch (sizeof(*(pu))) { \
2899 case 4: *(uint32_t *)(puOld) = ASMAtomicSubU32((volatile uint32_t *)(void *)(pu), (uint32_t)(uNew)); break; \
2900 case 8: *(uint64_t *)(puOld) = ASMAtomicSubU64((volatile uint64_t *)(void *)(pu), (uint64_t)(uNew)); break; \
2901 default: AssertMsgFailed(("ASMAtomicSubSize: size %d is not supported\n", sizeof(*(pu)))); \
2902 } \
2903 } while (0)
2904
2905
2906
2907/**
2908 * Atomically increment a 16-bit value, ordered.
2909 *
2910 * @returns The new value.
2911 * @param pu16 Pointer to the value to increment.
2912 * @remarks Not implemented. Just to make 16-bit code happy.
2913 *
2914 * @remarks x86: Requires a 486 or later.
2915 */
2916DECLASM(uint16_t) ASMAtomicIncU16(uint16_t volatile *pu16);
2917
2918
2919/**
2920 * Atomically increment a 32-bit value, ordered.
2921 *
2922 * @returns The new value.
2923 * @param pu32 Pointer to the value to increment.
2924 *
2925 * @remarks x86: Requires a 486 or later.
2926 */
2927#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2928DECLASM(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32);
2929#else
2930DECLINLINE(uint32_t) ASMAtomicIncU32(uint32_t volatile *pu32)
2931{
2932 uint32_t u32;
2933# if RT_INLINE_ASM_USES_INTRIN
2934 u32 = _InterlockedIncrement((long *)pu32);
2935 return u32;
2936
2937# elif RT_INLINE_ASM_GNU_STYLE
2938 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
2939 : "=r" (u32),
2940 "=m" (*pu32)
2941 : "0" (1),
2942 "m" (*pu32)
2943 : "memory");
2944 return u32+1;
2945# else
2946 __asm
2947 {
2948 mov eax, 1
2949# ifdef RT_ARCH_AMD64
2950 mov rdx, [pu32]
2951 lock xadd [rdx], eax
2952# else
2953 mov edx, [pu32]
2954 lock xadd [edx], eax
2955# endif
2956 mov u32, eax
2957 }
2958 return u32+1;
2959# endif
2960}
2961#endif
2962
2963
2964/**
2965 * Atomically increment a signed 32-bit value, ordered.
2966 *
2967 * @returns The new value.
2968 * @param pi32 Pointer to the value to increment.
2969 *
2970 * @remarks x86: Requires a 486 or later.
2971 */
2972DECLINLINE(int32_t) ASMAtomicIncS32(int32_t volatile *pi32)
2973{
2974 return (int32_t)ASMAtomicIncU32((uint32_t volatile *)pi32);
2975}
2976
2977
2978/**
2979 * Atomically increment a 64-bit value, ordered.
2980 *
2981 * @returns The new value.
2982 * @param pu64 Pointer to the value to increment.
2983 *
2984 * @remarks x86: Requires a Pentium or later.
2985 */
2986#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
2987DECLASM(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64);
2988#else
2989DECLINLINE(uint64_t) ASMAtomicIncU64(uint64_t volatile *pu64)
2990{
2991# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
2992 uint64_t u64;
2993 u64 = _InterlockedIncrement64((__int64 *)pu64);
2994 return u64;
2995
2996# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
2997 uint64_t u64;
2998 __asm__ __volatile__("lock; xaddq %0, %1\n\t"
2999 : "=r" (u64),
3000 "=m" (*pu64)
3001 : "0" (1),
3002 "m" (*pu64)
3003 : "memory");
3004 return u64 + 1;
3005# else
3006 return ASMAtomicAddU64(pu64, 1) + 1;
3007# endif
3008}
3009#endif
3010
3011
3012/**
3013 * Atomically increment a signed 64-bit value, ordered.
3014 *
3015 * @returns The new value.
3016 * @param pi64 Pointer to the value to increment.
3017 *
3018 * @remarks x86: Requires a Pentium or later.
3019 */
3020DECLINLINE(int64_t) ASMAtomicIncS64(int64_t volatile *pi64)
3021{
3022 return (int64_t)ASMAtomicIncU64((uint64_t volatile *)pi64);
3023}
3024
3025
3026/**
3027 * Atomically increment a size_t value, ordered.
3028 *
3029 * @returns The new value.
3030 * @param pcb Pointer to the value to increment.
3031 *
3032 * @remarks x86: Requires a 486 or later.
3033 */
3034DECLINLINE(int64_t) ASMAtomicIncZ(size_t volatile *pcb)
3035{
3036#if ARCH_BITS == 64
3037 return ASMAtomicIncU64((uint64_t volatile *)pcb);
3038#elif ARCH_BITS == 32
3039 return ASMAtomicIncU32((uint32_t volatile *)pcb);
3040#elif ARCH_BITS == 16
3041 return ASMAtomicIncU16((uint16_t volatile *)pcb);
3042#else
3043# error "Unsupported ARCH_BITS value"
3044#endif
3045}
3046
3047
3048
3049/**
3050 * Atomically decrement an unsigned 32-bit value, ordered.
3051 *
3052 * @returns The new value.
3053 * @param pu16 Pointer to the value to decrement.
3054 * @remarks Not implemented. Just to make 16-bit code happy.
3055 *
3056 * @remarks x86: Requires a 486 or later.
3057 */
3058DECLASM(uint32_t) ASMAtomicDecU16(uint16_t volatile *pu16);
3059
3060
3061/**
3062 * Atomically decrement an unsigned 32-bit value, ordered.
3063 *
3064 * @returns The new value.
3065 * @param pu32 Pointer to the value to decrement.
3066 *
3067 * @remarks x86: Requires a 486 or later.
3068 */
3069#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3070DECLASM(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32);
3071#else
3072DECLINLINE(uint32_t) ASMAtomicDecU32(uint32_t volatile *pu32)
3073{
3074 uint32_t u32;
3075# if RT_INLINE_ASM_USES_INTRIN
3076 u32 = _InterlockedDecrement((long *)pu32);
3077 return u32;
3078
3079# elif RT_INLINE_ASM_GNU_STYLE
3080 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3081 : "=r" (u32),
3082 "=m" (*pu32)
3083 : "0" (-1),
3084 "m" (*pu32)
3085 : "memory");
3086 return u32-1;
3087# else
3088 __asm
3089 {
3090 mov eax, -1
3091# ifdef RT_ARCH_AMD64
3092 mov rdx, [pu32]
3093 lock xadd [rdx], eax
3094# else
3095 mov edx, [pu32]
3096 lock xadd [edx], eax
3097# endif
3098 mov u32, eax
3099 }
3100 return u32-1;
3101# endif
3102}
3103#endif
3104
3105
3106/**
3107 * Atomically decrement a signed 32-bit value, ordered.
3108 *
3109 * @returns The new value.
3110 * @param pi32 Pointer to the value to decrement.
3111 *
3112 * @remarks x86: Requires a 486 or later.
3113 */
3114DECLINLINE(int32_t) ASMAtomicDecS32(int32_t volatile *pi32)
3115{
3116 return (int32_t)ASMAtomicDecU32((uint32_t volatile *)pi32);
3117}
3118
3119
3120/**
3121 * Atomically decrement an unsigned 64-bit value, ordered.
3122 *
3123 * @returns The new value.
3124 * @param pu64 Pointer to the value to decrement.
3125 *
3126 * @remarks x86: Requires a Pentium or later.
3127 */
3128#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3129DECLASM(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64);
3130#else
3131DECLINLINE(uint64_t) ASMAtomicDecU64(uint64_t volatile *pu64)
3132{
3133# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3134 uint64_t u64 = _InterlockedDecrement64((__int64 volatile *)pu64);
3135 return u64;
3136
3137# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3138 uint64_t u64;
3139 __asm__ __volatile__("lock; xaddq %q0, %1\n\t"
3140 : "=r" (u64),
3141 "=m" (*pu64)
3142 : "0" (~(uint64_t)0),
3143 "m" (*pu64)
3144 : "memory");
3145 return u64-1;
3146# else
3147 return ASMAtomicAddU64(pu64, UINT64_MAX) - 1;
3148# endif
3149}
3150#endif
3151
3152
3153/**
3154 * Atomically decrement a signed 64-bit value, ordered.
3155 *
3156 * @returns The new value.
3157 * @param pi64 Pointer to the value to decrement.
3158 *
3159 * @remarks x86: Requires a Pentium or later.
3160 */
3161DECLINLINE(int64_t) ASMAtomicDecS64(int64_t volatile *pi64)
3162{
3163 return (int64_t)ASMAtomicDecU64((uint64_t volatile *)pi64);
3164}
3165
3166
3167/**
3168 * Atomically decrement a size_t value, ordered.
3169 *
3170 * @returns The new value.
3171 * @param pcb Pointer to the value to decrement.
3172 *
3173 * @remarks x86: Requires a 486 or later.
3174 */
3175DECLINLINE(int64_t) ASMAtomicDecZ(size_t volatile *pcb)
3176{
3177#if ARCH_BITS == 64
3178 return ASMAtomicDecU64((uint64_t volatile *)pcb);
3179#elif ARCH_BITS == 32
3180 return ASMAtomicDecU32((uint32_t volatile *)pcb);
3181#elif ARCH_BITS == 16
3182 return ASMAtomicDecU16((uint16_t volatile *)pcb);
3183#else
3184# error "Unsupported ARCH_BITS value"
3185#endif
3186}
3187
3188
3189/**
3190 * Atomically Or an unsigned 32-bit value, ordered.
3191 *
3192 * @param pu32 Pointer to the pointer variable to OR u32 with.
3193 * @param u32 The value to OR *pu32 with.
3194 *
3195 * @remarks x86: Requires a 386 or later.
3196 */
3197#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3198DECLASM(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32);
3199#else
3200DECLINLINE(void) ASMAtomicOrU32(uint32_t volatile *pu32, uint32_t u32)
3201{
3202# if RT_INLINE_ASM_USES_INTRIN
3203 _InterlockedOr((long volatile *)pu32, (long)u32);
3204
3205# elif RT_INLINE_ASM_GNU_STYLE
3206 __asm__ __volatile__("lock; orl %1, %0\n\t"
3207 : "=m" (*pu32)
3208 : "ir" (u32),
3209 "m" (*pu32));
3210# else
3211 __asm
3212 {
3213 mov eax, [u32]
3214# ifdef RT_ARCH_AMD64
3215 mov rdx, [pu32]
3216 lock or [rdx], eax
3217# else
3218 mov edx, [pu32]
3219 lock or [edx], eax
3220# endif
3221 }
3222# endif
3223}
3224#endif
3225
3226
3227/**
3228 * Atomically Or a signed 32-bit value, ordered.
3229 *
3230 * @param pi32 Pointer to the pointer variable to OR u32 with.
3231 * @param i32 The value to OR *pu32 with.
3232 *
3233 * @remarks x86: Requires a 386 or later.
3234 */
3235DECLINLINE(void) ASMAtomicOrS32(int32_t volatile *pi32, int32_t i32)
3236{
3237 ASMAtomicOrU32((uint32_t volatile *)pi32, i32);
3238}
3239
3240
3241/**
3242 * Atomically Or an unsigned 64-bit value, ordered.
3243 *
3244 * @param pu64 Pointer to the pointer variable to OR u64 with.
3245 * @param u64 The value to OR *pu64 with.
3246 *
3247 * @remarks x86: Requires a Pentium or later.
3248 */
3249#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3250DECLASM(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64);
3251#else
3252DECLINLINE(void) ASMAtomicOrU64(uint64_t volatile *pu64, uint64_t u64)
3253{
3254# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3255 _InterlockedOr64((__int64 volatile *)pu64, (__int64)u64);
3256
3257# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3258 __asm__ __volatile__("lock; orq %1, %q0\n\t"
3259 : "=m" (*pu64)
3260 : "r" (u64),
3261 "m" (*pu64));
3262# else
3263 for (;;)
3264 {
3265 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3266 uint64_t u64New = u64Old | u64;
3267 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3268 break;
3269 ASMNopPause();
3270 }
3271# endif
3272}
3273#endif
3274
3275
3276/**
3277 * Atomically Or a signed 64-bit value, ordered.
3278 *
3279 * @param pi64 Pointer to the pointer variable to OR u64 with.
3280 * @param i64 The value to OR *pu64 with.
3281 *
3282 * @remarks x86: Requires a Pentium or later.
3283 */
3284DECLINLINE(void) ASMAtomicOrS64(int64_t volatile *pi64, int64_t i64)
3285{
3286 ASMAtomicOrU64((uint64_t volatile *)pi64, i64);
3287}
3288
3289
3290/**
3291 * Atomically And an unsigned 32-bit value, ordered.
3292 *
3293 * @param pu32 Pointer to the pointer variable to AND u32 with.
3294 * @param u32 The value to AND *pu32 with.
3295 *
3296 * @remarks x86: Requires a 386 or later.
3297 */
3298#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3299DECLASM(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32);
3300#else
3301DECLINLINE(void) ASMAtomicAndU32(uint32_t volatile *pu32, uint32_t u32)
3302{
3303# if RT_INLINE_ASM_USES_INTRIN
3304 _InterlockedAnd((long volatile *)pu32, u32);
3305
3306# elif RT_INLINE_ASM_GNU_STYLE
3307 __asm__ __volatile__("lock; andl %1, %0\n\t"
3308 : "=m" (*pu32)
3309 : "ir" (u32),
3310 "m" (*pu32));
3311# else
3312 __asm
3313 {
3314 mov eax, [u32]
3315# ifdef RT_ARCH_AMD64
3316 mov rdx, [pu32]
3317 lock and [rdx], eax
3318# else
3319 mov edx, [pu32]
3320 lock and [edx], eax
3321# endif
3322 }
3323# endif
3324}
3325#endif
3326
3327
3328/**
3329 * Atomically And a signed 32-bit value, ordered.
3330 *
3331 * @param pi32 Pointer to the pointer variable to AND i32 with.
3332 * @param i32 The value to AND *pi32 with.
3333 *
3334 * @remarks x86: Requires a 386 or later.
3335 */
3336DECLINLINE(void) ASMAtomicAndS32(int32_t volatile *pi32, int32_t i32)
3337{
3338 ASMAtomicAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3339}
3340
3341
3342/**
3343 * Atomically And an unsigned 64-bit value, ordered.
3344 *
3345 * @param pu64 Pointer to the pointer variable to AND u64 with.
3346 * @param u64 The value to AND *pu64 with.
3347 *
3348 * @remarks x86: Requires a Pentium or later.
3349 */
3350#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3351DECLASM(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64);
3352#else
3353DECLINLINE(void) ASMAtomicAndU64(uint64_t volatile *pu64, uint64_t u64)
3354{
3355# if RT_INLINE_ASM_USES_INTRIN && defined(RT_ARCH_AMD64)
3356 _InterlockedAnd64((__int64 volatile *)pu64, u64);
3357
3358# elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3359 __asm__ __volatile__("lock; andq %1, %0\n\t"
3360 : "=m" (*pu64)
3361 : "r" (u64),
3362 "m" (*pu64));
3363# else
3364 for (;;)
3365 {
3366 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3367 uint64_t u64New = u64Old & u64;
3368 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3369 break;
3370 ASMNopPause();
3371 }
3372# endif
3373}
3374#endif
3375
3376
3377/**
3378 * Atomically And a signed 64-bit value, ordered.
3379 *
3380 * @param pi64 Pointer to the pointer variable to AND i64 with.
3381 * @param i64 The value to AND *pi64 with.
3382 *
3383 * @remarks x86: Requires a Pentium or later.
3384 */
3385DECLINLINE(void) ASMAtomicAndS64(int64_t volatile *pi64, int64_t i64)
3386{
3387 ASMAtomicAndU64((uint64_t volatile *)pi64, (uint64_t)i64);
3388}
3389
3390
3391/**
3392 * Atomically OR an unsigned 32-bit value, unordered but interrupt safe.
3393 *
3394 * @param pu32 Pointer to the pointer variable to OR u32 with.
3395 * @param u32 The value to OR *pu32 with.
3396 *
3397 * @remarks x86: Requires a 386 or later.
3398 */
3399#if RT_INLINE_ASM_EXTERNAL
3400DECLASM(void) ASMAtomicUoOrU32(uint32_t volatile *pu32, uint32_t u32);
3401#else
3402DECLINLINE(void) ASMAtomicUoOrU32(uint32_t volatile *pu32, uint32_t u32)
3403{
3404# if RT_INLINE_ASM_GNU_STYLE
3405 __asm__ __volatile__("orl %1, %0\n\t"
3406 : "=m" (*pu32)
3407 : "ir" (u32),
3408 "m" (*pu32));
3409# else
3410 __asm
3411 {
3412 mov eax, [u32]
3413# ifdef RT_ARCH_AMD64
3414 mov rdx, [pu32]
3415 or [rdx], eax
3416# else
3417 mov edx, [pu32]
3418 or [edx], eax
3419# endif
3420 }
3421# endif
3422}
3423#endif
3424
3425
3426/**
3427 * Atomically OR a signed 32-bit value, unordered.
3428 *
3429 * @param pi32 Pointer to the pointer variable to OR u32 with.
3430 * @param i32 The value to OR *pu32 with.
3431 *
3432 * @remarks x86: Requires a 386 or later.
3433 */
3434DECLINLINE(void) ASMAtomicUoOrS32(int32_t volatile *pi32, int32_t i32)
3435{
3436 ASMAtomicUoOrU32((uint32_t volatile *)pi32, i32);
3437}
3438
3439
3440/**
3441 * Atomically OR an unsigned 64-bit value, unordered.
3442 *
3443 * @param pu64 Pointer to the pointer variable to OR u64 with.
3444 * @param u64 The value to OR *pu64 with.
3445 *
3446 * @remarks x86: Requires a Pentium or later.
3447 */
3448#if RT_INLINE_ASM_EXTERNAL
3449DECLASM(void) ASMAtomicUoOrU64(uint64_t volatile *pu64, uint64_t u64);
3450#else
3451DECLINLINE(void) ASMAtomicUoOrU64(uint64_t volatile *pu64, uint64_t u64)
3452{
3453# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3454 __asm__ __volatile__("orq %1, %q0\n\t"
3455 : "=m" (*pu64)
3456 : "r" (u64),
3457 "m" (*pu64));
3458# else
3459 for (;;)
3460 {
3461 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3462 uint64_t u64New = u64Old | u64;
3463 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3464 break;
3465 ASMNopPause();
3466 }
3467# endif
3468}
3469#endif
3470
3471
3472/**
3473 * Atomically Or a signed 64-bit value, unordered.
3474 *
3475 * @param pi64 Pointer to the pointer variable to OR u64 with.
3476 * @param i64 The value to OR *pu64 with.
3477 *
3478 * @remarks x86: Requires a Pentium or later.
3479 */
3480DECLINLINE(void) ASMAtomicUoOrS64(int64_t volatile *pi64, int64_t i64)
3481{
3482 ASMAtomicUoOrU64((uint64_t volatile *)pi64, i64);
3483}
3484
3485
3486/**
3487 * Atomically And an unsigned 32-bit value, unordered.
3488 *
3489 * @param pu32 Pointer to the pointer variable to AND u32 with.
3490 * @param u32 The value to AND *pu32 with.
3491 *
3492 * @remarks x86: Requires a 386 or later.
3493 */
3494#if RT_INLINE_ASM_EXTERNAL
3495DECLASM(void) ASMAtomicUoAndU32(uint32_t volatile *pu32, uint32_t u32);
3496#else
3497DECLINLINE(void) ASMAtomicUoAndU32(uint32_t volatile *pu32, uint32_t u32)
3498{
3499# if RT_INLINE_ASM_GNU_STYLE
3500 __asm__ __volatile__("andl %1, %0\n\t"
3501 : "=m" (*pu32)
3502 : "ir" (u32),
3503 "m" (*pu32));
3504# else
3505 __asm
3506 {
3507 mov eax, [u32]
3508# ifdef RT_ARCH_AMD64
3509 mov rdx, [pu32]
3510 and [rdx], eax
3511# else
3512 mov edx, [pu32]
3513 and [edx], eax
3514# endif
3515 }
3516# endif
3517}
3518#endif
3519
3520
3521/**
3522 * Atomically And a signed 32-bit value, unordered.
3523 *
3524 * @param pi32 Pointer to the pointer variable to AND i32 with.
3525 * @param i32 The value to AND *pi32 with.
3526 *
3527 * @remarks x86: Requires a 386 or later.
3528 */
3529DECLINLINE(void) ASMAtomicUoAndS32(int32_t volatile *pi32, int32_t i32)
3530{
3531 ASMAtomicUoAndU32((uint32_t volatile *)pi32, (uint32_t)i32);
3532}
3533
3534
3535/**
3536 * Atomically And an unsigned 64-bit value, unordered.
3537 *
3538 * @param pu64 Pointer to the pointer variable to AND u64 with.
3539 * @param u64 The value to AND *pu64 with.
3540 *
3541 * @remarks x86: Requires a Pentium or later.
3542 */
3543#if RT_INLINE_ASM_EXTERNAL
3544DECLASM(void) ASMAtomicUoAndU64(uint64_t volatile *pu64, uint64_t u64);
3545#else
3546DECLINLINE(void) ASMAtomicUoAndU64(uint64_t volatile *pu64, uint64_t u64)
3547{
3548# if RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
3549 __asm__ __volatile__("andq %1, %0\n\t"
3550 : "=m" (*pu64)
3551 : "r" (u64),
3552 "m" (*pu64));
3553# else
3554 for (;;)
3555 {
3556 uint64_t u64Old = ASMAtomicUoReadU64(pu64);
3557 uint64_t u64New = u64Old & u64;
3558 if (ASMAtomicCmpXchgU64(pu64, u64New, u64Old))
3559 break;
3560 ASMNopPause();
3561 }
3562# endif
3563}
3564#endif
3565
3566
3567/**
3568 * Atomically And a signed 64-bit value, unordered.
3569 *
3570 * @param pi64 Pointer to the pointer variable to AND i64 with.
3571 * @param i64 The value to AND *pi64 with.
3572 *
3573 * @remarks x86: Requires a Pentium or later.
3574 */
3575DECLINLINE(void) ASMAtomicUoAndS64(int64_t volatile *pi64, int64_t i64)
3576{
3577 ASMAtomicUoAndU64((uint64_t volatile *)pi64, (uint64_t)i64);
3578}
3579
3580
3581/**
3582 * Atomically increment an unsigned 32-bit value, unordered.
3583 *
3584 * @returns the new value.
3585 * @param pu32 Pointer to the variable to increment.
3586 *
3587 * @remarks x86: Requires a 486 or later.
3588 */
3589#if RT_INLINE_ASM_EXTERNAL
3590DECLASM(uint32_t) ASMAtomicUoIncU32(uint32_t volatile *pu32);
3591#else
3592DECLINLINE(uint32_t) ASMAtomicUoIncU32(uint32_t volatile *pu32)
3593{
3594 uint32_t u32;
3595# if RT_INLINE_ASM_GNU_STYLE
3596 __asm__ __volatile__("xaddl %0, %1\n\t"
3597 : "=r" (u32),
3598 "=m" (*pu32)
3599 : "0" (1),
3600 "m" (*pu32)
3601 : "memory");
3602 return u32 + 1;
3603# else
3604 __asm
3605 {
3606 mov eax, 1
3607# ifdef RT_ARCH_AMD64
3608 mov rdx, [pu32]
3609 xadd [rdx], eax
3610# else
3611 mov edx, [pu32]
3612 xadd [edx], eax
3613# endif
3614 mov u32, eax
3615 }
3616 return u32 + 1;
3617# endif
3618}
3619#endif
3620
3621
3622/**
3623 * Atomically decrement an unsigned 32-bit value, unordered.
3624 *
3625 * @returns the new value.
3626 * @param pu32 Pointer to the variable to decrement.
3627 *
3628 * @remarks x86: Requires a 486 or later.
3629 */
3630#if RT_INLINE_ASM_EXTERNAL
3631DECLASM(uint32_t) ASMAtomicUoDecU32(uint32_t volatile *pu32);
3632#else
3633DECLINLINE(uint32_t) ASMAtomicUoDecU32(uint32_t volatile *pu32)
3634{
3635 uint32_t u32;
3636# if RT_INLINE_ASM_GNU_STYLE
3637 __asm__ __volatile__("lock; xaddl %0, %1\n\t"
3638 : "=r" (u32),
3639 "=m" (*pu32)
3640 : "0" (-1),
3641 "m" (*pu32)
3642 : "memory");
3643 return u32 - 1;
3644# else
3645 __asm
3646 {
3647 mov eax, -1
3648# ifdef RT_ARCH_AMD64
3649 mov rdx, [pu32]
3650 xadd [rdx], eax
3651# else
3652 mov edx, [pu32]
3653 xadd [edx], eax
3654# endif
3655 mov u32, eax
3656 }
3657 return u32 - 1;
3658# endif
3659}
3660#endif
3661
3662
3663/** @def RT_ASM_PAGE_SIZE
3664 * We try avoid dragging in iprt/param.h here.
3665 * @internal
3666 */
3667#if defined(RT_ARCH_SPARC64)
3668# define RT_ASM_PAGE_SIZE 0x2000
3669# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3670# if PAGE_SIZE != 0x2000
3671# error "PAGE_SIZE is not 0x2000!"
3672# endif
3673# endif
3674#else
3675# define RT_ASM_PAGE_SIZE 0x1000
3676# if defined(PAGE_SIZE) && !defined(NT_INCLUDED)
3677# if PAGE_SIZE != 0x1000
3678# error "PAGE_SIZE is not 0x1000!"
3679# endif
3680# endif
3681#endif
3682
3683/**
3684 * Zeros a 4K memory page.
3685 *
3686 * @param pv Pointer to the memory block. This must be page aligned.
3687 */
3688#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3689DECLASM(void) ASMMemZeroPage(volatile void *pv);
3690# else
3691DECLINLINE(void) ASMMemZeroPage(volatile void *pv)
3692{
3693# if RT_INLINE_ASM_USES_INTRIN
3694# ifdef RT_ARCH_AMD64
3695 __stosq((unsigned __int64 *)pv, 0, RT_ASM_PAGE_SIZE / 8);
3696# else
3697 __stosd((unsigned long *)pv, 0, RT_ASM_PAGE_SIZE / 4);
3698# endif
3699
3700# elif RT_INLINE_ASM_GNU_STYLE
3701 RTCCUINTREG uDummy;
3702# ifdef RT_ARCH_AMD64
3703 __asm__ __volatile__("rep stosq"
3704 : "=D" (pv),
3705 "=c" (uDummy)
3706 : "0" (pv),
3707 "c" (RT_ASM_PAGE_SIZE >> 3),
3708 "a" (0)
3709 : "memory");
3710# else
3711 __asm__ __volatile__("rep stosl"
3712 : "=D" (pv),
3713 "=c" (uDummy)
3714 : "0" (pv),
3715 "c" (RT_ASM_PAGE_SIZE >> 2),
3716 "a" (0)
3717 : "memory");
3718# endif
3719# else
3720 __asm
3721 {
3722# ifdef RT_ARCH_AMD64
3723 xor rax, rax
3724 mov ecx, 0200h
3725 mov rdi, [pv]
3726 rep stosq
3727# else
3728 xor eax, eax
3729 mov ecx, 0400h
3730 mov edi, [pv]
3731 rep stosd
3732# endif
3733 }
3734# endif
3735}
3736# endif
3737
3738
3739/**
3740 * Zeros a memory block with a 32-bit aligned size.
3741 *
3742 * @param pv Pointer to the memory block.
3743 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3744 */
3745#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3746DECLASM(void) ASMMemZero32(volatile void *pv, size_t cb);
3747#else
3748DECLINLINE(void) ASMMemZero32(volatile void *pv, size_t cb)
3749{
3750# if RT_INLINE_ASM_USES_INTRIN
3751# ifdef RT_ARCH_AMD64
3752 if (!(cb & 7))
3753 __stosq((unsigned __int64 *)pv, 0, cb / 8);
3754 else
3755# endif
3756 __stosd((unsigned long *)pv, 0, cb / 4);
3757
3758# elif RT_INLINE_ASM_GNU_STYLE
3759 __asm__ __volatile__("rep stosl"
3760 : "=D" (pv),
3761 "=c" (cb)
3762 : "0" (pv),
3763 "1" (cb >> 2),
3764 "a" (0)
3765 : "memory");
3766# else
3767 __asm
3768 {
3769 xor eax, eax
3770# ifdef RT_ARCH_AMD64
3771 mov rcx, [cb]
3772 shr rcx, 2
3773 mov rdi, [pv]
3774# else
3775 mov ecx, [cb]
3776 shr ecx, 2
3777 mov edi, [pv]
3778# endif
3779 rep stosd
3780 }
3781# endif
3782}
3783#endif
3784
3785
3786/**
3787 * Fills a memory block with a 32-bit aligned size.
3788 *
3789 * @param pv Pointer to the memory block.
3790 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3791 * @param u32 The value to fill with.
3792 */
3793#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
3794DECLASM(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32);
3795#else
3796DECLINLINE(void) ASMMemFill32(volatile void *pv, size_t cb, uint32_t u32)
3797{
3798# if RT_INLINE_ASM_USES_INTRIN
3799# ifdef RT_ARCH_AMD64
3800 if (!(cb & 7))
3801 __stosq((unsigned __int64 *)pv, RT_MAKE_U64(u32, u32), cb / 8);
3802 else
3803# endif
3804 __stosd((unsigned long *)pv, u32, cb / 4);
3805
3806# elif RT_INLINE_ASM_GNU_STYLE
3807 __asm__ __volatile__("rep stosl"
3808 : "=D" (pv),
3809 "=c" (cb)
3810 : "0" (pv),
3811 "1" (cb >> 2),
3812 "a" (u32)
3813 : "memory");
3814# else
3815 __asm
3816 {
3817# ifdef RT_ARCH_AMD64
3818 mov rcx, [cb]
3819 shr rcx, 2
3820 mov rdi, [pv]
3821# else
3822 mov ecx, [cb]
3823 shr ecx, 2
3824 mov edi, [pv]
3825# endif
3826 mov eax, [u32]
3827 rep stosd
3828 }
3829# endif
3830}
3831#endif
3832
3833
3834/**
3835 * Checks if a memory block is all zeros.
3836 *
3837 * @returns Pointer to the first non-zero byte.
3838 * @returns NULL if all zero.
3839 *
3840 * @param pv Pointer to the memory block.
3841 * @param cb Number of bytes in the block.
3842 *
3843 * @todo Fix name, it is a predicate function but it's not returning boolean!
3844 */
3845#if !defined(RT_OS_LINUX) || !defined(__KERNEL__)
3846DECLASM(void *) ASMMemFirstNonZero(void const *pv, size_t cb);
3847#else
3848DECLINLINE(void *) ASMMemFirstNonZero(void const *pv, size_t cb)
3849{
3850 uint8_t const *pb = (uint8_t const *)pv;
3851 for (; cb; cb--, pb++)
3852 if (RT_LIKELY(*pb == 0))
3853 { /* likely */ }
3854 else
3855 return (void *)pb;
3856 return NULL;
3857}
3858#endif
3859
3860
3861/**
3862 * Checks if a memory block is all zeros.
3863 *
3864 * @returns true if zero, false if not.
3865 *
3866 * @param pv Pointer to the memory block.
3867 * @param cb Number of bytes in the block.
3868 *
3869 * @sa ASMMemFirstNonZero
3870 */
3871DECLINLINE(bool) ASMMemIsZero(void const *pv, size_t cb)
3872{
3873 return ASMMemFirstNonZero(pv, cb) == NULL;
3874}
3875
3876
3877/**
3878 * Checks if a memory page is all zeros.
3879 *
3880 * @returns true / false.
3881 *
3882 * @param pvPage Pointer to the page. Must be aligned on 16 byte
3883 * boundary
3884 */
3885DECLINLINE(bool) ASMMemIsZeroPage(void const *pvPage)
3886{
3887# if 0 /*RT_INLINE_ASM_GNU_STYLE - this is actually slower... */
3888 union { RTCCUINTREG r; bool f; } uAX;
3889 RTCCUINTREG xCX, xDI;
3890 Assert(!((uintptr_t)pvPage & 15));
3891 __asm__ __volatile__("repe; "
3892# ifdef RT_ARCH_AMD64
3893 "scasq\n\t"
3894# else
3895 "scasl\n\t"
3896# endif
3897 "setnc %%al\n\t"
3898 : "=&c" (xCX),
3899 "=&D" (xDI),
3900 "=&a" (uAX.r)
3901 : "mr" (pvPage),
3902# ifdef RT_ARCH_AMD64
3903 "0" (RT_ASM_PAGE_SIZE/8),
3904# else
3905 "0" (RT_ASM_PAGE_SIZE/4),
3906# endif
3907 "1" (pvPage),
3908 "2" (0));
3909 return uAX.f;
3910# else
3911 uintptr_t const *puPtr = (uintptr_t const *)pvPage;
3912 int cLeft = RT_ASM_PAGE_SIZE / sizeof(uintptr_t) / 8;
3913 Assert(!((uintptr_t)pvPage & 15));
3914 for (;;)
3915 {
3916 if (puPtr[0]) return false;
3917 if (puPtr[4]) return false;
3918
3919 if (puPtr[2]) return false;
3920 if (puPtr[6]) return false;
3921
3922 if (puPtr[1]) return false;
3923 if (puPtr[5]) return false;
3924
3925 if (puPtr[3]) return false;
3926 if (puPtr[7]) return false;
3927
3928 if (!--cLeft)
3929 return true;
3930 puPtr += 8;
3931 }
3932 return true;
3933# endif
3934}
3935
3936
3937/**
3938 * Checks if a memory block is filled with the specified byte, returning the
3939 * first mismatch.
3940 *
3941 * This is sort of an inverted memchr.
3942 *
3943 * @returns Pointer to the byte which doesn't equal u8.
3944 * @returns NULL if all equal to u8.
3945 *
3946 * @param pv Pointer to the memory block.
3947 * @param cb Number of bytes in the block.
3948 * @param u8 The value it's supposed to be filled with.
3949 *
3950 * @remarks No alignment requirements.
3951 */
3952#if !defined(RT_OS_LINUX) || !defined(__KERNEL__)
3953DECLASM(void *) ASMMemFirstMismatchingU8(void const *pv, size_t cb, uint8_t u8);
3954#else
3955DECLINLINE(void *) ASMMemFirstMismatchingU8(void const *pv, size_t cb, uint8_t u8)
3956{
3957 uint8_t const *pb = (uint8_t const *)pv;
3958 for (; cb; cb--, pb++)
3959 if (RT_LIKELY(*pb == u8))
3960 { /* likely */ }
3961 else
3962 return (void *)pb;
3963 return NULL;
3964}
3965#endif
3966
3967
3968/**
3969 * Checks if a memory block is filled with the specified byte.
3970 *
3971 * @returns true if all matching, false if not.
3972 *
3973 * @param pv Pointer to the memory block.
3974 * @param cb Number of bytes in the block.
3975 * @param u8 The value it's supposed to be filled with.
3976 *
3977 * @remarks No alignment requirements.
3978 */
3979DECLINLINE(bool) ASMMemIsAllU8(void const *pv, size_t cb, uint8_t u8)
3980{
3981 return ASMMemFirstMismatchingU8(pv, cb, u8) == NULL;
3982}
3983
3984
3985/**
3986 * Checks if a memory block is filled with the specified 32-bit value.
3987 *
3988 * This is a sort of inverted memchr.
3989 *
3990 * @returns Pointer to the first value which doesn't equal u32.
3991 * @returns NULL if all equal to u32.
3992 *
3993 * @param pv Pointer to the memory block.
3994 * @param cb Number of bytes in the block. This MUST be aligned on 32-bit!
3995 * @param u32 The value it's supposed to be filled with.
3996 */
3997DECLINLINE(uint32_t *) ASMMemFirstMismatchingU32(void const *pv, size_t cb, uint32_t u32)
3998{
3999/** @todo rewrite this in inline assembly? */
4000 uint32_t const *pu32 = (uint32_t const *)pv;
4001 for (; cb; cb -= 4, pu32++)
4002 if (RT_LIKELY(*pu32 == u32))
4003 { /* likely */ }
4004 else
4005 return (uint32_t *)pu32;
4006 return NULL;
4007}
4008
4009
4010/**
4011 * Probes a byte pointer for read access.
4012 *
4013 * While the function will not fault if the byte is not read accessible,
4014 * the idea is to do this in a safe place like before acquiring locks
4015 * and such like.
4016 *
4017 * Also, this functions guarantees that an eager compiler is not going
4018 * to optimize the probing away.
4019 *
4020 * @param pvByte Pointer to the byte.
4021 */
4022#if RT_INLINE_ASM_EXTERNAL
4023DECLASM(uint8_t) ASMProbeReadByte(const void *pvByte);
4024#else
4025DECLINLINE(uint8_t) ASMProbeReadByte(const void *pvByte)
4026{
4027 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4028 uint8_t u8;
4029# if RT_INLINE_ASM_GNU_STYLE
4030 __asm__ __volatile__("movb (%1), %0\n\t"
4031 : "=r" (u8)
4032 : "r" (pvByte));
4033# else
4034 __asm
4035 {
4036# ifdef RT_ARCH_AMD64
4037 mov rax, [pvByte]
4038 mov al, [rax]
4039# else
4040 mov eax, [pvByte]
4041 mov al, [eax]
4042# endif
4043 mov [u8], al
4044 }
4045# endif
4046 return u8;
4047}
4048#endif
4049
4050/**
4051 * Probes a buffer for read access page by page.
4052 *
4053 * While the function will fault if the buffer is not fully read
4054 * accessible, the idea is to do this in a safe place like before
4055 * acquiring locks and such like.
4056 *
4057 * Also, this functions guarantees that an eager compiler is not going
4058 * to optimize the probing away.
4059 *
4060 * @param pvBuf Pointer to the buffer.
4061 * @param cbBuf The size of the buffer in bytes. Must be >= 1.
4062 */
4063DECLINLINE(void) ASMProbeReadBuffer(const void *pvBuf, size_t cbBuf)
4064{
4065 /** @todo verify that the compiler actually doesn't optimize this away. (intel & gcc) */
4066 /* the first byte */
4067 const uint8_t *pu8 = (const uint8_t *)pvBuf;
4068 ASMProbeReadByte(pu8);
4069
4070 /* the pages in between pages. */
4071 while (cbBuf > RT_ASM_PAGE_SIZE)
4072 {
4073 ASMProbeReadByte(pu8);
4074 cbBuf -= RT_ASM_PAGE_SIZE;
4075 pu8 += RT_ASM_PAGE_SIZE;
4076 }
4077
4078 /* the last byte */
4079 ASMProbeReadByte(pu8 + cbBuf - 1);
4080}
4081
4082
4083
4084/** @defgroup grp_inline_bits Bit Operations
4085 * @{
4086 */
4087
4088
4089/**
4090 * Sets a bit in a bitmap.
4091 *
4092 * @param pvBitmap Pointer to the bitmap. This should be 32-bit aligned.
4093 * @param iBit The bit to set.
4094 *
4095 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4096 * However, doing so will yield better performance as well as avoiding
4097 * traps accessing the last bits in the bitmap.
4098 */
4099#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4100DECLASM(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit);
4101#else
4102DECLINLINE(void) ASMBitSet(volatile void *pvBitmap, int32_t iBit)
4103{
4104# if RT_INLINE_ASM_USES_INTRIN
4105 _bittestandset((long *)pvBitmap, iBit);
4106
4107# elif RT_INLINE_ASM_GNU_STYLE
4108 __asm__ __volatile__("btsl %1, %0"
4109 : "=m" (*(volatile long *)pvBitmap)
4110 : "Ir" (iBit),
4111 "m" (*(volatile long *)pvBitmap)
4112 : "memory");
4113# else
4114 __asm
4115 {
4116# ifdef RT_ARCH_AMD64
4117 mov rax, [pvBitmap]
4118 mov edx, [iBit]
4119 bts [rax], edx
4120# else
4121 mov eax, [pvBitmap]
4122 mov edx, [iBit]
4123 bts [eax], edx
4124# endif
4125 }
4126# endif
4127}
4128#endif
4129
4130
4131/**
4132 * Atomically sets a bit in a bitmap, ordered.
4133 *
4134 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4135 * the memory access isn't atomic!
4136 * @param iBit The bit to set.
4137 *
4138 * @remarks x86: Requires a 386 or later.
4139 */
4140#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4141DECLASM(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit);
4142#else
4143DECLINLINE(void) ASMAtomicBitSet(volatile void *pvBitmap, int32_t iBit)
4144{
4145 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4146# if RT_INLINE_ASM_USES_INTRIN
4147 _interlockedbittestandset((long *)pvBitmap, iBit);
4148# elif RT_INLINE_ASM_GNU_STYLE
4149 __asm__ __volatile__("lock; btsl %1, %0"
4150 : "=m" (*(volatile long *)pvBitmap)
4151 : "Ir" (iBit),
4152 "m" (*(volatile long *)pvBitmap)
4153 : "memory");
4154# else
4155 __asm
4156 {
4157# ifdef RT_ARCH_AMD64
4158 mov rax, [pvBitmap]
4159 mov edx, [iBit]
4160 lock bts [rax], edx
4161# else
4162 mov eax, [pvBitmap]
4163 mov edx, [iBit]
4164 lock bts [eax], edx
4165# endif
4166 }
4167# endif
4168}
4169#endif
4170
4171
4172/**
4173 * Clears a bit in a bitmap.
4174 *
4175 * @param pvBitmap Pointer to the bitmap.
4176 * @param iBit The bit to clear.
4177 *
4178 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4179 * However, doing so will yield better performance as well as avoiding
4180 * traps accessing the last bits in the bitmap.
4181 */
4182#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4183DECLASM(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit);
4184#else
4185DECLINLINE(void) ASMBitClear(volatile void *pvBitmap, int32_t iBit)
4186{
4187# if RT_INLINE_ASM_USES_INTRIN
4188 _bittestandreset((long *)pvBitmap, iBit);
4189
4190# elif RT_INLINE_ASM_GNU_STYLE
4191 __asm__ __volatile__("btrl %1, %0"
4192 : "=m" (*(volatile long *)pvBitmap)
4193 : "Ir" (iBit),
4194 "m" (*(volatile long *)pvBitmap)
4195 : "memory");
4196# else
4197 __asm
4198 {
4199# ifdef RT_ARCH_AMD64
4200 mov rax, [pvBitmap]
4201 mov edx, [iBit]
4202 btr [rax], edx
4203# else
4204 mov eax, [pvBitmap]
4205 mov edx, [iBit]
4206 btr [eax], edx
4207# endif
4208 }
4209# endif
4210}
4211#endif
4212
4213
4214/**
4215 * Atomically clears a bit in a bitmap, ordered.
4216 *
4217 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4218 * the memory access isn't atomic!
4219 * @param iBit The bit to toggle set.
4220 *
4221 * @remarks No memory barrier, take care on smp.
4222 * @remarks x86: Requires a 386 or later.
4223 */
4224#if RT_INLINE_ASM_EXTERNAL
4225DECLASM(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit);
4226#else
4227DECLINLINE(void) ASMAtomicBitClear(volatile void *pvBitmap, int32_t iBit)
4228{
4229 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4230# if RT_INLINE_ASM_GNU_STYLE
4231 __asm__ __volatile__("lock; btrl %1, %0"
4232 : "=m" (*(volatile long *)pvBitmap)
4233 : "Ir" (iBit),
4234 "m" (*(volatile long *)pvBitmap)
4235 : "memory");
4236# else
4237 __asm
4238 {
4239# ifdef RT_ARCH_AMD64
4240 mov rax, [pvBitmap]
4241 mov edx, [iBit]
4242 lock btr [rax], edx
4243# else
4244 mov eax, [pvBitmap]
4245 mov edx, [iBit]
4246 lock btr [eax], edx
4247# endif
4248 }
4249# endif
4250}
4251#endif
4252
4253
4254/**
4255 * Toggles a bit in a bitmap.
4256 *
4257 * @param pvBitmap Pointer to the bitmap.
4258 * @param iBit The bit to toggle.
4259 *
4260 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4261 * However, doing so will yield better performance as well as avoiding
4262 * traps accessing the last bits in the bitmap.
4263 */
4264#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4265DECLASM(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit);
4266#else
4267DECLINLINE(void) ASMBitToggle(volatile void *pvBitmap, int32_t iBit)
4268{
4269# if RT_INLINE_ASM_USES_INTRIN
4270 _bittestandcomplement((long *)pvBitmap, iBit);
4271# elif RT_INLINE_ASM_GNU_STYLE
4272 __asm__ __volatile__("btcl %1, %0"
4273 : "=m" (*(volatile long *)pvBitmap)
4274 : "Ir" (iBit),
4275 "m" (*(volatile long *)pvBitmap)
4276 : "memory");
4277# else
4278 __asm
4279 {
4280# ifdef RT_ARCH_AMD64
4281 mov rax, [pvBitmap]
4282 mov edx, [iBit]
4283 btc [rax], edx
4284# else
4285 mov eax, [pvBitmap]
4286 mov edx, [iBit]
4287 btc [eax], edx
4288# endif
4289 }
4290# endif
4291}
4292#endif
4293
4294
4295/**
4296 * Atomically toggles a bit in a bitmap, ordered.
4297 *
4298 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4299 * the memory access isn't atomic!
4300 * @param iBit The bit to test and set.
4301 *
4302 * @remarks x86: Requires a 386 or later.
4303 */
4304#if RT_INLINE_ASM_EXTERNAL
4305DECLASM(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit);
4306#else
4307DECLINLINE(void) ASMAtomicBitToggle(volatile void *pvBitmap, int32_t iBit)
4308{
4309 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4310# if RT_INLINE_ASM_GNU_STYLE
4311 __asm__ __volatile__("lock; btcl %1, %0"
4312 : "=m" (*(volatile long *)pvBitmap)
4313 : "Ir" (iBit),
4314 "m" (*(volatile long *)pvBitmap)
4315 : "memory");
4316# else
4317 __asm
4318 {
4319# ifdef RT_ARCH_AMD64
4320 mov rax, [pvBitmap]
4321 mov edx, [iBit]
4322 lock btc [rax], edx
4323# else
4324 mov eax, [pvBitmap]
4325 mov edx, [iBit]
4326 lock btc [eax], edx
4327# endif
4328 }
4329# endif
4330}
4331#endif
4332
4333
4334/**
4335 * Tests and sets a bit in a bitmap.
4336 *
4337 * @returns true if the bit was set.
4338 * @returns false if the bit was clear.
4339 *
4340 * @param pvBitmap Pointer to the bitmap.
4341 * @param iBit The bit to test and set.
4342 *
4343 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4344 * However, doing so will yield better performance as well as avoiding
4345 * traps accessing the last bits in the bitmap.
4346 */
4347#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4348DECLASM(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4349#else
4350DECLINLINE(bool) ASMBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4351{
4352 union { bool f; uint32_t u32; uint8_t u8; } rc;
4353# if RT_INLINE_ASM_USES_INTRIN
4354 rc.u8 = _bittestandset((long *)pvBitmap, iBit);
4355
4356# elif RT_INLINE_ASM_GNU_STYLE
4357 __asm__ __volatile__("btsl %2, %1\n\t"
4358 "setc %b0\n\t"
4359 "andl $1, %0\n\t"
4360 : "=q" (rc.u32),
4361 "=m" (*(volatile long *)pvBitmap)
4362 : "Ir" (iBit),
4363 "m" (*(volatile long *)pvBitmap)
4364 : "memory");
4365# else
4366 __asm
4367 {
4368 mov edx, [iBit]
4369# ifdef RT_ARCH_AMD64
4370 mov rax, [pvBitmap]
4371 bts [rax], edx
4372# else
4373 mov eax, [pvBitmap]
4374 bts [eax], edx
4375# endif
4376 setc al
4377 and eax, 1
4378 mov [rc.u32], eax
4379 }
4380# endif
4381 return rc.f;
4382}
4383#endif
4384
4385
4386/**
4387 * Atomically tests and sets a bit in a bitmap, ordered.
4388 *
4389 * @returns true if the bit was set.
4390 * @returns false if the bit was clear.
4391 *
4392 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4393 * the memory access isn't atomic!
4394 * @param iBit The bit to set.
4395 *
4396 * @remarks x86: Requires a 386 or later.
4397 */
4398#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4399DECLASM(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit);
4400#else
4401DECLINLINE(bool) ASMAtomicBitTestAndSet(volatile void *pvBitmap, int32_t iBit)
4402{
4403 union { bool f; uint32_t u32; uint8_t u8; } rc;
4404 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4405# if RT_INLINE_ASM_USES_INTRIN
4406 rc.u8 = _interlockedbittestandset((long *)pvBitmap, iBit);
4407# elif RT_INLINE_ASM_GNU_STYLE
4408 __asm__ __volatile__("lock; btsl %2, %1\n\t"
4409 "setc %b0\n\t"
4410 "andl $1, %0\n\t"
4411 : "=q" (rc.u32),
4412 "=m" (*(volatile long *)pvBitmap)
4413 : "Ir" (iBit),
4414 "m" (*(volatile long *)pvBitmap)
4415 : "memory");
4416# else
4417 __asm
4418 {
4419 mov edx, [iBit]
4420# ifdef RT_ARCH_AMD64
4421 mov rax, [pvBitmap]
4422 lock bts [rax], edx
4423# else
4424 mov eax, [pvBitmap]
4425 lock bts [eax], edx
4426# endif
4427 setc al
4428 and eax, 1
4429 mov [rc.u32], eax
4430 }
4431# endif
4432 return rc.f;
4433}
4434#endif
4435
4436
4437/**
4438 * Tests and clears a bit in a bitmap.
4439 *
4440 * @returns true if the bit was set.
4441 * @returns false if the bit was clear.
4442 *
4443 * @param pvBitmap Pointer to the bitmap.
4444 * @param iBit The bit to test and clear.
4445 *
4446 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4447 * However, doing so will yield better performance as well as avoiding
4448 * traps accessing the last bits in the bitmap.
4449 */
4450#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4451DECLASM(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4452#else
4453DECLINLINE(bool) ASMBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4454{
4455 union { bool f; uint32_t u32; uint8_t u8; } rc;
4456# if RT_INLINE_ASM_USES_INTRIN
4457 rc.u8 = _bittestandreset((long *)pvBitmap, iBit);
4458
4459# elif RT_INLINE_ASM_GNU_STYLE
4460 __asm__ __volatile__("btrl %2, %1\n\t"
4461 "setc %b0\n\t"
4462 "andl $1, %0\n\t"
4463 : "=q" (rc.u32),
4464 "=m" (*(volatile long *)pvBitmap)
4465 : "Ir" (iBit),
4466 "m" (*(volatile long *)pvBitmap)
4467 : "memory");
4468# else
4469 __asm
4470 {
4471 mov edx, [iBit]
4472# ifdef RT_ARCH_AMD64
4473 mov rax, [pvBitmap]
4474 btr [rax], edx
4475# else
4476 mov eax, [pvBitmap]
4477 btr [eax], edx
4478# endif
4479 setc al
4480 and eax, 1
4481 mov [rc.u32], eax
4482 }
4483# endif
4484 return rc.f;
4485}
4486#endif
4487
4488
4489/**
4490 * Atomically tests and clears a bit in a bitmap, ordered.
4491 *
4492 * @returns true if the bit was set.
4493 * @returns false if the bit was clear.
4494 *
4495 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4496 * the memory access isn't atomic!
4497 * @param iBit The bit to test and clear.
4498 *
4499 * @remarks No memory barrier, take care on smp.
4500 * @remarks x86: Requires a 386 or later.
4501 */
4502#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4503DECLASM(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit);
4504#else
4505DECLINLINE(bool) ASMAtomicBitTestAndClear(volatile void *pvBitmap, int32_t iBit)
4506{
4507 union { bool f; uint32_t u32; uint8_t u8; } rc;
4508 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4509# if RT_INLINE_ASM_USES_INTRIN
4510 rc.u8 = _interlockedbittestandreset((long *)pvBitmap, iBit);
4511
4512# elif RT_INLINE_ASM_GNU_STYLE
4513 __asm__ __volatile__("lock; btrl %2, %1\n\t"
4514 "setc %b0\n\t"
4515 "andl $1, %0\n\t"
4516 : "=q" (rc.u32),
4517 "=m" (*(volatile long *)pvBitmap)
4518 : "Ir" (iBit),
4519 "m" (*(volatile long *)pvBitmap)
4520 : "memory");
4521# else
4522 __asm
4523 {
4524 mov edx, [iBit]
4525# ifdef RT_ARCH_AMD64
4526 mov rax, [pvBitmap]
4527 lock btr [rax], edx
4528# else
4529 mov eax, [pvBitmap]
4530 lock btr [eax], edx
4531# endif
4532 setc al
4533 and eax, 1
4534 mov [rc.u32], eax
4535 }
4536# endif
4537 return rc.f;
4538}
4539#endif
4540
4541
4542/**
4543 * Tests and toggles a bit in a bitmap.
4544 *
4545 * @returns true if the bit was set.
4546 * @returns false if the bit was clear.
4547 *
4548 * @param pvBitmap Pointer to the bitmap.
4549 * @param iBit The bit to test and toggle.
4550 *
4551 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4552 * However, doing so will yield better performance as well as avoiding
4553 * traps accessing the last bits in the bitmap.
4554 */
4555#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4556DECLASM(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4557#else
4558DECLINLINE(bool) ASMBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4559{
4560 union { bool f; uint32_t u32; uint8_t u8; } rc;
4561# if RT_INLINE_ASM_USES_INTRIN
4562 rc.u8 = _bittestandcomplement((long *)pvBitmap, iBit);
4563
4564# elif RT_INLINE_ASM_GNU_STYLE
4565 __asm__ __volatile__("btcl %2, %1\n\t"
4566 "setc %b0\n\t"
4567 "andl $1, %0\n\t"
4568 : "=q" (rc.u32),
4569 "=m" (*(volatile long *)pvBitmap)
4570 : "Ir" (iBit),
4571 "m" (*(volatile long *)pvBitmap)
4572 : "memory");
4573# else
4574 __asm
4575 {
4576 mov edx, [iBit]
4577# ifdef RT_ARCH_AMD64
4578 mov rax, [pvBitmap]
4579 btc [rax], edx
4580# else
4581 mov eax, [pvBitmap]
4582 btc [eax], edx
4583# endif
4584 setc al
4585 and eax, 1
4586 mov [rc.u32], eax
4587 }
4588# endif
4589 return rc.f;
4590}
4591#endif
4592
4593
4594/**
4595 * Atomically tests and toggles a bit in a bitmap, ordered.
4596 *
4597 * @returns true if the bit was set.
4598 * @returns false if the bit was clear.
4599 *
4600 * @param pvBitmap Pointer to the bitmap. Must be 32-bit aligned, otherwise
4601 * the memory access isn't atomic!
4602 * @param iBit The bit to test and toggle.
4603 *
4604 * @remarks x86: Requires a 386 or later.
4605 */
4606#if RT_INLINE_ASM_EXTERNAL
4607DECLASM(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit);
4608#else
4609DECLINLINE(bool) ASMAtomicBitTestAndToggle(volatile void *pvBitmap, int32_t iBit)
4610{
4611 union { bool f; uint32_t u32; uint8_t u8; } rc;
4612 AssertMsg(!((uintptr_t)pvBitmap & 3), ("address %p not 32-bit aligned", pvBitmap));
4613# if RT_INLINE_ASM_GNU_STYLE
4614 __asm__ __volatile__("lock; btcl %2, %1\n\t"
4615 "setc %b0\n\t"
4616 "andl $1, %0\n\t"
4617 : "=q" (rc.u32),
4618 "=m" (*(volatile long *)pvBitmap)
4619 : "Ir" (iBit),
4620 "m" (*(volatile long *)pvBitmap)
4621 : "memory");
4622# else
4623 __asm
4624 {
4625 mov edx, [iBit]
4626# ifdef RT_ARCH_AMD64
4627 mov rax, [pvBitmap]
4628 lock btc [rax], edx
4629# else
4630 mov eax, [pvBitmap]
4631 lock btc [eax], edx
4632# endif
4633 setc al
4634 and eax, 1
4635 mov [rc.u32], eax
4636 }
4637# endif
4638 return rc.f;
4639}
4640#endif
4641
4642
4643/**
4644 * Tests if a bit in a bitmap is set.
4645 *
4646 * @returns true if the bit is set.
4647 * @returns false if the bit is clear.
4648 *
4649 * @param pvBitmap Pointer to the bitmap.
4650 * @param iBit The bit to test.
4651 *
4652 * @remarks The 32-bit aligning of pvBitmap is not a strict requirement.
4653 * However, doing so will yield better performance as well as avoiding
4654 * traps accessing the last bits in the bitmap.
4655 */
4656#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
4657DECLASM(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit);
4658#else
4659DECLINLINE(bool) ASMBitTest(const volatile void *pvBitmap, int32_t iBit)
4660{
4661 union { bool f; uint32_t u32; uint8_t u8; } rc;
4662# if RT_INLINE_ASM_USES_INTRIN
4663 rc.u32 = _bittest((long *)pvBitmap, iBit);
4664# elif RT_INLINE_ASM_GNU_STYLE
4665
4666 __asm__ __volatile__("btl %2, %1\n\t"
4667 "setc %b0\n\t"
4668 "andl $1, %0\n\t"
4669 : "=q" (rc.u32)
4670 : "m" (*(const volatile long *)pvBitmap),
4671 "Ir" (iBit)
4672 : "memory");
4673# else
4674 __asm
4675 {
4676 mov edx, [iBit]
4677# ifdef RT_ARCH_AMD64
4678 mov rax, [pvBitmap]
4679 bt [rax], edx
4680# else
4681 mov eax, [pvBitmap]
4682 bt [eax], edx
4683# endif
4684 setc al
4685 and eax, 1
4686 mov [rc.u32], eax
4687 }
4688# endif
4689 return rc.f;
4690}
4691#endif
4692
4693
4694/**
4695 * Clears a bit range within a bitmap.
4696 *
4697 * @param pvBitmap Pointer to the bitmap.
4698 * @param iBitStart The First bit to clear.
4699 * @param iBitEnd The first bit not to clear.
4700 */
4701DECLINLINE(void) ASMBitClearRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4702{
4703 if (iBitStart < iBitEnd)
4704 {
4705 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4706 int32_t iStart = iBitStart & ~31;
4707 int32_t iEnd = iBitEnd & ~31;
4708 if (iStart == iEnd)
4709 *pu32 &= ((UINT32_C(1) << (iBitStart & 31)) - 1) | ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4710 else
4711 {
4712 /* bits in first dword. */
4713 if (iBitStart & 31)
4714 {
4715 *pu32 &= (UINT32_C(1) << (iBitStart & 31)) - 1;
4716 pu32++;
4717 iBitStart = iStart + 32;
4718 }
4719
4720 /* whole dword. */
4721 if (iBitStart != iEnd)
4722 ASMMemZero32(pu32, (iEnd - iBitStart) >> 3);
4723
4724 /* bits in last dword. */
4725 if (iBitEnd & 31)
4726 {
4727 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4728 *pu32 &= ~((UINT32_C(1) << (iBitEnd & 31)) - 1);
4729 }
4730 }
4731 }
4732}
4733
4734
4735/**
4736 * Sets a bit range within a bitmap.
4737 *
4738 * @param pvBitmap Pointer to the bitmap.
4739 * @param iBitStart The First bit to set.
4740 * @param iBitEnd The first bit not to set.
4741 */
4742DECLINLINE(void) ASMBitSetRange(volatile void *pvBitmap, int32_t iBitStart, int32_t iBitEnd)
4743{
4744 if (iBitStart < iBitEnd)
4745 {
4746 volatile uint32_t *pu32 = (volatile uint32_t *)pvBitmap + (iBitStart >> 5);
4747 int32_t iStart = iBitStart & ~31;
4748 int32_t iEnd = iBitEnd & ~31;
4749 if (iStart == iEnd)
4750 *pu32 |= ((UINT32_C(1) << (iBitEnd - iBitStart)) - 1) << (iBitStart & 31);
4751 else
4752 {
4753 /* bits in first dword. */
4754 if (iBitStart & 31)
4755 {
4756 *pu32 |= ~((UINT32_C(1) << (iBitStart & 31)) - 1);
4757 pu32++;
4758 iBitStart = iStart + 32;
4759 }
4760
4761 /* whole dword. */
4762 if (iBitStart != iEnd)
4763 ASMMemFill32(pu32, (iEnd - iBitStart) >> 3, ~UINT32_C(0));
4764
4765 /* bits in last dword. */
4766 if (iBitEnd & 31)
4767 {
4768 pu32 = (volatile uint32_t *)pvBitmap + (iBitEnd >> 5);
4769 *pu32 |= (UINT32_C(1) << (iBitEnd & 31)) - 1;
4770 }
4771 }
4772 }
4773}
4774
4775
4776/**
4777 * Finds the first clear bit in a bitmap.
4778 *
4779 * @returns Index of the first zero bit.
4780 * @returns -1 if no clear bit was found.
4781 * @param pvBitmap Pointer to the bitmap.
4782 * @param cBits The number of bits in the bitmap. Multiple of 32.
4783 */
4784#if RT_INLINE_ASM_EXTERNAL
4785DECLASM(int32_t) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits);
4786#else
4787DECLINLINE(int32_t) ASMBitFirstClear(const volatile void *pvBitmap, uint32_t cBits)
4788{
4789 if (cBits)
4790 {
4791 int32_t iBit;
4792# if RT_INLINE_ASM_GNU_STYLE
4793 RTCCUINTREG uEAX, uECX, uEDI;
4794 cBits = RT_ALIGN_32(cBits, 32);
4795 __asm__ __volatile__("repe; scasl\n\t"
4796 "je 1f\n\t"
4797# ifdef RT_ARCH_AMD64
4798 "lea -4(%%rdi), %%rdi\n\t"
4799 "xorl (%%rdi), %%eax\n\t"
4800 "subq %5, %%rdi\n\t"
4801# else
4802 "lea -4(%%edi), %%edi\n\t"
4803 "xorl (%%edi), %%eax\n\t"
4804 "subl %5, %%edi\n\t"
4805# endif
4806 "shll $3, %%edi\n\t"
4807 "bsfl %%eax, %%edx\n\t"
4808 "addl %%edi, %%edx\n\t"
4809 "1:\t\n"
4810 : "=d" (iBit),
4811 "=&c" (uECX),
4812 "=&D" (uEDI),
4813 "=&a" (uEAX)
4814 : "0" (0xffffffff),
4815 "mr" (pvBitmap),
4816 "1" (cBits >> 5),
4817 "2" (pvBitmap),
4818 "3" (0xffffffff));
4819# else
4820 cBits = RT_ALIGN_32(cBits, 32);
4821 __asm
4822 {
4823# ifdef RT_ARCH_AMD64
4824 mov rdi, [pvBitmap]
4825 mov rbx, rdi
4826# else
4827 mov edi, [pvBitmap]
4828 mov ebx, edi
4829# endif
4830 mov edx, 0ffffffffh
4831 mov eax, edx
4832 mov ecx, [cBits]
4833 shr ecx, 5
4834 repe scasd
4835 je done
4836
4837# ifdef RT_ARCH_AMD64
4838 lea rdi, [rdi - 4]
4839 xor eax, [rdi]
4840 sub rdi, rbx
4841# else
4842 lea edi, [edi - 4]
4843 xor eax, [edi]
4844 sub edi, ebx
4845# endif
4846 shl edi, 3
4847 bsf edx, eax
4848 add edx, edi
4849 done:
4850 mov [iBit], edx
4851 }
4852# endif
4853 return iBit;
4854 }
4855 return -1;
4856}
4857#endif
4858
4859
4860/**
4861 * Finds the next clear bit in a bitmap.
4862 *
4863 * @returns Index of the first zero bit.
4864 * @returns -1 if no clear bit was found.
4865 * @param pvBitmap Pointer to the bitmap.
4866 * @param cBits The number of bits in the bitmap. Multiple of 32.
4867 * @param iBitPrev The bit returned from the last search.
4868 * The search will start at iBitPrev + 1.
4869 */
4870#if RT_INLINE_ASM_EXTERNAL
4871DECLASM(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
4872#else
4873DECLINLINE(int) ASMBitNextClear(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
4874{
4875 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
4876 int iBit = ++iBitPrev & 31;
4877 if (iBit)
4878 {
4879 /*
4880 * Inspect the 32-bit word containing the unaligned bit.
4881 */
4882 uint32_t u32 = ~pau32Bitmap[iBitPrev / 32] >> iBit;
4883
4884# if RT_INLINE_ASM_USES_INTRIN
4885 unsigned long ulBit = 0;
4886 if (_BitScanForward(&ulBit, u32))
4887 return ulBit + iBitPrev;
4888# else
4889# if RT_INLINE_ASM_GNU_STYLE
4890 __asm__ __volatile__("bsf %1, %0\n\t"
4891 "jnz 1f\n\t"
4892 "movl $-1, %0\n\t"
4893 "1:\n\t"
4894 : "=r" (iBit)
4895 : "r" (u32));
4896# else
4897 __asm
4898 {
4899 mov edx, [u32]
4900 bsf eax, edx
4901 jnz done
4902 mov eax, 0ffffffffh
4903 done:
4904 mov [iBit], eax
4905 }
4906# endif
4907 if (iBit >= 0)
4908 return iBit + iBitPrev;
4909# endif
4910
4911 /*
4912 * Skip ahead and see if there is anything left to search.
4913 */
4914 iBitPrev |= 31;
4915 iBitPrev++;
4916 if (cBits <= (uint32_t)iBitPrev)
4917 return -1;
4918 }
4919
4920 /*
4921 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
4922 */
4923 iBit = ASMBitFirstClear(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
4924 if (iBit >= 0)
4925 iBit += iBitPrev;
4926 return iBit;
4927}
4928#endif
4929
4930
4931/**
4932 * Finds the first set bit in a bitmap.
4933 *
4934 * @returns Index of the first set bit.
4935 * @returns -1 if no clear bit was found.
4936 * @param pvBitmap Pointer to the bitmap.
4937 * @param cBits The number of bits in the bitmap. Multiple of 32.
4938 */
4939#if RT_INLINE_ASM_EXTERNAL
4940DECLASM(int32_t) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits);
4941#else
4942DECLINLINE(int32_t) ASMBitFirstSet(const volatile void *pvBitmap, uint32_t cBits)
4943{
4944 if (cBits)
4945 {
4946 int32_t iBit;
4947# if RT_INLINE_ASM_GNU_STYLE
4948 RTCCUINTREG uEAX, uECX, uEDI;
4949 cBits = RT_ALIGN_32(cBits, 32);
4950 __asm__ __volatile__("repe; scasl\n\t"
4951 "je 1f\n\t"
4952# ifdef RT_ARCH_AMD64
4953 "lea -4(%%rdi), %%rdi\n\t"
4954 "movl (%%rdi), %%eax\n\t"
4955 "subq %5, %%rdi\n\t"
4956# else
4957 "lea -4(%%edi), %%edi\n\t"
4958 "movl (%%edi), %%eax\n\t"
4959 "subl %5, %%edi\n\t"
4960# endif
4961 "shll $3, %%edi\n\t"
4962 "bsfl %%eax, %%edx\n\t"
4963 "addl %%edi, %%edx\n\t"
4964 "1:\t\n"
4965 : "=d" (iBit),
4966 "=&c" (uECX),
4967 "=&D" (uEDI),
4968 "=&a" (uEAX)
4969 : "0" (0xffffffff),
4970 "mr" (pvBitmap),
4971 "1" (cBits >> 5),
4972 "2" (pvBitmap),
4973 "3" (0));
4974# else
4975 cBits = RT_ALIGN_32(cBits, 32);
4976 __asm
4977 {
4978# ifdef RT_ARCH_AMD64
4979 mov rdi, [pvBitmap]
4980 mov rbx, rdi
4981# else
4982 mov edi, [pvBitmap]
4983 mov ebx, edi
4984# endif
4985 mov edx, 0ffffffffh
4986 xor eax, eax
4987 mov ecx, [cBits]
4988 shr ecx, 5
4989 repe scasd
4990 je done
4991# ifdef RT_ARCH_AMD64
4992 lea rdi, [rdi - 4]
4993 mov eax, [rdi]
4994 sub rdi, rbx
4995# else
4996 lea edi, [edi - 4]
4997 mov eax, [edi]
4998 sub edi, ebx
4999# endif
5000 shl edi, 3
5001 bsf edx, eax
5002 add edx, edi
5003 done:
5004 mov [iBit], edx
5005 }
5006# endif
5007 return iBit;
5008 }
5009 return -1;
5010}
5011#endif
5012
5013
5014/**
5015 * Finds the next set bit in a bitmap.
5016 *
5017 * @returns Index of the next set bit.
5018 * @returns -1 if no set bit was found.
5019 * @param pvBitmap Pointer to the bitmap.
5020 * @param cBits The number of bits in the bitmap. Multiple of 32.
5021 * @param iBitPrev The bit returned from the last search.
5022 * The search will start at iBitPrev + 1.
5023 */
5024#if RT_INLINE_ASM_EXTERNAL
5025DECLASM(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev);
5026#else
5027DECLINLINE(int) ASMBitNextSet(const volatile void *pvBitmap, uint32_t cBits, uint32_t iBitPrev)
5028{
5029 const volatile uint32_t *pau32Bitmap = (const volatile uint32_t *)pvBitmap;
5030 int iBit = ++iBitPrev & 31;
5031 if (iBit)
5032 {
5033 /*
5034 * Inspect the 32-bit word containing the unaligned bit.
5035 */
5036 uint32_t u32 = pau32Bitmap[iBitPrev / 32] >> iBit;
5037
5038# if RT_INLINE_ASM_USES_INTRIN
5039 unsigned long ulBit = 0;
5040 if (_BitScanForward(&ulBit, u32))
5041 return ulBit + iBitPrev;
5042# else
5043# if RT_INLINE_ASM_GNU_STYLE
5044 __asm__ __volatile__("bsf %1, %0\n\t"
5045 "jnz 1f\n\t"
5046 "movl $-1, %0\n\t"
5047 "1:\n\t"
5048 : "=r" (iBit)
5049 : "r" (u32));
5050# else
5051 __asm
5052 {
5053 mov edx, [u32]
5054 bsf eax, edx
5055 jnz done
5056 mov eax, 0ffffffffh
5057 done:
5058 mov [iBit], eax
5059 }
5060# endif
5061 if (iBit >= 0)
5062 return iBit + iBitPrev;
5063# endif
5064
5065 /*
5066 * Skip ahead and see if there is anything left to search.
5067 */
5068 iBitPrev |= 31;
5069 iBitPrev++;
5070 if (cBits <= (uint32_t)iBitPrev)
5071 return -1;
5072 }
5073
5074 /*
5075 * 32-bit aligned search, let ASMBitFirstClear do the dirty work.
5076 */
5077 iBit = ASMBitFirstSet(&pau32Bitmap[iBitPrev / 32], cBits - iBitPrev);
5078 if (iBit >= 0)
5079 iBit += iBitPrev;
5080 return iBit;
5081}
5082#endif
5083
5084
5085/**
5086 * Finds the first bit which is set in the given 32-bit integer.
5087 * Bits are numbered from 1 (least significant) to 32.
5088 *
5089 * @returns index [1..32] of the first set bit.
5090 * @returns 0 if all bits are cleared.
5091 * @param u32 Integer to search for set bits.
5092 * @remarks Similar to ffs() in BSD.
5093 */
5094#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5095DECLASM(unsigned) ASMBitFirstSetU32(uint32_t u32);
5096#else
5097DECLINLINE(unsigned) ASMBitFirstSetU32(uint32_t u32)
5098{
5099# if RT_INLINE_ASM_USES_INTRIN
5100 unsigned long iBit;
5101 if (_BitScanForward(&iBit, u32))
5102 iBit++;
5103 else
5104 iBit = 0;
5105# elif RT_INLINE_ASM_GNU_STYLE
5106 uint32_t iBit;
5107 __asm__ __volatile__("bsf %1, %0\n\t"
5108 "jnz 1f\n\t"
5109 "xorl %0, %0\n\t"
5110 "jmp 2f\n"
5111 "1:\n\t"
5112 "incl %0\n"
5113 "2:\n\t"
5114 : "=r" (iBit)
5115 : "rm" (u32));
5116# else
5117 uint32_t iBit;
5118 _asm
5119 {
5120 bsf eax, [u32]
5121 jnz found
5122 xor eax, eax
5123 jmp done
5124 found:
5125 inc eax
5126 done:
5127 mov [iBit], eax
5128 }
5129# endif
5130 return iBit;
5131}
5132#endif
5133
5134
5135/**
5136 * Finds the first bit which is set in the given 32-bit integer.
5137 * Bits are numbered from 1 (least significant) to 32.
5138 *
5139 * @returns index [1..32] of the first set bit.
5140 * @returns 0 if all bits are cleared.
5141 * @param i32 Integer to search for set bits.
5142 * @remark Similar to ffs() in BSD.
5143 */
5144DECLINLINE(unsigned) ASMBitFirstSetS32(int32_t i32)
5145{
5146 return ASMBitFirstSetU32((uint32_t)i32);
5147}
5148
5149
5150/**
5151 * Finds the first bit which is set in the given 64-bit integer.
5152 *
5153 * Bits are numbered from 1 (least significant) to 64.
5154 *
5155 * @returns index [1..64] of the first set bit.
5156 * @returns 0 if all bits are cleared.
5157 * @param u64 Integer to search for set bits.
5158 * @remarks Similar to ffs() in BSD.
5159 */
5160#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5161DECLASM(unsigned) ASMBitFirstSetU64(uint64_t u64);
5162#else
5163DECLINLINE(unsigned) ASMBitFirstSetU64(uint64_t u64)
5164{
5165# if RT_INLINE_ASM_USES_INTRIN
5166 unsigned long iBit;
5167# if ARCH_BITS == 64
5168 if (_BitScanForward64(&iBit, u64))
5169 iBit++;
5170 else
5171 iBit = 0;
5172# else
5173 if (_BitScanForward(&iBit, (uint32_t)u64))
5174 iBit++;
5175 else if (_BitScanForward(&iBit, (uint32_t)(u64 >> 32)))
5176 iBit += 33;
5177 else
5178 iBit = 0;
5179# endif
5180# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5181 uint64_t iBit;
5182 __asm__ __volatile__("bsfq %1, %0\n\t"
5183 "jnz 1f\n\t"
5184 "xorl %0, %0\n\t"
5185 "jmp 2f\n"
5186 "1:\n\t"
5187 "incl %0\n"
5188 "2:\n\t"
5189 : "=r" (iBit)
5190 : "rm" (u64));
5191# else
5192 unsigned iBit = ASMBitFirstSetU32((uint32_t)u64);
5193 if (!iBit)
5194 {
5195 iBit = ASMBitFirstSetU32((uint32_t)(u64 >> 32));
5196 if (iBit)
5197 iBit += 32;
5198 }
5199# endif
5200 return (unsigned)iBit;
5201}
5202#endif
5203
5204
5205/**
5206 * Finds the first bit which is set in the given 16-bit integer.
5207 *
5208 * Bits are numbered from 1 (least significant) to 16.
5209 *
5210 * @returns index [1..16] of the first set bit.
5211 * @returns 0 if all bits are cleared.
5212 * @param u16 Integer to search for set bits.
5213 * @remarks For 16-bit bs3kit code.
5214 */
5215#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5216DECLASM(unsigned) ASMBitFirstSetU16(uint16_t u16);
5217#else
5218DECLINLINE(unsigned) ASMBitFirstSetU16(uint16_t u16)
5219{
5220 return ASMBitFirstSetU32((uint32_t)u16);
5221}
5222#endif
5223
5224
5225/**
5226 * Finds the last bit which is set in the given 32-bit integer.
5227 * Bits are numbered from 1 (least significant) to 32.
5228 *
5229 * @returns index [1..32] of the last set bit.
5230 * @returns 0 if all bits are cleared.
5231 * @param u32 Integer to search for set bits.
5232 * @remark Similar to fls() in BSD.
5233 */
5234#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5235DECLASM(unsigned) ASMBitLastSetU32(uint32_t u32);
5236#else
5237DECLINLINE(unsigned) ASMBitLastSetU32(uint32_t u32)
5238{
5239# if RT_INLINE_ASM_USES_INTRIN
5240 unsigned long iBit;
5241 if (_BitScanReverse(&iBit, u32))
5242 iBit++;
5243 else
5244 iBit = 0;
5245# elif RT_INLINE_ASM_GNU_STYLE
5246 uint32_t iBit;
5247 __asm__ __volatile__("bsrl %1, %0\n\t"
5248 "jnz 1f\n\t"
5249 "xorl %0, %0\n\t"
5250 "jmp 2f\n"
5251 "1:\n\t"
5252 "incl %0\n"
5253 "2:\n\t"
5254 : "=r" (iBit)
5255 : "rm" (u32));
5256# else
5257 uint32_t iBit;
5258 _asm
5259 {
5260 bsr eax, [u32]
5261 jnz found
5262 xor eax, eax
5263 jmp done
5264 found:
5265 inc eax
5266 done:
5267 mov [iBit], eax
5268 }
5269# endif
5270 return iBit;
5271}
5272#endif
5273
5274
5275/**
5276 * Finds the last bit which is set in the given 32-bit integer.
5277 * Bits are numbered from 1 (least significant) to 32.
5278 *
5279 * @returns index [1..32] of the last set bit.
5280 * @returns 0 if all bits are cleared.
5281 * @param i32 Integer to search for set bits.
5282 * @remark Similar to fls() in BSD.
5283 */
5284DECLINLINE(unsigned) ASMBitLastSetS32(int32_t i32)
5285{
5286 return ASMBitLastSetU32((uint32_t)i32);
5287}
5288
5289
5290/**
5291 * Finds the last bit which is set in the given 64-bit integer.
5292 *
5293 * Bits are numbered from 1 (least significant) to 64.
5294 *
5295 * @returns index [1..64] of the last set bit.
5296 * @returns 0 if all bits are cleared.
5297 * @param u64 Integer to search for set bits.
5298 * @remark Similar to fls() in BSD.
5299 */
5300#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5301DECLASM(unsigned) ASMBitLastSetU64(uint64_t u64);
5302#else
5303DECLINLINE(unsigned) ASMBitLastSetU64(uint64_t u64)
5304{
5305# if RT_INLINE_ASM_USES_INTRIN
5306 unsigned long iBit;
5307# if ARCH_BITS == 64
5308 if (_BitScanReverse64(&iBit, u64))
5309 iBit++;
5310 else
5311 iBit = 0;
5312# else
5313 if (_BitScanReverse(&iBit, (uint32_t)(u64 >> 32)))
5314 iBit += 33;
5315 else if (_BitScanReverse(&iBit, (uint32_t)u64))
5316 iBit++;
5317 else
5318 iBit = 0;
5319# endif
5320# elif RT_INLINE_ASM_GNU_STYLE && ARCH_BITS == 64
5321 uint64_t iBit;
5322 __asm__ __volatile__("bsrq %1, %0\n\t"
5323 "jnz 1f\n\t"
5324 "xorl %0, %0\n\t"
5325 "jmp 2f\n"
5326 "1:\n\t"
5327 "incl %0\n"
5328 "2:\n\t"
5329 : "=r" (iBit)
5330 : "rm" (u64));
5331# else
5332 unsigned iBit = ASMBitLastSetU32((uint32_t)(u64 >> 32));
5333 if (iBit)
5334 iBit += 32;
5335 else
5336 iBit = ASMBitLastSetU32((uint32_t)u64);
5337#endif
5338 return (unsigned)iBit;
5339}
5340#endif
5341
5342
5343/**
5344 * Finds the last bit which is set in the given 16-bit integer.
5345 *
5346 * Bits are numbered from 1 (least significant) to 16.
5347 *
5348 * @returns index [1..16] of the last set bit.
5349 * @returns 0 if all bits are cleared.
5350 * @param u16 Integer to search for set bits.
5351 * @remarks For 16-bit bs3kit code.
5352 */
5353#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5354DECLASM(unsigned) ASMBitLastSetU16(uint16_t u16);
5355#else
5356DECLINLINE(unsigned) ASMBitLastSetU16(uint16_t u16)
5357{
5358 return ASMBitLastSetU32((uint32_t)u16);
5359}
5360#endif
5361
5362
5363/**
5364 * Reverse the byte order of the given 16-bit integer.
5365 *
5366 * @returns Revert
5367 * @param u16 16-bit integer value.
5368 */
5369#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5370DECLASM(uint16_t) ASMByteSwapU16(uint16_t u16);
5371#else
5372DECLINLINE(uint16_t) ASMByteSwapU16(uint16_t u16)
5373{
5374# if RT_INLINE_ASM_USES_INTRIN
5375 u16 = _byteswap_ushort(u16);
5376# elif RT_INLINE_ASM_GNU_STYLE
5377 __asm__ ("rorw $8, %0" : "=r" (u16) : "0" (u16));
5378# else
5379 _asm
5380 {
5381 mov ax, [u16]
5382 ror ax, 8
5383 mov [u16], ax
5384 }
5385# endif
5386 return u16;
5387}
5388#endif
5389
5390
5391/**
5392 * Reverse the byte order of the given 32-bit integer.
5393 *
5394 * @returns Revert
5395 * @param u32 32-bit integer value.
5396 */
5397#if RT_INLINE_ASM_EXTERNAL && !RT_INLINE_ASM_USES_INTRIN
5398DECLASM(uint32_t) ASMByteSwapU32(uint32_t u32);
5399#else
5400DECLINLINE(uint32_t) ASMByteSwapU32(uint32_t u32)
5401{
5402# if RT_INLINE_ASM_USES_INTRIN
5403 u32 = _byteswap_ulong(u32);
5404# elif RT_INLINE_ASM_GNU_STYLE
5405 __asm__ ("bswapl %0" : "=r" (u32) : "0" (u32));
5406# else
5407 _asm
5408 {
5409 mov eax, [u32]
5410 bswap eax
5411 mov [u32], eax
5412 }
5413# endif
5414 return u32;
5415}
5416#endif
5417
5418
5419/**
5420 * Reverse the byte order of the given 64-bit integer.
5421 *
5422 * @returns Revert
5423 * @param u64 64-bit integer value.
5424 */
5425DECLINLINE(uint64_t) ASMByteSwapU64(uint64_t u64)
5426{
5427#if defined(RT_ARCH_AMD64) && RT_INLINE_ASM_USES_INTRIN
5428 u64 = _byteswap_uint64(u64);
5429#else
5430 u64 = (uint64_t)ASMByteSwapU32((uint32_t)u64) << 32
5431 | (uint64_t)ASMByteSwapU32((uint32_t)(u64 >> 32));
5432#endif
5433 return u64;
5434}
5435
5436
5437/**
5438 * Rotate 32-bit unsigned value to the left by @a cShift.
5439 *
5440 * @returns Rotated value.
5441 * @param u32 The value to rotate.
5442 * @param cShift How many bits to rotate by.
5443 */
5444#ifdef __WATCOMC__
5445DECLASM(uint32_t) ASMRotateLeftU32(uint32_t u32, unsigned cShift);
5446#else
5447DECLINLINE(uint32_t) ASMRotateLeftU32(uint32_t u32, uint32_t cShift)
5448{
5449# if RT_INLINE_ASM_USES_INTRIN
5450 return _rotl(u32, cShift);
5451# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5452 __asm__ __volatile__("roll %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5453 return u32;
5454# else
5455 cShift &= 31;
5456 return (u32 << cShift) | (u32 >> (32 - cShift));
5457# endif
5458}
5459#endif
5460
5461
5462/**
5463 * Rotate 32-bit unsigned value to the right by @a cShift.
5464 *
5465 * @returns Rotated value.
5466 * @param u32 The value to rotate.
5467 * @param cShift How many bits to rotate by.
5468 */
5469#ifdef __WATCOMC__
5470DECLASM(uint32_t) ASMRotateRightU32(uint32_t u32, unsigned cShift);
5471#else
5472DECLINLINE(uint32_t) ASMRotateRightU32(uint32_t u32, uint32_t cShift)
5473{
5474# if RT_INLINE_ASM_USES_INTRIN
5475 return _rotr(u32, cShift);
5476# elif RT_INLINE_ASM_GNU_STYLE && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
5477 __asm__ __volatile__("rorl %b1, %0" : "=g" (u32) : "Ic" (cShift), "0" (u32));
5478 return u32;
5479# else
5480 cShift &= 31;
5481 return (u32 >> cShift) | (u32 << (32 - cShift));
5482# endif
5483}
5484#endif
5485
5486
5487/**
5488 * Rotate 64-bit unsigned value to the left by @a cShift.
5489 *
5490 * @returns Rotated value.
5491 * @param u64 The value to rotate.
5492 * @param cShift How many bits to rotate by.
5493 */
5494DECLINLINE(uint64_t) ASMRotateLeftU64(uint64_t u64, uint32_t cShift)
5495{
5496#if RT_INLINE_ASM_USES_INTRIN
5497 return _rotl64(u64, cShift);
5498#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5499 __asm__ __volatile__("rolq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5500 return u64;
5501#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5502 uint32_t uSpill;
5503 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5504 "jz 1f\n\t"
5505 "xchgl %%eax, %%edx\n\t"
5506 "1:\n\t"
5507 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5508 "jz 2f\n\t"
5509 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5510 "shldl %%cl,%%eax,%%edx\n\t" /* shift the hi value left, feeding MSBits from the low value. */
5511 "shldl %%cl,%2,%%eax\n\t" /* shift the lo value left, feeding MSBits from the saved hi value. */
5512 "2:\n\t" /* } */
5513 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5514 : "0" (u64),
5515 "1" (cShift));
5516 return u64;
5517#else
5518 cShift &= 63;
5519 return (u64 << cShift) | (u64 >> (64 - cShift));
5520#endif
5521}
5522
5523
5524/**
5525 * Rotate 64-bit unsigned value to the right by @a cShift.
5526 *
5527 * @returns Rotated value.
5528 * @param u64 The value to rotate.
5529 * @param cShift How many bits to rotate by.
5530 */
5531DECLINLINE(uint64_t) ASMRotateRightU64(uint64_t u64, uint32_t cShift)
5532{
5533#if RT_INLINE_ASM_USES_INTRIN
5534 return _rotr64(u64, cShift);
5535#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_AMD64)
5536 __asm__ __volatile__("rorq %b1, %0" : "=g" (u64) : "Jc" (cShift), "0" (u64));
5537 return u64;
5538#elif RT_INLINE_ASM_GNU_STYLE && defined(RT_ARCH_X86)
5539 uint32_t uSpill;
5540 __asm__ __volatile__("testb $0x20, %%cl\n\t" /* if (cShift >= 0x20) { swap(u64.hi, u64lo); cShift -= 0x20; } */
5541 "jz 1f\n\t"
5542 "xchgl %%eax, %%edx\n\t"
5543 "1:\n\t"
5544 "andb $0x1f, %%cl\n\t" /* if (cShift & 0x1f) { */
5545 "jz 2f\n\t"
5546 "movl %%edx, %2\n\t" /* save the hi value in %3. */
5547 "shrdl %%cl,%%eax,%%edx\n\t" /* shift the hi value right, feeding LSBits from the low value. */
5548 "shrdl %%cl,%2,%%eax\n\t" /* shift the lo value right, feeding LSBits from the saved hi value. */
5549 "2:\n\t" /* } */
5550 : "=A" (u64), "=c" (cShift), "=r" (uSpill)
5551 : "0" (u64),
5552 "1" (cShift));
5553 return u64;
5554#else
5555 cShift &= 63;
5556 return (u64 >> cShift) | (u64 << (64 - cShift));
5557#endif
5558}
5559
5560/** @} */
5561
5562
5563/** @} */
5564
5565#endif
5566
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette