VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 103922

最後變更 在這個檔案從103922是 103919,由 vboxsync 提交於 12 月 前

VMM/IEM: Implement native emitter for IEM_MC_STORE_YREG_U128_ZX_VLMAX() and fixes for IEM_MC_STORE_XREG_U128(), bugref:10614

  • 屬性 svn:eol-style 設為 native
  • 屬性 svn:keywords 設為 Author Date Id Revision
檔案大小: 374.8 KB
 
1/* $Id: IEMAllN8veRecompFuncs.h 103919 2024-03-19 13:52:22Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.alldomusa.eu.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62
63
64/*
65 * Narrow down configs here to avoid wasting time on unused configs here.
66 * Note! Same checks in IEMAllThrdRecompiler.cpp.
67 */
68
69#ifndef IEM_WITH_CODE_TLB
70# error The code TLB must be enabled for the recompiler.
71#endif
72
73#ifndef IEM_WITH_DATA_TLB
74# error The data TLB must be enabled for the recompiler.
75#endif
76
77#ifndef IEM_WITH_SETJMP
78# error The setjmp approach must be enabled for the recompiler.
79#endif
80
81
82
83/*********************************************************************************************************************************
84* Code emitters for flushing pending guest register writes and sanity checks *
85*********************************************************************************************************************************/
86
87#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
88# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
89DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
90{
91 /* Compare the shadow with the context value, they should match. */
92 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
93 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
94 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
95 return off;
96}
97# endif
98#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
99
100/**
101 * Flushes delayed write of a specific guest register.
102 *
103 * This must be called prior to calling CImpl functions and any helpers that use
104 * the guest state (like raising exceptions) and such.
105 *
106 * This optimization has not yet been implemented. The first target would be
107 * RIP updates, since these are the most common ones.
108 */
109DECL_INLINE_THROW(uint32_t)
110iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
111{
112#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
113 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
114#endif
115
116#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
117 if ( enmClass == kIemNativeGstRegRef_XReg
118 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
119 {
120 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
121 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
122 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
123
124 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
125 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
126 }
127#endif
128 RT_NOREF(pReNative, enmClass, idxReg);
129 return off;
130}
131
132
133
134/*********************************************************************************************************************************
135* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
136*********************************************************************************************************************************/
137
138#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
139 { \
140 Assert(pReNative->Core.bmVars == 0); \
141 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
142 Assert(pReNative->Core.bmStack == 0); \
143 pReNative->fMc = (a_fMcFlags); \
144 pReNative->fCImpl = (a_fCImplFlags); \
145 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
146
147/** We have to get to the end in recompilation mode, as otherwise we won't
148 * generate code for all the IEM_MC_IF_XXX branches. */
149#define IEM_MC_END() \
150 iemNativeVarFreeAll(pReNative); \
151 } return off
152
153
154
155/*********************************************************************************************************************************
156* Native Emitter Support. *
157*********************************************************************************************************************************/
158
159#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
160
161#define IEM_MC_NATIVE_ELSE() } else {
162
163#define IEM_MC_NATIVE_ENDIF() } ((void)0)
164
165
166#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
167 off = a_fnEmitter(pReNative, off)
168
169#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
170 off = a_fnEmitter(pReNative, off, (a0))
171
172#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
173 off = a_fnEmitter(pReNative, off, (a0), (a1))
174
175#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
176 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
177
178#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
179 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
180
181#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
182 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
183
184#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
185 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
186
187#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
188 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
189
190#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
191 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
192
193
194
195/*********************************************************************************************************************************
196* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
197*********************************************************************************************************************************/
198
199#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
200 pReNative->fMc = 0; \
201 pReNative->fCImpl = (a_fFlags); \
202 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
203
204
205#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
206 pReNative->fMc = 0; \
207 pReNative->fCImpl = (a_fFlags); \
208 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
209
210DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
211 uint8_t idxInstr, uint64_t a_fGstShwFlush,
212 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
213{
214 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
215}
216
217
218#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
219 pReNative->fMc = 0; \
220 pReNative->fCImpl = (a_fFlags); \
221 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
222 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
223
224DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
225 uint8_t idxInstr, uint64_t a_fGstShwFlush,
226 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
227{
228 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
229}
230
231
232#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
233 pReNative->fMc = 0; \
234 pReNative->fCImpl = (a_fFlags); \
235 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
236 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
237
238DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
239 uint8_t idxInstr, uint64_t a_fGstShwFlush,
240 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
241 uint64_t uArg2)
242{
243 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
244}
245
246
247
248/*********************************************************************************************************************************
249* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
250*********************************************************************************************************************************/
251
252/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
253 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
254DECL_INLINE_THROW(uint32_t)
255iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
256{
257 /*
258 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
259 * return with special status code and make the execution loop deal with
260 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
261 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
262 * could continue w/o interruption, it probably will drop into the
263 * debugger, so not worth the effort of trying to services it here and we
264 * just lump it in with the handling of the others.
265 *
266 * To simplify the code and the register state management even more (wrt
267 * immediate in AND operation), we always update the flags and skip the
268 * extra check associated conditional jump.
269 */
270 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
271 <= UINT32_MAX);
272#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
273 AssertMsg( pReNative->idxCurCall == 0
274 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)),
275 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)));
276#endif
277
278 /*
279 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
280 * any pending register writes must be flushed.
281 */
282 off = iemNativeRegFlushPendingWrites(pReNative, off);
283
284 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
285 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
286 true /*fSkipLivenessAssert*/);
287 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
288 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
289 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
290 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
291 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
292
293 /* Free but don't flush the EFLAGS register. */
294 iemNativeRegFreeTmp(pReNative, idxEflReg);
295
296 return off;
297}
298
299
300/** The VINF_SUCCESS dummy. */
301template<int const a_rcNormal>
302DECL_FORCE_INLINE(uint32_t)
303iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
304{
305 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
306 if (a_rcNormal != VINF_SUCCESS)
307 {
308#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
309 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
310#else
311 RT_NOREF_PV(idxInstr);
312#endif
313
314 /* As this code returns from the TB any pending register writes must be flushed. */
315 off = iemNativeRegFlushPendingWrites(pReNative, off);
316
317 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreak);
318 }
319 return off;
320}
321
322
323#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
324 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
325 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
326
327#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
328 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
329 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
330 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
331
332/** Same as iemRegAddToRip64AndFinishingNoFlags. */
333DECL_INLINE_THROW(uint32_t)
334iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
335{
336#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
337# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
338 if (!pReNative->Core.offPc)
339 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
340# endif
341
342 /* Allocate a temporary PC register. */
343 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
344
345 /* Perform the addition and store the result. */
346 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
347 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
348
349 /* Free but don't flush the PC register. */
350 iemNativeRegFreeTmp(pReNative, idxPcReg);
351#endif
352
353#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
354 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
355
356 pReNative->Core.offPc += cbInstr;
357# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
358 off = iemNativePcAdjustCheck(pReNative, off);
359# endif
360 if (pReNative->cCondDepth)
361 off = iemNativeEmitPcWriteback(pReNative, off);
362 else
363 pReNative->Core.cInstrPcUpdateSkipped++;
364#endif
365
366 return off;
367}
368
369
370#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
371 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
372 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
373
374#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
375 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
376 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
377 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
378
379/** Same as iemRegAddToEip32AndFinishingNoFlags. */
380DECL_INLINE_THROW(uint32_t)
381iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
382{
383#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
384# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
385 if (!pReNative->Core.offPc)
386 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
387# endif
388
389 /* Allocate a temporary PC register. */
390 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
391
392 /* Perform the addition and store the result. */
393 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
394 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
395
396 /* Free but don't flush the PC register. */
397 iemNativeRegFreeTmp(pReNative, idxPcReg);
398#endif
399
400#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
401 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
402
403 pReNative->Core.offPc += cbInstr;
404# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
405 off = iemNativePcAdjustCheck(pReNative, off);
406# endif
407 if (pReNative->cCondDepth)
408 off = iemNativeEmitPcWriteback(pReNative, off);
409 else
410 pReNative->Core.cInstrPcUpdateSkipped++;
411#endif
412
413 return off;
414}
415
416
417#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
418 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
419 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
420
421#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
422 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
423 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
424 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
425
426/** Same as iemRegAddToIp16AndFinishingNoFlags. */
427DECL_INLINE_THROW(uint32_t)
428iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
429{
430#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
431# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
432 if (!pReNative->Core.offPc)
433 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
434# endif
435
436 /* Allocate a temporary PC register. */
437 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
438
439 /* Perform the addition and store the result. */
440 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
441 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
442 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
443
444 /* Free but don't flush the PC register. */
445 iemNativeRegFreeTmp(pReNative, idxPcReg);
446#endif
447
448#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
449 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
450
451 pReNative->Core.offPc += cbInstr;
452# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
453 off = iemNativePcAdjustCheck(pReNative, off);
454# endif
455 if (pReNative->cCondDepth)
456 off = iemNativeEmitPcWriteback(pReNative, off);
457 else
458 pReNative->Core.cInstrPcUpdateSkipped++;
459#endif
460
461 return off;
462}
463
464
465
466/*********************************************************************************************************************************
467* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
468*********************************************************************************************************************************/
469
470#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
471 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
472 (a_enmEffOpSize), pCallEntry->idxInstr); \
473 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
474
475#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
476 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
477 (a_enmEffOpSize), pCallEntry->idxInstr); \
478 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
479 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
480
481#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
482 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
483 IEMMODE_16BIT, pCallEntry->idxInstr); \
484 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
485
486#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
487 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
488 IEMMODE_16BIT, pCallEntry->idxInstr); \
489 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
490 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
491
492#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
493 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
494 IEMMODE_64BIT, pCallEntry->idxInstr); \
495 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
496
497#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
498 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
499 IEMMODE_64BIT, pCallEntry->idxInstr); \
500 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
501 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
502
503/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
504 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
505 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
506DECL_INLINE_THROW(uint32_t)
507iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
508 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
509{
510 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
511
512 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
513 off = iemNativeRegFlushPendingWrites(pReNative, off);
514
515#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
516 Assert(pReNative->Core.offPc == 0);
517
518 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
519#endif
520
521 /* Allocate a temporary PC register. */
522 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
523
524 /* Perform the addition. */
525 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
526
527 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
528 {
529 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
530 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
531 }
532 else
533 {
534 /* Just truncate the result to 16-bit IP. */
535 Assert(enmEffOpSize == IEMMODE_16BIT);
536 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
537 }
538 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
539
540 /* Free but don't flush the PC register. */
541 iemNativeRegFreeTmp(pReNative, idxPcReg);
542
543 return off;
544}
545
546
547#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
548 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
549 (a_enmEffOpSize), pCallEntry->idxInstr); \
550 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
551
552#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
553 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
554 (a_enmEffOpSize), pCallEntry->idxInstr); \
555 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
556 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
557
558#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
559 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
560 IEMMODE_16BIT, pCallEntry->idxInstr); \
561 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
562
563#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
564 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
565 IEMMODE_16BIT, pCallEntry->idxInstr); \
566 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
567 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
568
569#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
570 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
571 IEMMODE_32BIT, pCallEntry->idxInstr); \
572 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
573
574#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
575 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
576 IEMMODE_32BIT, pCallEntry->idxInstr); \
577 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
578 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
579
580/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
581 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
582 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
583DECL_INLINE_THROW(uint32_t)
584iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
585 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
586{
587 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
588
589 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
590 off = iemNativeRegFlushPendingWrites(pReNative, off);
591
592#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
593 Assert(pReNative->Core.offPc == 0);
594
595 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
596#endif
597
598 /* Allocate a temporary PC register. */
599 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
600
601 /* Perform the addition. */
602 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
603
604 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
605 if (enmEffOpSize == IEMMODE_16BIT)
606 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
607
608 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
609/** @todo we can skip this in 32-bit FLAT mode. */
610 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
611
612 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
613
614 /* Free but don't flush the PC register. */
615 iemNativeRegFreeTmp(pReNative, idxPcReg);
616
617 return off;
618}
619
620
621#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
622 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
623 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
624
625#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
626 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
627 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
628 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
629
630#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
631 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
632 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
633
634#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
635 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
636 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
637 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
638
639#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
640 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
641 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
642
643#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
644 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
645 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
646 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
647
648/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
649DECL_INLINE_THROW(uint32_t)
650iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
651 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
652{
653 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
654 off = iemNativeRegFlushPendingWrites(pReNative, off);
655
656#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
657 Assert(pReNative->Core.offPc == 0);
658
659 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
660#endif
661
662 /* Allocate a temporary PC register. */
663 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
664
665 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
666 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
667 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
668 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
669 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
670
671 /* Free but don't flush the PC register. */
672 iemNativeRegFreeTmp(pReNative, idxPcReg);
673
674 return off;
675}
676
677
678
679/*********************************************************************************************************************************
680* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
681*********************************************************************************************************************************/
682
683/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
684#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
685 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
686
687/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
688#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
689 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
690
691/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
692#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
693 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
694
695/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
696 * clears flags. */
697#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
698 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
699 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
700
701/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
702 * clears flags. */
703#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
704 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
705 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
706
707/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
708 * clears flags. */
709#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
710 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
711 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
712
713#undef IEM_MC_SET_RIP_U16_AND_FINISH
714
715
716/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
717#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
718 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
719
720/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
721#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
722 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
723
724/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
725 * clears flags. */
726#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
727 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
728 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
729
730/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
731 * and clears flags. */
732#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
733 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
734 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
735
736#undef IEM_MC_SET_RIP_U32_AND_FINISH
737
738
739/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
740#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
741 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
742
743/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
744 * and clears flags. */
745#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
746 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
747 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
748
749#undef IEM_MC_SET_RIP_U64_AND_FINISH
750
751
752/** Same as iemRegRipJumpU16AndFinishNoFlags,
753 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
754DECL_INLINE_THROW(uint32_t)
755iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
756 uint8_t idxInstr, uint8_t cbVar)
757{
758 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
759 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
760
761 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
762 off = iemNativeRegFlushPendingWrites(pReNative, off);
763
764#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
765 Assert(pReNative->Core.offPc == 0);
766
767 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
768#endif
769
770 /* Get a register with the new PC loaded from idxVarPc.
771 Note! This ASSUMES that the high bits of the GPR is zeroed. */
772 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
773
774 /* Check limit (may #GP(0) + exit TB). */
775 if (!f64Bit)
776/** @todo we can skip this test in FLAT 32-bit mode. */
777 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
778 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
779 else if (cbVar > sizeof(uint32_t))
780 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
781
782 /* Store the result. */
783 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
784
785 iemNativeVarRegisterRelease(pReNative, idxVarPc);
786 /** @todo implictly free the variable? */
787
788 return off;
789}
790
791
792
793/*********************************************************************************************************************************
794* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
795*********************************************************************************************************************************/
796
797#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
798 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
799
800/**
801 * Emits code to check if a \#NM exception should be raised.
802 *
803 * @returns New code buffer offset, UINT32_MAX on failure.
804 * @param pReNative The native recompile state.
805 * @param off The code buffer offset.
806 * @param idxInstr The current instruction.
807 */
808DECL_INLINE_THROW(uint32_t)
809iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
810{
811#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
812 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
813
814 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
815 {
816#endif
817 /*
818 * Make sure we don't have any outstanding guest register writes as we may
819 * raise an #NM and all guest register must be up to date in CPUMCTX.
820 */
821 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
822 off = iemNativeRegFlushPendingWrites(pReNative, off);
823
824#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
825 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
826#else
827 RT_NOREF(idxInstr);
828#endif
829
830 /* Allocate a temporary CR0 register. */
831 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
832 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
833
834 /*
835 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
836 * return raisexcpt();
837 */
838 /* Test and jump. */
839 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS, idxLabelRaiseNm);
840
841 /* Free but don't flush the CR0 register. */
842 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
843
844#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
845 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
846 }
847 else
848 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
849#endif
850
851 return off;
852}
853
854
855#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
856 off = iemNativeEmitMaybeFpuException(pReNative, off, pCallEntry->idxInstr)
857
858/**
859 * Emits code to check if a \#MF exception should be raised.
860 *
861 * @returns New code buffer offset, UINT32_MAX on failure.
862 * @param pReNative The native recompile state.
863 * @param off The code buffer offset.
864 * @param idxInstr The current instruction.
865 */
866DECL_INLINE_THROW(uint32_t)
867iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
868{
869 /*
870 * Make sure we don't have any outstanding guest register writes as we may
871 * raise an #MF and all guest register must be up to date in CPUMCTX.
872 */
873 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
874 off = iemNativeRegFlushPendingWrites(pReNative, off);
875
876#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
877 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
878#else
879 RT_NOREF(idxInstr);
880#endif
881
882 /* Allocate a temporary FSW register. */
883 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw, kIemNativeGstRegUse_ReadOnly);
884 uint8_t const idxLabelRaiseMf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseMf);
885
886 /*
887 * if (FSW & X86_FSW_ES != 0)
888 * return raisexcpt();
889 */
890 /* Test and jump. */
891 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxFpuFswReg, X86_FSW_ES, idxLabelRaiseMf);
892
893 /* Free but don't flush the FSW register. */
894 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
895
896 return off;
897}
898
899
900#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
901 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
902
903/**
904 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
905 *
906 * @returns New code buffer offset, UINT32_MAX on failure.
907 * @param pReNative The native recompile state.
908 * @param off The code buffer offset.
909 * @param idxInstr The current instruction.
910 */
911DECL_INLINE_THROW(uint32_t)
912iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
913{
914#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
915 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
916
917 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
918 {
919#endif
920 /*
921 * Make sure we don't have any outstanding guest register writes as we may
922 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
923 */
924 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
925 off = iemNativeRegFlushPendingWrites(pReNative, off, false /*fFlushShadows*/);
926
927#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
928 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
929#else
930 RT_NOREF(idxInstr);
931#endif
932
933 /* Allocate a temporary CR0 and CR4 register. */
934 uint8_t const idxLabelRaiseSseRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseSseRelated);
935 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
936 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
937 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
938
939 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
940#ifdef RT_ARCH_AMD64
941 /*
942 * We do a modified test here:
943 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
944 * else { goto RaiseSseRelated; }
945 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
946 * all targets except the 386, which doesn't support SSE, this should
947 * be a safe assumption.
948 */
949 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+3+7+7+6);
950 //pCodeBuf[off++] = 0xcc;
951 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
952 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
953 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
954 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
955 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
956 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelRaiseSseRelated, kIemNativeInstrCond_ne);
957
958#elif defined(RT_ARCH_ARM64)
959 /*
960 * We do a modified test here:
961 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
962 * else { goto RaiseSseRelated; }
963 */
964 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+5);
965 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
966 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
967 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
968 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
969 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
970 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
971 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
972 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
973 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
974 idxLabelRaiseSseRelated);
975
976#else
977# error "Port me!"
978#endif
979
980 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
981 iemNativeRegFreeTmp(pReNative, idxTmpReg);
982 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
983 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
984
985#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
986 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
987 }
988 else
989 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
990#endif
991
992 return off;
993}
994
995
996#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
997 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
998
999/**
1000 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
1001 *
1002 * @returns New code buffer offset, UINT32_MAX on failure.
1003 * @param pReNative The native recompile state.
1004 * @param off The code buffer offset.
1005 * @param idxInstr The current instruction.
1006 */
1007DECL_INLINE_THROW(uint32_t)
1008iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1009{
1010#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1011 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
1012
1013 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
1014 {
1015#endif
1016 /*
1017 * Make sure we don't have any outstanding guest register writes as we may
1018 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
1019 */
1020 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
1021 off = iemNativeRegFlushPendingWrites(pReNative, off, false /*fFlushShadows*/);
1022
1023#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1024 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1025#else
1026 RT_NOREF(idxInstr);
1027#endif
1028
1029 /* Allocate a temporary CR0, CR4 and XCR0 register. */
1030 uint8_t const idxLabelRaiseAvxRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseAvxRelated);
1031 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
1032 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
1033 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
1034 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1035
1036 /*
1037 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
1038 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
1039 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
1040 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
1041 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
1042 * { likely }
1043 * else { goto RaiseAvxRelated; }
1044 */
1045#ifdef RT_ARCH_AMD64
1046 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
1047 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
1048 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
1049 ^ 0x1a) ) { likely }
1050 else { goto RaiseAvxRelated; } */
1051 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+5+3+5+3+7+6);
1052 //pCodeBuf[off++] = 0xcc;
1053 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
1054 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
1055 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
1056 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
1057 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
1058 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
1059 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
1060 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
1061 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
1062 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
1063 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelRaiseAvxRelated, kIemNativeInstrCond_ne);
1064
1065#elif defined(RT_ARCH_ARM64)
1066 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
1067 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
1068 else { goto RaiseAvxRelated; } */
1069 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6);
1070 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
1071 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
1072 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
1073 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
1074 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
1075 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
1076 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
1077 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
1078 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
1079 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
1080 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
1081 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
1082 idxLabelRaiseAvxRelated);
1083
1084#else
1085# error "Port me!"
1086#endif
1087
1088 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1089 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
1090 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
1091 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
1092#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1093 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
1094 }
1095 else
1096 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
1097#endif
1098
1099 return off;
1100}
1101
1102
1103#define IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
1104 off = iemNativeEmitRaiseSseAvxSimdFpXcpt(pReNative, off, pCallEntry->idxInstr)
1105
1106/**
1107 * Emits code to raise a SIMD floating point (either \#UD or \#XF) should be raised.
1108 *
1109 * @returns New code buffer offset, UINT32_MAX on failure.
1110 * @param pReNative The native recompile state.
1111 * @param off The code buffer offset.
1112 * @param idxInstr The current instruction.
1113 */
1114DECL_INLINE_THROW(uint32_t)
1115iemNativeEmitRaiseSseAvxSimdFpXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1116{
1117 /*
1118 * Make sure we don't have any outstanding guest register writes as we may
1119 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
1120 */
1121 off = iemNativeRegFlushPendingWrites(pReNative, off);
1122
1123#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1124 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1125#else
1126 RT_NOREF(idxInstr);
1127#endif
1128
1129 /* Allocate a temporary CR4 register. */
1130 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
1131 uint8_t const idxLabelRaiseXf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseXf);
1132 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
1133
1134 /*
1135 * if (!(cr4 & X86_CR4_OSXMMEEXCPT))
1136 * return raisexcpt();
1137 */
1138 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSXMMEEXCPT_BIT, idxLabelRaiseXf);
1139
1140 /* raise \#UD exception unconditionally. */
1141 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelRaiseUd);
1142
1143 /* Free but don't flush the CR4 register. */
1144 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
1145
1146 return off;
1147}
1148
1149
1150#define IEM_MC_RAISE_DIVIDE_ERROR() \
1151 off = iemNativeEmitRaiseDivideError(pReNative, off, pCallEntry->idxInstr)
1152
1153/**
1154 * Emits code to raise a \#DE.
1155 *
1156 * @returns New code buffer offset, UINT32_MAX on failure.
1157 * @param pReNative The native recompile state.
1158 * @param off The code buffer offset.
1159 * @param idxInstr The current instruction.
1160 */
1161DECL_INLINE_THROW(uint32_t)
1162iemNativeEmitRaiseDivideError(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1163{
1164 /*
1165 * Make sure we don't have any outstanding guest register writes as we may
1166 */
1167 off = iemNativeRegFlushPendingWrites(pReNative, off);
1168
1169#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1170 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1171#else
1172 RT_NOREF(idxInstr);
1173#endif
1174
1175 uint8_t const idxLabelRaiseDe = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseDe);
1176
1177 /* raise \#DE exception unconditionally. */
1178 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelRaiseDe);
1179
1180 return off;
1181}
1182
1183
1184/*********************************************************************************************************************************
1185* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
1186*********************************************************************************************************************************/
1187
1188/**
1189 * Pushes an IEM_MC_IF_XXX onto the condition stack.
1190 *
1191 * @returns Pointer to the condition stack entry on success, NULL on failure
1192 * (too many nestings)
1193 */
1194DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
1195{
1196#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1197 *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
1198#endif
1199
1200 uint32_t const idxStack = pReNative->cCondDepth;
1201 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
1202
1203 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
1204 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
1205
1206 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
1207 pEntry->fInElse = false;
1208 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
1209 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
1210
1211 return pEntry;
1212}
1213
1214
1215/**
1216 * Start of the if-block, snapshotting the register and variable state.
1217 */
1218DECL_INLINE_THROW(void)
1219iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
1220{
1221 Assert(offIfBlock != UINT32_MAX);
1222 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
1223 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
1224 Assert(!pEntry->fInElse);
1225
1226 /* Define the start of the IF block if request or for disassembly purposes. */
1227 if (idxLabelIf != UINT32_MAX)
1228 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
1229#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1230 else
1231 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
1232#else
1233 RT_NOREF(offIfBlock);
1234#endif
1235
1236#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1237 Assert(pReNative->Core.offPc == 0);
1238#endif
1239
1240 /* Copy the initial state so we can restore it in the 'else' block. */
1241 pEntry->InitialState = pReNative->Core;
1242}
1243
1244
1245#define IEM_MC_ELSE() } while (0); \
1246 off = iemNativeEmitElse(pReNative, off); \
1247 do {
1248
1249/** Emits code related to IEM_MC_ELSE. */
1250DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1251{
1252 /* Check sanity and get the conditional stack entry. */
1253 Assert(off != UINT32_MAX);
1254 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
1255 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
1256 Assert(!pEntry->fInElse);
1257
1258 /* Jump to the endif */
1259 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
1260
1261 /* Define the else label and enter the else part of the condition. */
1262 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
1263 pEntry->fInElse = true;
1264
1265#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1266 Assert(pReNative->Core.offPc == 0);
1267#endif
1268
1269 /* Snapshot the core state so we can do a merge at the endif and restore
1270 the snapshot we took at the start of the if-block. */
1271 pEntry->IfFinalState = pReNative->Core;
1272 pReNative->Core = pEntry->InitialState;
1273
1274 return off;
1275}
1276
1277
1278#define IEM_MC_ENDIF() } while (0); \
1279 off = iemNativeEmitEndIf(pReNative, off)
1280
1281/** Emits code related to IEM_MC_ENDIF. */
1282DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1283{
1284 /* Check sanity and get the conditional stack entry. */
1285 Assert(off != UINT32_MAX);
1286 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
1287 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
1288
1289#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1290 Assert(pReNative->Core.offPc == 0);
1291#endif
1292
1293 /*
1294 * Now we have find common group with the core state at the end of the
1295 * if-final. Use the smallest common denominator and just drop anything
1296 * that isn't the same in both states.
1297 */
1298 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
1299 * which is why we're doing this at the end of the else-block.
1300 * But we'd need more info about future for that to be worth the effort. */
1301 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
1302 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
1303 {
1304 /* shadow guest stuff first. */
1305 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
1306 if (fGstRegs)
1307 {
1308 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
1309 do
1310 {
1311 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
1312 fGstRegs &= ~RT_BIT_64(idxGstReg);
1313
1314 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
1315 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
1316 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
1317 {
1318 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
1319 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
1320 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
1321 }
1322 } while (fGstRegs);
1323 }
1324 else
1325 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
1326
1327 /* Check variables next. For now we must require them to be identical
1328 or stuff we can recreate. */
1329 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
1330 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
1331 if (fVars)
1332 {
1333 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
1334 do
1335 {
1336 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
1337 fVars &= ~RT_BIT_32(idxVar);
1338
1339 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
1340 {
1341 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
1342 continue;
1343 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
1344 {
1345 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
1346 if (idxHstReg != UINT8_MAX)
1347 {
1348 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
1349 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
1350 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
1351 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
1352 }
1353 continue;
1354 }
1355 }
1356 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
1357 continue;
1358
1359 /* Irreconcilable, so drop it. */
1360 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
1361 if (idxHstReg != UINT8_MAX)
1362 {
1363 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
1364 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
1365 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
1366 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
1367 }
1368 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
1369 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
1370 } while (fVars);
1371 }
1372
1373 /* Finally, check that the host register allocations matches. */
1374 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
1375 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
1376 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
1377 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
1378 }
1379
1380 /*
1381 * Define the endif label and maybe the else one if we're still in the 'if' part.
1382 */
1383 if (!pEntry->fInElse)
1384 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
1385 else
1386 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
1387 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
1388
1389 /* Pop the conditional stack.*/
1390 pReNative->cCondDepth -= 1;
1391
1392 return off;
1393}
1394
1395
1396#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
1397 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
1398 do {
1399
1400/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
1401DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
1402{
1403 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
1404 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1405
1406 /* Get the eflags. */
1407 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1408 kIemNativeGstRegUse_ReadOnly);
1409
1410 /* Test and jump. */
1411 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
1412
1413 /* Free but don't flush the EFlags register. */
1414 iemNativeRegFreeTmp(pReNative, idxEflReg);
1415
1416 /* Make a copy of the core state now as we start the if-block. */
1417 iemNativeCondStartIfBlock(pReNative, off);
1418
1419 return off;
1420}
1421
1422
1423#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
1424 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
1425 do {
1426
1427/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
1428DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
1429{
1430 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
1431 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1432
1433 /* Get the eflags. */
1434 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1435 kIemNativeGstRegUse_ReadOnly);
1436
1437 /* Test and jump. */
1438 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
1439
1440 /* Free but don't flush the EFlags register. */
1441 iemNativeRegFreeTmp(pReNative, idxEflReg);
1442
1443 /* Make a copy of the core state now as we start the if-block. */
1444 iemNativeCondStartIfBlock(pReNative, off);
1445
1446 return off;
1447}
1448
1449
1450#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
1451 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
1452 do {
1453
1454/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
1455DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
1456{
1457 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1458 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1459
1460 /* Get the eflags. */
1461 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1462 kIemNativeGstRegUse_ReadOnly);
1463
1464 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1465 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1466
1467 /* Test and jump. */
1468 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
1469
1470 /* Free but don't flush the EFlags register. */
1471 iemNativeRegFreeTmp(pReNative, idxEflReg);
1472
1473 /* Make a copy of the core state now as we start the if-block. */
1474 iemNativeCondStartIfBlock(pReNative, off);
1475
1476 return off;
1477}
1478
1479
1480#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
1481 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
1482 do {
1483
1484/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
1485DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
1486{
1487 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1488 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1489
1490 /* Get the eflags. */
1491 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1492 kIemNativeGstRegUse_ReadOnly);
1493
1494 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1495 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1496
1497 /* Test and jump. */
1498 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
1499
1500 /* Free but don't flush the EFlags register. */
1501 iemNativeRegFreeTmp(pReNative, idxEflReg);
1502
1503 /* Make a copy of the core state now as we start the if-block. */
1504 iemNativeCondStartIfBlock(pReNative, off);
1505
1506 return off;
1507}
1508
1509
1510#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
1511 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
1512 do {
1513
1514#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
1515 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
1516 do {
1517
1518/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
1519DECL_INLINE_THROW(uint32_t)
1520iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1521 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
1522{
1523 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBit1InEfl | fBit2InEfl);
1524 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1525
1526 /* Get the eflags. */
1527 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1528 kIemNativeGstRegUse_ReadOnly);
1529
1530 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
1531 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
1532
1533 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
1534 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
1535 Assert(iBitNo1 != iBitNo2);
1536
1537#ifdef RT_ARCH_AMD64
1538 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
1539
1540 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1541 if (iBitNo1 > iBitNo2)
1542 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
1543 else
1544 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
1545 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1546
1547#elif defined(RT_ARCH_ARM64)
1548 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1549 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1550
1551 /* and tmpreg, eflreg, #1<<iBitNo1 */
1552 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
1553
1554 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
1555 if (iBitNo1 > iBitNo2)
1556 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1557 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
1558 else
1559 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1560 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
1561
1562 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1563
1564#else
1565# error "Port me"
1566#endif
1567
1568 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
1569 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
1570 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
1571
1572 /* Free but don't flush the EFlags and tmp registers. */
1573 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1574 iemNativeRegFreeTmp(pReNative, idxEflReg);
1575
1576 /* Make a copy of the core state now as we start the if-block. */
1577 iemNativeCondStartIfBlock(pReNative, off);
1578
1579 return off;
1580}
1581
1582
1583#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
1584 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
1585 do {
1586
1587#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
1588 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
1589 do {
1590
1591/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
1592 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
1593DECL_INLINE_THROW(uint32_t)
1594iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
1595 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
1596{
1597 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl | fBit1InEfl | fBit2InEfl);
1598 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1599
1600 /* We need an if-block label for the non-inverted variant. */
1601 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
1602 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
1603
1604 /* Get the eflags. */
1605 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1606 kIemNativeGstRegUse_ReadOnly);
1607
1608 /* Translate the flag masks to bit numbers. */
1609 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1610 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1611
1612 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
1613 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
1614 Assert(iBitNo1 != iBitNo);
1615
1616 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
1617 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
1618 Assert(iBitNo2 != iBitNo);
1619 Assert(iBitNo2 != iBitNo1);
1620
1621#ifdef RT_ARCH_AMD64
1622 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
1623#elif defined(RT_ARCH_ARM64)
1624 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1625#endif
1626
1627 /* Check for the lone bit first. */
1628 if (!fInverted)
1629 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
1630 else
1631 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
1632
1633 /* Then extract and compare the other two bits. */
1634#ifdef RT_ARCH_AMD64
1635 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1636 if (iBitNo1 > iBitNo2)
1637 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
1638 else
1639 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
1640 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1641
1642#elif defined(RT_ARCH_ARM64)
1643 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1644
1645 /* and tmpreg, eflreg, #1<<iBitNo1 */
1646 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
1647
1648 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
1649 if (iBitNo1 > iBitNo2)
1650 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1651 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
1652 else
1653 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1654 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
1655
1656 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1657
1658#else
1659# error "Port me"
1660#endif
1661
1662 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
1663 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
1664 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
1665
1666 /* Free but don't flush the EFlags and tmp registers. */
1667 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1668 iemNativeRegFreeTmp(pReNative, idxEflReg);
1669
1670 /* Make a copy of the core state now as we start the if-block. */
1671 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
1672
1673 return off;
1674}
1675
1676
1677#define IEM_MC_IF_CX_IS_NZ() \
1678 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
1679 do {
1680
1681/** Emits code for IEM_MC_IF_CX_IS_NZ. */
1682DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1683{
1684 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1685
1686 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1687 kIemNativeGstRegUse_ReadOnly);
1688 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
1689 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1690
1691 iemNativeCondStartIfBlock(pReNative, off);
1692 return off;
1693}
1694
1695
1696#define IEM_MC_IF_ECX_IS_NZ() \
1697 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
1698 do {
1699
1700#define IEM_MC_IF_RCX_IS_NZ() \
1701 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
1702 do {
1703
1704/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
1705DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
1706{
1707 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1708
1709 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1710 kIemNativeGstRegUse_ReadOnly);
1711 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
1712 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1713
1714 iemNativeCondStartIfBlock(pReNative, off);
1715 return off;
1716}
1717
1718
1719#define IEM_MC_IF_CX_IS_NOT_ONE() \
1720 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
1721 do {
1722
1723/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
1724DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1725{
1726 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1727
1728 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1729 kIemNativeGstRegUse_ReadOnly);
1730#ifdef RT_ARCH_AMD64
1731 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1732#else
1733 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1734 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
1735 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1736#endif
1737 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1738
1739 iemNativeCondStartIfBlock(pReNative, off);
1740 return off;
1741}
1742
1743
1744#define IEM_MC_IF_ECX_IS_NOT_ONE() \
1745 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
1746 do {
1747
1748#define IEM_MC_IF_RCX_IS_NOT_ONE() \
1749 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
1750 do {
1751
1752/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
1753DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
1754{
1755 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1756
1757 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1758 kIemNativeGstRegUse_ReadOnly);
1759 if (f64Bit)
1760 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1761 else
1762 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1763 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1764
1765 iemNativeCondStartIfBlock(pReNative, off);
1766 return off;
1767}
1768
1769
1770#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
1771 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
1772 do {
1773
1774#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
1775 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
1776 do {
1777
1778/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
1779 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
1780DECL_INLINE_THROW(uint32_t)
1781iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
1782{
1783 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1784 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1785
1786 /* We have to load both RCX and EFLAGS before we can start branching,
1787 otherwise we'll end up in the else-block with an inconsistent
1788 register allocator state.
1789 Doing EFLAGS first as it's more likely to be loaded, right? */
1790 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1791 kIemNativeGstRegUse_ReadOnly);
1792 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1793 kIemNativeGstRegUse_ReadOnly);
1794
1795 /** @todo we could reduce this to a single branch instruction by spending a
1796 * temporary register and some setnz stuff. Not sure if loops are
1797 * worth it. */
1798 /* Check CX. */
1799#ifdef RT_ARCH_AMD64
1800 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1801#else
1802 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1803 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
1804 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1805#endif
1806
1807 /* Check the EFlags bit. */
1808 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1809 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1810 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
1811 !fCheckIfSet /*fJmpIfSet*/);
1812
1813 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1814 iemNativeRegFreeTmp(pReNative, idxEflReg);
1815
1816 iemNativeCondStartIfBlock(pReNative, off);
1817 return off;
1818}
1819
1820
1821#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
1822 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
1823 do {
1824
1825#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
1826 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
1827 do {
1828
1829#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
1830 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
1831 do {
1832
1833#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
1834 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
1835 do {
1836
1837/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
1838 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
1839 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
1840 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
1841DECL_INLINE_THROW(uint32_t)
1842iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1843 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
1844{
1845 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1846 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1847
1848 /* We have to load both RCX and EFLAGS before we can start branching,
1849 otherwise we'll end up in the else-block with an inconsistent
1850 register allocator state.
1851 Doing EFLAGS first as it's more likely to be loaded, right? */
1852 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1853 kIemNativeGstRegUse_ReadOnly);
1854 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1855 kIemNativeGstRegUse_ReadOnly);
1856
1857 /** @todo we could reduce this to a single branch instruction by spending a
1858 * temporary register and some setnz stuff. Not sure if loops are
1859 * worth it. */
1860 /* Check RCX/ECX. */
1861 if (f64Bit)
1862 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1863 else
1864 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1865
1866 /* Check the EFlags bit. */
1867 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1868 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1869 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
1870 !fCheckIfSet /*fJmpIfSet*/);
1871
1872 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1873 iemNativeRegFreeTmp(pReNative, idxEflReg);
1874
1875 iemNativeCondStartIfBlock(pReNative, off);
1876 return off;
1877}
1878
1879
1880#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
1881 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
1882 do {
1883
1884/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
1885DECL_INLINE_THROW(uint32_t)
1886iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
1887{
1888 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1889
1890 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
1891 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
1892 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
1893 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
1894
1895 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
1896
1897 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
1898
1899 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
1900
1901 iemNativeCondStartIfBlock(pReNative, off);
1902 return off;
1903}
1904
1905
1906#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1907
1908#define IEM_MC_IF_MXCSR_XCPT_PENDING() \
1909 off = iemNativeEmitIfMxcsrXcptPending(pReNative, off); \
1910 do {
1911
1912/** Emits code for IEM_MC_IF_MXCSR_XCPT_PENDING. */
1913DECL_INLINE_THROW(uint32_t)
1914iemNativeEmitIfMxcsrXcptPending(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1915{
1916 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1917
1918 uint8_t const idxGstMxcsrReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
1919 kIemNativeGstRegUse_Calculation);
1920 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
1921
1922 /* mov tmp0, mxcsr */
1923 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegTmp, idxGstMxcsrReg);
1924 /* tmp0 &= X86_MXCSR_XCPT_FLAGS */
1925 off = iemNativeEmitAndGprByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS);
1926 /* mxcsr &= X86_MXCSR_XCPT_MASK */
1927 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstMxcsrReg, X86_MXCSR_XCPT_MASK);
1928 /* mxcsr ~= mxcsr */
1929 off = iemNativeEmitInvBitsGpr(pReNative, off, idxGstMxcsrReg, idxGstMxcsrReg);
1930 /* mxcsr >>= X86_MXCSR_XCPT_MASK_SHIFT */
1931 off = iemNativeEmitShiftGprRight(pReNative, off, idxGstMxcsrReg, X86_MXCSR_XCPT_MASK_SHIFT);
1932 /* tmp0 &= mxcsr */
1933 off = iemNativeEmitAndGprByGpr(pReNative, off, idxRegTmp, idxGstMxcsrReg);
1934
1935 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxRegTmp, true /*f64Bit*/, pEntry->idxLabelElse);
1936 iemNativeRegFreeTmp(pReNative, idxGstMxcsrReg);
1937 iemNativeRegFreeTmp(pReNative, idxRegTmp);
1938
1939 iemNativeCondStartIfBlock(pReNative, off);
1940 return off;
1941}
1942
1943#endif
1944
1945
1946/*********************************************************************************************************************************
1947* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
1948*********************************************************************************************************************************/
1949
1950#define IEM_MC_NOREF(a_Name) \
1951 RT_NOREF_PV(a_Name)
1952
1953#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
1954 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
1955
1956#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
1957 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
1958
1959#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
1960 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
1961
1962#define IEM_MC_LOCAL(a_Type, a_Name) \
1963 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
1964
1965#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
1966 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
1967
1968
1969/**
1970 * Sets the host register for @a idxVarRc to @a idxReg.
1971 *
1972 * The register must not be allocated. Any guest register shadowing will be
1973 * implictly dropped by this call.
1974 *
1975 * The variable must not have any register associated with it (causes
1976 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
1977 * implied.
1978 *
1979 * @returns idxReg
1980 * @param pReNative The recompiler state.
1981 * @param idxVar The variable.
1982 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
1983 * @param off For recording in debug info.
1984 *
1985 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
1986 */
1987DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
1988{
1989 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
1990 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
1991 Assert(!pVar->fRegAcquired);
1992 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
1993 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
1994 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
1995
1996 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
1997 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
1998
1999 iemNativeVarSetKindToStack(pReNative, idxVar);
2000 pVar->idxReg = idxReg;
2001
2002 return idxReg;
2003}
2004
2005
2006/**
2007 * A convenient helper function.
2008 */
2009DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
2010 uint8_t idxReg, uint32_t *poff)
2011{
2012 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
2013 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
2014 return idxReg;
2015}
2016
2017
2018/**
2019 * This is called by IEM_MC_END() to clean up all variables.
2020 */
2021DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
2022{
2023 uint32_t const bmVars = pReNative->Core.bmVars;
2024 if (bmVars != 0)
2025 iemNativeVarFreeAllSlow(pReNative, bmVars);
2026 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
2027 Assert(pReNative->Core.bmStack == 0);
2028}
2029
2030
2031#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
2032
2033/**
2034 * This is called by IEM_MC_FREE_LOCAL.
2035 */
2036DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
2037{
2038 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2039 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
2040 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
2041}
2042
2043
2044#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
2045
2046/**
2047 * This is called by IEM_MC_FREE_ARG.
2048 */
2049DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
2050{
2051 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2052 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
2053 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
2054}
2055
2056
2057#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
2058
2059/**
2060 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
2061 */
2062DECL_INLINE_THROW(uint32_t)
2063iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
2064{
2065 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
2066 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
2067 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
2068 Assert( pVarDst->cbVar == sizeof(uint16_t)
2069 || pVarDst->cbVar == sizeof(uint32_t));
2070
2071 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
2072 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
2073 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
2074 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
2075 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
2076
2077 Assert(pVarDst->cbVar < pVarSrc->cbVar);
2078
2079 /*
2080 * Special case for immediates.
2081 */
2082 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
2083 {
2084 switch (pVarDst->cbVar)
2085 {
2086 case sizeof(uint16_t):
2087 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
2088 break;
2089 case sizeof(uint32_t):
2090 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
2091 break;
2092 default: AssertFailed(); break;
2093 }
2094 }
2095 else
2096 {
2097 /*
2098 * The generic solution for now.
2099 */
2100 /** @todo optimize this by having the python script make sure the source
2101 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
2102 * statement. Then we could just transfer the register assignments. */
2103 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
2104 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
2105 switch (pVarDst->cbVar)
2106 {
2107 case sizeof(uint16_t):
2108 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
2109 break;
2110 case sizeof(uint32_t):
2111 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
2112 break;
2113 default: AssertFailed(); break;
2114 }
2115 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
2116 iemNativeVarRegisterRelease(pReNative, idxVarDst);
2117 }
2118 return off;
2119}
2120
2121
2122
2123/*********************************************************************************************************************************
2124* Emitters for IEM_MC_CALL_CIMPL_XXX *
2125*********************************************************************************************************************************/
2126
2127/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
2128DECL_INLINE_THROW(uint32_t)
2129iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
2130 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
2131
2132{
2133 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
2134
2135#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2136 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
2137 when a calls clobber any of the relevatn control registers. */
2138# if 1
2139 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
2140 {
2141 /* Likely as long as call+ret are done via cimpl. */
2142 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
2143 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
2144 }
2145 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
2146 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2147 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
2148 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
2149 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
2150 else
2151 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
2152 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
2153 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
2154
2155# else
2156 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
2157 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2158 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
2159 pReNative->fSimdRaiseXcptChecksEmitted = 0;
2160 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
2161 || pfnCImpl == (uintptr_t)iemCImpl_callf
2162 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
2163 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
2164 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
2165 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
2166 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
2167# endif
2168#endif
2169
2170 /*
2171 * Do all the call setup and cleanup.
2172 */
2173 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
2174
2175 /*
2176 * Load the two or three hidden arguments.
2177 */
2178#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
2179 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
2180 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2181 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
2182#else
2183 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2184 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
2185#endif
2186
2187 /*
2188 * Make the call and check the return code.
2189 *
2190 * Shadow PC copies are always flushed here, other stuff depends on flags.
2191 * Segment and general purpose registers are explictily flushed via the
2192 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
2193 * macros.
2194 */
2195 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
2196#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
2197 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
2198#endif
2199 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
2200 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
2201 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
2202 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
2203
2204 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
2205}
2206
2207
2208#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
2209 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
2210
2211/** Emits code for IEM_MC_CALL_CIMPL_1. */
2212DECL_INLINE_THROW(uint32_t)
2213iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2214 uintptr_t pfnCImpl, uint8_t idxArg0)
2215{
2216 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2217 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
2218}
2219
2220
2221#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
2222 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
2223
2224/** Emits code for IEM_MC_CALL_CIMPL_2. */
2225DECL_INLINE_THROW(uint32_t)
2226iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2227 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
2228{
2229 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2230 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2231 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
2232}
2233
2234
2235#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
2236 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
2237 (uintptr_t)a_pfnCImpl, a0, a1, a2)
2238
2239/** Emits code for IEM_MC_CALL_CIMPL_3. */
2240DECL_INLINE_THROW(uint32_t)
2241iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2242 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
2243{
2244 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2245 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2246 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
2247 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
2248}
2249
2250
2251#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
2252 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
2253 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
2254
2255/** Emits code for IEM_MC_CALL_CIMPL_4. */
2256DECL_INLINE_THROW(uint32_t)
2257iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2258 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
2259{
2260 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2261 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2262 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
2263 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
2264 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
2265}
2266
2267
2268#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
2269 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
2270 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
2271
2272/** Emits code for IEM_MC_CALL_CIMPL_4. */
2273DECL_INLINE_THROW(uint32_t)
2274iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2275 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
2276{
2277 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2278 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2279 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
2280 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
2281 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
2282 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
2283}
2284
2285
2286/** Recompiler debugging: Flush guest register shadow copies. */
2287#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
2288
2289
2290
2291/*********************************************************************************************************************************
2292* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
2293*********************************************************************************************************************************/
2294
2295/**
2296 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
2297 */
2298DECL_INLINE_THROW(uint32_t)
2299iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2300 uintptr_t pfnAImpl, uint8_t cArgs)
2301{
2302 if (idxVarRc != UINT8_MAX)
2303 {
2304 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
2305 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
2306 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
2307 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
2308 }
2309
2310 /*
2311 * Do all the call setup and cleanup.
2312 */
2313 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
2314
2315 /*
2316 * Make the call and update the return code variable if we've got one.
2317 */
2318 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
2319 if (idxVarRc != UINT8_MAX)
2320 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
2321
2322 return off;
2323}
2324
2325
2326
2327#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
2328 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
2329
2330#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
2331 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
2332
2333/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
2334DECL_INLINE_THROW(uint32_t)
2335iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
2336{
2337 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
2338}
2339
2340
2341#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
2342 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
2343
2344#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
2345 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
2346
2347/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
2348DECL_INLINE_THROW(uint32_t)
2349iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
2350{
2351 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2352 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
2353}
2354
2355
2356#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
2357 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
2358
2359#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
2360 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
2361
2362/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
2363DECL_INLINE_THROW(uint32_t)
2364iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2365 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
2366{
2367 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2368 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
2369 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
2370}
2371
2372
2373#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
2374 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
2375
2376#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
2377 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
2378
2379/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
2380DECL_INLINE_THROW(uint32_t)
2381iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2382 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
2383{
2384 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2385 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
2386 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
2387 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
2388}
2389
2390
2391#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
2392 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
2393
2394#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
2395 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
2396
2397/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
2398DECL_INLINE_THROW(uint32_t)
2399iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2400 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
2401{
2402 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2403 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
2404 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
2405 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
2406 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
2407}
2408
2409
2410
2411/*********************************************************************************************************************************
2412* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
2413*********************************************************************************************************************************/
2414
2415#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
2416 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
2417
2418#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
2419 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
2420
2421#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
2422 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
2423
2424#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
2425 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
2426
2427
2428/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
2429 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
2430DECL_INLINE_THROW(uint32_t)
2431iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
2432{
2433 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2434 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
2435 Assert(iGRegEx < 20);
2436
2437 /* Same discussion as in iemNativeEmitFetchGregU16 */
2438 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2439 kIemNativeGstRegUse_ReadOnly);
2440
2441 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2442 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2443
2444 /* The value is zero-extended to the full 64-bit host register width. */
2445 if (iGRegEx < 16)
2446 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2447 else
2448 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
2449
2450 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2451 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2452 return off;
2453}
2454
2455
2456#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
2457 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
2458
2459#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
2460 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
2461
2462#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
2463 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
2464
2465/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
2466DECL_INLINE_THROW(uint32_t)
2467iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
2468{
2469 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2470 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
2471 Assert(iGRegEx < 20);
2472
2473 /* Same discussion as in iemNativeEmitFetchGregU16 */
2474 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2475 kIemNativeGstRegUse_ReadOnly);
2476
2477 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2478 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2479
2480 if (iGRegEx < 16)
2481 {
2482 switch (cbSignExtended)
2483 {
2484 case sizeof(uint16_t):
2485 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2486 break;
2487 case sizeof(uint32_t):
2488 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2489 break;
2490 case sizeof(uint64_t):
2491 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2492 break;
2493 default: AssertFailed(); break;
2494 }
2495 }
2496 else
2497 {
2498 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
2499 switch (cbSignExtended)
2500 {
2501 case sizeof(uint16_t):
2502 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
2503 break;
2504 case sizeof(uint32_t):
2505 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
2506 break;
2507 case sizeof(uint64_t):
2508 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
2509 break;
2510 default: AssertFailed(); break;
2511 }
2512 }
2513
2514 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2515 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2516 return off;
2517}
2518
2519
2520
2521#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
2522 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
2523
2524#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
2525 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
2526
2527#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
2528 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
2529
2530/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
2531DECL_INLINE_THROW(uint32_t)
2532iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
2533{
2534 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2535 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
2536 Assert(iGReg < 16);
2537
2538 /*
2539 * We can either just load the low 16-bit of the GPR into a host register
2540 * for the variable, or we can do so via a shadow copy host register. The
2541 * latter will avoid having to reload it if it's being stored later, but
2542 * will waste a host register if it isn't touched again. Since we don't
2543 * know what going to happen, we choose the latter for now.
2544 */
2545 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2546 kIemNativeGstRegUse_ReadOnly);
2547
2548 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2549 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2550 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
2551 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2552
2553 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2554 return off;
2555}
2556
2557
2558#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
2559 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
2560
2561#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
2562 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
2563
2564/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
2565DECL_INLINE_THROW(uint32_t)
2566iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
2567{
2568 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2569 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
2570 Assert(iGReg < 16);
2571
2572 /*
2573 * We can either just load the low 16-bit of the GPR into a host register
2574 * for the variable, or we can do so via a shadow copy host register. The
2575 * latter will avoid having to reload it if it's being stored later, but
2576 * will waste a host register if it isn't touched again. Since we don't
2577 * know what going to happen, we choose the latter for now.
2578 */
2579 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2580 kIemNativeGstRegUse_ReadOnly);
2581
2582 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2583 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2584 if (cbSignExtended == sizeof(uint32_t))
2585 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
2586 else
2587 {
2588 Assert(cbSignExtended == sizeof(uint64_t));
2589 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
2590 }
2591 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2592
2593 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2594 return off;
2595}
2596
2597
2598#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
2599 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
2600
2601#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
2602 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
2603
2604/** Emits code for IEM_MC_FETCH_GREG_U32. */
2605DECL_INLINE_THROW(uint32_t)
2606iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
2607{
2608 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2609 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
2610 Assert(iGReg < 16);
2611
2612 /*
2613 * We can either just load the low 16-bit of the GPR into a host register
2614 * for the variable, or we can do so via a shadow copy host register. The
2615 * latter will avoid having to reload it if it's being stored later, but
2616 * will waste a host register if it isn't touched again. Since we don't
2617 * know what going to happen, we choose the latter for now.
2618 */
2619 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2620 kIemNativeGstRegUse_ReadOnly);
2621
2622 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2623 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2624 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
2625 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2626
2627 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2628 return off;
2629}
2630
2631
2632#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
2633 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
2634
2635/** Emits code for IEM_MC_FETCH_GREG_U32. */
2636DECL_INLINE_THROW(uint32_t)
2637iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
2638{
2639 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2640 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
2641 Assert(iGReg < 16);
2642
2643 /*
2644 * We can either just load the low 32-bit of the GPR into a host register
2645 * for the variable, or we can do so via a shadow copy host register. The
2646 * latter will avoid having to reload it if it's being stored later, but
2647 * will waste a host register if it isn't touched again. Since we don't
2648 * know what going to happen, we choose the latter for now.
2649 */
2650 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2651 kIemNativeGstRegUse_ReadOnly);
2652
2653 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2654 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2655 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
2656 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2657
2658 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2659 return off;
2660}
2661
2662
2663#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
2664 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
2665
2666#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
2667 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
2668
2669/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
2670 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
2671DECL_INLINE_THROW(uint32_t)
2672iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
2673{
2674 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2675 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
2676 Assert(iGReg < 16);
2677
2678 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2679 kIemNativeGstRegUse_ReadOnly);
2680
2681 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2682 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2683 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
2684 /** @todo name the register a shadow one already? */
2685 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2686
2687 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2688 return off;
2689}
2690
2691
2692
2693/*********************************************************************************************************************************
2694* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
2695*********************************************************************************************************************************/
2696
2697#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
2698 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
2699
2700/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
2701DECL_INLINE_THROW(uint32_t)
2702iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
2703{
2704 Assert(iGRegEx < 20);
2705 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2706 kIemNativeGstRegUse_ForUpdate);
2707#ifdef RT_ARCH_AMD64
2708 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2709
2710 /* To the lowest byte of the register: mov r8, imm8 */
2711 if (iGRegEx < 16)
2712 {
2713 if (idxGstTmpReg >= 8)
2714 pbCodeBuf[off++] = X86_OP_REX_B;
2715 else if (idxGstTmpReg >= 4)
2716 pbCodeBuf[off++] = X86_OP_REX;
2717 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
2718 pbCodeBuf[off++] = u8Value;
2719 }
2720 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
2721 else if (idxGstTmpReg < 4)
2722 {
2723 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
2724 pbCodeBuf[off++] = u8Value;
2725 }
2726 else
2727 {
2728 /* ror reg64, 8 */
2729 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2730 pbCodeBuf[off++] = 0xc1;
2731 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
2732 pbCodeBuf[off++] = 8;
2733
2734 /* mov reg8, imm8 */
2735 if (idxGstTmpReg >= 8)
2736 pbCodeBuf[off++] = X86_OP_REX_B;
2737 else if (idxGstTmpReg >= 4)
2738 pbCodeBuf[off++] = X86_OP_REX;
2739 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
2740 pbCodeBuf[off++] = u8Value;
2741
2742 /* rol reg64, 8 */
2743 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2744 pbCodeBuf[off++] = 0xc1;
2745 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
2746 pbCodeBuf[off++] = 8;
2747 }
2748
2749#elif defined(RT_ARCH_ARM64)
2750 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
2751 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2752 if (iGRegEx < 16)
2753 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
2754 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
2755 else
2756 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
2757 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
2758 iemNativeRegFreeTmp(pReNative, idxImmReg);
2759
2760#else
2761# error "Port me!"
2762#endif
2763
2764 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2765
2766 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
2767
2768 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
2769 return off;
2770}
2771
2772
2773#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
2774 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
2775
2776/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
2777DECL_INLINE_THROW(uint32_t)
2778iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
2779{
2780 Assert(iGRegEx < 20);
2781 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
2782
2783 /*
2784 * If it's a constant value (unlikely) we treat this as a
2785 * IEM_MC_STORE_GREG_U8_CONST statement.
2786 */
2787 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
2788 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
2789 { /* likely */ }
2790 else
2791 {
2792 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
2793 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
2794 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
2795 }
2796
2797 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2798 kIemNativeGstRegUse_ForUpdate);
2799 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
2800
2801#ifdef RT_ARCH_AMD64
2802 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
2803 if (iGRegEx < 16)
2804 {
2805 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
2806 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
2807 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
2808 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
2809 pbCodeBuf[off++] = X86_OP_REX;
2810 pbCodeBuf[off++] = 0x8a;
2811 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
2812 }
2813 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
2814 else if (idxGstTmpReg < 4 && idxVarReg < 4)
2815 {
2816 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
2817 pbCodeBuf[off++] = 0x8a;
2818 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
2819 }
2820 else
2821 {
2822 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
2823
2824 /* ror reg64, 8 */
2825 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2826 pbCodeBuf[off++] = 0xc1;
2827 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
2828 pbCodeBuf[off++] = 8;
2829
2830 /* mov reg8, reg8(r/m) */
2831 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
2832 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
2833 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
2834 pbCodeBuf[off++] = X86_OP_REX;
2835 pbCodeBuf[off++] = 0x8a;
2836 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
2837
2838 /* rol reg64, 8 */
2839 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2840 pbCodeBuf[off++] = 0xc1;
2841 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
2842 pbCodeBuf[off++] = 8;
2843 }
2844
2845#elif defined(RT_ARCH_ARM64)
2846 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
2847 or
2848 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
2849 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2850 if (iGRegEx < 16)
2851 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
2852 else
2853 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
2854
2855#else
2856# error "Port me!"
2857#endif
2858 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2859
2860 iemNativeVarRegisterRelease(pReNative, idxValueVar);
2861
2862 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
2863 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
2864 return off;
2865}
2866
2867
2868
2869#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
2870 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
2871
2872/** Emits code for IEM_MC_STORE_GREG_U16. */
2873DECL_INLINE_THROW(uint32_t)
2874iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
2875{
2876 Assert(iGReg < 16);
2877 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2878 kIemNativeGstRegUse_ForUpdate);
2879#ifdef RT_ARCH_AMD64
2880 /* mov reg16, imm16 */
2881 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
2882 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
2883 if (idxGstTmpReg >= 8)
2884 pbCodeBuf[off++] = X86_OP_REX_B;
2885 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
2886 pbCodeBuf[off++] = RT_BYTE1(uValue);
2887 pbCodeBuf[off++] = RT_BYTE2(uValue);
2888
2889#elif defined(RT_ARCH_ARM64)
2890 /* movk xdst, #uValue, lsl #0 */
2891 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2892 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
2893
2894#else
2895# error "Port me!"
2896#endif
2897
2898 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2899
2900 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
2901 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
2902 return off;
2903}
2904
2905
2906#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
2907 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
2908
2909/** Emits code for IEM_MC_STORE_GREG_U16. */
2910DECL_INLINE_THROW(uint32_t)
2911iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
2912{
2913 Assert(iGReg < 16);
2914 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
2915
2916 /*
2917 * If it's a constant value (unlikely) we treat this as a
2918 * IEM_MC_STORE_GREG_U16_CONST statement.
2919 */
2920 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
2921 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
2922 { /* likely */ }
2923 else
2924 {
2925 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
2926 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
2927 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
2928 }
2929
2930 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2931 kIemNativeGstRegUse_ForUpdate);
2932
2933#ifdef RT_ARCH_AMD64
2934 /* mov reg16, reg16 or [mem16] */
2935 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2936 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
2937 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
2938 {
2939 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
2940 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
2941 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
2942 pbCodeBuf[off++] = 0x8b;
2943 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
2944 }
2945 else
2946 {
2947 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
2948 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
2949 if (idxGstTmpReg >= 8)
2950 pbCodeBuf[off++] = X86_OP_REX_R;
2951 pbCodeBuf[off++] = 0x8b;
2952 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
2953 }
2954
2955#elif defined(RT_ARCH_ARM64)
2956 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
2957 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
2958 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2959 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
2960 iemNativeVarRegisterRelease(pReNative, idxValueVar);
2961
2962#else
2963# error "Port me!"
2964#endif
2965
2966 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2967
2968 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
2969 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
2970 return off;
2971}
2972
2973
2974#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
2975 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
2976
2977/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
2978DECL_INLINE_THROW(uint32_t)
2979iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
2980{
2981 Assert(iGReg < 16);
2982 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2983 kIemNativeGstRegUse_ForFullWrite);
2984 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
2985 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
2986 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
2987 return off;
2988}
2989
2990
2991#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
2992 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
2993
2994/** Emits code for IEM_MC_STORE_GREG_U32. */
2995DECL_INLINE_THROW(uint32_t)
2996iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
2997{
2998 Assert(iGReg < 16);
2999 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
3000
3001 /*
3002 * If it's a constant value (unlikely) we treat this as a
3003 * IEM_MC_STORE_GREG_U32_CONST statement.
3004 */
3005 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
3006 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
3007 { /* likely */ }
3008 else
3009 {
3010 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3011 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3012 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
3013 }
3014
3015 /*
3016 * For the rest we allocate a guest register for the variable and writes
3017 * it to the CPUMCTX structure.
3018 */
3019 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
3020 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3021#ifdef VBOX_STRICT
3022 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
3023#endif
3024 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3025 return off;
3026}
3027
3028
3029#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
3030 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
3031
3032/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
3033DECL_INLINE_THROW(uint32_t)
3034iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
3035{
3036 Assert(iGReg < 16);
3037 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3038 kIemNativeGstRegUse_ForFullWrite);
3039 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
3040 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3041 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3042 return off;
3043}
3044
3045
3046#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
3047 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
3048
3049/** Emits code for IEM_MC_STORE_GREG_U64. */
3050DECL_INLINE_THROW(uint32_t)
3051iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
3052{
3053 Assert(iGReg < 16);
3054 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
3055
3056 /*
3057 * If it's a constant value (unlikely) we treat this as a
3058 * IEM_MC_STORE_GREG_U64_CONST statement.
3059 */
3060 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
3061 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
3062 { /* likely */ }
3063 else
3064 {
3065 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3066 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3067 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
3068 }
3069
3070 /*
3071 * For the rest we allocate a guest register for the variable and writes
3072 * it to the CPUMCTX structure.
3073 */
3074 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
3075 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3076 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3077 return off;
3078}
3079
3080
3081#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
3082 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
3083
3084/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
3085DECL_INLINE_THROW(uint32_t)
3086iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
3087{
3088 Assert(iGReg < 16);
3089 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3090 kIemNativeGstRegUse_ForUpdate);
3091 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
3092 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3093 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3094 return off;
3095}
3096
3097
3098/*********************************************************************************************************************************
3099* General purpose register manipulation (add, sub). *
3100*********************************************************************************************************************************/
3101
3102#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
3103 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
3104
3105/** Emits code for IEM_MC_ADD_GREG_U16. */
3106DECL_INLINE_THROW(uint32_t)
3107iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
3108{
3109 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3110 kIemNativeGstRegUse_ForUpdate);
3111
3112#ifdef RT_ARCH_AMD64
3113 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
3114 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3115 if (idxGstTmpReg >= 8)
3116 pbCodeBuf[off++] = X86_OP_REX_B;
3117 if (uAddend == 1)
3118 {
3119 pbCodeBuf[off++] = 0xff; /* inc */
3120 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3121 }
3122 else
3123 {
3124 pbCodeBuf[off++] = 0x81;
3125 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3126 pbCodeBuf[off++] = uAddend;
3127 pbCodeBuf[off++] = 0;
3128 }
3129
3130#else
3131 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3132 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3133
3134 /* sub tmp, gstgrp, uAddend */
3135 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
3136
3137 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
3138 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
3139
3140 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3141#endif
3142
3143 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3144
3145 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3146
3147 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3148 return off;
3149}
3150
3151
3152#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
3153 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
3154
3155#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
3156 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
3157
3158/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
3159DECL_INLINE_THROW(uint32_t)
3160iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
3161{
3162 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3163 kIemNativeGstRegUse_ForUpdate);
3164
3165#ifdef RT_ARCH_AMD64
3166 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
3167 if (f64Bit)
3168 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
3169 else if (idxGstTmpReg >= 8)
3170 pbCodeBuf[off++] = X86_OP_REX_B;
3171 if (uAddend == 1)
3172 {
3173 pbCodeBuf[off++] = 0xff; /* inc */
3174 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3175 }
3176 else if (uAddend < 128)
3177 {
3178 pbCodeBuf[off++] = 0x83; /* add */
3179 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3180 pbCodeBuf[off++] = RT_BYTE1(uAddend);
3181 }
3182 else
3183 {
3184 pbCodeBuf[off++] = 0x81; /* add */
3185 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3186 pbCodeBuf[off++] = RT_BYTE1(uAddend);
3187 pbCodeBuf[off++] = 0;
3188 pbCodeBuf[off++] = 0;
3189 pbCodeBuf[off++] = 0;
3190 }
3191
3192#else
3193 /* sub tmp, gstgrp, uAddend */
3194 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3195 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
3196
3197#endif
3198
3199 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3200
3201 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3202
3203 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3204 return off;
3205}
3206
3207
3208
3209#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
3210 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
3211
3212/** Emits code for IEM_MC_SUB_GREG_U16. */
3213DECL_INLINE_THROW(uint32_t)
3214iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
3215{
3216 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3217 kIemNativeGstRegUse_ForUpdate);
3218
3219#ifdef RT_ARCH_AMD64
3220 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
3221 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3222 if (idxGstTmpReg >= 8)
3223 pbCodeBuf[off++] = X86_OP_REX_B;
3224 if (uSubtrahend == 1)
3225 {
3226 pbCodeBuf[off++] = 0xff; /* dec */
3227 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
3228 }
3229 else
3230 {
3231 pbCodeBuf[off++] = 0x81;
3232 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
3233 pbCodeBuf[off++] = uSubtrahend;
3234 pbCodeBuf[off++] = 0;
3235 }
3236
3237#else
3238 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3239 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3240
3241 /* sub tmp, gstgrp, uSubtrahend */
3242 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
3243
3244 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
3245 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
3246
3247 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3248#endif
3249
3250 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3251
3252 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3253
3254 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3255 return off;
3256}
3257
3258
3259#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
3260 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
3261
3262#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
3263 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
3264
3265/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
3266DECL_INLINE_THROW(uint32_t)
3267iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
3268{
3269 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3270 kIemNativeGstRegUse_ForUpdate);
3271
3272#ifdef RT_ARCH_AMD64
3273 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
3274 if (f64Bit)
3275 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
3276 else if (idxGstTmpReg >= 8)
3277 pbCodeBuf[off++] = X86_OP_REX_B;
3278 if (uSubtrahend == 1)
3279 {
3280 pbCodeBuf[off++] = 0xff; /* dec */
3281 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
3282 }
3283 else if (uSubtrahend < 128)
3284 {
3285 pbCodeBuf[off++] = 0x83; /* sub */
3286 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
3287 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
3288 }
3289 else
3290 {
3291 pbCodeBuf[off++] = 0x81; /* sub */
3292 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
3293 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
3294 pbCodeBuf[off++] = 0;
3295 pbCodeBuf[off++] = 0;
3296 pbCodeBuf[off++] = 0;
3297 }
3298
3299#else
3300 /* sub tmp, gstgrp, uSubtrahend */
3301 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3302 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
3303
3304#endif
3305
3306 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3307
3308 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3309
3310 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3311 return off;
3312}
3313
3314
3315/*********************************************************************************************************************************
3316* Local variable manipulation (add, sub, and, or). *
3317*********************************************************************************************************************************/
3318
3319#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
3320 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
3321
3322#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
3323 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
3324
3325#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
3326 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
3327
3328#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
3329 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
3330
3331/** Emits code for AND'ing a local and a constant value. */
3332DECL_INLINE_THROW(uint32_t)
3333iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
3334{
3335#ifdef VBOX_STRICT
3336 switch (cbMask)
3337 {
3338 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3339 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3340 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3341 case sizeof(uint64_t): break;
3342 default: AssertFailedBreak();
3343 }
3344#endif
3345
3346 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3347 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
3348
3349 if (cbMask <= sizeof(uint32_t))
3350 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
3351 else
3352 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
3353
3354 iemNativeVarRegisterRelease(pReNative, idxVar);
3355 return off;
3356}
3357
3358
3359#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
3360 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
3361
3362#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
3363 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
3364
3365#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
3366 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
3367
3368#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
3369 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
3370
3371/** Emits code for OR'ing a local and a constant value. */
3372DECL_INLINE_THROW(uint32_t)
3373iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
3374{
3375#ifdef VBOX_STRICT
3376 switch (cbMask)
3377 {
3378 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3379 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3380 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3381 case sizeof(uint64_t): break;
3382 default: AssertFailedBreak();
3383 }
3384#endif
3385
3386 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3387 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
3388
3389 if (cbMask <= sizeof(uint32_t))
3390 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
3391 else
3392 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
3393
3394 iemNativeVarRegisterRelease(pReNative, idxVar);
3395 return off;
3396}
3397
3398
3399#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
3400 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
3401
3402#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
3403 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
3404
3405#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
3406 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
3407
3408/** Emits code for reversing the byte order in a local value. */
3409DECL_INLINE_THROW(uint32_t)
3410iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
3411{
3412 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3413 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3414
3415 switch (cbLocal)
3416 {
3417 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
3418 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
3419 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
3420 default: AssertFailedBreak();
3421 }
3422
3423 iemNativeVarRegisterRelease(pReNative, idxVar);
3424 return off;
3425}
3426
3427
3428
3429/*********************************************************************************************************************************
3430* EFLAGS *
3431*********************************************************************************************************************************/
3432
3433#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
3434# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
3435#else
3436# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
3437 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
3438
3439DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
3440{
3441 if (fEflOutput)
3442 {
3443 PVMCPUCC const pVCpu = pReNative->pVCpu;
3444# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3445 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
3446 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
3447 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
3448# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
3449 if (fEflOutput & (a_fEfl)) \
3450 { \
3451 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
3452 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
3453 else \
3454 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
3455 } else do { } while (0)
3456# else
3457 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
3458 IEMLIVENESSBIT const LivenessClobbered =
3459 {
3460 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3461 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3462 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
3463 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
3464 };
3465 IEMLIVENESSBIT const LivenessDelayable =
3466 {
3467 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3468 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
3469 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3470 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
3471 };
3472# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
3473 if (fEflOutput & (a_fEfl)) \
3474 { \
3475 if (LivenessClobbered.a_fLivenessMember) \
3476 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
3477 else if (LivenessDelayable.a_fLivenessMember) \
3478 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
3479 else \
3480 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
3481 } else do { } while (0)
3482# endif
3483 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
3484 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
3485 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
3486 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
3487 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
3488 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
3489 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
3490# undef CHECK_FLAG_AND_UPDATE_STATS
3491 }
3492 RT_NOREF(fEflInput);
3493}
3494#endif /* VBOX_WITH_STATISTICS */
3495
3496#undef IEM_MC_FETCH_EFLAGS /* should not be used */
3497#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
3498 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
3499
3500/** Handles IEM_MC_FETCH_EFLAGS_EX. */
3501DECL_INLINE_THROW(uint32_t)
3502iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
3503 uint32_t fEflInput, uint32_t fEflOutput)
3504{
3505 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
3506 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
3507 RT_NOREF(fEflInput, fEflOutput);
3508
3509#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3510# ifdef VBOX_STRICT
3511 if ( pReNative->idxCurCall != 0
3512 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
3513 {
3514 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
3515 uint32_t const fBoth = fEflInput | fEflOutput;
3516# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
3517 AssertMsg( !(fBoth & (a_fElfConst)) \
3518 || (!(fEflInput & (a_fElfConst)) \
3519 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
3520 : !(fEflOutput & (a_fElfConst)) \
3521 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
3522 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
3523 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
3524 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
3525 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
3526 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
3527 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
3528 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
3529 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
3530 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
3531# undef ASSERT_ONE_EFL
3532 }
3533# endif
3534#endif
3535
3536 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
3537
3538 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
3539 * the existing shadow copy. */
3540 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
3541 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
3542 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
3543 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
3544 return off;
3545}
3546
3547
3548
3549/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
3550 * start using it with custom native code emission (inlining assembly
3551 * instruction helpers). */
3552#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
3553#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
3554 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
3555 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, true /*fUpdateSkipping*/)
3556
3557#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
3558#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
3559 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
3560 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, false /*fUpdateSkipping*/)
3561
3562/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
3563DECL_INLINE_THROW(uint32_t)
3564iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput,
3565 bool fUpdateSkipping)
3566{
3567 RT_NOREF(fEflOutput);
3568 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
3569 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
3570
3571#ifdef VBOX_STRICT
3572 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
3573 uint32_t offFixup = off;
3574 off = iemNativeEmitJnzToFixed(pReNative, off, off);
3575 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
3576 iemNativeFixupFixedJump(pReNative, offFixup, off);
3577
3578 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
3579 offFixup = off;
3580 off = iemNativeEmitJzToFixed(pReNative, off, off);
3581 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
3582 iemNativeFixupFixedJump(pReNative, offFixup, off);
3583
3584 /** @todo validate that only bits in the fElfOutput mask changed. */
3585#endif
3586
3587#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
3588 if (fUpdateSkipping)
3589 {
3590 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
3591 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
3592 else
3593 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
3594 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
3595 }
3596#else
3597 RT_NOREF_PV(fUpdateSkipping);
3598#endif
3599
3600 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
3601 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
3602 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
3603 return off;
3604}
3605
3606
3607
3608/*********************************************************************************************************************************
3609* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
3610*********************************************************************************************************************************/
3611
3612#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
3613 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
3614
3615#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
3616 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
3617
3618#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
3619 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
3620
3621
3622/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
3623 * IEM_MC_FETCH_SREG_ZX_U64. */
3624DECL_INLINE_THROW(uint32_t)
3625iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
3626{
3627 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3628 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
3629 Assert(iSReg < X86_SREG_COUNT);
3630
3631 /*
3632 * For now, we will not create a shadow copy of a selector. The rational
3633 * is that since we do not recompile the popping and loading of segment
3634 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
3635 * pushing and moving to registers, there is only a small chance that the
3636 * shadow copy will be accessed again before the register is reloaded. One
3637 * scenario would be nested called in 16-bit code, but I doubt it's worth
3638 * the extra register pressure atm.
3639 *
3640 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
3641 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
3642 * store scencario covered at present (r160730).
3643 */
3644 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3645 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3646 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
3647 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3648 return off;
3649}
3650
3651
3652
3653/*********************************************************************************************************************************
3654* Register references. *
3655*********************************************************************************************************************************/
3656
3657#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
3658 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
3659
3660#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
3661 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
3662
3663/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
3664DECL_INLINE_THROW(uint32_t)
3665iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
3666{
3667 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
3668 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
3669 Assert(iGRegEx < 20);
3670
3671 if (iGRegEx < 16)
3672 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
3673 else
3674 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
3675
3676 /* If we've delayed writing back the register value, flush it now. */
3677 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
3678
3679 /* If it's not a const reference we need to flush the shadow copy of the register now. */
3680 if (!fConst)
3681 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
3682
3683 return off;
3684}
3685
3686#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
3687 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
3688
3689#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
3690 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
3691
3692#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
3693 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
3694
3695#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
3696 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
3697
3698#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
3699 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
3700
3701#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
3702 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
3703
3704#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
3705 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
3706
3707#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
3708 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
3709
3710#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
3711 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
3712
3713#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
3714 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
3715
3716/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
3717DECL_INLINE_THROW(uint32_t)
3718iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
3719{
3720 Assert(iGReg < 16);
3721 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
3722 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
3723
3724 /* If we've delayed writing back the register value, flush it now. */
3725 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
3726
3727 /* If it's not a const reference we need to flush the shadow copy of the register now. */
3728 if (!fConst)
3729 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
3730
3731 return off;
3732}
3733
3734
3735#undef IEM_MC_REF_EFLAGS /* should not be used. */
3736#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
3737 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
3738 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags, a_fEflInput, a_fEflOutput)
3739
3740/** Handles IEM_MC_REF_EFLAGS. */
3741DECL_INLINE_THROW(uint32_t)
3742iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput, uint32_t fEflOutput)
3743{
3744 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
3745 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
3746
3747#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
3748 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
3749
3750 /* Updating the skipping according to the outputs is a little early, but
3751 we don't have any other hooks for references atm. */
3752 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
3753 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
3754 else if (fEflOutput & X86_EFL_STATUS_BITS)
3755 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
3756 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
3757#else
3758 RT_NOREF(fEflInput, fEflOutput);
3759#endif
3760
3761 /* If we've delayed writing back the register value, flush it now. */
3762 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
3763
3764 /* If there is a shadow copy of guest EFLAGS, flush it now. */
3765 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
3766
3767 return off;
3768}
3769
3770
3771/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
3772 * different code from threaded recompiler, maybe it would be helpful. For now
3773 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
3774#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
3775
3776
3777#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
3778 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
3779
3780#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
3781 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
3782
3783#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
3784 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
3785
3786/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
3787DECL_INLINE_THROW(uint32_t)
3788iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
3789{
3790 Assert(iXReg < 16);
3791 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
3792 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
3793
3794 /* If we've delayed writing back the register value, flush it now. */
3795 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
3796
3797#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3798 /* If it's not a const reference we need to flush the shadow copy of the register now. */
3799 if (!fConst)
3800 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
3801#else
3802 RT_NOREF(fConst);
3803#endif
3804
3805 return off;
3806}
3807
3808
3809#define IEM_MC_REF_MXCSR(a_pfMxcsr) \
3810 off = iemNativeEmitRefMxcsr(pReNative, off, a_pfMxcsr)
3811
3812/** Handles IEM_MC_REF_MXCSR. */
3813DECL_INLINE_THROW(uint32_t)
3814iemNativeEmitRefMxcsr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
3815{
3816 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_MxCsr, 0);
3817 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
3818
3819 /* If we've delayed writing back the register value, flush it now. */
3820 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_MxCsr, 0);
3821
3822 /* If there is a shadow copy of guest MXCSR, flush it now. */
3823 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_MxCsr));
3824
3825 return off;
3826}
3827
3828
3829
3830/*********************************************************************************************************************************
3831* Effective Address Calculation *
3832*********************************************************************************************************************************/
3833#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
3834 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
3835
3836/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
3837 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
3838DECL_INLINE_THROW(uint32_t)
3839iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3840 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
3841{
3842 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
3843
3844 /*
3845 * Handle the disp16 form with no registers first.
3846 *
3847 * Convert to an immediate value, as that'll delay the register allocation
3848 * and assignment till the memory access / call / whatever and we can use
3849 * a more appropriate register (or none at all).
3850 */
3851 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
3852 {
3853 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
3854 return off;
3855 }
3856
3857 /* Determin the displacment. */
3858 uint16_t u16EffAddr;
3859 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
3860 {
3861 case 0: u16EffAddr = 0; break;
3862 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
3863 case 2: u16EffAddr = u16Disp; break;
3864 default: AssertFailedStmt(u16EffAddr = 0);
3865 }
3866
3867 /* Determine the registers involved. */
3868 uint8_t idxGstRegBase;
3869 uint8_t idxGstRegIndex;
3870 switch (bRm & X86_MODRM_RM_MASK)
3871 {
3872 case 0:
3873 idxGstRegBase = X86_GREG_xBX;
3874 idxGstRegIndex = X86_GREG_xSI;
3875 break;
3876 case 1:
3877 idxGstRegBase = X86_GREG_xBX;
3878 idxGstRegIndex = X86_GREG_xDI;
3879 break;
3880 case 2:
3881 idxGstRegBase = X86_GREG_xBP;
3882 idxGstRegIndex = X86_GREG_xSI;
3883 break;
3884 case 3:
3885 idxGstRegBase = X86_GREG_xBP;
3886 idxGstRegIndex = X86_GREG_xDI;
3887 break;
3888 case 4:
3889 idxGstRegBase = X86_GREG_xSI;
3890 idxGstRegIndex = UINT8_MAX;
3891 break;
3892 case 5:
3893 idxGstRegBase = X86_GREG_xDI;
3894 idxGstRegIndex = UINT8_MAX;
3895 break;
3896 case 6:
3897 idxGstRegBase = X86_GREG_xBP;
3898 idxGstRegIndex = UINT8_MAX;
3899 break;
3900#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
3901 default:
3902#endif
3903 case 7:
3904 idxGstRegBase = X86_GREG_xBX;
3905 idxGstRegIndex = UINT8_MAX;
3906 break;
3907 }
3908
3909 /*
3910 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
3911 */
3912 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
3913 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
3914 kIemNativeGstRegUse_ReadOnly);
3915 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
3916 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
3917 kIemNativeGstRegUse_ReadOnly)
3918 : UINT8_MAX;
3919#ifdef RT_ARCH_AMD64
3920 if (idxRegIndex == UINT8_MAX)
3921 {
3922 if (u16EffAddr == 0)
3923 {
3924 /* movxz ret, base */
3925 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
3926 }
3927 else
3928 {
3929 /* lea ret32, [base64 + disp32] */
3930 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
3931 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
3932 if (idxRegRet >= 8 || idxRegBase >= 8)
3933 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
3934 pbCodeBuf[off++] = 0x8d;
3935 if (idxRegBase != X86_GREG_x12 /*SIB*/)
3936 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
3937 else
3938 {
3939 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
3940 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
3941 }
3942 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
3943 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
3944 pbCodeBuf[off++] = 0;
3945 pbCodeBuf[off++] = 0;
3946 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3947
3948 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
3949 }
3950 }
3951 else
3952 {
3953 /* lea ret32, [index64 + base64 (+ disp32)] */
3954 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
3955 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
3956 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
3957 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
3958 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
3959 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
3960 pbCodeBuf[off++] = 0x8d;
3961 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
3962 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
3963 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
3964 if (bMod == X86_MOD_MEM4)
3965 {
3966 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
3967 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
3968 pbCodeBuf[off++] = 0;
3969 pbCodeBuf[off++] = 0;
3970 }
3971 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3972 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
3973 }
3974
3975#elif defined(RT_ARCH_ARM64)
3976 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
3977 if (u16EffAddr == 0)
3978 {
3979 if (idxRegIndex == UINT8_MAX)
3980 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
3981 else
3982 {
3983 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
3984 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
3985 }
3986 }
3987 else
3988 {
3989 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
3990 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
3991 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
3992 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
3993 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
3994 else
3995 {
3996 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
3997 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
3998 }
3999 if (idxRegIndex != UINT8_MAX)
4000 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
4001 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
4002 }
4003
4004#else
4005# error "port me"
4006#endif
4007
4008 if (idxRegIndex != UINT8_MAX)
4009 iemNativeRegFreeTmp(pReNative, idxRegIndex);
4010 iemNativeRegFreeTmp(pReNative, idxRegBase);
4011 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4012 return off;
4013}
4014
4015
4016#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
4017 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
4018
4019/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
4020 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
4021DECL_INLINE_THROW(uint32_t)
4022iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4023 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
4024{
4025 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
4026
4027 /*
4028 * Handle the disp32 form with no registers first.
4029 *
4030 * Convert to an immediate value, as that'll delay the register allocation
4031 * and assignment till the memory access / call / whatever and we can use
4032 * a more appropriate register (or none at all).
4033 */
4034 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
4035 {
4036 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
4037 return off;
4038 }
4039
4040 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
4041 uint32_t u32EffAddr = 0;
4042 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
4043 {
4044 case 0: break;
4045 case 1: u32EffAddr = (int8_t)u32Disp; break;
4046 case 2: u32EffAddr = u32Disp; break;
4047 default: AssertFailed();
4048 }
4049
4050 /* Get the register (or SIB) value. */
4051 uint8_t idxGstRegBase = UINT8_MAX;
4052 uint8_t idxGstRegIndex = UINT8_MAX;
4053 uint8_t cShiftIndex = 0;
4054 switch (bRm & X86_MODRM_RM_MASK)
4055 {
4056 case 0: idxGstRegBase = X86_GREG_xAX; break;
4057 case 1: idxGstRegBase = X86_GREG_xCX; break;
4058 case 2: idxGstRegBase = X86_GREG_xDX; break;
4059 case 3: idxGstRegBase = X86_GREG_xBX; break;
4060 case 4: /* SIB */
4061 {
4062 /* index /w scaling . */
4063 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
4064 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
4065 {
4066 case 0: idxGstRegIndex = X86_GREG_xAX; break;
4067 case 1: idxGstRegIndex = X86_GREG_xCX; break;
4068 case 2: idxGstRegIndex = X86_GREG_xDX; break;
4069 case 3: idxGstRegIndex = X86_GREG_xBX; break;
4070 case 4: cShiftIndex = 0; /*no index*/ break;
4071 case 5: idxGstRegIndex = X86_GREG_xBP; break;
4072 case 6: idxGstRegIndex = X86_GREG_xSI; break;
4073 case 7: idxGstRegIndex = X86_GREG_xDI; break;
4074 }
4075
4076 /* base */
4077 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
4078 {
4079 case 0: idxGstRegBase = X86_GREG_xAX; break;
4080 case 1: idxGstRegBase = X86_GREG_xCX; break;
4081 case 2: idxGstRegBase = X86_GREG_xDX; break;
4082 case 3: idxGstRegBase = X86_GREG_xBX; break;
4083 case 4:
4084 idxGstRegBase = X86_GREG_xSP;
4085 u32EffAddr += uSibAndRspOffset >> 8;
4086 break;
4087 case 5:
4088 if ((bRm & X86_MODRM_MOD_MASK) != 0)
4089 idxGstRegBase = X86_GREG_xBP;
4090 else
4091 {
4092 Assert(u32EffAddr == 0);
4093 u32EffAddr = u32Disp;
4094 }
4095 break;
4096 case 6: idxGstRegBase = X86_GREG_xSI; break;
4097 case 7: idxGstRegBase = X86_GREG_xDI; break;
4098 }
4099 break;
4100 }
4101 case 5: idxGstRegBase = X86_GREG_xBP; break;
4102 case 6: idxGstRegBase = X86_GREG_xSI; break;
4103 case 7: idxGstRegBase = X86_GREG_xDI; break;
4104 }
4105
4106 /*
4107 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
4108 * the start of the function.
4109 */
4110 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
4111 {
4112 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
4113 return off;
4114 }
4115
4116 /*
4117 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
4118 */
4119 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4120 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
4121 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
4122 kIemNativeGstRegUse_ReadOnly);
4123 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
4124 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
4125 kIemNativeGstRegUse_ReadOnly);
4126
4127 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
4128 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
4129 {
4130 idxRegBase = idxRegIndex;
4131 idxRegIndex = UINT8_MAX;
4132 }
4133
4134#ifdef RT_ARCH_AMD64
4135 if (idxRegIndex == UINT8_MAX)
4136 {
4137 if (u32EffAddr == 0)
4138 {
4139 /* mov ret, base */
4140 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
4141 }
4142 else
4143 {
4144 /* lea ret32, [base64 + disp32] */
4145 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
4146 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4147 if (idxRegRet >= 8 || idxRegBase >= 8)
4148 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
4149 pbCodeBuf[off++] = 0x8d;
4150 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4151 if (idxRegBase != X86_GREG_x12 /*SIB*/)
4152 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
4153 else
4154 {
4155 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4156 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
4157 }
4158 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4159 if (bMod == X86_MOD_MEM4)
4160 {
4161 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4162 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4163 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4164 }
4165 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4166 }
4167 }
4168 else
4169 {
4170 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
4171 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4172 if (idxRegBase == UINT8_MAX)
4173 {
4174 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
4175 if (idxRegRet >= 8 || idxRegIndex >= 8)
4176 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4177 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
4178 pbCodeBuf[off++] = 0x8d;
4179 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
4180 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
4181 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4182 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4183 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4184 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4185 }
4186 else
4187 {
4188 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
4189 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
4190 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4191 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
4192 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
4193 pbCodeBuf[off++] = 0x8d;
4194 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
4195 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4196 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4197 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
4198 if (bMod != X86_MOD_MEM0)
4199 {
4200 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4201 if (bMod == X86_MOD_MEM4)
4202 {
4203 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4204 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4205 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4206 }
4207 }
4208 }
4209 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4210 }
4211
4212#elif defined(RT_ARCH_ARM64)
4213 if (u32EffAddr == 0)
4214 {
4215 if (idxRegIndex == UINT8_MAX)
4216 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
4217 else if (idxRegBase == UINT8_MAX)
4218 {
4219 if (cShiftIndex == 0)
4220 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
4221 else
4222 {
4223 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4224 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
4225 }
4226 }
4227 else
4228 {
4229 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4230 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
4231 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
4232 }
4233 }
4234 else
4235 {
4236 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
4237 {
4238 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4239 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
4240 }
4241 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
4242 {
4243 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4244 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
4245 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
4246 }
4247 else
4248 {
4249 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
4250 if (idxRegBase != UINT8_MAX)
4251 {
4252 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4253 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
4254 }
4255 }
4256 if (idxRegIndex != UINT8_MAX)
4257 {
4258 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4259 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
4260 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
4261 }
4262 }
4263
4264#else
4265# error "port me"
4266#endif
4267
4268 if (idxRegIndex != UINT8_MAX)
4269 iemNativeRegFreeTmp(pReNative, idxRegIndex);
4270 if (idxRegBase != UINT8_MAX)
4271 iemNativeRegFreeTmp(pReNative, idxRegBase);
4272 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4273 return off;
4274}
4275
4276
4277#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
4278 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
4279 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
4280
4281#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
4282 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
4283 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
4284
4285#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
4286 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
4287 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
4288
4289/**
4290 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
4291 *
4292 * @returns New off.
4293 * @param pReNative .
4294 * @param off .
4295 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
4296 * bit 4 to REX.X. The two bits are part of the
4297 * REG sub-field, which isn't needed in this
4298 * function.
4299 * @param uSibAndRspOffset Two parts:
4300 * - The first 8 bits make up the SIB byte.
4301 * - The next 8 bits are the fixed RSP/ESP offset
4302 * in case of a pop [xSP].
4303 * @param u32Disp The displacement byte/word/dword, if any.
4304 * @param cbInstr The size of the fully decoded instruction. Used
4305 * for RIP relative addressing.
4306 * @param idxVarRet The result variable number.
4307 * @param f64Bit Whether to use a 64-bit or 32-bit address size
4308 * when calculating the address.
4309 *
4310 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
4311 */
4312DECL_INLINE_THROW(uint32_t)
4313iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
4314 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
4315{
4316 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
4317
4318 /*
4319 * Special case the rip + disp32 form first.
4320 */
4321 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
4322 {
4323#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
4324 /* Need to take the current PC offset into account for the displacement, no need to flush here
4325 * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
4326 u32Disp += pReNative->Core.offPc;
4327#endif
4328
4329 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4330 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
4331 kIemNativeGstRegUse_ReadOnly);
4332#ifdef RT_ARCH_AMD64
4333 if (f64Bit)
4334 {
4335 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
4336 if ((int32_t)offFinalDisp == offFinalDisp)
4337 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
4338 else
4339 {
4340 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
4341 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
4342 }
4343 }
4344 else
4345 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
4346
4347#elif defined(RT_ARCH_ARM64)
4348 if (f64Bit)
4349 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
4350 (int64_t)(int32_t)u32Disp + cbInstr);
4351 else
4352 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
4353 (int32_t)u32Disp + cbInstr);
4354
4355#else
4356# error "Port me!"
4357#endif
4358 iemNativeRegFreeTmp(pReNative, idxRegPc);
4359 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4360 return off;
4361 }
4362
4363 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
4364 int64_t i64EffAddr = 0;
4365 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
4366 {
4367 case 0: break;
4368 case 1: i64EffAddr = (int8_t)u32Disp; break;
4369 case 2: i64EffAddr = (int32_t)u32Disp; break;
4370 default: AssertFailed();
4371 }
4372
4373 /* Get the register (or SIB) value. */
4374 uint8_t idxGstRegBase = UINT8_MAX;
4375 uint8_t idxGstRegIndex = UINT8_MAX;
4376 uint8_t cShiftIndex = 0;
4377 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
4378 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
4379 else /* SIB: */
4380 {
4381 /* index /w scaling . */
4382 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
4383 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
4384 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
4385 if (idxGstRegIndex == 4)
4386 {
4387 /* no index */
4388 cShiftIndex = 0;
4389 idxGstRegIndex = UINT8_MAX;
4390 }
4391
4392 /* base */
4393 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
4394 if (idxGstRegBase == 4)
4395 {
4396 /* pop [rsp] hack */
4397 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
4398 }
4399 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
4400 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
4401 {
4402 /* mod=0 and base=5 -> disp32, no base reg. */
4403 Assert(i64EffAddr == 0);
4404 i64EffAddr = (int32_t)u32Disp;
4405 idxGstRegBase = UINT8_MAX;
4406 }
4407 }
4408
4409 /*
4410 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
4411 * the start of the function.
4412 */
4413 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
4414 {
4415 if (f64Bit)
4416 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
4417 else
4418 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
4419 return off;
4420 }
4421
4422 /*
4423 * Now emit code that calculates:
4424 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
4425 * or if !f64Bit:
4426 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
4427 */
4428 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4429 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
4430 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
4431 kIemNativeGstRegUse_ReadOnly);
4432 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
4433 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
4434 kIemNativeGstRegUse_ReadOnly);
4435
4436 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
4437 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
4438 {
4439 idxRegBase = idxRegIndex;
4440 idxRegIndex = UINT8_MAX;
4441 }
4442
4443#ifdef RT_ARCH_AMD64
4444 uint8_t bFinalAdj;
4445 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
4446 bFinalAdj = 0; /* likely */
4447 else
4448 {
4449 /* pop [rsp] with a problematic disp32 value. Split out the
4450 RSP offset and add it separately afterwards (bFinalAdj). */
4451 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
4452 Assert(idxGstRegBase == X86_GREG_xSP);
4453 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
4454 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
4455 Assert(bFinalAdj != 0);
4456 i64EffAddr -= bFinalAdj;
4457 Assert((int32_t)i64EffAddr == i64EffAddr);
4458 }
4459 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
4460//pReNative->pInstrBuf[off++] = 0xcc;
4461
4462 if (idxRegIndex == UINT8_MAX)
4463 {
4464 if (u32EffAddr == 0)
4465 {
4466 /* mov ret, base */
4467 if (f64Bit)
4468 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
4469 else
4470 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
4471 }
4472 else
4473 {
4474 /* lea ret, [base + disp32] */
4475 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
4476 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4477 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
4478 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4479 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
4480 | (f64Bit ? X86_OP_REX_W : 0);
4481 pbCodeBuf[off++] = 0x8d;
4482 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4483 if (idxRegBase != X86_GREG_x12 /*SIB*/)
4484 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
4485 else
4486 {
4487 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4488 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
4489 }
4490 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4491 if (bMod == X86_MOD_MEM4)
4492 {
4493 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4494 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4495 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4496 }
4497 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4498 }
4499 }
4500 else
4501 {
4502 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
4503 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4504 if (idxRegBase == UINT8_MAX)
4505 {
4506 /* lea ret, [(index64 << cShiftIndex) + disp32] */
4507 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
4508 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4509 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
4510 | (f64Bit ? X86_OP_REX_W : 0);
4511 pbCodeBuf[off++] = 0x8d;
4512 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
4513 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
4514 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4515 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4516 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4517 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4518 }
4519 else
4520 {
4521 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
4522 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
4523 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4524 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
4525 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
4526 | (f64Bit ? X86_OP_REX_W : 0);
4527 pbCodeBuf[off++] = 0x8d;
4528 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
4529 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4530 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4531 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
4532 if (bMod != X86_MOD_MEM0)
4533 {
4534 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4535 if (bMod == X86_MOD_MEM4)
4536 {
4537 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4538 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4539 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4540 }
4541 }
4542 }
4543 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4544 }
4545
4546 if (!bFinalAdj)
4547 { /* likely */ }
4548 else
4549 {
4550 Assert(f64Bit);
4551 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
4552 }
4553
4554#elif defined(RT_ARCH_ARM64)
4555 if (i64EffAddr == 0)
4556 {
4557 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4558 if (idxRegIndex == UINT8_MAX)
4559 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
4560 else if (idxRegBase != UINT8_MAX)
4561 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
4562 f64Bit, false /*fSetFlags*/, cShiftIndex);
4563 else
4564 {
4565 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
4566 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
4567 }
4568 }
4569 else
4570 {
4571 if (f64Bit)
4572 { /* likely */ }
4573 else
4574 i64EffAddr = (int32_t)i64EffAddr;
4575
4576 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
4577 {
4578 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4579 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
4580 }
4581 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
4582 {
4583 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4584 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
4585 }
4586 else
4587 {
4588 if (f64Bit)
4589 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
4590 else
4591 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
4592 if (idxRegBase != UINT8_MAX)
4593 {
4594 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4595 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
4596 }
4597 }
4598 if (idxRegIndex != UINT8_MAX)
4599 {
4600 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4601 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
4602 f64Bit, false /*fSetFlags*/, cShiftIndex);
4603 }
4604 }
4605
4606#else
4607# error "port me"
4608#endif
4609
4610 if (idxRegIndex != UINT8_MAX)
4611 iemNativeRegFreeTmp(pReNative, idxRegIndex);
4612 if (idxRegBase != UINT8_MAX)
4613 iemNativeRegFreeTmp(pReNative, idxRegBase);
4614 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4615 return off;
4616}
4617
4618
4619/*********************************************************************************************************************************
4620* Memory fetches and stores common *
4621*********************************************************************************************************************************/
4622
4623typedef enum IEMNATIVEMITMEMOP
4624{
4625 kIemNativeEmitMemOp_Store = 0,
4626 kIemNativeEmitMemOp_Fetch,
4627 kIemNativeEmitMemOp_Fetch_Zx_U16,
4628 kIemNativeEmitMemOp_Fetch_Zx_U32,
4629 kIemNativeEmitMemOp_Fetch_Zx_U64,
4630 kIemNativeEmitMemOp_Fetch_Sx_U16,
4631 kIemNativeEmitMemOp_Fetch_Sx_U32,
4632 kIemNativeEmitMemOp_Fetch_Sx_U64
4633} IEMNATIVEMITMEMOP;
4634
4635/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
4636 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
4637 * (with iSegReg = UINT8_MAX). */
4638DECL_INLINE_THROW(uint32_t)
4639iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
4640 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
4641 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
4642{
4643 /*
4644 * Assert sanity.
4645 */
4646 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
4647 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
4648 Assert( enmOp != kIemNativeEmitMemOp_Store
4649 || pVarValue->enmKind == kIemNativeVarKind_Immediate
4650 || pVarValue->enmKind == kIemNativeVarKind_Stack);
4651 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
4652 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
4653 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
4654 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
4655 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4656 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
4657#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4658 Assert( cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8
4659 || cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U));
4660#else
4661 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
4662#endif
4663 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
4664#ifdef VBOX_STRICT
4665 if (iSegReg == UINT8_MAX)
4666 {
4667 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
4668 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
4669 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
4670 switch (cbMem)
4671 {
4672 case 1:
4673 Assert( pfnFunction
4674 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
4675 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
4676 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
4677 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
4678 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
4679 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
4680 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
4681 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
4682 : UINT64_C(0xc000b000a0009000) ));
4683 break;
4684 case 2:
4685 Assert( pfnFunction
4686 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
4687 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
4688 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
4689 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
4690 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
4691 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
4692 : UINT64_C(0xc000b000a0009000) ));
4693 break;
4694 case 4:
4695 Assert( pfnFunction
4696 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
4697 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
4698 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
4699 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
4700 : UINT64_C(0xc000b000a0009000) ));
4701 break;
4702 case 8:
4703 Assert( pfnFunction
4704 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
4705 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
4706 : UINT64_C(0xc000b000a0009000) ));
4707 break;
4708#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4709 case sizeof(RTUINT128U):
4710 Assert(enmOp == kIemNativeEmitMemOp_Store || enmOp == kIemNativeEmitMemOp_Fetch);
4711 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
4712 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
4713 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
4714 || ( enmOp == kIemNativeEmitMemOp_Store
4715 && (pfnFunction == UINT64_C(0xc000b000a0009000))));
4716 break;
4717 case sizeof(RTUINT256U):
4718 Assert(enmOp == kIemNativeEmitMemOp_Store || enmOp == kIemNativeEmitMemOp_Fetch);
4719 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
4720 && (pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc))
4721 || ( enmOp == kIemNativeEmitMemOp_Store
4722 && (pfnFunction == UINT64_C(0xc000b000a0009000))));
4723 break;
4724#endif
4725 }
4726 }
4727 else
4728 {
4729 Assert(iSegReg < 6);
4730 switch (cbMem)
4731 {
4732 case 1:
4733 Assert( pfnFunction
4734 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
4735 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
4736 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
4737 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
4738 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
4739 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
4740 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
4741 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
4742 : UINT64_C(0xc000b000a0009000) ));
4743 break;
4744 case 2:
4745 Assert( pfnFunction
4746 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
4747 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
4748 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
4749 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
4750 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
4751 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
4752 : UINT64_C(0xc000b000a0009000) ));
4753 break;
4754 case 4:
4755 Assert( pfnFunction
4756 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
4757 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
4758 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
4759 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
4760 : UINT64_C(0xc000b000a0009000) ));
4761 break;
4762 case 8:
4763 Assert( pfnFunction
4764 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
4765 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
4766 : UINT64_C(0xc000b000a0009000) ));
4767 break;
4768#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4769 case sizeof(RTUINT128U):
4770 Assert(enmOp == kIemNativeEmitMemOp_Store || enmOp == kIemNativeEmitMemOp_Fetch);
4771 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
4772 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
4773 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
4774 || ( enmOp == kIemNativeEmitMemOp_Store
4775 && (pfnFunction == UINT64_C(0xc000b000a0009000))));
4776 break;
4777 case sizeof(RTUINT256U):
4778 Assert(enmOp == kIemNativeEmitMemOp_Store || enmOp == kIemNativeEmitMemOp_Fetch);
4779 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
4780 && (pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc))
4781 || ( enmOp == kIemNativeEmitMemOp_Store
4782 && (pfnFunction == UINT64_C(0xc000b000a0009000))));
4783 break;
4784#endif
4785 }
4786 }
4787#endif
4788
4789#ifdef VBOX_STRICT
4790 /*
4791 * Check that the fExec flags we've got make sense.
4792 */
4793 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
4794#endif
4795
4796 /*
4797 * To keep things simple we have to commit any pending writes first as we
4798 * may end up making calls.
4799 */
4800 /** @todo we could postpone this till we make the call and reload the
4801 * registers after returning from the call. Not sure if that's sensible or
4802 * not, though. */
4803#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
4804 off = iemNativeRegFlushPendingWrites(pReNative, off);
4805#else
4806 /* The program counter is treated differently for now. */
4807 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
4808#endif
4809
4810#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
4811 /*
4812 * Move/spill/flush stuff out of call-volatile registers.
4813 * This is the easy way out. We could contain this to the tlb-miss branch
4814 * by saving and restoring active stuff here.
4815 */
4816 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
4817#endif
4818
4819 /*
4820 * Define labels and allocate the result register (trying for the return
4821 * register if we can).
4822 */
4823 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
4824#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4825 uint8_t idxRegValueFetch = UINT8_MAX;
4826
4827 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
4828 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
4829 : iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off);
4830 else
4831 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
4832 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
4833 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
4834 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
4835#else
4836 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
4837 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
4838 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
4839 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
4840#endif
4841 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
4842 uint8_t const idxRegValueStore = !TlbState.fSkip
4843 && enmOp == kIemNativeEmitMemOp_Store
4844 && pVarValue->enmKind != kIemNativeVarKind_Immediate
4845 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
4846 : UINT8_MAX;
4847 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
4848 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
4849 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
4850 : UINT32_MAX;
4851
4852 /*
4853 * Jump to the TLB lookup code.
4854 */
4855 if (!TlbState.fSkip)
4856 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
4857
4858 /*
4859 * TlbMiss:
4860 *
4861 * Call helper to do the fetching.
4862 * We flush all guest register shadow copies here.
4863 */
4864 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
4865
4866#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
4867 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
4868#else
4869 RT_NOREF(idxInstr);
4870#endif
4871
4872#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
4873 if (pReNative->Core.offPc)
4874 {
4875 /*
4876 * Update the program counter but restore it at the end of the TlbMiss branch.
4877 * This should allow delaying more program counter updates for the TlbLookup and hit paths
4878 * which are hopefully much more frequent, reducing the amount of memory accesses.
4879 */
4880 /* Allocate a temporary PC register. */
4881 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4882
4883 /* Perform the addition and store the result. */
4884 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
4885 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4886
4887 /* Free and flush the PC register. */
4888 iemNativeRegFreeTmp(pReNative, idxPcReg);
4889 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
4890 }
4891#endif
4892
4893#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
4894 /* Save variables in volatile registers. */
4895 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
4896 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
4897 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
4898 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
4899#endif
4900
4901 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
4902 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4903#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4904 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
4905 {
4906 /*
4907 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
4908 *
4909 * @note There was a register variable assigned to the variable for the TlbLookup case above
4910 * which must not be freed or the value loaded into the register will not be synced into the register
4911 * further down the road because the variable doesn't know it had a variable assigned.
4912 *
4913 * @note For loads it is not required to sync what is in the assigned register with the stack slot
4914 * as it will be overwritten anyway.
4915 */
4916 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
4917 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
4918 enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
4919 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
4920 }
4921 else
4922#endif
4923 if (enmOp == kIemNativeEmitMemOp_Store)
4924 {
4925 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
4926 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
4927#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
4928 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
4929#else
4930 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
4931 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
4932#endif
4933 }
4934
4935 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
4936 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
4937#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
4938 fVolGregMask);
4939#else
4940 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
4941#endif
4942
4943 if (iSegReg != UINT8_MAX)
4944 {
4945 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
4946 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
4947 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
4948 }
4949
4950 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
4951 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4952
4953 /* Done setting up parameters, make the call. */
4954 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
4955
4956 /*
4957 * Put the result in the right register if this is a fetch.
4958 */
4959 if (enmOp != kIemNativeEmitMemOp_Store)
4960 {
4961#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4962 if ( cbMem == sizeof(RTUINT128U)
4963 || cbMem == sizeof(RTUINT256U))
4964 {
4965 Assert(enmOp == kIemNativeEmitMemOp_Fetch);
4966
4967 /* Sync the value on the stack with the host register assigned to the variable. */
4968 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
4969 }
4970 else
4971#endif
4972 {
4973 Assert(idxRegValueFetch == pVarValue->idxReg);
4974 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
4975 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
4976 }
4977 }
4978
4979#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
4980 /* Restore variables and guest shadow registers to volatile registers. */
4981 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
4982 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
4983#endif
4984
4985#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
4986 if (pReNative->Core.offPc)
4987 {
4988 /*
4989 * Time to restore the program counter to its original value.
4990 */
4991 /* Allocate a temporary PC register. */
4992 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
4993
4994 /* Restore the original value. */
4995 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
4996 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
4997
4998 /* Free and flush the PC register. */
4999 iemNativeRegFreeTmp(pReNative, idxPcReg);
5000 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
5001 }
5002#endif
5003
5004#ifdef IEMNATIVE_WITH_TLB_LOOKUP
5005 if (!TlbState.fSkip)
5006 {
5007 /* end of TlbMiss - Jump to the done label. */
5008 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
5009 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
5010
5011 /*
5012 * TlbLookup:
5013 */
5014 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
5015 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
5016 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
5017
5018 /*
5019 * Emit code to do the actual storing / fetching.
5020 */
5021 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
5022# ifdef VBOX_WITH_STATISTICS
5023 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
5024 enmOp == kIemNativeEmitMemOp_Store
5025 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
5026 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
5027# endif
5028 switch (enmOp)
5029 {
5030 case kIemNativeEmitMemOp_Store:
5031 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
5032 {
5033 switch (cbMem)
5034 {
5035 case 1:
5036 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5037 break;
5038 case 2:
5039 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5040 break;
5041 case 4:
5042 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5043 break;
5044 case 8:
5045 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5046 break;
5047 default:
5048 AssertFailed();
5049 }
5050 }
5051 else
5052 {
5053 switch (cbMem)
5054 {
5055 case 1:
5056 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
5057 idxRegMemResult, TlbState.idxReg1);
5058 break;
5059 case 2:
5060 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
5061 idxRegMemResult, TlbState.idxReg1);
5062 break;
5063 case 4:
5064 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
5065 idxRegMemResult, TlbState.idxReg1);
5066 break;
5067 case 8:
5068 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
5069 idxRegMemResult, TlbState.idxReg1);
5070 break;
5071 default:
5072 AssertFailed();
5073 }
5074 }
5075 break;
5076
5077 case kIemNativeEmitMemOp_Fetch:
5078 case kIemNativeEmitMemOp_Fetch_Zx_U16:
5079 case kIemNativeEmitMemOp_Fetch_Zx_U32:
5080 case kIemNativeEmitMemOp_Fetch_Zx_U64:
5081 switch (cbMem)
5082 {
5083 case 1:
5084 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5085 break;
5086 case 2:
5087 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5088 break;
5089 case 4:
5090 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5091 break;
5092 case 8:
5093 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5094 break;
5095#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5096 case sizeof(RTUINT128U):
5097 /*
5098 * No need to sync back the register with the stack, this is done by the generic variable handling
5099 * code if there is a register assigned to a variable and the stack must be accessed.
5100 */
5101 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5102 break;
5103 case sizeof(RTUINT256U):
5104 /*
5105 * No need to sync back the register with the stack, this is done by the generic variable handling
5106 * code if there is a register assigned to a variable and the stack must be accessed.
5107 */
5108 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5109 break;
5110#endif
5111 default:
5112 AssertFailed();
5113 }
5114 break;
5115
5116 case kIemNativeEmitMemOp_Fetch_Sx_U16:
5117 Assert(cbMem == 1);
5118 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5119 break;
5120
5121 case kIemNativeEmitMemOp_Fetch_Sx_U32:
5122 Assert(cbMem == 1 || cbMem == 2);
5123 if (cbMem == 1)
5124 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5125 else
5126 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5127 break;
5128
5129 case kIemNativeEmitMemOp_Fetch_Sx_U64:
5130 switch (cbMem)
5131 {
5132 case 1:
5133 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5134 break;
5135 case 2:
5136 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5137 break;
5138 case 4:
5139 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5140 break;
5141 default:
5142 AssertFailed();
5143 }
5144 break;
5145
5146 default:
5147 AssertFailed();
5148 }
5149
5150 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
5151
5152 /*
5153 * TlbDone:
5154 */
5155 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
5156
5157 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
5158
5159# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5160 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
5161 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
5162# endif
5163 }
5164#else
5165 RT_NOREF(fAlignMask, idxLabelTlbMiss);
5166#endif
5167
5168 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
5169 iemNativeVarRegisterRelease(pReNative, idxVarValue);
5170 return off;
5171}
5172
5173
5174
5175/*********************************************************************************************************************************
5176* Memory fetches (IEM_MEM_FETCH_XXX). *
5177*********************************************************************************************************************************/
5178
5179/* 8-bit segmented: */
5180#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
5181 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
5182 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
5183 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5184
5185#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
5186 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5187 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
5188 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5189
5190#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5191 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5192 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5193 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5194
5195#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5196 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5197 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5198 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5199
5200#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
5201 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5202 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
5203 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
5204
5205#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5206 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5207 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5208 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
5209
5210#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5211 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5212 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5213 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
5214
5215/* 16-bit segmented: */
5216#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
5217 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5218 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5219 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
5220
5221#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
5222 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5223 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5224 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
5225
5226#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5227 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5228 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5229 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
5230
5231#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5232 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5233 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5234 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
5235
5236#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5237 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5238 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5239 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
5240
5241#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5242 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5243 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5244 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
5245
5246
5247/* 32-bit segmented: */
5248#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5249 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5250 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5251 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5252
5253#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
5254 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5255 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5256 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
5257
5258#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5259 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5260 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5261 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5262
5263#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5264 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5265 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5266 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
5267
5268
5269/* 64-bit segmented: */
5270#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5271 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5272 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
5273 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
5274
5275
5276
5277/* 8-bit flat: */
5278#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
5279 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
5280 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
5281 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5282
5283#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
5284 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5285 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
5286 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5287
5288#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
5289 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5290 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5291 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5292
5293#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
5294 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5295 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5296 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5297
5298#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
5299 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5300 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
5301 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
5302
5303#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
5304 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5305 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5306 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
5307
5308#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
5309 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5310 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5311 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
5312
5313
5314/* 16-bit flat: */
5315#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
5316 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5317 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5318 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
5319
5320#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
5321 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5322 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5323 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
5324
5325#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
5326 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5327 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5328 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
5329
5330#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
5331 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5332 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5333 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
5334
5335#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
5336 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5337 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5338 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
5339
5340#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
5341 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5342 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5343 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
5344
5345/* 32-bit flat: */
5346#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
5347 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5348 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5349 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
5350
5351#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
5352 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5353 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5354 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
5355
5356#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
5357 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5358 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5359 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
5360
5361#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
5362 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5363 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5364 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
5365
5366/* 64-bit flat: */
5367#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
5368 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5369 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
5370 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
5371
5372#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5373/* 128-bit segmented: */
5374#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
5375 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
5376 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
5377 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
5378
5379/* 128-bit flat: */
5380#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
5381 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
5382 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
5383 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
5384
5385/* 128-bit segmented: */
5386#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
5387 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
5388 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
5389 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
5390
5391/* 128-bit flat: */
5392#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
5393 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
5394 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
5395 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
5396
5397/* 256-bit segmented: */
5398#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
5399 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
5400 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
5401 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
5402
5403/* 256-bit flat: */
5404#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
5405 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
5406 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
5407 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
5408#endif
5409
5410
5411/*********************************************************************************************************************************
5412* Memory stores (IEM_MEM_STORE_XXX). *
5413*********************************************************************************************************************************/
5414
5415#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
5416 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
5417 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
5418 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
5419
5420#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
5421 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
5422 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
5423 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
5424
5425#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
5426 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
5427 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
5428 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
5429
5430#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
5431 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
5432 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
5433 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
5434
5435
5436#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
5437 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
5438 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
5439 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
5440
5441#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
5442 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
5443 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
5444 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
5445
5446#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
5447 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
5448 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
5449 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
5450
5451#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
5452 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
5453 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
5454 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
5455
5456
5457#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
5458 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
5459 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
5460
5461#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
5462 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
5463 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
5464
5465#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
5466 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
5467 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
5468
5469#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
5470 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
5471 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
5472
5473
5474#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
5475 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
5476 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
5477
5478#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
5479 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
5480 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
5481
5482#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
5483 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
5484 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
5485
5486#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
5487 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
5488 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
5489
5490/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
5491 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
5492DECL_INLINE_THROW(uint32_t)
5493iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
5494 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
5495{
5496 /*
5497 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
5498 * to do the grunt work.
5499 */
5500 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
5501 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
5502 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
5503 pfnFunction, idxInstr);
5504 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
5505 return off;
5506}
5507
5508
5509
5510/*********************************************************************************************************************************
5511* Stack Accesses. *
5512*********************************************************************************************************************************/
5513/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
5514#define IEM_MC_PUSH_U16(a_u16Value) \
5515 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
5516 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
5517#define IEM_MC_PUSH_U32(a_u32Value) \
5518 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
5519 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
5520#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
5521 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
5522 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
5523#define IEM_MC_PUSH_U64(a_u64Value) \
5524 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
5525 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
5526
5527#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
5528 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
5529 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
5530#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
5531 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
5532 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
5533#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
5534 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
5535 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
5536
5537#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
5538 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
5539 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
5540#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
5541 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
5542 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
5543
5544
5545DECL_FORCE_INLINE_THROW(uint32_t)
5546iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
5547{
5548 /* Use16BitSp: */
5549#ifdef RT_ARCH_AMD64
5550 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
5551 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
5552#else
5553 /* sub regeff, regrsp, #cbMem */
5554 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
5555 /* and regeff, regeff, #0xffff */
5556 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
5557 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
5558 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
5559 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
5560#endif
5561 return off;
5562}
5563
5564
5565DECL_FORCE_INLINE(uint32_t)
5566iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
5567{
5568 /* Use32BitSp: */
5569 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
5570 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
5571 return off;
5572}
5573
5574
5575/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
5576DECL_INLINE_THROW(uint32_t)
5577iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
5578 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
5579{
5580 /*
5581 * Assert sanity.
5582 */
5583 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
5584 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
5585#ifdef VBOX_STRICT
5586 if (RT_BYTE2(cBitsVarAndFlat) != 0)
5587 {
5588 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
5589 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
5590 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
5591 Assert( pfnFunction
5592 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
5593 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
5594 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
5595 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
5596 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
5597 : UINT64_C(0xc000b000a0009000) ));
5598 }
5599 else
5600 Assert( pfnFunction
5601 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
5602 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
5603 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
5604 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
5605 : UINT64_C(0xc000b000a0009000) ));
5606#endif
5607
5608#ifdef VBOX_STRICT
5609 /*
5610 * Check that the fExec flags we've got make sense.
5611 */
5612 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
5613#endif
5614
5615 /*
5616 * To keep things simple we have to commit any pending writes first as we
5617 * may end up making calls.
5618 */
5619 /** @todo we could postpone this till we make the call and reload the
5620 * registers after returning from the call. Not sure if that's sensible or
5621 * not, though. */
5622 off = iemNativeRegFlushPendingWrites(pReNative, off);
5623
5624 /*
5625 * First we calculate the new RSP and the effective stack pointer value.
5626 * For 64-bit mode and flat 32-bit these two are the same.
5627 * (Code structure is very similar to that of PUSH)
5628 */
5629 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
5630 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
5631 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
5632 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
5633 ? cbMem : sizeof(uint16_t);
5634 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
5635 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
5636 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
5637 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
5638 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
5639 if (cBitsFlat != 0)
5640 {
5641 Assert(idxRegEffSp == idxRegRsp);
5642 Assert(cBitsFlat == 32 || cBitsFlat == 64);
5643 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
5644 if (cBitsFlat == 64)
5645 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
5646 else
5647 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
5648 }
5649 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
5650 {
5651 Assert(idxRegEffSp != idxRegRsp);
5652 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
5653 kIemNativeGstRegUse_ReadOnly);
5654#ifdef RT_ARCH_AMD64
5655 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5656#else
5657 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5658#endif
5659 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
5660 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
5661 offFixupJumpToUseOtherBitSp = off;
5662 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
5663 {
5664 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
5665 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
5666 }
5667 else
5668 {
5669 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
5670 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
5671 }
5672 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5673 }
5674 /* SpUpdateEnd: */
5675 uint32_t const offLabelSpUpdateEnd = off;
5676
5677 /*
5678 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
5679 * we're skipping lookup).
5680 */
5681 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
5682 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
5683 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
5684 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
5685 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
5686 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
5687 : UINT32_MAX;
5688 uint8_t const idxRegValue = !TlbState.fSkip
5689 && pVarValue->enmKind != kIemNativeVarKind_Immediate
5690 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
5691 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
5692 : UINT8_MAX;
5693 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
5694
5695
5696 if (!TlbState.fSkip)
5697 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
5698 else
5699 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
5700
5701 /*
5702 * Use16BitSp:
5703 */
5704 if (cBitsFlat == 0)
5705 {
5706#ifdef RT_ARCH_AMD64
5707 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5708#else
5709 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
5710#endif
5711 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
5712 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
5713 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
5714 else
5715 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
5716 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
5717 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5718 }
5719
5720 /*
5721 * TlbMiss:
5722 *
5723 * Call helper to do the pushing.
5724 */
5725 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
5726
5727#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5728 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5729#else
5730 RT_NOREF(idxInstr);
5731#endif
5732
5733 /* Save variables in volatile registers. */
5734 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
5735 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
5736 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
5737 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
5738 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
5739
5740 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
5741 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
5742 {
5743 /* Swap them using ARG0 as temp register: */
5744 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
5745 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
5746 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
5747 }
5748 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
5749 {
5750 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
5751 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
5752 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
5753
5754 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
5755 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
5756 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
5757 }
5758 else
5759 {
5760 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
5761 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
5762
5763 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
5764 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
5765 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
5766 }
5767
5768 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
5769 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5770
5771 /* Done setting up parameters, make the call. */
5772 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
5773
5774 /* Restore variables and guest shadow registers to volatile registers. */
5775 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
5776 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
5777
5778#ifdef IEMNATIVE_WITH_TLB_LOOKUP
5779 if (!TlbState.fSkip)
5780 {
5781 /* end of TlbMiss - Jump to the done label. */
5782 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
5783 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
5784
5785 /*
5786 * TlbLookup:
5787 */
5788 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
5789 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
5790
5791 /*
5792 * Emit code to do the actual storing / fetching.
5793 */
5794 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
5795# ifdef VBOX_WITH_STATISTICS
5796 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
5797 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
5798# endif
5799 if (idxRegValue != UINT8_MAX)
5800 {
5801 switch (cbMemAccess)
5802 {
5803 case 2:
5804 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
5805 break;
5806 case 4:
5807 if (!fIsIntelSeg)
5808 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
5809 else
5810 {
5811 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
5812 PUSH FS in real mode, so we have to try emulate that here.
5813 We borrow the now unused idxReg1 from the TLB lookup code here. */
5814 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
5815 kIemNativeGstReg_EFlags);
5816 if (idxRegEfl != UINT8_MAX)
5817 {
5818#ifdef ARCH_AMD64
5819 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
5820 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
5821 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
5822#else
5823 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
5824 off, TlbState.idxReg1, idxRegEfl,
5825 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
5826#endif
5827 iemNativeRegFreeTmp(pReNative, idxRegEfl);
5828 }
5829 else
5830 {
5831 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
5832 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
5833 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
5834 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
5835 }
5836 /* ASSUMES the upper half of idxRegValue is ZERO. */
5837 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
5838 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
5839 }
5840 break;
5841 case 8:
5842 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
5843 break;
5844 default:
5845 AssertFailed();
5846 }
5847 }
5848 else
5849 {
5850 switch (cbMemAccess)
5851 {
5852 case 2:
5853 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
5854 idxRegMemResult, TlbState.idxReg1);
5855 break;
5856 case 4:
5857 Assert(!fIsSegReg);
5858 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
5859 idxRegMemResult, TlbState.idxReg1);
5860 break;
5861 case 8:
5862 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
5863 break;
5864 default:
5865 AssertFailed();
5866 }
5867 }
5868
5869 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
5870 TlbState.freeRegsAndReleaseVars(pReNative);
5871
5872 /*
5873 * TlbDone:
5874 *
5875 * Commit the new RSP value.
5876 */
5877 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
5878 }
5879#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
5880
5881 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
5882 iemNativeRegFreeTmp(pReNative, idxRegRsp);
5883 if (idxRegEffSp != idxRegRsp)
5884 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
5885
5886 /* The value variable is implictly flushed. */
5887 if (idxRegValue != UINT8_MAX)
5888 iemNativeVarRegisterRelease(pReNative, idxVarValue);
5889 iemNativeVarFreeLocal(pReNative, idxVarValue);
5890
5891 return off;
5892}
5893
5894
5895
5896/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
5897#define IEM_MC_POP_GREG_U16(a_iGReg) \
5898 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
5899 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
5900#define IEM_MC_POP_GREG_U32(a_iGReg) \
5901 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
5902 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
5903#define IEM_MC_POP_GREG_U64(a_iGReg) \
5904 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
5905 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
5906
5907#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
5908 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
5909 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
5910#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
5911 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
5912 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
5913
5914#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
5915 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
5916 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
5917#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
5918 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
5919 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
5920
5921
5922DECL_FORCE_INLINE_THROW(uint32_t)
5923iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
5924 uint8_t idxRegTmp)
5925{
5926 /* Use16BitSp: */
5927#ifdef RT_ARCH_AMD64
5928 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
5929 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
5930 RT_NOREF(idxRegTmp);
5931#else
5932 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
5933 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
5934 /* add tmp, regrsp, #cbMem */
5935 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
5936 /* and tmp, tmp, #0xffff */
5937 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
5938 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
5939 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
5940 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
5941#endif
5942 return off;
5943}
5944
5945
5946DECL_FORCE_INLINE(uint32_t)
5947iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
5948{
5949 /* Use32BitSp: */
5950 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
5951 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
5952 return off;
5953}
5954
5955
5956/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
5957DECL_INLINE_THROW(uint32_t)
5958iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
5959 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
5960{
5961 /*
5962 * Assert sanity.
5963 */
5964 Assert(idxGReg < 16);
5965#ifdef VBOX_STRICT
5966 if (RT_BYTE2(cBitsVarAndFlat) != 0)
5967 {
5968 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
5969 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
5970 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
5971 Assert( pfnFunction
5972 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
5973 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
5974 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
5975 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
5976 : UINT64_C(0xc000b000a0009000) ));
5977 }
5978 else
5979 Assert( pfnFunction
5980 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
5981 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
5982 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
5983 : UINT64_C(0xc000b000a0009000) ));
5984#endif
5985
5986#ifdef VBOX_STRICT
5987 /*
5988 * Check that the fExec flags we've got make sense.
5989 */
5990 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
5991#endif
5992
5993 /*
5994 * To keep things simple we have to commit any pending writes first as we
5995 * may end up making calls.
5996 */
5997 off = iemNativeRegFlushPendingWrites(pReNative, off);
5998
5999 /*
6000 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
6001 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
6002 * directly as the effective stack pointer.
6003 * (Code structure is very similar to that of PUSH)
6004 */
6005 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
6006 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
6007 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
6008 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
6009 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
6010 /** @todo can do a better job picking the register here. For cbMem >= 4 this
6011 * will be the resulting register value. */
6012 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
6013
6014 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
6015 if (cBitsFlat != 0)
6016 {
6017 Assert(idxRegEffSp == idxRegRsp);
6018 Assert(cBitsFlat == 32 || cBitsFlat == 64);
6019 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
6020 }
6021 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
6022 {
6023 Assert(idxRegEffSp != idxRegRsp);
6024 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
6025 kIemNativeGstRegUse_ReadOnly);
6026#ifdef RT_ARCH_AMD64
6027 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6028#else
6029 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6030#endif
6031 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
6032 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
6033 offFixupJumpToUseOtherBitSp = off;
6034 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6035 {
6036/** @todo can skip idxRegRsp updating when popping ESP. */
6037 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
6038 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6039 }
6040 else
6041 {
6042 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
6043 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
6044 }
6045 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6046 }
6047 /* SpUpdateEnd: */
6048 uint32_t const offLabelSpUpdateEnd = off;
6049
6050 /*
6051 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
6052 * we're skipping lookup).
6053 */
6054 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
6055 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
6056 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6057 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
6058 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6059 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6060 : UINT32_MAX;
6061
6062 if (!TlbState.fSkip)
6063 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6064 else
6065 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
6066
6067 /*
6068 * Use16BitSp:
6069 */
6070 if (cBitsFlat == 0)
6071 {
6072#ifdef RT_ARCH_AMD64
6073 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6074#else
6075 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6076#endif
6077 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
6078 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6079 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
6080 else
6081 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6082 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
6083 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6084 }
6085
6086 /*
6087 * TlbMiss:
6088 *
6089 * Call helper to do the pushing.
6090 */
6091 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
6092
6093#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6094 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6095#else
6096 RT_NOREF(idxInstr);
6097#endif
6098
6099 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
6100 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
6101 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
6102 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6103
6104
6105 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
6106 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
6107 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
6108
6109 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6110 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6111
6112 /* Done setting up parameters, make the call. */
6113 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6114
6115 /* Move the return register content to idxRegMemResult. */
6116 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
6117 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
6118
6119 /* Restore variables and guest shadow registers to volatile registers. */
6120 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
6121 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
6122
6123#ifdef IEMNATIVE_WITH_TLB_LOOKUP
6124 if (!TlbState.fSkip)
6125 {
6126 /* end of TlbMiss - Jump to the done label. */
6127 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
6128 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
6129
6130 /*
6131 * TlbLookup:
6132 */
6133 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
6134 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
6135
6136 /*
6137 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
6138 */
6139 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6140# ifdef VBOX_WITH_STATISTICS
6141 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
6142 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
6143# endif
6144 switch (cbMem)
6145 {
6146 case 2:
6147 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
6148 break;
6149 case 4:
6150 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
6151 break;
6152 case 8:
6153 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
6154 break;
6155 default:
6156 AssertFailed();
6157 }
6158
6159 TlbState.freeRegsAndReleaseVars(pReNative);
6160
6161 /*
6162 * TlbDone:
6163 *
6164 * Set the new RSP value (FLAT accesses needs to calculate it first) and
6165 * commit the popped register value.
6166 */
6167 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
6168 }
6169#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
6170
6171 if (idxGReg != X86_GREG_xSP)
6172 {
6173 /* Set the register. */
6174 if (cbMem >= sizeof(uint32_t))
6175 {
6176#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
6177 AssertMsg( pReNative->idxCurCall == 0
6178 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
6179 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
6180#endif
6181 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
6182 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
6183 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
6184 }
6185 else
6186 {
6187 Assert(cbMem == sizeof(uint16_t));
6188 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
6189 kIemNativeGstRegUse_ForUpdate);
6190 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
6191 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
6192 iemNativeRegFreeTmp(pReNative, idxRegDst);
6193 }
6194
6195 /* Complete RSP calculation for FLAT mode. */
6196 if (idxRegEffSp == idxRegRsp)
6197 {
6198 if (cBitsFlat == 64)
6199 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
6200 else
6201 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
6202 }
6203 }
6204 else
6205 {
6206 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
6207 if (cbMem == sizeof(uint64_t))
6208 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
6209 else if (cbMem == sizeof(uint32_t))
6210 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
6211 else
6212 {
6213 if (idxRegEffSp == idxRegRsp)
6214 {
6215 if (cBitsFlat == 64)
6216 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
6217 else
6218 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
6219 }
6220 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
6221 }
6222 }
6223 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
6224
6225 iemNativeRegFreeTmp(pReNative, idxRegRsp);
6226 if (idxRegEffSp != idxRegRsp)
6227 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
6228 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
6229
6230 return off;
6231}
6232
6233
6234
6235/*********************************************************************************************************************************
6236* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
6237*********************************************************************************************************************************/
6238
6239#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6240 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6241 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
6242 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
6243
6244#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6245 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6246 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
6247 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
6248
6249#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6250 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6251 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
6252 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
6253
6254#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6255 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6256 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
6257 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
6258
6259
6260#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6261 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6262 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6263 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
6264
6265#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6266 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6267 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6268 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
6269
6270#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6271 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6272 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6273 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
6274
6275#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6276 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6277 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6278 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
6279
6280#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6281 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
6282 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6283 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
6284
6285
6286#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6287 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6288 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6289 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
6290
6291#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6292 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6293 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6294 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
6295
6296#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6297 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6298 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6299 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
6300
6301#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6302 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6303 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6304 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
6305
6306#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6307 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
6308 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6309 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
6310
6311
6312#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6313 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
6314 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6315 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
6316
6317#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6318 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
6319 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6320 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
6321#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6322 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
6323 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6324 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
6325
6326#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6327 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
6328 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6329 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
6330
6331#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6332 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
6333 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6334 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
6335
6336
6337#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6338 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
6339 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6340 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
6341
6342#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6343 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
6344 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
6345 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
6346
6347
6348#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6349 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
6350 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6351 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
6352
6353#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6354 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
6355 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6356 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
6357
6358#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6359 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
6360 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6361 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
6362
6363#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6364 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
6365 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6366 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
6367
6368
6369
6370#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
6371 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
6372 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
6373 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
6374
6375#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
6376 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
6377 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
6378 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
6379
6380#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
6381 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
6382 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
6383 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
6384
6385#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
6386 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
6387 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
6388 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
6389
6390
6391#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
6392 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
6393 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6394 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
6395
6396#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
6397 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
6398 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6399 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
6400
6401#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
6402 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
6403 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6404 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
6405
6406#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
6407 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
6408 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6409 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
6410
6411#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
6412 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
6413 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6414 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
6415
6416
6417#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
6418 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
6419 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6420 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
6421
6422#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
6423 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
6424 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6425 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
6426
6427#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
6428 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
6429 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6430 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
6431
6432#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
6433 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
6434 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6435 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
6436
6437#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
6438 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
6439 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6440 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
6441
6442
6443#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
6444 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
6445 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6446 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
6447
6448#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
6449 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
6450 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6451 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
6452
6453#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
6454 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
6455 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6456 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
6457
6458#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
6459 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
6460 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6461 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
6462
6463#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
6464 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
6465 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6466 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
6467
6468
6469#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
6470 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
6471 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
6472 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
6473
6474#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
6475 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
6476 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
6477 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
6478
6479
6480#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
6481 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
6482 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6483 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
6484
6485#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
6486 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
6487 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6488 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
6489
6490#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
6491 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
6492 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6493 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
6494
6495#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
6496 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
6497 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
6498 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
6499
6500
6501DECL_INLINE_THROW(uint32_t)
6502iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
6503 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
6504 uintptr_t pfnFunction, uint8_t idxInstr)
6505{
6506 /*
6507 * Assert sanity.
6508 */
6509 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
6510 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
6511 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
6512 && pVarMem->cbVar == sizeof(void *),
6513 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6514
6515 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
6516 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
6517 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
6518 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
6519 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6520
6521 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
6522 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
6523 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
6524 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
6525 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6526
6527 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
6528
6529 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6530
6531#ifdef VBOX_STRICT
6532# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
6533 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
6534 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
6535 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
6536 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
6537# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
6538 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
6539 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
6540 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
6541
6542 if (iSegReg == UINT8_MAX)
6543 {
6544 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
6545 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
6546 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
6547 switch (cbMem)
6548 {
6549 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
6550 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
6551 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
6552 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
6553 case 10:
6554 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
6555 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
6556 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
6557 break;
6558 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
6559# if 0
6560 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
6561 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
6562# endif
6563 default: AssertFailed(); break;
6564 }
6565 }
6566 else
6567 {
6568 Assert(iSegReg < 6);
6569 switch (cbMem)
6570 {
6571 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
6572 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
6573 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
6574 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
6575 case 10:
6576 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
6577 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
6578 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
6579 break;
6580 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
6581# if 0
6582 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256)); break;
6583 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512)); break;
6584# endif
6585 default: AssertFailed(); break;
6586 }
6587 }
6588# undef IEM_MAP_HLP_FN
6589# undef IEM_MAP_HLP_FN_NO_AT
6590#endif
6591
6592#ifdef VBOX_STRICT
6593 /*
6594 * Check that the fExec flags we've got make sense.
6595 */
6596 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
6597#endif
6598
6599 /*
6600 * To keep things simple we have to commit any pending writes first as we
6601 * may end up making calls.
6602 */
6603 off = iemNativeRegFlushPendingWrites(pReNative, off);
6604
6605#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6606 /*
6607 * Move/spill/flush stuff out of call-volatile registers.
6608 * This is the easy way out. We could contain this to the tlb-miss branch
6609 * by saving and restoring active stuff here.
6610 */
6611 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
6612 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
6613#endif
6614
6615 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
6616 while the tlb-miss codepath will temporarily put it on the stack.
6617 Set the the type to stack here so we don't need to do it twice below. */
6618 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
6619 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
6620 /** @todo use a tmp register from TlbState, since they'll be free after tlb
6621 * lookup is done. */
6622
6623 /*
6624 * Define labels and allocate the result register (trying for the return
6625 * register if we can).
6626 */
6627 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6628 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
6629 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
6630 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
6631 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
6632 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6633 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6634 : UINT32_MAX;
6635//off=iemNativeEmitBrk(pReNative, off, 0);
6636 /*
6637 * Jump to the TLB lookup code.
6638 */
6639 if (!TlbState.fSkip)
6640 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6641
6642 /*
6643 * TlbMiss:
6644 *
6645 * Call helper to do the fetching.
6646 * We flush all guest register shadow copies here.
6647 */
6648 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
6649
6650#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6651 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6652#else
6653 RT_NOREF(idxInstr);
6654#endif
6655
6656#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6657 /* Save variables in volatile registers. */
6658 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
6659 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6660#endif
6661
6662 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
6663 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
6664#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6665 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
6666#else
6667 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6668#endif
6669
6670 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
6671 if (iSegReg != UINT8_MAX)
6672 {
6673 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
6674 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
6675 }
6676
6677 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
6678 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
6679 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
6680
6681 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6682 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6683
6684 /* Done setting up parameters, make the call. */
6685 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6686
6687 /*
6688 * Put the output in the right registers.
6689 */
6690 Assert(idxRegMemResult == pVarMem->idxReg);
6691 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
6692 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
6693
6694#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6695 /* Restore variables and guest shadow registers to volatile registers. */
6696 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
6697 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
6698#endif
6699
6700 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
6701 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
6702
6703#ifdef IEMNATIVE_WITH_TLB_LOOKUP
6704 if (!TlbState.fSkip)
6705 {
6706 /* end of tlbsmiss - Jump to the done label. */
6707 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
6708 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
6709
6710 /*
6711 * TlbLookup:
6712 */
6713 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
6714 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
6715# ifdef VBOX_WITH_STATISTICS
6716 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
6717 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
6718# endif
6719
6720 /* [idxVarUnmapInfo] = 0; */
6721 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
6722
6723 /*
6724 * TlbDone:
6725 */
6726 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
6727
6728 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
6729
6730# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6731 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
6732 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6733# endif
6734 }
6735#else
6736 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
6737#endif
6738
6739 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
6740 iemNativeVarRegisterRelease(pReNative, idxVarMem);
6741
6742 return off;
6743}
6744
6745
6746#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
6747 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
6748 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
6749
6750#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
6751 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
6752 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
6753
6754#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
6755 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
6756 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
6757
6758#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
6759 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
6760 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
6761
6762DECL_INLINE_THROW(uint32_t)
6763iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
6764 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
6765{
6766 /*
6767 * Assert sanity.
6768 */
6769 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
6770#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
6771 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
6772#endif
6773 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
6774 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
6775 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
6776#ifdef VBOX_STRICT
6777 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
6778 {
6779 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
6780 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
6781 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
6782 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
6783 case IEM_ACCESS_TYPE_WRITE:
6784 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
6785 case IEM_ACCESS_TYPE_READ:
6786 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
6787 default: AssertFailed();
6788 }
6789#else
6790 RT_NOREF(fAccess);
6791#endif
6792
6793 /*
6794 * To keep things simple we have to commit any pending writes first as we
6795 * may end up making calls (there shouldn't be any at this point, so this
6796 * is just for consistency).
6797 */
6798 /** @todo we could postpone this till we make the call and reload the
6799 * registers after returning from the call. Not sure if that's sensible or
6800 * not, though. */
6801 off = iemNativeRegFlushPendingWrites(pReNative, off);
6802
6803 /*
6804 * Move/spill/flush stuff out of call-volatile registers.
6805 *
6806 * We exclude any register holding the bUnmapInfo variable, as we'll be
6807 * checking it after returning from the call and will free it afterwards.
6808 */
6809 /** @todo save+restore active registers and maybe guest shadows in miss
6810 * scenario. */
6811 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
6812 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
6813
6814 /*
6815 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
6816 * to call the unmap helper function.
6817 *
6818 * The likelyhood of it being zero is higher than for the TLB hit when doing
6819 * the mapping, as a TLB miss for an well aligned and unproblematic memory
6820 * access should also end up with a mapping that won't need special unmapping.
6821 */
6822 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
6823 * should speed up things for the pure interpreter as well when TLBs
6824 * are enabled. */
6825#ifdef RT_ARCH_AMD64
6826 if (pVarUnmapInfo->idxReg == UINT8_MAX)
6827 {
6828 /* test byte [rbp - xxx], 0ffh */
6829 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
6830 pbCodeBuf[off++] = 0xf6;
6831 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
6832 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
6833 pbCodeBuf[off++] = 0xff;
6834 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6835 }
6836 else
6837#endif
6838 {
6839 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
6840 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
6841 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
6842 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
6843 }
6844 uint32_t const offJmpFixup = off;
6845 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
6846
6847 /*
6848 * Call the unmap helper function.
6849 */
6850#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
6851 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6852#else
6853 RT_NOREF(idxInstr);
6854#endif
6855
6856 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
6857 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
6858 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6859
6860 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6861 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6862
6863 /* Done setting up parameters, make the call. */
6864 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6865
6866 /* The bUnmapInfo variable is implictly free by these MCs. */
6867 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
6868
6869 /*
6870 * Done, just fixup the jump for the non-call case.
6871 */
6872 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
6873
6874 return off;
6875}
6876
6877
6878
6879/*********************************************************************************************************************************
6880* State and Exceptions *
6881*********************************************************************************************************************************/
6882
6883#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
6884#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
6885
6886#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
6887#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
6888#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
6889
6890#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
6891#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
6892#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
6893
6894
6895DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
6896{
6897 /** @todo this needs a lot more work later. */
6898 RT_NOREF(pReNative, fForChange);
6899 return off;
6900}
6901
6902
6903
6904/*********************************************************************************************************************************
6905* Emitters for FPU related operations. *
6906*********************************************************************************************************************************/
6907
6908#define IEM_MC_FETCH_FCW(a_u16Fcw) \
6909 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
6910
6911/** Emits code for IEM_MC_FETCH_FCW. */
6912DECL_INLINE_THROW(uint32_t)
6913iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
6914{
6915 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
6916 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
6917
6918 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
6919
6920 /* Allocate a temporary FCW register. */
6921 /** @todo eliminate extra register */
6922 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
6923 kIemNativeGstRegUse_ReadOnly);
6924
6925 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
6926
6927 /* Free but don't flush the FCW register. */
6928 iemNativeRegFreeTmp(pReNative, idxFcwReg);
6929 iemNativeVarRegisterRelease(pReNative, idxDstVar);
6930
6931 return off;
6932}
6933
6934
6935#define IEM_MC_FETCH_FSW(a_u16Fsw) \
6936 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
6937
6938/** Emits code for IEM_MC_FETCH_FSW. */
6939DECL_INLINE_THROW(uint32_t)
6940iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
6941{
6942 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
6943 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
6944
6945 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
6946 /* Allocate a temporary FSW register. */
6947 /** @todo eliminate extra register */
6948 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
6949 kIemNativeGstRegUse_ReadOnly);
6950
6951 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
6952
6953 /* Free but don't flush the FSW register. */
6954 iemNativeRegFreeTmp(pReNative, idxFswReg);
6955 iemNativeVarRegisterRelease(pReNative, idxDstVar);
6956
6957 return off;
6958}
6959
6960
6961
6962#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6963
6964
6965/*********************************************************************************************************************************
6966* Emitters for SSE/AVX specific operations. *
6967*********************************************************************************************************************************/
6968
6969#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
6970 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
6971
6972/** Emits code for IEM_MC_COPY_XREG_U128. */
6973DECL_INLINE_THROW(uint32_t)
6974iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
6975{
6976 /* Allocate destination and source register. */
6977 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
6978 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
6979 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
6980 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
6981
6982 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
6983 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXRegDst);
6984 /* We don't need to write everything back here as the destination is marked as dirty and will be flushed automatically. */
6985
6986 /* Free but don't flush the source and destination register. */
6987 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
6988 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
6989
6990 return off;
6991}
6992
6993
6994#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
6995 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
6996
6997/** Emits code for IEM_MC_FETCH_XREG_U64. */
6998DECL_INLINE_THROW(uint32_t)
6999iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
7000{
7001 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7002 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
7003
7004 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7005 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7006
7007 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7008 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7009
7010 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
7011
7012 /* Free but don't flush the source register. */
7013 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7014 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7015
7016 return off;
7017}
7018
7019
7020#define IEM_MC_FETCH_XREG_U32(a_u64Value, a_iXReg, a_iDWord) \
7021 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u64Value, a_iXReg, a_iDWord)
7022
7023/** Emits code for IEM_MC_FETCH_XREG_U32. */
7024DECL_INLINE_THROW(uint32_t)
7025iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
7026{
7027 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7028 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
7029
7030 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7031 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7032
7033 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7034 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7035
7036 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
7037
7038 /* Free but don't flush the source register. */
7039 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7040 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7041
7042 return off;
7043}
7044
7045
7046#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
7047 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
7048
7049/** Emits code for IEM_MC_FETCH_XREG_U16. */
7050DECL_INLINE_THROW(uint32_t)
7051iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
7052{
7053 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7054 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
7055
7056 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7057 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7058
7059 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7060 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7061
7062 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
7063
7064 /* Free but don't flush the source register. */
7065 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7066 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7067
7068 return off;
7069}
7070
7071
7072#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
7073 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
7074
7075/** Emits code for IEM_MC_FETCH_XREG_U8. */
7076DECL_INLINE_THROW(uint32_t)
7077iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
7078{
7079 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7080 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
7081
7082 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7083 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7084
7085 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7086 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7087
7088 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
7089
7090 /* Free but don't flush the source register. */
7091 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7092 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7093
7094 return off;
7095}
7096
7097
7098#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
7099 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
7100
7101/** Emits code for IEM_MC_STORE_XREG_U128. */
7102DECL_INLINE_THROW(uint32_t)
7103iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
7104{
7105 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7106 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
7107
7108 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7109 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
7110
7111 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
7112
7113 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
7114 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
7115
7116 /* Free but don't flush the source register. */
7117 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7118 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
7119
7120 return off;
7121}
7122
7123
7124#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
7125 off = iemNativeEmitSimdStoreXregU64(pReNative, off, a_iXReg, a_u64Value, a_iQWord)
7126
7127/** Emits code for IEM_MC_STORE_XREG_U64. */
7128DECL_INLINE_THROW(uint32_t)
7129iemNativeEmitSimdStoreXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar, uint8_t iQWord)
7130{
7131 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7132 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
7133
7134 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7135 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7136
7137 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7138
7139 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQWord);
7140 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
7141
7142 /* Free but don't flush the source register. */
7143 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7144 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7145
7146 return off;
7147}
7148
7149
7150#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
7151 off = iemNativeEmitSimdStoreXregU32(pReNative, off, a_iXReg, a_u32Value, a_iDWord)
7152
7153/** Emits code for IEM_MC_STORE_XREG_U32. */
7154DECL_INLINE_THROW(uint32_t)
7155iemNativeEmitSimdStoreXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar, uint8_t iDWord)
7156{
7157 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7158 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
7159
7160 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7161 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7162
7163 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7164
7165 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iDWord);
7166 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
7167
7168 /* Free but don't flush the source register. */
7169 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7170 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7171
7172 return off;
7173}
7174
7175
7176#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
7177 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
7178
7179/** Emits code for IEM_MC_STORE_XREG_U32. */
7180DECL_INLINE_THROW(uint32_t)
7181iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
7182{
7183 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7184 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
7185
7186 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7187 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7188
7189 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7190
7191 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
7192 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
7193 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
7194 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
7195
7196 /* Free but don't flush the source register. */
7197 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7198 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7199
7200 return off;
7201}
7202
7203
7204#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
7205 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
7206
7207/** Emits code for IEM_MC_STORE_XREG_U32. */
7208DECL_INLINE_THROW(uint32_t)
7209iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
7210{
7211 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7212 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
7213
7214 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7215 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7216
7217 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7218
7219 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
7220 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
7221 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
7222 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
7223
7224 /* Free but don't flush the source register. */
7225 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7226 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7227
7228 return off;
7229}
7230
7231
7232#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
7233 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
7234
7235/** Emits code for IEM_MC_FETCH_YREG_U64. */
7236DECL_INLINE_THROW(uint32_t)
7237iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
7238{
7239 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7240 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
7241
7242 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
7243 iQWord >= 2
7244 ? kIemNativeGstSimdRegLdStSz_High128
7245 : kIemNativeGstSimdRegLdStSz_Low128,
7246 kIemNativeGstRegUse_ReadOnly);
7247
7248 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7249 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7250
7251 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
7252
7253 /* Free but don't flush the source register. */
7254 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7255 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7256
7257 return off;
7258}
7259
7260
7261#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
7262 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
7263
7264/** Emits code for IEM_MC_FETCH_YREG_U32. */
7265DECL_INLINE_THROW(uint32_t)
7266iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
7267{
7268 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7269 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
7270
7271 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
7272 iDWord >= 4
7273 ? kIemNativeGstSimdRegLdStSz_High128
7274 : kIemNativeGstSimdRegLdStSz_Low128,
7275 kIemNativeGstRegUse_ReadOnly);
7276
7277 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7278 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7279
7280 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
7281
7282 /* Free but don't flush the source register. */
7283 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7284 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7285
7286 return off;
7287}
7288
7289
7290#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
7291 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
7292
7293/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
7294DECL_INLINE_THROW(uint32_t)
7295iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
7296{
7297 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
7298 kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
7299
7300 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
7301 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYReg);
7302
7303 /* Free but don't flush the register. */
7304 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
7305
7306 return off;
7307}
7308
7309
7310#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
7311 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
7312
7313/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
7314DECL_INLINE_THROW(uint32_t)
7315iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
7316{
7317 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7318 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
7319
7320 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
7321 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7322
7323 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
7324
7325 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
7326 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
7327 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYReg);
7328 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYReg);
7329
7330 /* Free but don't flush the source register. */
7331 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7332 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
7333
7334 return off;
7335}
7336
7337
7338#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
7339 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
7340
7341/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
7342DECL_INLINE_THROW(uint32_t)
7343iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
7344{
7345 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7346 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
7347
7348 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7349 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7350
7351 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
7352
7353 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
7354 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
7355 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
7356 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iXReg);
7357
7358 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7359 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
7360
7361 return off;
7362}
7363
7364
7365#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
7366 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
7367
7368/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
7369DECL_INLINE_THROW(uint32_t)
7370iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
7371{
7372 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7373 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
7374
7375 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7376 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7377
7378 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
7379
7380 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
7381 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
7382 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
7383 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iXReg);
7384
7385 /* Free but don't flush the source register. */
7386 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7387 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
7388
7389 return off;
7390}
7391
7392
7393#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
7394 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
7395
7396/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
7397DECL_INLINE_THROW(uint32_t)
7398iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
7399{
7400 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7401 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
7402
7403 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7404 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7405
7406 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
7407
7408 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
7409 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
7410 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
7411 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iXReg);
7412
7413 /* Free but don't flush the source register. */
7414 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7415 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
7416
7417 return off;
7418}
7419
7420
7421#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
7422 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
7423
7424/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
7425DECL_INLINE_THROW(uint32_t)
7426iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
7427{
7428 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7429 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
7430
7431 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7432 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7433
7434 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
7435
7436 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
7437 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
7438 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
7439 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iXReg);
7440
7441 /* Free but don't flush the source register. */
7442 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7443 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
7444
7445 return off;
7446}
7447
7448
7449#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
7450 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
7451
7452/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
7453DECL_INLINE_THROW(uint32_t)
7454iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
7455{
7456 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7457 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
7458
7459 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
7460 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7461
7462 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
7463
7464 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
7465 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYReg);
7466 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYReg);
7467
7468 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7469 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
7470
7471 return off;
7472}
7473
7474
7475#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
7476 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
7477
7478/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
7479DECL_INLINE_THROW(uint32_t)
7480iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
7481{
7482 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7483 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
7484
7485 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
7486 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7487
7488 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
7489
7490 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
7491 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYReg);
7492 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYReg);
7493
7494 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7495 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
7496
7497 return off;
7498}
7499
7500
7501#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
7502 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
7503
7504/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
7505DECL_INLINE_THROW(uint32_t)
7506iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
7507{
7508 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7509 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
7510
7511 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
7512 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7513
7514 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
7515
7516 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
7517 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYReg);
7518 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYReg);
7519
7520 /* Free but don't flush the source register. */
7521 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7522 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
7523
7524 return off;
7525}
7526
7527
7528#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
7529 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
7530
7531/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
7532DECL_INLINE_THROW(uint32_t)
7533iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
7534{
7535 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7536 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
7537
7538 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
7539 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7540
7541 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
7542
7543 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
7544 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYReg);
7545 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYReg);
7546
7547 /* Free but don't flush the source register. */
7548 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7549 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
7550
7551 return off;
7552}
7553
7554
7555#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
7556 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
7557
7558/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
7559DECL_INLINE_THROW(uint32_t)
7560iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
7561{
7562 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7563 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
7564
7565 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
7566 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7567
7568 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
7569
7570 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
7571 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
7572 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYReg);
7573 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYReg);
7574
7575 /* Free but don't flush the source register. */
7576 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7577 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
7578
7579 return off;
7580}
7581
7582
7583#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
7584 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
7585
7586/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
7587DECL_INLINE_THROW(uint32_t)
7588iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
7589{
7590 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7591 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
7592
7593 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
7594 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7595
7596 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
7597
7598 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
7599 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
7600 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYReg);
7601 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYReg);
7602
7603 /* Free but don't flush the source register. */
7604 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7605 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
7606
7607 return off;
7608}
7609
7610
7611#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
7612 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
7613
7614/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
7615DECL_INLINE_THROW(uint32_t)
7616iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
7617{
7618 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7619 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
7620
7621 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
7622 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7623 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
7624 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7625 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
7626
7627 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
7628 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
7629 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
7630 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYRegDst);
7631 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYRegDst);
7632
7633 /* Free but don't flush the source and destination registers. */
7634 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
7635 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7636 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
7637
7638 return off;
7639}
7640
7641
7642#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
7643 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
7644
7645/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
7646DECL_INLINE_THROW(uint32_t)
7647iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
7648{
7649 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7650 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
7651
7652 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
7653 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7654 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
7655 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7656 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
7657
7658 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
7659 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
7660 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
7661 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYRegDst);
7662 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYRegDst);
7663
7664 /* Free but don't flush the source and destination registers. */
7665 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
7666 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7667 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
7668
7669 return off;
7670}
7671
7672
7673#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
7674 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
7675
7676
7677/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
7678DECL_INLINE_THROW(uint32_t)
7679iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
7680{
7681 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7682 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7683
7684 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
7685 if (bImm8Mask & RT_BIT(0))
7686 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
7687 if (bImm8Mask & RT_BIT(1))
7688 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
7689 if (bImm8Mask & RT_BIT(2))
7690 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
7691 if (bImm8Mask & RT_BIT(3))
7692 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
7693 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
7694
7695 /* Free but don't flush the destination register. */
7696 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7697
7698 return off;
7699}
7700
7701
7702#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
7703 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
7704
7705
7706/** Emits code for IEM_MC_FETCH_YREG_U256. */
7707DECL_INLINE_THROW(uint32_t)
7708iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
7709{
7710 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7711 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
7712
7713 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
7714 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ReadOnly);
7715 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
7716
7717 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
7718
7719 /* Free but don't flush the source register. */
7720 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7721 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
7722
7723 return off;
7724}
7725
7726
7727#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
7728 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
7729
7730
7731/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX. */
7732DECL_INLINE_THROW(uint32_t)
7733iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
7734{
7735 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7736 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
7737
7738 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
7739 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
7740 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
7741
7742 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
7743 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYRegDst);
7744 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYRegDst);
7745
7746 /* Free but don't flush the source register. */
7747 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7748 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
7749
7750 return off;
7751}
7752
7753
7754
7755/*********************************************************************************************************************************
7756* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
7757*********************************************************************************************************************************/
7758
7759/**
7760 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX.
7761 */
7762DECL_INLINE_THROW(uint32_t)
7763iemNativeEmitCallSseAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs)
7764{
7765 /*
7766 * Need to do the FPU preparation.
7767 */
7768 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
7769
7770 /*
7771 * Do all the call setup and cleanup.
7772 */
7773 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS);
7774
7775 /*
7776 * Load the XState::x87 pointer.
7777 */
7778 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, kIemNativeGstRegRef_X87, 0 /*idxRegInClass*/);
7779
7780 /*
7781 * Make the call.
7782 */
7783 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
7784
7785 return off;
7786}
7787
7788
7789#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
7790 off = iemNativeEmitCallSseAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
7791
7792/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
7793DECL_INLINE_THROW(uint32_t)
7794iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
7795{
7796 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
7797 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
7798 return iemNativeEmitCallSseAImplCommon(pReNative, off, pfnAImpl, 2);
7799}
7800
7801
7802#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
7803 off = iemNativeEmitCallSseAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
7804
7805/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
7806DECL_INLINE_THROW(uint32_t)
7807iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
7808{
7809 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
7810 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
7811 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
7812 return iemNativeEmitCallSseAImplCommon(pReNative, off, pfnAImpl, 3);
7813}
7814#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
7815
7816
7817/*********************************************************************************************************************************
7818* Include instruction emitters. *
7819*********************************************************************************************************************************/
7820#include "target-x86/IEMAllN8veEmit-x86.h"
7821
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette