VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 101484

最後變更 在這個檔案從101484是 101484,由 vboxsync 提交於 17 月 前

VMM/IEM: Basic register allocator sketches that incorporates simple skipping of guest register value loads. Sketched out varable and argument managmenet. Start telling GDB our jitted code to help with backtraces. ++ bugref:10371

  • 屬性 svn:eol-style 設為 native
  • 屬性 svn:keywords 設為 Author Date Id Revision
檔案大小: 143.1 KB
 
1/* $Id: IEMAllN8veRecompiler.cpp 101484 2023-10-18 01:32:17Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : ...
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): ...
18 * - Level 12 (Log12): Register allocator
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.alldomusa.eu.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include "IEMInternal.h"
54#include <VBox/vmm/vmcc.h>
55#include <VBox/log.h>
56#include <VBox/err.h>
57#include <VBox/param.h>
58#include <iprt/assert.h>
59#include <iprt/heap.h>
60#include <iprt/mem.h>
61#include <iprt/string.h>
62#if defined(RT_ARCH_AMD64)
63# include <iprt/x86.h>
64#elif defined(RT_ARCH_ARM64)
65# include <iprt/armv8.h>
66#endif
67
68#ifdef RT_OS_WINDOWS
69# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
70extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
71extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
72#else
73# include <iprt/formats/dwarf.h>
74# if defined(RT_OS_DARWIN)
75# include <libkern/OSCacheControl.h>
76# define IEMNATIVE_USE_LIBUNWIND
77extern "C" void __register_frame(const void *pvFde);
78extern "C" void __deregister_frame(const void *pvFde);
79# else
80# ifdef DEBUG_bird /** @todo not thread safe yet */
81# define IEMNATIVE_USE_GDB_JIT
82# endif
83# ifdef IEMNATIVE_USE_GDB_JIT
84# include <iprt/critsect.h>
85# include <iprt/once.h>
86# include <iprt/formats/elf64.h>
87# endif
88extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
89extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
90# endif
91#endif
92
93#include "IEMInline.h"
94#include "IEMThreadedFunctions.h"
95#include "IEMN8veRecompiler.h"
96#include "IEMNativeFunctions.h"
97
98
99/*
100 * Narrow down configs here to avoid wasting time on unused configs here.
101 * Note! Same checks in IEMAllThrdRecompiler.cpp.
102 */
103
104#ifndef IEM_WITH_CODE_TLB
105# error The code TLB must be enabled for the recompiler.
106#endif
107
108#ifndef IEM_WITH_DATA_TLB
109# error The data TLB must be enabled for the recompiler.
110#endif
111
112#ifndef IEM_WITH_SETJMP
113# error The setjmp approach must be enabled for the recompiler.
114#endif
115
116
117/*********************************************************************************************************************************
118* Executable Memory Allocator *
119*********************************************************************************************************************************/
120/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
121 * Use an alternative chunk sub-allocator that does store internal data
122 * in the chunk.
123 *
124 * Using the RTHeapSimple is not practial on newer darwin systems where
125 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
126 * memory. We would have to change the protection of the whole chunk for
127 * every call to RTHeapSimple, which would be rather expensive.
128 *
129 * This alternative implemenation let restrict page protection modifications
130 * to the pages backing the executable memory we just allocated.
131 */
132#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
133/** The chunk sub-allocation unit size in bytes. */
134#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
135/** The chunk sub-allocation unit size as a shift factor. */
136#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
137
138#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
139# ifdef IEMNATIVE_USE_GDB_JIT
140# define IEMNATIVE_USE_GDB_JIT_ET_DYN
141
142/** GDB JIT: Code entry. */
143typedef struct GDBJITCODEENTRY
144{
145 struct GDBJITCODEENTRY *pNext;
146 struct GDBJITCODEENTRY *pPrev;
147 uint8_t *pbSymFile;
148 uint64_t cbSymFile;
149} GDBJITCODEENTRY;
150
151/** GDB JIT: Actions. */
152typedef enum GDBJITACTIONS : uint32_t
153{
154 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
155} GDBJITACTIONS;
156
157/** GDB JIT: Descriptor. */
158typedef struct GDBJITDESCRIPTOR
159{
160 uint32_t uVersion;
161 GDBJITACTIONS enmAction;
162 GDBJITCODEENTRY *pRelevant;
163 GDBJITCODEENTRY *pHead;
164 /** Our addition: */
165 GDBJITCODEENTRY *pTail;
166} GDBJITDESCRIPTOR;
167
168/** GDB JIT: Our simple symbol file data. */
169typedef struct GDBJITSYMFILE
170{
171 Elf64_Ehdr EHdr;
172# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
173 Elf64_Shdr aShdrs[5];
174# else
175 Elf64_Shdr aShdrs[6];
176 Elf64_Phdr aPhdrs[3];
177# endif
178 /** The dwarf ehframe data for the chunk. */
179 uint8_t abEhFrame[512];
180 char szzStrTab[128];
181 Elf64_Sym aSymbols[1];
182# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
183 Elf64_Dyn aDyn[6];
184# endif
185} GDBJITSYMFILE;
186
187extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
188extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
189
190/** Init once for g_IemNativeGdbJitLock. */
191static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
192/** Init once for the critical section. */
193static RTCRITSECT g_IemNativeGdbJitLock;
194
195/** GDB reads the info here. */
196GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
197
198/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
199DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
200{
201 ASMNopPause();
202}
203
204/** @callback_method_impl{FNRTONCE} */
205static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
206{
207 RT_NOREF(pvUser);
208 return RTCritSectInit(&g_IemNativeGdbJitLock);
209}
210
211
212# endif /* IEMNATIVE_USE_GDB_JIT */
213
214/**
215 * Per-chunk unwind info for non-windows hosts.
216 */
217typedef struct IEMEXECMEMCHUNKEHFRAME
218{
219# ifdef IEMNATIVE_USE_LIBUNWIND
220 /** The offset of the FDA into abEhFrame. */
221 uintptr_t offFda;
222# else
223 /** 'struct object' storage area. */
224 uint8_t abObject[1024];
225# endif
226# ifdef IEMNATIVE_USE_GDB_JIT
227# if 0
228 /** The GDB JIT 'symbol file' data. */
229 GDBJITSYMFILE GdbJitSymFile;
230# endif
231 /** The GDB JIT list entry. */
232 GDBJITCODEENTRY GdbJitEntry;
233# endif
234 /** The dwarf ehframe data for the chunk. */
235 uint8_t abEhFrame[512];
236} IEMEXECMEMCHUNKEHFRAME;
237/** Pointer to per-chunk info info for non-windows hosts. */
238typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
239#endif
240
241
242/**
243 * An chunk of executable memory.
244 */
245typedef struct IEMEXECMEMCHUNK
246{
247#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
248 /** Number of free items in this chunk. */
249 uint32_t cFreeUnits;
250 /** Hint were to start searching for free space in the allocation bitmap. */
251 uint32_t idxFreeHint;
252#else
253 /** The heap handle. */
254 RTHEAPSIMPLE hHeap;
255#endif
256 /** Pointer to the chunk. */
257 void *pvChunk;
258#ifdef IN_RING3
259 /**
260 * Pointer to the unwind information.
261 *
262 * This is used during C++ throw and longjmp (windows and probably most other
263 * platforms). Some debuggers (windbg) makes use of it as well.
264 *
265 * Windows: This is allocated from hHeap on windows because (at least for
266 * AMD64) the UNWIND_INFO structure address in the
267 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
268 *
269 * Others: Allocated from the regular heap to avoid unnecessary executable data
270 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
271 void *pvUnwindInfo;
272#elif defined(IN_RING0)
273 /** Allocation handle. */
274 RTR0MEMOBJ hMemObj;
275#endif
276} IEMEXECMEMCHUNK;
277/** Pointer to a memory chunk. */
278typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
279
280
281/**
282 * Executable memory allocator for the native recompiler.
283 */
284typedef struct IEMEXECMEMALLOCATOR
285{
286 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
287 uint32_t uMagic;
288
289 /** The chunk size. */
290 uint32_t cbChunk;
291 /** The maximum number of chunks. */
292 uint32_t cMaxChunks;
293 /** The current number of chunks. */
294 uint32_t cChunks;
295 /** Hint where to start looking for available memory. */
296 uint32_t idxChunkHint;
297 /** Statistics: Current number of allocations. */
298 uint32_t cAllocations;
299
300 /** The total amount of memory available. */
301 uint64_t cbTotal;
302 /** Total amount of free memory. */
303 uint64_t cbFree;
304 /** Total amount of memory allocated. */
305 uint64_t cbAllocated;
306
307#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
308 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
309 *
310 * Since the chunk size is a power of two and the minimum chunk size is a lot
311 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
312 * require a whole number of uint64_t elements in the allocation bitmap. So,
313 * for sake of simplicity, they are allocated as one continous chunk for
314 * simplicity/laziness. */
315 uint64_t *pbmAlloc;
316 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
317 uint32_t cUnitsPerChunk;
318 /** Number of bitmap elements per chunk (for quickly locating the bitmap
319 * portion corresponding to an chunk). */
320 uint32_t cBitmapElementsPerChunk;
321#else
322 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
323 * @{ */
324 /** The size of the heap internal block header. This is used to adjust the
325 * request memory size to make sure there is exacly enough room for a header at
326 * the end of the blocks we allocate before the next 64 byte alignment line. */
327 uint32_t cbHeapBlockHdr;
328 /** The size of initial heap allocation required make sure the first
329 * allocation is correctly aligned. */
330 uint32_t cbHeapAlignTweak;
331 /** The alignment tweak allocation address. */
332 void *pvAlignTweak;
333 /** @} */
334#endif
335
336#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
337 /** Pointer to the array of unwind info running parallel to aChunks (same
338 * allocation as this structure, located after the bitmaps).
339 * (For Windows, the structures must reside in 32-bit RVA distance to the
340 * actual chunk, so they are allocated off the chunk.) */
341 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
342#endif
343
344 /** The allocation chunks. */
345 RT_FLEXIBLE_ARRAY_EXTENSION
346 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
347} IEMEXECMEMALLOCATOR;
348/** Pointer to an executable memory allocator. */
349typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
350
351/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
352#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
353
354
355static int iemExecMemAllocatorGrow(PIEMEXECMEMALLOCATOR pExecMemAllocator);
356
357
358/**
359 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
360 * the heap statistics.
361 */
362static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
363 uint32_t cbReq, uint32_t idxChunk)
364{
365 pExecMemAllocator->cAllocations += 1;
366 pExecMemAllocator->cbAllocated += cbReq;
367#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
368 pExecMemAllocator->cbFree -= cbReq;
369#else
370 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
371#endif
372 pExecMemAllocator->idxChunkHint = idxChunk;
373
374#ifdef RT_OS_DARWIN
375 /*
376 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
377 * on darwin. So, we mark the pages returned as read+write after alloc and
378 * expect the caller to call iemExecMemAllocatorReadyForUse when done
379 * writing to the allocation.
380 *
381 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
382 * for details.
383 */
384 /** @todo detect if this is necessary... it wasn't required on 10.15 or
385 * whatever older version it was. */
386 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
387 AssertRC(rc);
388#endif
389
390 return pvRet;
391}
392
393
394#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
395static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
396 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
397{
398 /*
399 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
400 */
401 Assert(!(cToScan & 63));
402 Assert(!(idxFirst & 63));
403 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
404 pbmAlloc += idxFirst / 64;
405
406 /*
407 * Scan the bitmap for cReqUnits of consequtive clear bits
408 */
409 /** @todo This can probably be done more efficiently for non-x86 systems. */
410 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
411 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
412 {
413 uint32_t idxAddBit = 1;
414 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
415 idxAddBit++;
416 if (idxAddBit >= cReqUnits)
417 {
418 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
419
420 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
421 pChunk->cFreeUnits -= cReqUnits;
422 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
423
424 void * const pvRet = (uint8_t *)pChunk->pvChunk
425 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
426
427 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
428 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
429 }
430
431 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
432 }
433 return NULL;
434}
435#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
436
437
438static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
439{
440#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
441 /*
442 * Figure out how much to allocate.
443 */
444 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
445 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
446 {
447 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
448 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
449 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
450 {
451 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
452 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
453 if (pvRet)
454 return pvRet;
455 }
456 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
457 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
458 cReqUnits, idxChunk);
459 }
460#else
461 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
462 if (pvRet)
463 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
464#endif
465 return NULL;
466
467}
468
469
470/**
471 * Allocates @a cbReq bytes of executable memory.
472 *
473 * @returns Pointer to the memory, NULL if out of memory or other problem
474 * encountered.
475 * @param pVCpu The cross context virtual CPU structure of the calling
476 * thread.
477 * @param cbReq How many bytes are required.
478 */
479static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
480{
481 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
482 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
483 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
484
485 /*
486 * Adjust the request size so it'll fit the allocator alignment/whatnot.
487 *
488 * For the RTHeapSimple allocator this means to follow the logic described
489 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
490 * existing chunks if we think we've got sufficient free memory around.
491 *
492 * While for the alternative one we just align it up to a whole unit size.
493 */
494#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
495 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
496#else
497 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
498#endif
499 if (cbReq <= pExecMemAllocator->cbFree)
500 {
501 uint32_t const cChunks = pExecMemAllocator->cChunks;
502 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
503 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
504 {
505 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
506 if (pvRet)
507 return pvRet;
508 }
509 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
510 {
511 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
512 if (pvRet)
513 return pvRet;
514 }
515 }
516
517 /*
518 * Can we grow it with another chunk?
519 */
520 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
521 {
522 int rc = iemExecMemAllocatorGrow(pExecMemAllocator);
523 AssertLogRelRCReturn(rc, NULL);
524
525 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
526 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
527 if (pvRet)
528 return pvRet;
529 AssertFailed();
530 }
531
532 /* What now? Prune native translation blocks from the cache? */
533 AssertFailed();
534 return NULL;
535}
536
537
538/** This is a hook that we may need later for changing memory protection back
539 * to readonly+exec */
540static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
541{
542#ifdef RT_OS_DARWIN
543 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
544 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
545 AssertRC(rc); RT_NOREF(pVCpu);
546
547 /*
548 * Flush the instruction cache:
549 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
550 */
551 /* sys_dcache_flush(pv, cb); - not necessary */
552 sys_icache_invalidate(pv, cb);
553#else
554 RT_NOREF(pVCpu, pv, cb);
555#endif
556}
557
558
559/**
560 * Frees executable memory.
561 */
562void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
563{
564 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
565 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
566 Assert(pv);
567#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
568 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
569#else
570 Assert(!((uintptr_t)pv & 63));
571#endif
572
573 /* Align the size as we did when allocating the block. */
574#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
575 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
576#else
577 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
578#endif
579
580 /* Free it / assert sanity. */
581#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
582 uint32_t const cChunks = pExecMemAllocator->cChunks;
583 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
584 bool fFound = false;
585 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
586 {
587 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
588 fFound = offChunk < cbChunk;
589 if (fFound)
590 {
591#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
592 uint32_t const idxFirst = offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
593 uint32_t const cReqUnits = cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
594
595 /* Check that it's valid and free it. */
596 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
597 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
598 for (uint32_t i = 1; i < cReqUnits; i++)
599 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
600 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
601
602 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
603 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
604
605 /* Update the stats. */
606 pExecMemAllocator->cbAllocated -= cb;
607 pExecMemAllocator->cbFree += cb;
608 pExecMemAllocator->cAllocations -= 1;
609 return;
610#else
611 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
612 break;
613#endif
614 }
615 }
616# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
617 AssertFailed();
618# else
619 Assert(fFound);
620# endif
621#endif
622
623#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
624 /* Update stats while cb is freshly calculated.*/
625 pExecMemAllocator->cbAllocated -= cb;
626 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
627 pExecMemAllocator->cAllocations -= 1;
628
629 /* Free it. */
630 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
631#endif
632}
633
634
635
636#ifdef IN_RING3
637# ifdef RT_OS_WINDOWS
638
639/**
640 * Initializes the unwind info structures for windows hosts.
641 */
642static int
643iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvChunk, uint32_t idxChunk)
644{
645 /*
646 * The AMD64 unwind opcodes.
647 *
648 * This is a program that starts with RSP after a RET instruction that
649 * ends up in recompiled code, and the operations we describe here will
650 * restore all non-volatile registers and bring RSP back to where our
651 * RET address is. This means it's reverse order from what happens in
652 * the prologue.
653 *
654 * Note! Using a frame register approach here both because we have one
655 * and but mainly because the UWOP_ALLOC_LARGE argument values
656 * would be a pain to write initializers for. On the positive
657 * side, we're impervious to changes in the the stack variable
658 * area can can deal with dynamic stack allocations if necessary.
659 */
660 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
661 {
662 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
663 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
664 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
665 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
666 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
667 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
668 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
669 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
670 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
671 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
672 };
673 union
674 {
675 IMAGE_UNWIND_INFO Info;
676 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
677 } s_UnwindInfo =
678 {
679 {
680 /* .Version = */ 1,
681 /* .Flags = */ 0,
682 /* .SizeOfProlog = */ 16, /* whatever */
683 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
684 /* .FrameRegister = */ X86_GREG_xBP,
685 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
686 }
687 };
688 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
689 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
690
691 /*
692 * Calc how much space we need and allocate it off the exec heap.
693 */
694 unsigned const cFunctionEntries = 1;
695 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
696 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
697# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
698 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
699 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
700 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
701# else
702 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
703 - pExecMemAllocator->cbHeapBlockHdr;
704 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
705 32 /*cbAlignment*/);
706# endif
707 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
708 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
709
710 /*
711 * Initialize the structures.
712 */
713 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
714
715 paFunctions[0].BeginAddress = 0;
716 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
717 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
718
719 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
720 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
721
722 /*
723 * Register it.
724 */
725 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
726 AssertReturn(fRet, NULL); /* Nothing to clean up on failure, since its within the chunk itself. */
727
728 return paFunctions;
729}
730
731
732# else /* !RT_OS_WINDOWS */
733
734/**
735 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
736 */
737DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
738{
739 if (iValue >= 64)
740 {
741 Assert(iValue < 0x2000);
742 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
743 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
744 }
745 else if (iValue >= 0)
746 *Ptr.pb++ = (uint8_t)iValue;
747 else if (iValue > -64)
748 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
749 else
750 {
751 Assert(iValue > -0x2000);
752 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
753 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
754 }
755 return Ptr;
756}
757
758
759/**
760 * Emits an ULEB128 encoded value (up to 64-bit wide).
761 */
762DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
763{
764 while (uValue >= 0x80)
765 {
766 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
767 uValue >>= 7;
768 }
769 *Ptr.pb++ = (uint8_t)uValue;
770 return Ptr;
771}
772
773
774/**
775 * Emits a CFA rule as register @a uReg + offset @a off.
776 */
777DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
778{
779 *Ptr.pb++ = DW_CFA_def_cfa;
780 Ptr = iemDwarfPutUleb128(Ptr, uReg);
781 Ptr = iemDwarfPutUleb128(Ptr, off);
782 return Ptr;
783}
784
785
786/**
787 * Emits a register (@a uReg) save location:
788 * CFA + @a off * data_alignment_factor
789 */
790DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
791{
792 if (uReg < 0x40)
793 *Ptr.pb++ = DW_CFA_offset | uReg;
794 else
795 {
796 *Ptr.pb++ = DW_CFA_offset_extended;
797 Ptr = iemDwarfPutUleb128(Ptr, uReg);
798 }
799 Ptr = iemDwarfPutUleb128(Ptr, off);
800 return Ptr;
801}
802
803
804# if 0 /* unused */
805/**
806 * Emits a register (@a uReg) save location, using signed offset:
807 * CFA + @a offSigned * data_alignment_factor
808 */
809DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
810{
811 *Ptr.pb++ = DW_CFA_offset_extended_sf;
812 Ptr = iemDwarfPutUleb128(Ptr, uReg);
813 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
814 return Ptr;
815}
816# endif
817
818
819/**
820 * Initializes the unwind info section for non-windows hosts.
821 */
822static int
823iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvChunk, uint32_t idxChunk)
824{
825 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
826 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
827
828 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
829
830 /*
831 * Generate the CIE first.
832 */
833# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
834 uint8_t const iDwarfVer = 3;
835# else
836 uint8_t const iDwarfVer = 4;
837# endif
838 RTPTRUNION const PtrCie = Ptr;
839 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
840 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
841 *Ptr.pb++ = iDwarfVer; /* DwARF version */
842 *Ptr.pb++ = 0; /* Augmentation. */
843 if (iDwarfVer >= 4)
844 {
845 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
846 *Ptr.pb++ = 0; /* Segment selector size. */
847 }
848# ifdef RT_ARCH_AMD64
849 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
850# else
851 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
852# endif
853 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
854# ifdef RT_ARCH_AMD64
855 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
856# elif defined(RT_ARCH_ARM64)
857 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
858# else
859# error "port me"
860# endif
861 /* Initial instructions: */
862# ifdef RT_ARCH_AMD64
863 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
864 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
865 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
866 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
867 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
868 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
869 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
870 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
871# elif defined(RT_ARCH_ARM64)
872# if 1
873 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
874# else
875 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
876# endif
877 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
878 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
879 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
880 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
881 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
882 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
883 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
884 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
885 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
886 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
887 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
888 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
889 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
890 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
891# else
892# error "port me"
893# endif
894 while ((Ptr.u - PtrCie.u) & 3)
895 *Ptr.pb++ = DW_CFA_nop;
896 /* Finalize the CIE size. */
897 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
898
899 /*
900 * Generate an FDE for the whole chunk area.
901 */
902# ifdef IEMNATIVE_USE_LIBUNWIND
903 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
904# endif
905 RTPTRUNION const PtrFde = Ptr;
906 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
907 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
908 Ptr.pu32++;
909 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
910 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
911# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
912 *Ptr.pb++ = DW_CFA_nop;
913# endif
914 while ((Ptr.u - PtrFde.u) & 3)
915 *Ptr.pb++ = DW_CFA_nop;
916 /* Finalize the FDE size. */
917 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
918
919 /* Terminator entry. */
920 *Ptr.pu32++ = 0;
921 *Ptr.pu32++ = 0; /* just to be sure... */
922 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
923
924 /*
925 * Register it.
926 */
927# ifdef IEMNATIVE_USE_LIBUNWIND
928 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
929# else
930 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
931 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
932# endif
933
934# ifdef IEMNATIVE_USE_GDB_JIT
935 /*
936 * Now for telling GDB about this (experimental).
937 *
938 * This seems to work best with ET_DYN.
939 */
940 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
941# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
942 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
943 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
944# else
945 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
946 - pExecMemAllocator->cbHeapBlockHdr;
947 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
948# endif
949 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
950 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
951
952 RT_ZERO(*pSymFile);
953 /* The ELF header: */
954 pSymFile->EHdr.e_ident[0] = ELFMAG0;
955 pSymFile->EHdr.e_ident[1] = ELFMAG1;
956 pSymFile->EHdr.e_ident[2] = ELFMAG2;
957 pSymFile->EHdr.e_ident[3] = ELFMAG3;
958 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
959 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
960 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
961 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
962# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
963 pSymFile->EHdr.e_type = ET_DYN;
964# else
965 pSymFile->EHdr.e_type = ET_REL;
966# endif
967# ifdef RT_ARCH_AMD64
968 pSymFile->EHdr.e_machine = EM_AMD64;
969# elif defined(RT_ARCH_ARM64)
970 pSymFile->EHdr.e_machine = EM_AARCH64;
971# else
972# error "port me"
973# endif
974 pSymFile->EHdr.e_version = 1; /*?*/
975 pSymFile->EHdr.e_entry = 0;
976# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
977 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
978# else
979 pSymFile->EHdr.e_phoff = 0;
980# endif
981 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
982 pSymFile->EHdr.e_flags = 0;
983 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
984# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
985 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
986 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
987# else
988 pSymFile->EHdr.e_phentsize = 0;
989 pSymFile->EHdr.e_phnum = 0;
990# endif
991 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
992 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
993 pSymFile->EHdr.e_shstrndx = 0; /* set later */
994
995 uint32_t offStrTab = 0;
996#define APPEND_STR(a_szStr) do { \
997 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
998 offStrTab += sizeof(a_szStr); \
999 } while (0)
1000 /* Section header #0: NULL */
1001 unsigned i = 0;
1002 APPEND_STR("");
1003 RT_ZERO(pSymFile->aShdrs[i]);
1004 i++;
1005
1006 /* Section header: .eh_frame */
1007 pSymFile->aShdrs[i].sh_name = offStrTab;
1008 APPEND_STR(".eh_frame");
1009 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1010 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1011# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1012 pSymFile->aShdrs[i].sh_offset
1013 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1014# else
1015 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1016 pSymFile->aShdrs[i].sh_offset = 0;
1017# endif
1018
1019 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1020 pSymFile->aShdrs[i].sh_link = 0;
1021 pSymFile->aShdrs[i].sh_info = 0;
1022 pSymFile->aShdrs[i].sh_addralign = 1;
1023 pSymFile->aShdrs[i].sh_entsize = 0;
1024 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1025 i++;
1026
1027 /* Section header: .shstrtab */
1028 unsigned const iShStrTab = i;
1029 pSymFile->EHdr.e_shstrndx = iShStrTab;
1030 pSymFile->aShdrs[i].sh_name = offStrTab;
1031 APPEND_STR(".shstrtab");
1032 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1033 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1034# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1035 pSymFile->aShdrs[i].sh_offset
1036 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1037# else
1038 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1039 pSymFile->aShdrs[i].sh_offset = 0;
1040# endif
1041 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1042 pSymFile->aShdrs[i].sh_link = 0;
1043 pSymFile->aShdrs[i].sh_info = 0;
1044 pSymFile->aShdrs[i].sh_addralign = 1;
1045 pSymFile->aShdrs[i].sh_entsize = 0;
1046 i++;
1047
1048 /* Section header: .symbols */
1049 pSymFile->aShdrs[i].sh_name = offStrTab;
1050# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1051 APPEND_STR(".dynsym");
1052 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1053# else
1054 APPEND_STR(".symtab");
1055 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1056# endif
1057 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1058 pSymFile->aShdrs[i].sh_offset
1059 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1060 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1061 pSymFile->aShdrs[i].sh_link = iShStrTab;
1062 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1063 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1064 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1065 i++;
1066
1067# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1068 /* Section header: .dynamic */
1069 pSymFile->aShdrs[i].sh_name = offStrTab;
1070 APPEND_STR(".dynamic");
1071 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1072 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1073 pSymFile->aShdrs[i].sh_offset
1074 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1075 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1076 pSymFile->aShdrs[i].sh_link = iShStrTab;
1077 pSymFile->aShdrs[i].sh_info = 0;
1078 pSymFile->aShdrs[i].sh_addralign = 1;
1079 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1080 i++;
1081# endif
1082
1083 /* Section header: .text */
1084 pSymFile->aShdrs[i].sh_name = offStrTab;
1085 APPEND_STR(".text");
1086 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1087 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1088# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1089 pSymFile->aShdrs[i].sh_offset
1090 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1091# else
1092 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1093 pSymFile->aShdrs[i].sh_offset = 0;
1094# endif
1095 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1096 pSymFile->aShdrs[i].sh_link = 0;
1097 pSymFile->aShdrs[i].sh_info = 0;
1098 pSymFile->aShdrs[i].sh_addralign = 1;
1099 pSymFile->aShdrs[i].sh_entsize = 0;
1100 i++;
1101
1102 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1103
1104# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1105 /*
1106 * The program headers:
1107 */
1108 /* Headers and whatnot up to .dynamic: */
1109 i = 0;
1110 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1111 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1112 pSymFile->aPhdrs[i].p_offset
1113 = pSymFile->aPhdrs[i].p_vaddr
1114 = pSymFile->aPhdrs[i].p_paddr = 0;
1115 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1116 = pSymFile->aPhdrs[i].p_memsz = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1117 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1118 i++;
1119 /* .dynamic */
1120 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1121 pSymFile->aPhdrs[i].p_flags = PF_R;
1122 pSymFile->aPhdrs[i].p_offset
1123 = pSymFile->aPhdrs[i].p_vaddr
1124 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1125 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1126 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1127 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1128 i++;
1129 /* The rest of the chunk. */
1130 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1131 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1132 pSymFile->aPhdrs[i].p_offset
1133 = pSymFile->aPhdrs[i].p_vaddr
1134 = pSymFile->aPhdrs[i].p_paddr = sizeof(GDBJITSYMFILE);
1135 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1136 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1137 pSymFile->aPhdrs[i].p_align = 1;
1138 i++;
1139
1140 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1141
1142 /* The dynamic section: */
1143 i = 0;
1144 pSymFile->aDyn[i].d_tag = DT_SONAME;
1145 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1146 APPEND_STR("iem-native.so");
1147 i++;
1148 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1149 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1150 i++;
1151 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1152 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1153 i++;
1154 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1155 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1156 i++;
1157 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1158 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aSymbols[0]);
1159 i++;
1160 pSymFile->aDyn[i].d_tag = DT_NULL;
1161 i++;
1162 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1163# endif
1164
1165 /* Symbol table: */
1166 i = 0;
1167 pSymFile->aSymbols[i].st_name = offStrTab;
1168 APPEND_STR("iem_exec_chunk");
1169 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1170 pSymFile->aSymbols[i].st_value = (uintptr_t)pvChunk;
1171 pSymFile->aSymbols[i].st_size = pExecMemAllocator->cbChunk;
1172 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FUNC);
1173 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1174 i++;
1175 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1176 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1177
1178 /* The GDB JIT entry: */
1179 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1180# if 1
1181 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1182# else
1183 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1184# endif
1185
1186 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1187 RTCritSectEnter(&g_IemNativeGdbJitLock);
1188 pEhFrame->GdbJitEntry.pNext = NULL;
1189 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1190 if (__jit_debug_descriptor.pTail)
1191 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1192 else
1193 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1194 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1195 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1196
1197 /* Notify GDB: */
1198 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1199 __jit_debug_register_code();
1200 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1201 RTCritSectLeave(&g_IemNativeGdbJitLock);
1202
1203 RT_BREAKPOINT();
1204# endif
1205
1206 return VINF_SUCCESS;
1207}
1208
1209# endif /* !RT_OS_WINDOWS */
1210#endif /* IN_RING3 */
1211
1212
1213/**
1214 * Adds another chunk to the executable memory allocator.
1215 *
1216 * This is used by the init code for the initial allocation and later by the
1217 * regular allocator function when it's out of memory.
1218 */
1219static int iemExecMemAllocatorGrow(PIEMEXECMEMALLOCATOR pExecMemAllocator)
1220{
1221 /* Check that we've room for growth. */
1222 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1223 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1224
1225 /* Allocate a chunk. */
1226#ifdef RT_OS_DARWIN
1227 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1228#else
1229 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1230#endif
1231 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1232
1233#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1234 int rc = VINF_SUCCESS;
1235#else
1236 /* Initialize the heap for the chunk. */
1237 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1238 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1239 AssertRC(rc);
1240 if (RT_SUCCESS(rc))
1241 {
1242 /*
1243 * We want the memory to be aligned on 64 byte, so the first time thru
1244 * here we do some exploratory allocations to see how we can achieve this.
1245 * On subsequent runs we only make an initial adjustment allocation, if
1246 * necessary.
1247 *
1248 * Since we own the heap implementation, we know that the internal block
1249 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1250 * so all we need to wrt allocation size adjustments is to add 32 bytes
1251 * to the size, align up by 64 bytes, and subtract 32 bytes.
1252 *
1253 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1254 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1255 * allocation to force subsequent allocations to return 64 byte aligned
1256 * user areas.
1257 */
1258 if (!pExecMemAllocator->cbHeapBlockHdr)
1259 {
1260 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1261 pExecMemAllocator->cbHeapAlignTweak = 64;
1262 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1263 32 /*cbAlignment*/);
1264 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1265
1266 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1267 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1268 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1269 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1270 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1271
1272 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1273 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1274 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1275 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1276 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1277
1278 RTHeapSimpleFree(hHeap, pvTest2);
1279 RTHeapSimpleFree(hHeap, pvTest1);
1280 }
1281 else
1282 {
1283 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1284 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1285 }
1286 if (RT_SUCCESS(rc))
1287#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1288 {
1289 /*
1290 * Add the chunk.
1291 *
1292 * This must be done before the unwind init so windows can allocate
1293 * memory from the chunk when using the alternative sub-allocator.
1294 */
1295 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1296#ifdef IN_RING3
1297 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1298#endif
1299#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1300 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1301#else
1302 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1303 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1304 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1305 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1306#endif
1307
1308 pExecMemAllocator->cChunks = idxChunk + 1;
1309 pExecMemAllocator->idxChunkHint = idxChunk;
1310
1311#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1312 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1313 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1314#else
1315 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1316 pExecMemAllocator->cbTotal += cbFree;
1317 pExecMemAllocator->cbFree += cbFree;
1318#endif
1319
1320#ifdef IN_RING3
1321 /*
1322 * Initialize the unwind information (this cannot really fail atm).
1323 * (This sets pvUnwindInfo.)
1324 */
1325 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pExecMemAllocator, pvChunk, idxChunk);
1326 if (RT_SUCCESS(rc))
1327#endif
1328 {
1329 return VINF_SUCCESS;
1330 }
1331
1332#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1333 /* Just in case the impossible happens, undo the above up: */
1334 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1335 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1336 pExecMemAllocator->cChunks = idxChunk;
1337 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1338 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1339 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1340 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1341#endif
1342 }
1343#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1344 }
1345#endif
1346 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1347 return rc;
1348}
1349
1350
1351/**
1352 * Initializes the executable memory allocator for native recompilation on the
1353 * calling EMT.
1354 *
1355 * @returns VBox status code.
1356 * @param pVCpu The cross context virtual CPU structure of the calling
1357 * thread.
1358 * @param cbMax The max size of the allocator.
1359 * @param cbInitial The initial allocator size.
1360 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1361 * dependent).
1362 */
1363int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1364{
1365 /*
1366 * Validate input.
1367 */
1368 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1369 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1370 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1371 || cbChunk == 0
1372 || ( RT_IS_POWER_OF_TWO(cbChunk)
1373 && cbChunk >= _1M
1374 && cbChunk <= _256M
1375 && cbChunk <= cbMax),
1376 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1377 VERR_OUT_OF_RANGE);
1378
1379 /*
1380 * Adjust/figure out the chunk size.
1381 */
1382 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1383 {
1384 if (cbMax >= _256M)
1385 cbChunk = _64M;
1386 else
1387 {
1388 if (cbMax < _16M)
1389 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1390 else
1391 cbChunk = (uint32_t)cbMax / 4;
1392 if (!RT_IS_POWER_OF_TWO(cbChunk))
1393 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1394 }
1395 }
1396
1397 if (cbChunk > cbMax)
1398 cbMax = cbChunk;
1399 else
1400 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1401 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1402 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1403
1404 /*
1405 * Allocate and initialize the allocatore instance.
1406 */
1407 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1408#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1409 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1410 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1411 cbNeeded += cbBitmap * cMaxChunks;
1412 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1413 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1414#endif
1415#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1416 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1417 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1418#endif
1419 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1420 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1421 VERR_NO_MEMORY);
1422 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1423 pExecMemAllocator->cbChunk = cbChunk;
1424 pExecMemAllocator->cMaxChunks = cMaxChunks;
1425 pExecMemAllocator->cChunks = 0;
1426 pExecMemAllocator->idxChunkHint = 0;
1427 pExecMemAllocator->cAllocations = 0;
1428 pExecMemAllocator->cbTotal = 0;
1429 pExecMemAllocator->cbFree = 0;
1430 pExecMemAllocator->cbAllocated = 0;
1431#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1432 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1433 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1434 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1435 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1436#endif
1437#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1438 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1439#endif
1440 for (uint32_t i = 0; i < cMaxChunks; i++)
1441 {
1442#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1443 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1444 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1445#else
1446 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1447#endif
1448 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1449#ifdef IN_RING0
1450 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1451#else
1452 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1453#endif
1454 }
1455 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1456
1457 /*
1458 * Do the initial allocations.
1459 */
1460 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1461 {
1462 int rc = iemExecMemAllocatorGrow(pExecMemAllocator);
1463 AssertLogRelRCReturn(rc, rc);
1464 }
1465
1466 pExecMemAllocator->idxChunkHint = 0;
1467
1468 return VINF_SUCCESS;
1469}
1470
1471
1472/*********************************************************************************************************************************
1473* Native Recompilation *
1474*********************************************************************************************************************************/
1475
1476
1477/**
1478 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1479 */
1480IEM_DECL_IMPL_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1481{
1482 pVCpu->iem.s.cInstructions += idxInstr;
1483 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1484}
1485
1486
1487/**
1488 * Reinitializes the native recompiler state.
1489 *
1490 * Called before starting a new recompile job.
1491 */
1492static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
1493{
1494 pReNative->cLabels = 0;
1495 pReNative->cFixups = 0;
1496 pReNative->pTbOrg = pTb;
1497
1498 pReNative->bmHstRegs = IEMNATIVE_REG_FIXED_MASK
1499#if IEMNATIVE_HST_GREG_COUNT < 32
1500 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
1501#endif
1502 ;
1503 pReNative->bmHstRegsWithGstShadow = 0;
1504 pReNative->bmGstRegShadows = 0;
1505 pReNative->bmVars = 0;
1506 pReNative->u64ArgVars = UINT64_MAX;
1507
1508 /* Full host register reinit: */
1509 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->aHstRegs); i++)
1510 {
1511 pReNative->aHstRegs[i].fGstRegShadows = 0;
1512 pReNative->aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
1513 pReNative->aHstRegs[i].idxVar = UINT8_MAX;
1514 }
1515
1516 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
1517 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
1518#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1519 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
1520#endif
1521#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1522 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
1523#endif
1524 );
1525 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
1526 {
1527 fRegs &= ~RT_BIT_32(idxReg);
1528 pReNative->aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
1529 }
1530
1531 pReNative->aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
1532#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
1533 pReNative->aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
1534#endif
1535#ifdef IEMNATIVE_REG_FIXED_TMP0
1536 pReNative->aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
1537#endif
1538 return pReNative;
1539}
1540
1541
1542/**
1543 * Allocates and initializes the native recompiler state.
1544 *
1545 * This is called the first time an EMT wants to recompile something.
1546 *
1547 * @returns Pointer to the new recompiler state.
1548 * @param pVCpu The cross context virtual CPU structure of the calling
1549 * thread.
1550 * @param pTb The TB that's about to be recompiled.
1551 * @thread EMT(pVCpu)
1552 */
1553static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
1554{
1555 VMCPU_ASSERT_EMT(pVCpu);
1556
1557 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
1558 AssertReturn(pReNative, NULL);
1559
1560 /*
1561 * Try allocate all the buffers and stuff we need.
1562 */
1563 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
1564 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
1565 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
1566 if (RT_LIKELY( pReNative->pInstrBuf
1567 && pReNative->paLabels
1568 && pReNative->paFixups))
1569 {
1570 /*
1571 * Set the buffer & array sizes on success.
1572 */
1573 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
1574 pReNative->cLabelsAlloc = _8K;
1575 pReNative->cFixupsAlloc = _16K;
1576
1577 /*
1578 * Done, just need to save it and reinit it.
1579 */
1580 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
1581 return iemNativeReInit(pReNative, pTb);
1582 }
1583
1584 /*
1585 * Failed. Cleanup and return.
1586 */
1587 AssertFailed();
1588 RTMemFree(pReNative->pInstrBuf);
1589 RTMemFree(pReNative->paLabels);
1590 RTMemFree(pReNative->paFixups);
1591 RTMemFree(pReNative);
1592 return NULL;
1593}
1594
1595
1596/**
1597 * Defines a label.
1598 *
1599 * @returns Label ID.
1600 * @param pReNative The native recompile state.
1601 * @param enmType The label type.
1602 * @param offWhere The instruction offset of the label. UINT32_MAX if the
1603 * label is not yet defined (default).
1604 * @param uData Data associated with the lable. Only applicable to
1605 * certain type of labels. Default is zero.
1606 */
1607DECLHIDDEN(uint32_t) iemNativeMakeLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1608 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/) RT_NOEXCEPT
1609{
1610 /*
1611 * Do we have the label already?
1612 */
1613 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1614 uint32_t const cLabels = pReNative->cLabels;
1615 for (uint32_t i = 0; i < cLabels; i++)
1616 if ( paLabels[i].enmType == enmType
1617 && paLabels[i].uData == uData)
1618 {
1619 if (paLabels[i].off == offWhere || offWhere == UINT32_MAX)
1620 return i;
1621 if (paLabels[i].off == UINT32_MAX)
1622 {
1623 paLabels[i].off = offWhere;
1624 return i;
1625 }
1626 }
1627
1628 /*
1629 * Make sure we've got room for another label.
1630 */
1631 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
1632 { /* likely */ }
1633 else
1634 {
1635 uint32_t cNew = pReNative->cLabelsAlloc;
1636 AssertReturn(cNew, UINT32_MAX);
1637 AssertReturn(cLabels == cNew, UINT32_MAX);
1638 cNew *= 2;
1639 AssertReturn(cNew <= _64K, UINT32_MAX); /* IEMNATIVEFIXUP::idxLabel type restrict this */
1640 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
1641 AssertReturn(paLabels, UINT32_MAX);
1642 pReNative->paLabels = paLabels;
1643 pReNative->cLabelsAlloc = cNew;
1644 }
1645
1646 /*
1647 * Define a new label.
1648 */
1649 paLabels[cLabels].off = offWhere;
1650 paLabels[cLabels].enmType = enmType;
1651 paLabels[cLabels].uData = uData;
1652 pReNative->cLabels = cLabels + 1;
1653 return cLabels;
1654}
1655
1656
1657/**
1658 * Looks up a lable.
1659 *
1660 * @returns Label ID if found, UINT32_MAX if not.
1661 */
1662static uint32_t iemNativeFindLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
1663 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
1664{
1665 PIEMNATIVELABEL paLabels = pReNative->paLabels;
1666 uint32_t const cLabels = pReNative->cLabels;
1667 for (uint32_t i = 0; i < cLabels; i++)
1668 if ( paLabels[i].enmType == enmType
1669 && paLabels[i].uData == uData
1670 && ( paLabels[i].off == offWhere
1671 || offWhere == UINT32_MAX
1672 || paLabels[i].off == UINT32_MAX))
1673 return i;
1674 return UINT32_MAX;
1675}
1676
1677
1678
1679/**
1680 * Adds a fixup.
1681 *
1682 * @returns Success indicator.
1683 * @param pReNative The native recompile state.
1684 * @param offWhere The instruction offset of the fixup location.
1685 * @param idxLabel The target label ID for the fixup.
1686 * @param enmType The fixup type.
1687 * @param offAddend Fixup addend if applicable to the type. Default is 0.
1688 */
1689DECLHIDDEN(bool) iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
1690 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/) RT_NOEXCEPT
1691{
1692 Assert(idxLabel <= UINT16_MAX);
1693 Assert((unsigned)enmType <= UINT8_MAX);
1694
1695 /*
1696 * Make sure we've room.
1697 */
1698 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
1699 uint32_t const cFixups = pReNative->cFixups;
1700 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
1701 { /* likely */ }
1702 else
1703 {
1704 uint32_t cNew = pReNative->cFixupsAlloc;
1705 AssertReturn(cNew, false);
1706 AssertReturn(cFixups == cNew, false);
1707 cNew *= 2;
1708 AssertReturn(cNew <= _128K, false);
1709 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
1710 AssertReturn(paFixups, false);
1711 pReNative->paFixups = paFixups;
1712 pReNative->cFixupsAlloc = cNew;
1713 }
1714
1715 /*
1716 * Add the fixup.
1717 */
1718 paFixups[cFixups].off = offWhere;
1719 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
1720 paFixups[cFixups].enmType = enmType;
1721 paFixups[cFixups].offAddend = offAddend;
1722 pReNative->cFixups = cFixups + 1;
1723 return true;
1724}
1725
1726/**
1727 * Slow code path for iemNativeInstrBufEnsure.
1728 */
1729DECLHIDDEN(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1730 uint32_t cInstrReq) RT_NOEXCEPT
1731{
1732 /* Double the buffer size till we meet the request. */
1733 uint32_t cNew = pReNative->cInstrBufAlloc;
1734 AssertReturn(cNew > 0, NULL);
1735 do
1736 cNew *= 2;
1737 while (cNew < off + cInstrReq);
1738
1739 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
1740 AssertReturn(cbNew <= _2M, NULL);
1741
1742 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
1743 AssertReturn(pvNew, NULL);
1744
1745 pReNative->cInstrBufAlloc = cNew;
1746 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
1747}
1748
1749
1750/**
1751 * Register parameter indexes (indexed by argument number).
1752 */
1753DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
1754{
1755 IEMNATIVE_CALL_ARG0_GREG,
1756 IEMNATIVE_CALL_ARG1_GREG,
1757 IEMNATIVE_CALL_ARG2_GREG,
1758 IEMNATIVE_CALL_ARG3_GREG,
1759#if defined(IEMNATIVE_CALL_ARG4_GREG)
1760 IEMNATIVE_CALL_ARG4_GREG,
1761# if defined(IEMNATIVE_CALL_ARG5_GREG)
1762 IEMNATIVE_CALL_ARG5_GREG,
1763# if defined(IEMNATIVE_CALL_ARG6_GREG)
1764 IEMNATIVE_CALL_ARG6_GREG,
1765# if defined(IEMNATIVE_CALL_ARG7_GREG)
1766 IEMNATIVE_CALL_ARG7_GREG,
1767# endif
1768# endif
1769# endif
1770#endif
1771};
1772
1773/**
1774 * Call register masks indexed by argument count.
1775 */
1776DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
1777{
1778 0,
1779 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
1780 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
1781 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
1782 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
1783 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
1784#if defined(IEMNATIVE_CALL_ARG4_GREG)
1785 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
1786 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
1787# if defined(IEMNATIVE_CALL_ARG5_GREG)
1788 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
1789 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
1790# if defined(IEMNATIVE_CALL_ARG6_GREG)
1791 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
1792 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
1793 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
1794# if defined(IEMNATIVE_CALL_ARG7_GREG)
1795 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
1796 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
1797 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
1798# endif
1799# endif
1800# endif
1801#endif
1802};
1803
1804
1805DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
1806 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
1807{
1808 pReNative->bmHstRegs |= RT_BIT_32(idxReg);
1809
1810 pReNative->aHstRegs[idxReg].enmWhat = enmWhat;
1811 pReNative->aHstRegs[idxReg].fGstRegShadows = 0;
1812 pReNative->aHstRegs[idxReg].idxVar = idxVar;
1813 return (uint8_t)idxReg;
1814}
1815
1816
1817/**
1818 * Locate a register, possibly freeing one up.
1819 *
1820 * This ASSUMES the caller has done the minimal/optimal allocation checks and
1821 * failed.
1822 */
1823static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fAllowVolatile) RT_NOEXCEPT
1824{
1825 uint32_t fRegMask = fAllowVolatile
1826 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
1827 : IEMNATIVE_HST_GREG_MASK & ~(IEMNATIVE_REG_FIXED_MASK | IEMNATIVE_CALL_VOLATILE_GREG_MASK);
1828
1829 /*
1830 * Try a freed register that's shadowing a guest register
1831 */
1832 uint32_t fRegs = ~pReNative->bmHstRegs & fRegMask;
1833 if (fRegs)
1834 {
1835 /** @todo pick better here: */
1836 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
1837
1838 Assert(pReNative->aHstRegs[idxReg].fGstRegShadows != 0);
1839 Assert( (pReNative->aHstRegs[idxReg].fGstRegShadows & pReNative->bmGstRegShadows)
1840 == pReNative->aHstRegs[idxReg].fGstRegShadows);
1841 Assert(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
1842
1843 pReNative->bmGstRegShadows &= ~pReNative->aHstRegs[idxReg].fGstRegShadows;
1844 pReNative->bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
1845 pReNative->aHstRegs[idxReg].fGstRegShadows = 0;
1846 return idxReg;
1847 }
1848
1849 /*
1850 * Try free up a variable that's in a register.
1851 *
1852 * We do two rounds here, first evacuating variables we don't need to be
1853 * saved on the stack, then in the second round move things to the stack.
1854 */
1855 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
1856 {
1857 uint32_t fVars = pReNative->bmVars;
1858 while (fVars)
1859 {
1860 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
1861 uint8_t const idxReg = pReNative->aVars[idxVar].idxReg;
1862 if ( idxReg < RT_ELEMENTS(pReNative->aHstRegs)
1863 && (RT_BIT_32(idxReg) & fRegMask)
1864 && ( iLoop == 0
1865 ? pReNative->aVars[idxVar].enmKind != kIemNativeVarKind_Stack
1866 : pReNative->aVars[idxVar].enmKind == kIemNativeVarKind_Stack))
1867 {
1868 Assert(pReNative->bmHstRegs & RT_BIT_32(idxReg));
1869 Assert( (pReNative->bmGstRegShadows & pReNative->aHstRegs[idxReg].fGstRegShadows)
1870 == pReNative->aHstRegs[idxReg].fGstRegShadows);
1871 Assert( RT_BOOL(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
1872 == RT_BOOL(pReNative->aHstRegs[idxReg].fGstRegShadows));
1873
1874 if (pReNative->aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
1875 {
1876 AssertReturn(pReNative->aVars[idxVar].idxStackSlot != UINT8_MAX, UINT8_MAX);
1877 uint32_t off = *poff;
1878 *poff = off = iemNativeEmitStoreGprByBp(pReNative, off,
1879 pReNative->aVars[idxVar].idxStackSlot * sizeof(uint64_t)
1880 - IEMNATIVE_FP_OFF_STACK_VARS,
1881 idxReg);
1882 AssertReturn(off != UINT32_MAX, UINT8_MAX);
1883 }
1884
1885 pReNative->aVars[idxVar].idxReg = UINT8_MAX;
1886 pReNative->bmGstRegShadows &= ~pReNative->aHstRegs[idxReg].fGstRegShadows;
1887 pReNative->bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
1888 pReNative->bmHstRegs &= ~RT_BIT_32(idxReg);
1889 return idxReg;
1890 }
1891 fVars &= ~RT_BIT_32(idxVar);
1892 }
1893 }
1894
1895 AssertFailedReturn(UINT8_MAX);
1896}
1897
1898
1899/**
1900 * Moves a variable to a different register or spills it onto the stack.
1901 *
1902 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
1903 * kinds can easily be recreated if needed later.
1904 *
1905 * @returns The new code buffer position, UINT32_MAX on failure.
1906 * @param pReNative The native recompile state.
1907 * @param off The current code buffer position.
1908 * @param idxVar The variable index.
1909 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
1910 * call-volatile registers.
1911 */
1912static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
1913 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
1914{
1915 Assert(idxVar < RT_ELEMENTS(pReNative->aVars));
1916 Assert(pReNative->aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
1917
1918 uint8_t const idxRegOld = pReNative->aVars[idxVar].idxReg;
1919 Assert(idxRegOld < RT_ELEMENTS(pReNative->aHstRegs));
1920 Assert(pReNative->bmHstRegs & RT_BIT_32(idxRegOld));
1921 Assert(pReNative->aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
1922 Assert( (pReNative->bmGstRegShadows & pReNative->aHstRegs[idxRegOld].fGstRegShadows)
1923 == pReNative->aHstRegs[idxRegOld].fGstRegShadows);
1924 Assert( RT_BOOL(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
1925 == RT_BOOL(pReNative->aHstRegs[idxRegOld].fGstRegShadows));
1926
1927
1928 /** @todo Add statistics on this.*/
1929 /** @todo Implement basic variable liveness analysis (python) so variables
1930 * can be freed immediately once no longer used. This has the potential to
1931 * be trashing registers and stack for dead variables. */
1932
1933 /*
1934 * First try move it to a different register, as that's cheaper.
1935 */
1936 fForbiddenRegs |= RT_BIT_32(idxRegOld);
1937 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
1938 uint32_t fRegs = ~pReNative->bmHstRegs & ~fForbiddenRegs;
1939 if (fRegs)
1940 {
1941 /* Avoid using shadow registers, if possible. */
1942 if (fRegs & ~pReNative->bmHstRegsWithGstShadow)
1943 fRegs &= ~pReNative->bmHstRegsWithGstShadow;
1944 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
1945
1946 uint64_t fGstRegShadows = pReNative->aHstRegs[idxRegOld].fGstRegShadows;
1947 pReNative->aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
1948 pReNative->aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
1949 pReNative->aHstRegs[idxRegNew].idxVar = idxVar;
1950 if (fGstRegShadows)
1951 {
1952 pReNative->bmHstRegsWithGstShadow |= RT_BIT_32(idxRegNew);
1953 while (fGstRegShadows)
1954 {
1955 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows);
1956 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
1957
1958 Assert(pReNative->aidxGstRegShadows[idxGstReg] == idxRegOld);
1959 pReNative->aidxGstRegShadows[idxGstReg] = idxRegNew;
1960 }
1961 }
1962
1963 pReNative->aVars[idxVar].idxReg = (uint8_t)idxRegNew;
1964 pReNative->bmHstRegs |= RT_BIT_32(idxRegNew);
1965 }
1966 /*
1967 * Otherwise we must spill the register onto the stack.
1968 */
1969 else
1970 {
1971 AssertReturn(pReNative->aVars[idxVar].idxStackSlot != UINT8_MAX, UINT32_MAX);
1972 off = iemNativeEmitStoreGprByBp(pReNative, off,
1973 pReNative->aVars[idxVar].idxStackSlot * sizeof(uint64_t) - IEMNATIVE_FP_OFF_STACK_VARS,
1974 idxRegOld);
1975 AssertReturn(off != UINT32_MAX, UINT32_MAX);
1976
1977 pReNative->bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
1978 pReNative->bmGstRegShadows &= ~pReNative->aHstRegs[idxRegOld].fGstRegShadows;
1979 }
1980
1981 pReNative->bmHstRegs &= ~RT_BIT_32(idxRegOld);
1982 pReNative->aHstRegs[idxRegOld].fGstRegShadows = 0;
1983 return off;
1984}
1985
1986
1987/**
1988 * Allocates a temporary host general purpose register.
1989 *
1990 * This may emit code to save register content onto the stack in order to free
1991 * up a register.
1992 *
1993 * @returns The host register number, UINT8_MAX on failure.
1994 * @param pReNative The native recompile state.
1995 * @param poff Pointer to the variable with the code buffer position.
1996 * This will be update if we need to move a variable from
1997 * register to stack in order to satisfy the request.
1998 * @param fPreferVolatile Wheter to prefer volatile over non-volatile
1999 * registers (@c true, default) or the other way around
2000 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
2001 */
2002DECLHIDDEN(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
2003 bool fPreferVolatile /*= true*/) RT_NOEXCEPT
2004{
2005 /*
2006 * Try find a completely unused register, preferably a call-volatile one.
2007 */
2008 uint8_t idxReg;
2009 uint32_t fRegs = ~pReNative->bmHstRegs
2010 & ~pReNative->bmHstRegsWithGstShadow
2011 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
2012 if (fRegs)
2013 {
2014fPreferVolatile = false; /// @todo DO NOT COMMIT THIS
2015 if (fPreferVolatile)
2016 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
2017 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2018 else
2019 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
2020 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
2021 Assert(pReNative->aHstRegs[idxReg].fGstRegShadows == 0);
2022 Assert(!(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
2023 }
2024 else
2025 {
2026 idxReg = iemNativeRegAllocFindFree(pReNative, poff, true /*fAllowVolatile*/);
2027 AssertReturn(idxReg != UINT8_MAX, UINT8_MAX);
2028 }
2029 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
2030}
2031
2032
2033/**
2034 * Info about shadowed guest register values.
2035 * @see IEMNATIVEGSTREG
2036 */
2037static struct
2038{
2039 /** Offset in VMCPU. */
2040 uint32_t off;
2041 /** The field size. */
2042 uint8_t cb;
2043 /** Name (for logging). */
2044 const char *pszName;
2045} const g_aGstShadowInfo[] =
2046{
2047#define CPUMCTX_OFF_AND_SIZE(a_Reg) RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2048 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2049 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2050 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2051 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2052 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2053 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2054 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2055 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2056 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2057 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2058 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2059 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2060 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2061 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2062 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2063 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2064 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2065 /* [kIemNativeGstReg_Rflags] = */ { CPUMCTX_OFF_AND_SIZE(rflags), "rflags", },
2066 /* [18] = */ { UINT32_C(0xfffffff7), 0, NULL, },
2067 /* [19] = */ { UINT32_C(0xfffffff5), 0, NULL, },
2068 /* [20] = */ { UINT32_C(0xfffffff3), 0, NULL, },
2069 /* [21] = */ { UINT32_C(0xfffffff1), 0, NULL, },
2070 /* [22] = */ { UINT32_C(0xffffffef), 0, NULL, },
2071 /* [23] = */ { UINT32_C(0xffffffed), 0, NULL, },
2072 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2073 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2074 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2075 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2076 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2077 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2078 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2079 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2080 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2081 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2082 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2083 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2084 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2085 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2086 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2087 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2088 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2089 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2090#undef CPUMCTX_OFF_AND_SIZE
2091};
2092AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2093
2094
2095/** Host CPU general purpose register names. */
2096const char * const g_apszIemNativeHstRegNames[] =
2097{
2098#ifdef RT_ARCH_AMD64
2099 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2100#elif RT_ARCH_ARM64
2101 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2102 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2103#else
2104# error "port me"
2105#endif
2106};
2107
2108/**
2109 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
2110 * extending to 64-bit width.
2111 *
2112 * @returns New code buffer offset on success, UINT32_MAX on failure.
2113 * @param pReNative .
2114 * @param off The current code buffer position.
2115 * @param idxHstReg The host register to load the guest register value into.
2116 * @param enmGstReg The guest register to load.
2117 *
2118 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
2119 * that is something the caller needs to do if applicable.
2120 */
2121DECLHIDDEN(uint32_t) iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2122 uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
2123{
2124 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
2125 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
2126
2127 switch (g_aGstShadowInfo[enmGstReg].cb)
2128 {
2129 case sizeof(uint64_t):
2130 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2131 case sizeof(uint32_t):
2132 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2133 case sizeof(uint16_t):
2134 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2135#if 0 /* not present in the table. */
2136 case sizeof(uint8_t):
2137 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
2138#endif
2139 default:
2140 AssertFailedReturn(UINT32_MAX);
2141 }
2142}
2143
2144
2145#ifdef VBOX_STRICT
2146/**
2147 * Emitting code that checks that the content of register @a idxReg is the same
2148 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
2149 * instruction if that's not the case.
2150 *
2151 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
2152 * Trashes EFLAGS on AMD64.
2153 */
2154static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2155 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
2156{
2157# ifdef RT_ARCH_AMD64
2158 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2159 AssertReturn(pbCodeBuf, UINT32_MAX);
2160
2161 /* cmp reg, [mem] */
2162 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
2163 {
2164 if (idxReg >= 8)
2165 pbCodeBuf[off++] = X86_OP_REX_R;
2166 pbCodeBuf[off++] = 0x38;
2167 }
2168 else
2169 {
2170 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
2171 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
2172 else
2173 {
2174 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
2175 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
2176 else
2177 AssertReturn(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t), UINT32_MAX);
2178 if (idxReg >= 8)
2179 pbCodeBuf[off++] = X86_OP_REX_R;
2180 }
2181 pbCodeBuf[off++] = 0x39;
2182 }
2183 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
2184
2185 /* je/jz +1 */
2186 pbCodeBuf[off++] = 0x74;
2187 pbCodeBuf[off++] = 0x01;
2188
2189 /* int3 */
2190 pbCodeBuf[off++] = 0xcc;
2191
2192 /* For values smaller than the register size, we must check that the rest
2193 of the register is all zeros. */
2194 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
2195 {
2196 /* test reg64, imm32 */
2197 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
2198 pbCodeBuf[off++] = 0xf7;
2199 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
2200 pbCodeBuf[off++] = 0;
2201 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
2202 pbCodeBuf[off++] = 0xff;
2203 pbCodeBuf[off++] = 0xff;
2204
2205 /* je/jz +1 */
2206 pbCodeBuf[off++] = 0x74;
2207 pbCodeBuf[off++] = 0x01;
2208
2209 /* int3 */
2210 pbCodeBuf[off++] = 0xcc;
2211 }
2212 else if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
2213 {
2214 /* rol reg64, 32 */
2215 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
2216 pbCodeBuf[off++] = 0xc1;
2217 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
2218 pbCodeBuf[off++] = 32;
2219
2220 /* test reg32, ffffffffh */
2221 if (idxReg >= 8)
2222 pbCodeBuf[off++] = X86_OP_REX_B;
2223 pbCodeBuf[off++] = 0xf7;
2224 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
2225 pbCodeBuf[off++] = 0xff;
2226 pbCodeBuf[off++] = 0xff;
2227 pbCodeBuf[off++] = 0xff;
2228 pbCodeBuf[off++] = 0xff;
2229
2230 /* je/jz +1 */
2231 pbCodeBuf[off++] = 0x74;
2232 pbCodeBuf[off++] = 0x01;
2233
2234 /* int3 */
2235 pbCodeBuf[off++] = 0xcc;
2236
2237 /* rol reg64, 32 */
2238 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
2239 pbCodeBuf[off++] = 0xc1;
2240 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
2241 pbCodeBuf[off++] = 32;
2242 }
2243
2244# elif defined(RT_ARCH_ARM64)
2245 /* mov TMP0, [gstreg] */
2246 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
2247
2248 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
2249 AssertReturn(pu32CodeBuf, UINT32_MAX);
2250 /* sub tmp0, tmp0, idxReg */
2251 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
2252 /* cbz tmp0, +1 */
2253 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 1, IEMNATIVE_REG_FIXED_TMP0);
2254 /* brk #0x1000+enmGstReg */
2255 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
2256
2257# else
2258# error "Port me!"
2259# endif
2260 return off;
2261}
2262#endif /* VBOX_STRICT */
2263
2264
2265/**
2266 * Marks host register @a idxHstReg as containing a shadow copy of guest
2267 * register @a enmGstReg.
2268 *
2269 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
2270 * host register before calling.
2271 */
2272DECL_FORCE_INLINE(void)
2273iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
2274{
2275 Assert(!(pReNative->bmGstRegShadows & RT_BIT_64(enmGstReg)));
2276
2277 pReNative->aidxGstRegShadows[enmGstReg] = idxHstReg;
2278 pReNative->aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg);
2279 pReNative->bmGstRegShadows |= RT_BIT_64(enmGstReg);
2280 pReNative->bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
2281}
2282
2283
2284/**
2285 * Clear any guest register shadow claims from @a idxHstReg.
2286 *
2287 * The register does not need to be shadowing any guest registers.
2288 */
2289DECL_FORCE_INLINE(void)
2290iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg)
2291{
2292 Assert( (pReNative->bmGstRegShadows & pReNative->aHstRegs[idxHstReg].fGstRegShadows)
2293 == pReNative->aHstRegs[idxHstReg].fGstRegShadows);
2294 Assert( RT_BOOL(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
2295 == RT_BOOL(pReNative->aHstRegs[idxHstReg].fGstRegShadows));
2296
2297 pReNative->bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2298 pReNative->bmGstRegShadows &= ~pReNative->aHstRegs[idxHstReg].fGstRegShadows;
2299 pReNative->aHstRegs[idxHstReg].fGstRegShadows = 0;
2300}
2301
2302
2303/**
2304 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
2305 * to @a idxRegTo.
2306 */
2307DECL_FORCE_INLINE(void)
2308iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo, IEMNATIVEGSTREG enmGstReg)
2309{
2310 Assert(pReNative->aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
2311 Assert( (pReNative->bmGstRegShadows & pReNative->aHstRegs[idxRegFrom].fGstRegShadows)
2312 == pReNative->aHstRegs[idxRegFrom].fGstRegShadows);
2313 Assert( RT_BOOL(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
2314 == RT_BOOL(pReNative->aHstRegs[idxRegFrom].fGstRegShadows));
2315
2316 pReNative->aHstRegs[idxRegFrom].fGstRegShadows &= ~RT_BIT_64(enmGstReg);
2317 pReNative->aHstRegs[idxRegTo].fGstRegShadows = RT_BIT_64(enmGstReg);
2318 pReNative->aidxGstRegShadows[enmGstReg] = idxRegTo;
2319}
2320
2321
2322
2323/**
2324 * Intended use statement for iemNativeRegAllocTmpForGuestReg().
2325 */
2326typedef enum IEMNATIVEGSTREGUSE
2327{
2328 /** The usage is read-only, the register holding the guest register
2329 * shadow copy will not be modified by the caller. */
2330 kIemNativeGstRegUse_ReadOnly = 0,
2331 /** The caller will update the guest register (think: PC += cbInstr).
2332 * The guest shadow copy will follow the returned register. */
2333 kIemNativeGstRegUse_ForUpdate,
2334 /** The caller will use the guest register value as input in a calculation
2335 * and the host register will be modified.
2336 * This means that the returned host register will not be marked as a shadow
2337 * copy of the guest register. */
2338 kIemNativeGstRegUse_Calculation
2339} IEMNATIVEGSTREGUSE;
2340
2341/**
2342 * Allocates a temporary host general purpose register for updating a guest
2343 * register value.
2344 *
2345 * Since we may already have a register holding the guest register value,
2346 * code will be emitted to do the loading if that's not the case. Code may also
2347 * be emitted if we have to free up a register to satify the request.
2348 *
2349 * @returns The host register number, UINT8_MAX on failure.
2350 * @param pReNative The native recompile state.
2351 * @param poff Pointer to the variable with the code buffer
2352 * position. This will be update if we need to move a
2353 * variable from register to stack in order to satisfy
2354 * the request.
2355 * @param enmGstReg The guest register that will is to be updated.
2356 * @param enmIntendedUse How the caller will be using the host register.
2357 */
2358DECLHIDDEN(uint8_t) iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
2359 IEMNATIVEGSTREG enmGstReg, IEMNATIVEGSTREGUSE enmIntendedUse) RT_NOEXCEPT
2360{
2361 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
2362#ifdef LOG_ENABLED
2363 static const char * const s_pszIntendedUse[] = { "fetch", "update", "destructive calc" };
2364#endif
2365
2366 /*
2367 * First check if the guest register value is already in a host register.
2368 */
2369 if (pReNative->bmGstRegShadows & RT_BIT_64(enmGstReg))
2370 {
2371 uint8_t idxReg = pReNative->aidxGstRegShadows[enmGstReg];
2372 Assert(idxReg < RT_ELEMENTS(pReNative->aHstRegs));
2373 Assert(pReNative->aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
2374 Assert(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2375
2376 if (!(pReNative->bmHstRegs & RT_BIT_32(idxReg)))
2377 {
2378 /*
2379 * If the register will trash the guest shadow copy, try find a
2380 * completely unused register we can use instead. If that fails,
2381 * we need to disassociate the host reg from the guest reg.
2382 */
2383 /** @todo would be nice to know if preserving the register is in any way helpful. */
2384 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
2385 && ( ~pReNative->bmHstRegs
2386 & ~pReNative->bmHstRegsWithGstShadow
2387 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
2388 {
2389 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff);
2390 Assert(idxRegNew < RT_ELEMENTS(pReNative->aHstRegs));
2391
2392 uint32_t off = *poff;
2393 *poff = off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxReg);
2394 AssertReturn(off != UINT32_MAX, UINT8_MAX);
2395
2396 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
2397 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2398 g_apszIemNativeHstRegNames[idxRegNew]));
2399 idxReg = idxRegNew;
2400 }
2401 else
2402 {
2403 pReNative->bmHstRegs |= RT_BIT_32(idxReg);
2404 pReNative->aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
2405 pReNative->aHstRegs[idxReg].idxVar = UINT8_MAX;
2406 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2407 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
2408 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2409 else
2410 {
2411 iemNativeRegClearGstRegShadowing(pReNative, idxReg);
2412 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
2413 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
2414 }
2415 }
2416 }
2417 else
2418 {
2419 AssertMsg(enmIntendedUse != kIemNativeGstRegUse_ForUpdate,
2420 ("This shouldn't happen: idxReg=%d enmGstReg=%d\n", idxReg, enmGstReg));
2421
2422 /*
2423 * Allocate a new register, copy the value and, if updating, the
2424 * guest shadow copy assignment to the new register.
2425 */
2426 /** @todo share register for readonly access. */
2427 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2428 AssertReturn(idxRegNew < RT_ELEMENTS(pReNative->aHstRegs), UINT8_MAX);
2429
2430 uint32_t off = *poff;
2431 *poff = off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxReg);
2432 AssertReturn(off != UINT32_MAX, UINT8_MAX);
2433
2434 if (enmIntendedUse != kIemNativeGstRegUse_ForUpdate)
2435 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
2436 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2437 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
2438 else
2439 {
2440 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg);
2441 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for update\n",
2442 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
2443 g_apszIemNativeHstRegNames[idxRegNew]));
2444 }
2445 idxReg = idxRegNew;
2446 }
2447
2448#ifdef VBOX_STRICT
2449 /* Strict builds: Check that the value is correct. */
2450 uint32_t off = *poff;
2451 *poff = off = iemNativeEmitGuestRegValueCheck(pReNative, off, idxReg, enmGstReg);
2452 AssertReturn(off != UINT32_MAX, UINT8_MAX);
2453#endif
2454
2455 return idxReg;
2456 }
2457
2458 /*
2459 * Allocate a new register, load it with the guest value and designate it as a copy of the
2460 */
2461 uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
2462 AssertReturn(idxRegNew < RT_ELEMENTS(pReNative->aHstRegs), UINT8_MAX);
2463
2464 uint32_t off = *poff;
2465 *poff = off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxRegNew, enmGstReg);
2466 AssertReturn(off != UINT32_MAX, UINT8_MAX);
2467
2468 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
2469 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg);
2470 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
2471 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
2472
2473 return idxRegNew;
2474}
2475
2476
2477DECLHIDDEN(uint8_t) iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar) RT_NOEXCEPT;
2478
2479
2480/**
2481 * Allocates argument registers for a function call.
2482 *
2483 * @returns New code buffer offset on success, UINT32_MAX on failure.
2484 * @param pReNative The native recompile state.
2485 * @param off The current code buffer offset.
2486 * @param cArgs The number of arguments the function call takes.
2487 */
2488DECLHIDDEN(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs) RT_NOEXCEPT
2489{
2490 AssertReturn(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT, false);
2491 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2492 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2493
2494 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
2495 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
2496 else if (cArgs == 0)
2497 return true;
2498
2499 /*
2500 * Do we get luck and all register are free and not shadowing anything?
2501 */
2502 if (((pReNative->bmHstRegs | pReNative->bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
2503 for (uint32_t i = 0; i < cArgs; i++)
2504 {
2505 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
2506 pReNative->aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
2507 pReNative->aHstRegs[idxReg].idxVar = UINT8_MAX;
2508 Assert(pReNative->aHstRegs[idxReg].fGstRegShadows == 0);
2509 }
2510 /*
2511 * Okay, not lucky so we have to free up the registers.
2512 */
2513 else
2514 for (uint32_t i = 0; i < cArgs; i++)
2515 {
2516 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
2517 if (pReNative->bmHstRegs & RT_BIT_32(idxReg))
2518 {
2519 switch (pReNative->aHstRegs[idxReg].enmWhat)
2520 {
2521 case kIemNativeWhat_Var:
2522 {
2523 uint8_t const idxVar = pReNative->aHstRegs[idxReg].idxVar;
2524 AssertReturn(idxVar < RT_ELEMENTS(pReNative->aVars), false);
2525 Assert(pReNative->aVars[idxVar].idxReg == idxReg);
2526 Assert(pReNative->bmVars & RT_BIT_32(idxVar));
2527
2528 if (pReNative->aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2529 pReNative->aVars[idxVar].idxReg = UINT8_MAX;
2530 else
2531 {
2532 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
2533 AssertReturn(off != UINT32_MAX, false);
2534 Assert(!(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
2535 }
2536 break;
2537 }
2538
2539 case kIemNativeWhat_Tmp:
2540 case kIemNativeWhat_Arg:
2541 case kIemNativeWhat_rc:
2542 AssertFailedReturn(false);
2543 default:
2544 AssertFailedReturn(false);
2545 }
2546
2547 }
2548 if (pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
2549 {
2550 Assert(pReNative->aHstRegs[idxReg].fGstRegShadows != 0);
2551 Assert( (pReNative->aHstRegs[idxReg].fGstRegShadows & pReNative->bmGstRegShadows)
2552 == pReNative->aHstRegs[idxReg].fGstRegShadows);
2553 pReNative->bmGstRegShadows &= ~pReNative->aHstRegs[idxReg].fGstRegShadows;
2554 pReNative->aHstRegs[idxReg].fGstRegShadows = 0;
2555 }
2556 else
2557 Assert(pReNative->aHstRegs[idxReg].fGstRegShadows == 0);
2558 pReNative->aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
2559 pReNative->aHstRegs[idxReg].idxVar = UINT8_MAX;
2560 }
2561 pReNative->bmHstRegs |= g_afIemNativeCallRegs[cArgs];
2562 return true;
2563}
2564
2565
2566DECLHIDDEN(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT;
2567
2568
2569#if 0
2570/**
2571 * Frees a register assignment of any type.
2572 *
2573 * @param pReNative The native recompile state.
2574 * @param idxHstReg The register to free.
2575 *
2576 * @note Does not update variables.
2577 */
2578DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2579{
2580 Assert(idxHstReg < RT_ELEMENTS(pReNative->aHstRegs));
2581 Assert(pReNative->bmHstRegs & RT_BIT_32(idxHstReg));
2582 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
2583 Assert( pReNative->aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
2584 || pReNative->aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
2585 || pReNative->aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
2586 || pReNative->aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
2587 Assert( pReNative->aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
2588 || pReNative->aVars[pReNative->aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
2589 || (pReNative->bmVars & RT_BIT_32(pReNative->aHstRegs[idxHstReg].idxVar)));
2590 Assert( (pReNative->bmGstRegShadows & pReNative->aHstRegs[idxHstReg].fGstRegShadows)
2591 == pReNative->aHstRegs[idxHstReg].fGstRegShadows);
2592 Assert( RT_BOOL(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
2593 == RT_BOOL(pReNative->aHstRegs[idxHstReg].fGstRegShadows));
2594
2595 pReNative->bmHstRegs &= ~RT_BIT_32(idxHstReg);
2596 /* no flushing, right:
2597 pReNative->bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
2598 pReNative->bmGstRegShadows &= ~pReNative->aHstRegs[idxHstReg].fGstRegShadows;
2599 pReNative->aHstRegs[idxHstReg].fGstRegShadows = 0;
2600 */
2601}
2602#endif
2603
2604
2605/**
2606 * Frees a temporary register.
2607 *
2608 * Any shadow copies of guest registers assigned to the host register will not
2609 * be flushed by this operation.
2610 */
2611DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
2612{
2613 Assert(pReNative->bmHstRegs & RT_BIT_32(idxHstReg));
2614 Assert(pReNative->aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
2615 pReNative->bmHstRegs &= ~RT_BIT_32(idxHstReg);
2616 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
2617 g_apszIemNativeHstRegNames[idxHstReg], pReNative->aHstRegs[idxHstReg].fGstRegShadows));
2618}
2619
2620
2621/**
2622 * Called right before emitting a call instruction to move anything important
2623 * out of call-volatile registers, free and flush the call-volatile registers,
2624 * optionally freeing argument variables.
2625 *
2626 * @returns New code buffer offset, UINT32_MAX on failure.
2627 * @param pReNative The native recompile state.
2628 * @param off The code buffer offset.
2629 * @param cArgs The number of arguments the function call takes.
2630 * It is presumed that the host register part of these have
2631 * been allocated as such already and won't need moving,
2632 * just freeing.
2633 * @param fFreeArgVars Whether to free argument variables for the call.
2634 */
2635DECLHIDDEN(uint32_t) iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2636 uint8_t cArgs, bool fFreeArgVars) RT_NOEXCEPT
2637{
2638 /*
2639 * Free argument variables first (simplified).
2640 */
2641 AssertReturn(cArgs <= RT_ELEMENTS(pReNative->aidxArgVars), UINT32_MAX);
2642 if (fFreeArgVars && cArgs > 0)
2643 {
2644 for (uint32_t i = 0; i < cArgs; i++)
2645 {
2646 uint8_t idxVar = pReNative->aidxArgVars[i];
2647 if (idxVar < RT_ELEMENTS(pReNative->aVars))
2648 {
2649 pReNative->aidxArgVars[i] = UINT8_MAX;
2650 pReNative->bmVars &= ~RT_BIT_32(idxVar);
2651 Assert( pReNative->aVars[idxVar].idxReg
2652 == (i < RT_ELEMENTS(g_aidxIemNativeCallRegs) ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
2653 }
2654 }
2655 Assert(pReNative->u64ArgVars == UINT64_MAX);
2656 }
2657
2658 /*
2659 * Move anything important out of volatile registers.
2660 */
2661 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
2662 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
2663 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
2664#ifdef IEMNATIVE_REG_FIXED_TMP0
2665 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2666#endif
2667 & ~g_afIemNativeCallRegs[cArgs];
2668
2669 fRegsToMove &= pReNative->bmHstRegs;
2670 if (!fRegsToMove)
2671 { /* likely */ }
2672 else
2673 while (fRegsToMove != 0)
2674 {
2675 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
2676 fRegsToMove &= ~RT_BIT_32(idxReg);
2677
2678 switch (pReNative->aHstRegs[idxReg].enmWhat)
2679 {
2680 case kIemNativeWhat_Var:
2681 {
2682 uint8_t const idxVar = pReNative->aHstRegs[idxReg].idxVar;
2683 Assert(idxVar < RT_ELEMENTS(pReNative->aVars));
2684 Assert(pReNative->bmVars & RT_BIT_32(idxVar));
2685 Assert(pReNative->aVars[idxVar].idxReg == idxReg);
2686 if (pReNative->aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2687 pReNative->aVars[idxVar].idxReg = UINT8_MAX;
2688 else
2689 {
2690 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
2691 AssertReturn(off != UINT32_MAX, UINT32_MAX);
2692 }
2693 continue;
2694 }
2695
2696 case kIemNativeWhat_Arg:
2697 AssertMsgFailed(("What?!?: %u\n", idxReg));
2698 continue;
2699
2700 case kIemNativeWhat_rc:
2701 case kIemNativeWhat_Tmp:
2702 AssertMsgFailed(("Missing free: %u\n", idxReg));
2703 continue;
2704
2705 case kIemNativeWhat_FixedTmp:
2706 case kIemNativeWhat_pVCpuFixed:
2707 case kIemNativeWhat_pCtxFixed:
2708 case kIemNativeWhat_FixedReserved:
2709 case kIemNativeWhat_Invalid:
2710 case kIemNativeWhat_End:
2711 AssertFailedReturn(UINT32_MAX);
2712 }
2713 AssertFailedReturn(UINT32_MAX);
2714 }
2715
2716 /*
2717 * Do the actual freeing.
2718 */
2719 pReNative->bmHstRegs &= ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
2720
2721 /* If there are guest register shadows in any call-volatile register, we
2722 have to clear the corrsponding guest register masks for each register. */
2723 uint32_t fHstRegsWithGstShadow = pReNative->bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
2724 if (fHstRegsWithGstShadow)
2725 {
2726 pReNative->bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
2727 do
2728 {
2729 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
2730 fHstRegsWithGstShadow = ~RT_BIT_32(idxReg);
2731
2732 Assert(pReNative->aHstRegs[idxReg].fGstRegShadows != 0);
2733 pReNative->bmGstRegShadows &= ~pReNative->aHstRegs[idxReg].fGstRegShadows;
2734 pReNative->aHstRegs[idxReg].fGstRegShadows = 0;
2735 } while (fHstRegsWithGstShadow != 0);
2736 }
2737
2738 return off;
2739}
2740
2741
2742/**
2743 * Emits a code for checking the return code of a call and rcPassUp, returning
2744 * from the code if either are non-zero.
2745 */
2746DECLHIDDEN(uint32_t) iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2747 uint8_t idxInstr) RT_NOEXCEPT
2748{
2749#ifdef RT_ARCH_AMD64
2750 /*
2751 * AMD64: eax = call status code.
2752 */
2753
2754 /* edx = rcPassUp */
2755 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
2756 AssertReturn(off != UINT32_MAX, UINT32_MAX);
2757
2758 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
2759 AssertReturn(pbCodeBuf, UINT32_MAX);
2760
2761 /* edx = eax | rcPassUp */
2762 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
2763 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
2764
2765 /* Jump to non-zero status return path, loading cl with the instruction number. */
2766 pbCodeBuf[off++] = 0xb0 + X86_GREG_xCX; /* mov cl, imm8 (pCallEntry->idxInstr) */
2767 pbCodeBuf[off++] = idxInstr;
2768
2769 pbCodeBuf[off++] = 0x0f; /* jnz rel32 */
2770 pbCodeBuf[off++] = 0x85;
2771 uint32_t const idxLabel = iemNativeMakeLabel(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
2772 AssertReturn(idxLabel != UINT32_MAX, UINT32_MAX);
2773 AssertReturn(iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4), UINT32_MAX);
2774 pbCodeBuf[off++] = 0x00;
2775 pbCodeBuf[off++] = 0x00;
2776 pbCodeBuf[off++] = 0x00;
2777 pbCodeBuf[off++] = 0x00;
2778
2779 /* done. */
2780
2781#elif RT_ARCH_ARM64
2782 /*
2783 * ARM64: w0 = call status code.
2784 */
2785 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr); /** @todo 32-bit imm load? Fixed counter register? */
2786 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
2787
2788 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
2789 AssertReturn(pu32CodeBuf, UINT32_MAX);
2790
2791 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
2792
2793 uint32_t const idxLabel = iemNativeMakeLabel(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
2794 AssertReturn(idxLabel != UINT32_MAX, UINT32_MAX);
2795 AssertReturn(iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5), UINT32_MAX);
2796 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, ARMV8_A64_REG_X4, false /*f64Bit*/);
2797
2798#else
2799# error "port me"
2800#endif
2801 return off;
2802}
2803
2804
2805/**
2806 * Emits a call to a CImpl function or something similar.
2807 */
2808static int32_t iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
2809 uintptr_t pfnCImpl, uint8_t cbInstr, uint8_t cAddParams,
2810 uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
2811{
2812#ifdef VBOX_STRICT
2813 off = iemNativeEmitMarker(pReNative, off);
2814 AssertReturn(off != UINT32_MAX, UINT32_MAX);
2815#endif
2816
2817 /*
2818 * Load the parameters.
2819 */
2820#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
2821 /* Special code the hidden VBOXSTRICTRC pointer. */
2822 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2823 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
2824 if (cAddParams > 0)
2825 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
2826 if (cAddParams > 1)
2827 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
2828 if (cAddParams > 2)
2829 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
2830 off = iemNativeEmitLeaGrpByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
2831
2832#else
2833 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
2834 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2835 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
2836 if (cAddParams > 0)
2837 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
2838 if (cAddParams > 1)
2839 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
2840 if (cAddParams > 2)
2841# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
2842 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
2843# else
2844 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
2845# endif
2846#endif
2847 AssertReturn(off != UINT32_MAX, off);
2848
2849 /*
2850 * Make the call.
2851 */
2852#ifdef RT_ARCH_AMD64
2853 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xAX, pfnCImpl);
2854
2855 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2856 AssertReturn(pbCodeBuf, UINT32_MAX);
2857 pbCodeBuf[off++] = 0xff; /* call rax */
2858 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
2859
2860# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
2861 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
2862# endif
2863
2864#elif defined(RT_ARCH_ARM64)
2865 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, pfnCImpl);
2866
2867 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2868 AssertReturn(pu32CodeBuf, UINT32_MAX);
2869 pu32CodeBuf[off++] = Armv8A64MkInstrBlr(IEMNATIVE_REG_FIXED_TMP0);
2870
2871#else
2872# error "Port me!"
2873#endif
2874
2875 /*
2876 * Check the status code.
2877 */
2878 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
2879}
2880
2881
2882/**
2883 * Emits a call to a threaded worker function.
2884 */
2885static int32_t iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
2886{
2887#ifdef VBOX_STRICT
2888 off = iemNativeEmitMarker(pReNative, off);
2889 AssertReturn(off != UINT32_MAX, UINT32_MAX);
2890#endif
2891/** @todo Must flush all shadow guest registers as well. */
2892 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4, false /*fFreeArgVars*/);
2893 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
2894
2895#ifdef RT_ARCH_AMD64
2896 /* Load the parameters and emit the call. */
2897# ifdef RT_OS_WINDOWS
2898# ifndef VBOXSTRICTRC_STRICT_ENABLED
2899 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
2900 if (cParams > 0)
2901 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
2902 if (cParams > 1)
2903 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
2904 if (cParams > 2)
2905 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
2906# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
2907 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
2908 if (cParams > 0)
2909 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
2910 if (cParams > 1)
2911 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
2912 if (cParams > 2)
2913 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
2914 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
2915 off = iemNativeEmitLeaGrpByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
2916# endif /* VBOXSTRICTRC_STRICT_ENABLED */
2917# else
2918 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
2919 if (cParams > 0)
2920 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
2921 if (cParams > 1)
2922 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
2923 if (cParams > 2)
2924 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
2925# endif
2926 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xAX, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
2927
2928 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2929 AssertReturn(pbCodeBuf, UINT32_MAX);
2930 pbCodeBuf[off++] = 0xff; /* call rax */
2931 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
2932
2933# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
2934 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
2935# endif
2936
2937#elif RT_ARCH_ARM64
2938 /*
2939 * ARM64:
2940 */
2941 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2942 if (cParams > 0)
2943 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
2944 if (cParams > 1)
2945 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
2946 if (cParams > 2)
2947 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
2948 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0,
2949 (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
2950
2951 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2952 AssertReturn(pu32CodeBuf, UINT32_MAX);
2953
2954 pu32CodeBuf[off++] = Armv8A64MkInstrBlr(IEMNATIVE_REG_FIXED_TMP0);
2955
2956#else
2957# error "port me"
2958#endif
2959
2960 /*
2961 * Check the status code.
2962 */
2963 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
2964 AssertReturn(off != UINT32_MAX, off);
2965
2966 return off;
2967}
2968
2969
2970/**
2971 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
2972 */
2973static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
2974{
2975 /*
2976 * Generate the rc + rcPassUp fiddling code if needed.
2977 */
2978 uint32_t idxLabel = iemNativeFindLabel(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
2979 if (idxLabel != UINT32_MAX)
2980 {
2981 Assert(pReNative->paLabels[idxLabel].off == UINT32_MAX);
2982 pReNative->paLabels[idxLabel].off = off;
2983
2984 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
2985#ifdef RT_ARCH_AMD64
2986 /*
2987 * AMD64:
2988 */
2989 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
2990 AssertReturn(pbCodeBuf, UINT32_MAX);
2991
2992 /* Call helper and jump to return point. */
2993# ifdef RT_OS_WINDOWS
2994 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
2995 AssertReturn(off != UINT32_MAX, UINT32_MAX);
2996 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
2997 AssertReturn(off != UINT32_MAX, UINT32_MAX);
2998 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
2999 AssertReturn(off != UINT32_MAX, UINT32_MAX);
3000# else
3001 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
3002 AssertReturn(off != UINT32_MAX, UINT32_MAX);
3003 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
3004 AssertReturn(off != UINT32_MAX, UINT32_MAX);
3005 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
3006 AssertReturn(off != UINT32_MAX, UINT32_MAX);
3007# endif
3008 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xAX, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
3009 AssertReturn(off != UINT32_MAX, UINT32_MAX);
3010
3011 pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
3012 AssertReturn(pbCodeBuf, UINT32_MAX);
3013 pbCodeBuf[off++] = 0xff; /* call rax */
3014 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
3015
3016 /* Jump to common return point. */
3017 uint32_t offRel = pReNative->paLabels[idxReturnLabel].off - (off + 2);
3018 if (-(int32_t)offRel <= 127)
3019 {
3020 pbCodeBuf[off++] = 0xeb; /* jmp rel8 */
3021 pbCodeBuf[off++] = (uint8_t)offRel;
3022 off++;
3023 }
3024 else
3025 {
3026 offRel -= 3;
3027 pbCodeBuf[off++] = 0xe9; /* jmp rel32 */
3028 pbCodeBuf[off++] = RT_BYTE1(offRel);
3029 pbCodeBuf[off++] = RT_BYTE2(offRel);
3030 pbCodeBuf[off++] = RT_BYTE3(offRel);
3031 pbCodeBuf[off++] = RT_BYTE4(offRel);
3032 }
3033 pbCodeBuf[off++] = 0xcc; /* int3 poison */
3034
3035#elif RT_ARCH_ARM64
3036 /*
3037 * ARM64:
3038 */
3039 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
3040 AssertReturn(off != UINT32_MAX, UINT32_MAX);
3041 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3042 AssertReturn(off != UINT32_MAX, UINT32_MAX);
3043 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
3044 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
3045 AssertReturn(off != UINT32_MAX, UINT32_MAX);
3046
3047 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3048 AssertReturn(pu32CodeBuf, UINT32_MAX);
3049 pu32CodeBuf[off++] = Armv8A64MkInstrBlr(IEMNATIVE_REG_FIXED_TMP0);
3050
3051 /* Jump back to the common return point. */
3052 int32_t const offRel = pReNative->paLabels[idxReturnLabel].off - off;
3053 pu32CodeBuf[off++] = Armv8A64MkInstrB(offRel);
3054#else
3055# error "port me"
3056#endif
3057 }
3058 return off;
3059}
3060
3061
3062/**
3063 * Emits a standard epilog.
3064 */
3065static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3066{
3067 /*
3068 * Successful return, so clear the return register (eax, w0).
3069 */
3070 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
3071 AssertReturn(off != UINT32_MAX, UINT32_MAX);
3072
3073 /*
3074 * Define label for common return point.
3075 */
3076 uint32_t const idxReturn = iemNativeMakeLabel(pReNative, kIemNativeLabelType_Return, off);
3077 AssertReturn(idxReturn != UINT32_MAX, UINT32_MAX);
3078
3079 /*
3080 * Restore registers and return.
3081 */
3082#ifdef RT_ARCH_AMD64
3083 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
3084 AssertReturn(pbCodeBuf, UINT32_MAX);
3085
3086 /* Reposition esp at the r15 restore point. */
3087 pbCodeBuf[off++] = X86_OP_REX_W;
3088 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
3089 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
3090 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
3091
3092 /* Pop non-volatile registers and return */
3093 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
3094 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
3095 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
3096 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
3097 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
3098 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
3099 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
3100 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
3101# ifdef RT_OS_WINDOWS
3102 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
3103 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
3104# endif
3105 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
3106 pbCodeBuf[off++] = 0xc9; /* leave */
3107 pbCodeBuf[off++] = 0xc3; /* ret */
3108 pbCodeBuf[off++] = 0xcc; /* int3 poison */
3109
3110#elif RT_ARCH_ARM64
3111 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
3112 AssertReturn(pu32CodeBuf, UINT32_MAX);
3113
3114 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
3115 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
3116 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kPreIndex,
3117 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
3118 IEMNATIVE_FRAME_VAR_SIZE / 8);
3119 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
3120 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
3121 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
3122 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
3123 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
3124 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
3125 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
3126 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
3127 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
3128 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
3129 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
3130 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
3131
3132 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
3133 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
3134 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
3135 IEMNATIVE_FRAME_SAVE_REG_SIZE);
3136
3137 /* retab / ret */
3138# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
3139 if (1)
3140 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
3141 else
3142# endif
3143 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
3144
3145#else
3146# error "port me"
3147#endif
3148
3149 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
3150}
3151
3152
3153/**
3154 * Emits a standard prolog.
3155 */
3156static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3157{
3158#ifdef RT_ARCH_AMD64
3159 /*
3160 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
3161 * reserving 64 bytes for stack variables plus 4 non-register argument
3162 * slots. Fixed register assignment: xBX = pReNative;
3163 *
3164 * Since we always do the same register spilling, we can use the same
3165 * unwind description for all the code.
3166 */
3167 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
3168 AssertReturn(pbCodeBuf, UINT32_MAX);
3169 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
3170 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
3171 pbCodeBuf[off++] = 0x8b;
3172 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
3173 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
3174 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
3175# ifdef RT_OS_WINDOWS
3176 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
3177 pbCodeBuf[off++] = 0x8b;
3178 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
3179 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
3180 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
3181# else
3182 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
3183 pbCodeBuf[off++] = 0x8b;
3184 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
3185# endif
3186 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
3187 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
3188 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
3189 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
3190 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
3191 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
3192 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
3193 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
3194
3195 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
3196 X86_GREG_xSP,
3197 IEMNATIVE_FRAME_ALIGN_SIZE
3198 + IEMNATIVE_FRAME_VAR_SIZE
3199 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
3200 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
3201 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
3202 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
3203 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
3204
3205#elif RT_ARCH_ARM64
3206 /*
3207 * We set up a stack frame exactly like on x86, only we have to push the
3208 * return address our selves here. We save all non-volatile registers.
3209 */
3210 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
3211 AssertReturn(pu32CodeBuf, UINT32_MAX);
3212
3213# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
3214 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
3215 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
3216 * in any way conditional, so just emitting this instructions now and hoping for the best... */
3217 /* pacibsp */
3218 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
3219# endif
3220
3221 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
3222 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
3223 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kPreIndex,
3224 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
3225 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
3226 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
3227 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
3228 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
3229 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
3230 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
3231 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
3232 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
3233 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
3234 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
3235 /* Save the BP and LR (ret address) registers at the top of the frame. */
3236 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_kSigned,
3237 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
3238 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
3239 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
3240 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
3241 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
3242
3243 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
3244 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
3245
3246 /* mov r28, r0 */
3247 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
3248 /* mov r27, r1 */
3249 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
3250
3251#else
3252# error "port me"
3253#endif
3254 return off;
3255}
3256
3257
3258DECLINLINE(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3259 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
3260{
3261 return iemNativeEmitCImplCall(pReNative, off, idxInstr, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
3262}
3263
3264
3265DECLINLINE(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3266 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
3267{
3268 return iemNativeEmitCImplCall(pReNative, off, idxInstr, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
3269}
3270
3271
3272DECLINLINE(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
3273 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1, uint64_t uArg2)
3274{
3275 return iemNativeEmitCImplCall(pReNative, off, idxInstr, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
3276}
3277
3278
3279#if 0
3280/** Same as iemRegFinishClearingRF. */
3281DECLINLINE(uint32_t) iemNativeEmitFinishClearingRF(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3282{
3283 RT_NOREF(pReNative, off);
3284#if 0
3285 uint32_t const fFlags = pReNative->pTbOrg->fFlags;
3286 if (fFlags & IEMTB_F_INHIBIT_SHADOW)
3287 {
3288 }
3289 IEMTB_F_IEM_F_MASK
3290
3291 //
3292 if (RT_LIKELY(!( pVCpu->cpum.GstCtx.eflags.uBoth
3293 & (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)) ))
3294 return VINF_SUCCESS;
3295 return iemFinishInstructionWithFlagsSet(pVCpu);
3296#else
3297 return UINT32_MAX;
3298#endif
3299}
3300#endif
3301
3302
3303/** Same as iemRegAddToEip32AndFinishingNoFlags. */
3304DECLINLINE(uint32_t) iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
3305{
3306 /* Allocate a temporary PC register. */
3307 /** @todo this is not strictly required on AMD64, we could emit alternative
3308 * code here if we don't get a tmp register... */
3309 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
3310 AssertReturn(idxPcReg != UINT8_MAX, UINT32_MAX);
3311
3312 /* Perform the addition and store the result. */
3313 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
3314 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
3315
3316 /* Free but don't flush the PC register. */
3317 iemNativeRegFreeTmp(pReNative, idxPcReg);
3318
3319 return off;
3320}
3321
3322/*
3323 * MC definitions for the native recompiler.
3324 */
3325
3326#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl) \
3327 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
3328
3329#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0) \
3330 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
3331
3332#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1) \
3333 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
3334
3335#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_pfnCImpl, a0, a1, a2) \
3336 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
3337
3338
3339#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
3340 {
3341
3342#define IEM_MC_END() \
3343 } AssertFailedReturn(UINT32_MAX /* shouldn't be reached! */)
3344
3345#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr) \
3346 return iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, a_cbInstr)
3347
3348#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr) \
3349 return iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, a_cbInstr)
3350
3351#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr) \
3352 return iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, a_cbInstr)
3353
3354
3355/*
3356 * Builtin functions.
3357 */
3358
3359/**
3360 * Built-in function that calls a C-implemention function taking zero arguments.
3361 */
3362static IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_DeferToCImpl0)
3363{
3364 PFNIEMCIMPL0 const pfnCImpl = (PFNIEMCIMPL0)(uintptr_t)pCallEntry->auParams[0];
3365 uint8_t const cbInstr = (uint8_t)pCallEntry->auParams[1];
3366 return iemNativeEmitCImplCall(pReNative, off, pCallEntry->idxInstr, (uintptr_t)pfnCImpl, cbInstr, 0, 0, 0, 0);
3367}
3368
3369
3370
3371/*
3372 * Include g_apfnIemNativeRecompileFunctions and associated functions.
3373 *
3374 * This should probably live in it's own file later, but lets see what the
3375 * compile times turn out to be first.
3376 */
3377#include "IEMNativeFunctions.cpp.h"
3378
3379
3380/**
3381 * Recompiles the given threaded TB into a native one.
3382 *
3383 * In case of failure the translation block will be returned as-is.
3384 *
3385 * @returns pTb.
3386 * @param pVCpu The cross context virtual CPU structure of the calling
3387 * thread.
3388 * @param pTb The threaded translation to recompile to native.
3389 */
3390PIEMTB iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb)
3391{
3392 /*
3393 * The first time thru, we allocate the recompiler state, the other times
3394 * we just need to reset it before using it again.
3395 */
3396 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
3397 if (RT_LIKELY(pReNative))
3398 iemNativeReInit(pReNative, pTb);
3399 else
3400 {
3401 pReNative = iemNativeInit(pVCpu, pTb);
3402 AssertReturn(pReNative, pTb);
3403 }
3404
3405 /*
3406 * Emit prolog code (fixed).
3407 */
3408 uint32_t off = iemNativeEmitProlog(pReNative, 0);
3409 AssertReturn(off != UINT32_MAX, pTb);
3410
3411 /*
3412 * Convert the calls to native code.
3413 */
3414 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
3415 uint32_t cCallsLeft = pTb->Thrd.cCalls;
3416 while (cCallsLeft-- > 0)
3417 {
3418 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
3419 if (pfnRecom) /** @todo stats on this. */
3420 {
3421 //STAM_COUNTER_INC()
3422 off = pfnRecom(pReNative, off, pCallEntry);
3423 }
3424 else
3425 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
3426 AssertReturn(off != UINT32_MAX, pTb);
3427
3428 pCallEntry++;
3429 }
3430
3431 /*
3432 * Emit the epilog code.
3433 */
3434 off = iemNativeEmitEpilog(pReNative, off);
3435 AssertReturn(off != UINT32_MAX, pTb);
3436
3437 /*
3438 * Make sure all labels has been defined.
3439 */
3440 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
3441#ifdef VBOX_STRICT
3442 uint32_t const cLabels = pReNative->cLabels;
3443 for (uint32_t i = 0; i < cLabels; i++)
3444 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
3445#endif
3446
3447 /*
3448 * Allocate executable memory, copy over the code we've generated.
3449 */
3450 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
3451 if (pTbAllocator->pDelayedFreeHead)
3452 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
3453
3454 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
3455 AssertReturn(paFinalInstrBuf, pTb);
3456 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
3457
3458 /*
3459 * Apply fixups.
3460 */
3461 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
3462 uint32_t const cFixups = pReNative->cFixups;
3463 for (uint32_t i = 0; i < cFixups; i++)
3464 {
3465 Assert(paFixups[i].off < off);
3466 Assert(paFixups[i].idxLabel < cLabels);
3467 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
3468 switch (paFixups[i].enmType)
3469 {
3470#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
3471 case kIemNativeFixupType_Rel32:
3472 Assert(paFixups[i].off + 4 <= off);
3473 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
3474 continue;
3475
3476#elif defined(RT_ARCH_ARM64)
3477 case kIemNativeFixupType_RelImm19At5:
3478 {
3479 Assert(paFixups[i].off < off);
3480 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
3481 Assert(offDisp >= -262144 && offDisp < 262144);
3482 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (offDisp << 5);
3483 continue;
3484 }
3485#endif
3486 case kIemNativeFixupType_Invalid:
3487 case kIemNativeFixupType_End:
3488 break;
3489 }
3490 AssertFailed();
3491 }
3492
3493 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
3494#ifdef LOG_ENABLED
3495 if (LogIs3Enabled())
3496 {
3497
3498 }
3499#endif
3500
3501 /*
3502 * Convert the translation block.
3503 */
3504 //RT_BREAKPOINT();
3505 RTMemFree(pTb->Thrd.paCalls);
3506 pTb->Native.paInstructions = paFinalInstrBuf;
3507 pTb->Native.cInstructions = off;
3508 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
3509
3510 Assert(pTbAllocator->cThreadedTbs > 0);
3511 pTbAllocator->cThreadedTbs -= 1;
3512 pTbAllocator->cNativeTbs += 1;
3513 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
3514
3515 return pTb;
3516}
3517
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette