VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veExecMem.cpp@ 105413

最後變更 在這個檔案從105413是 105413,由 vboxsync 提交於 8 月 前

VMM/IEM: When pruning exec memory set the hint to the start of the pruned region for faster subsequent allocation, bugref:10653

  • 屬性 svn:eol-style 設為 native
  • 屬性 svn:keywords 設為 Author Date Id Revision
檔案大小: 67.1 KB
 
1/* $Id: IEMAllN8veExecMem.cpp 105413 2024-07-19 07:18:53Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.alldomusa.eu.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
50#include <VBox/vmm/iem.h>
51#include <VBox/vmm/cpum.h>
52#include "IEMInternal.h"
53#include <VBox/vmm/vmcc.h>
54#include <VBox/log.h>
55#include <VBox/err.h>
56#include <VBox/param.h>
57#include <iprt/assert.h>
58#include <iprt/mem.h>
59#include <iprt/string.h>
60#if defined(RT_ARCH_AMD64)
61# include <iprt/x86.h>
62#elif defined(RT_ARCH_ARM64)
63# include <iprt/armv8.h>
64#endif
65
66#ifdef RT_OS_WINDOWS
67# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
68extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
69extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
70#else
71# include <iprt/formats/dwarf.h>
72# if defined(RT_OS_DARWIN)
73# include <libkern/OSCacheControl.h>
74# include <mach/mach.h>
75# include <mach/mach_vm.h>
76# define IEMNATIVE_USE_LIBUNWIND
77extern "C" void __register_frame(const void *pvFde);
78extern "C" void __deregister_frame(const void *pvFde);
79# else
80# ifdef DEBUG_bird /** @todo not thread safe yet */
81# define IEMNATIVE_USE_GDB_JIT
82# endif
83# ifdef IEMNATIVE_USE_GDB_JIT
84# include <iprt/critsect.h>
85# include <iprt/once.h>
86# include <iprt/formats/elf64.h>
87# endif
88extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
89extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
90# endif
91#endif
92
93#include "IEMN8veRecompiler.h"
94
95
96/*********************************************************************************************************************************
97* Executable Memory Allocator *
98*********************************************************************************************************************************/
99/** The chunk sub-allocation unit size in bytes. */
100#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 256
101/** The chunk sub-allocation unit size as a shift factor. */
102#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 8
103/** Enables adding a header to the sub-allocator allocations.
104 * This is useful for freeing up executable memory among other things. */
105#define IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
106/** Use alternative pruning. */
107#define IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
108
109
110#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
111# ifdef IEMNATIVE_USE_GDB_JIT
112# define IEMNATIVE_USE_GDB_JIT_ET_DYN
113
114/** GDB JIT: Code entry. */
115typedef struct GDBJITCODEENTRY
116{
117 struct GDBJITCODEENTRY *pNext;
118 struct GDBJITCODEENTRY *pPrev;
119 uint8_t *pbSymFile;
120 uint64_t cbSymFile;
121} GDBJITCODEENTRY;
122
123/** GDB JIT: Actions. */
124typedef enum GDBJITACTIONS : uint32_t
125{
126 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
127} GDBJITACTIONS;
128
129/** GDB JIT: Descriptor. */
130typedef struct GDBJITDESCRIPTOR
131{
132 uint32_t uVersion;
133 GDBJITACTIONS enmAction;
134 GDBJITCODEENTRY *pRelevant;
135 GDBJITCODEENTRY *pHead;
136 /** Our addition: */
137 GDBJITCODEENTRY *pTail;
138} GDBJITDESCRIPTOR;
139
140/** GDB JIT: Our simple symbol file data. */
141typedef struct GDBJITSYMFILE
142{
143 Elf64_Ehdr EHdr;
144# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
145 Elf64_Shdr aShdrs[5];
146# else
147 Elf64_Shdr aShdrs[7];
148 Elf64_Phdr aPhdrs[2];
149# endif
150 /** The dwarf ehframe data for the chunk. */
151 uint8_t abEhFrame[512];
152 char szzStrTab[128];
153 Elf64_Sym aSymbols[3];
154# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
155 Elf64_Sym aDynSyms[2];
156 Elf64_Dyn aDyn[6];
157# endif
158} GDBJITSYMFILE;
159
160extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
161extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
162
163/** Init once for g_IemNativeGdbJitLock. */
164static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
165/** Init once for the critical section. */
166static RTCRITSECT g_IemNativeGdbJitLock;
167
168/** GDB reads the info here. */
169GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
170
171/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
172DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
173{
174 ASMNopPause();
175}
176
177/** @callback_method_impl{FNRTONCE} */
178static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
179{
180 RT_NOREF(pvUser);
181 return RTCritSectInit(&g_IemNativeGdbJitLock);
182}
183
184
185# endif /* IEMNATIVE_USE_GDB_JIT */
186
187/**
188 * Per-chunk unwind info for non-windows hosts.
189 */
190typedef struct IEMEXECMEMCHUNKEHFRAME
191{
192# ifdef IEMNATIVE_USE_LIBUNWIND
193 /** The offset of the FDA into abEhFrame. */
194 uintptr_t offFda;
195# else
196 /** 'struct object' storage area. */
197 uint8_t abObject[1024];
198# endif
199# ifdef IEMNATIVE_USE_GDB_JIT
200# if 0
201 /** The GDB JIT 'symbol file' data. */
202 GDBJITSYMFILE GdbJitSymFile;
203# endif
204 /** The GDB JIT list entry. */
205 GDBJITCODEENTRY GdbJitEntry;
206# endif
207 /** The dwarf ehframe data for the chunk. */
208 uint8_t abEhFrame[512];
209} IEMEXECMEMCHUNKEHFRAME;
210/** Pointer to per-chunk info info for non-windows hosts. */
211typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
212#endif
213
214
215/**
216 * An chunk of executable memory.
217 */
218typedef struct IEMEXECMEMCHUNK
219{
220 /** Number of free items in this chunk. */
221 uint32_t cFreeUnits;
222 /** Hint were to start searching for free space in the allocation bitmap. */
223 uint32_t idxFreeHint;
224 /** Pointer to the readable/writeable view of the memory chunk. */
225 void *pvChunkRw;
226 /** Pointer to the readable/executable view of the memory chunk. */
227 void *pvChunkRx;
228#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
229 /** Pointer to the context structure detailing the per chunk common code. */
230 PCIEMNATIVEPERCHUNKCTX pCtx;
231#endif
232#ifdef IN_RING3
233 /**
234 * Pointer to the unwind information.
235 *
236 * This is used during C++ throw and longjmp (windows and probably most other
237 * platforms). Some debuggers (windbg) makes use of it as well.
238 *
239 * Windows: This is allocated from hHeap on windows because (at least for
240 * AMD64) the UNWIND_INFO structure address in the
241 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
242 *
243 * Others: Allocated from the regular heap to avoid unnecessary executable data
244 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
245 void *pvUnwindInfo;
246#elif defined(IN_RING0)
247 /** Allocation handle. */
248 RTR0MEMOBJ hMemObj;
249#endif
250} IEMEXECMEMCHUNK;
251/** Pointer to a memory chunk. */
252typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
253
254
255/**
256 * Executable memory allocator for the native recompiler.
257 */
258typedef struct IEMEXECMEMALLOCATOR
259{
260 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
261 uint32_t uMagic;
262
263 /** The chunk size. */
264 uint32_t cbChunk;
265 /** The maximum number of chunks. */
266 uint32_t cMaxChunks;
267 /** The current number of chunks. */
268 uint32_t cChunks;
269 /** Hint where to start looking for available memory. */
270 uint32_t idxChunkHint;
271 /** Statistics: Current number of allocations. */
272 uint32_t cAllocations;
273
274 /** The total amount of memory available. */
275 uint64_t cbTotal;
276 /** Total amount of free memory. */
277 uint64_t cbFree;
278 /** Total amount of memory allocated. */
279 uint64_t cbAllocated;
280
281 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
282 *
283 * Since the chunk size is a power of two and the minimum chunk size is a lot
284 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
285 * require a whole number of uint64_t elements in the allocation bitmap. So,
286 * for sake of simplicity, they are allocated as one continous chunk for
287 * simplicity/laziness. */
288 uint64_t *pbmAlloc;
289 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
290 uint32_t cUnitsPerChunk;
291 /** Number of bitmap elements per chunk (for quickly locating the bitmap
292 * portion corresponding to an chunk). */
293 uint32_t cBitmapElementsPerChunk;
294
295#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
296 /** The next chunk to prune in. */
297 uint32_t idxChunkPrune;
298 /** Where in chunk offset to start pruning at. */
299 uint32_t offChunkPrune;
300 /** Profiling the pruning code. */
301 STAMPROFILE StatPruneProf;
302 /** Number of bytes recovered by the pruning. */
303 STAMPROFILE StatPruneRecovered;
304#endif
305
306#ifdef VBOX_WITH_STATISTICS
307 STAMPROFILE StatAlloc;
308#endif
309
310
311#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
312 /** Pointer to the array of unwind info running parallel to aChunks (same
313 * allocation as this structure, located after the bitmaps).
314 * (For Windows, the structures must reside in 32-bit RVA distance to the
315 * actual chunk, so they are allocated off the chunk.) */
316 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
317#endif
318
319 /** The allocation chunks. */
320 RT_FLEXIBLE_ARRAY_EXTENSION
321 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
322} IEMEXECMEMALLOCATOR;
323/** Pointer to an executable memory allocator. */
324typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
325
326/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
327#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
328
329
330#ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
331/**
332 * Allocation header.
333 */
334typedef struct IEMEXECMEMALLOCHDR
335{
336 /** Magic value / eyecatcher (IEMEXECMEMALLOCHDR_MAGIC). */
337 uint32_t uMagic;
338 /** The allocation chunk (for speeding up freeing). */
339 uint32_t idxChunk;
340 /** Pointer to the translation block the allocation belongs to.
341 * This is the whole point of the header. */
342 PIEMTB pTb;
343} IEMEXECMEMALLOCHDR;
344/** Pointer to an allocation header. */
345typedef IEMEXECMEMALLOCHDR *PIEMEXECMEMALLOCHDR;
346/** Magic value for IEMEXECMEMALLOCHDR ('ExeM'). */
347# define IEMEXECMEMALLOCHDR_MAGIC UINT32_C(0x4d657845)
348#endif
349
350
351static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
352
353
354#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
355/**
356 * Frees up executable memory when we're out space.
357 *
358 * This is an alternative to iemTbAllocatorFreeupNativeSpace() that frees up
359 * space in a more linear fashion from the allocator's point of view. It may
360 * also defragment if implemented & enabled
361 */
362static void iemExecMemAllocatorPrune(PVMCPU pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
363{
364# ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
365# error "IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING requires IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER"
366# endif
367 STAM_REL_PROFILE_START(&pExecMemAllocator->StatPruneProf, a);
368
369 /*
370 * Before we can start, we must process delayed frees.
371 */
372 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
373
374 AssertCompile(RT_IS_POWER_OF_TWO(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE));
375
376 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
377 AssertReturnVoid(RT_IS_POWER_OF_TWO(cbChunk));
378 AssertReturnVoid(cbChunk >= _1M && cbChunk <= _256M); /* see iemExecMemAllocatorInit */
379
380 uint32_t const cChunks = pExecMemAllocator->cChunks;
381 AssertReturnVoid(cChunks == pExecMemAllocator->cMaxChunks);
382 AssertReturnVoid(cChunks >= 1);
383
384 Assert(!pVCpu->iem.s.pCurTbR3);
385
386 /*
387 * Decide how much to prune. The chunk is is a multiple of two, so we'll be
388 * scanning a multiple of two here as well.
389 */
390 uint32_t cbToPrune = cbChunk;
391
392 /* Never more than 25%. */
393 if (cChunks < 4)
394 cbToPrune /= cChunks == 1 ? 4 : 2;
395
396 /* Upper limit. In a debug build a 4MB limit averages out at ~0.6ms per call. */
397 if (cbToPrune > _4M)
398 cbToPrune = _4M;
399
400 /*
401 * Adjust the pruning chunk and offset accordingly.
402 */
403 uint32_t idxChunk = pExecMemAllocator->idxChunkPrune;
404 uint32_t offChunk = pExecMemAllocator->offChunkPrune;
405 offChunk &= ~(uint32_t)(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1U);
406 if (offChunk >= cbChunk)
407 {
408 offChunk = 0;
409 idxChunk += 1;
410 }
411 if (idxChunk >= cChunks)
412 {
413 offChunk = 0;
414 idxChunk = 0;
415 }
416
417 uint32_t const offPruneEnd = RT_MIN(offChunk + cbToPrune, cbChunk);
418
419 /*
420 * Do the pruning. The current approach is the sever kind.
421 */
422 uint64_t cbPruned = 0;
423 uint8_t * const pbChunk = (uint8_t *)pExecMemAllocator->aChunks[idxChunk].pvChunkRx;
424 while (offChunk < offPruneEnd)
425 {
426 PIEMEXECMEMALLOCHDR pHdr = (PIEMEXECMEMALLOCHDR)&pbChunk[offChunk];
427
428 /* Is this the start of an allocation block for TB? (We typically have
429 one allocation at the start of each chunk for the unwind info where
430 pTb is NULL.) */
431 if ( pHdr->uMagic == IEMEXECMEMALLOCHDR_MAGIC
432 && pHdr->pTb != NULL
433 && pHdr->idxChunk == idxChunk)
434 {
435 PIEMTB const pTb = pHdr->pTb;
436 AssertPtr(pTb);
437
438 uint32_t const cbBlock = RT_ALIGN_32(pTb->Native.cInstructions * sizeof(IEMNATIVEINSTR) + sizeof(*pHdr),
439 IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
440 AssertBreakStmt(offChunk + cbBlock <= cbChunk, offChunk += IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE); /* paranoia */
441
442 iemTbAllocatorFree(pVCpu, pTb);
443
444 cbPruned += cbBlock;
445 offChunk += cbBlock;
446 }
447 else
448 offChunk += IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE;
449 }
450 STAM_REL_PROFILE_ADD_PERIOD(&pExecMemAllocator->StatPruneRecovered, cbPruned);
451
452 /*
453 * Save the current pruning point.
454 */
455 pExecMemAllocator->offChunkPrune = offChunk;
456 pExecMemAllocator->idxChunkPrune = idxChunk;
457
458 /* Set the hint to the start of the pruned region. */
459 pExecMemAllocator->idxChunkHint = idxChunk;
460 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = offChunk / IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE;
461
462 STAM_REL_PROFILE_STOP(&pExecMemAllocator->StatPruneProf, a);
463}
464#endif /* IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING */
465
466
467/**
468 * Try allocate a block of @a cReqUnits in the chunk @a idxChunk.
469 */
470static void *
471iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
472 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk, PIEMTB pTb,
473 void **ppvExec, PCIEMNATIVEPERCHUNKCTX *ppChunkCtx)
474{
475 /*
476 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
477 */
478 Assert(!(cToScan & 63));
479 Assert(!(idxFirst & 63));
480 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
481 pbmAlloc += idxFirst / 64;
482
483 /*
484 * Scan the bitmap for cReqUnits of consequtive clear bits
485 */
486 /** @todo This can probably be done more efficiently for non-x86 systems. */
487 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
488 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
489 {
490 uint32_t idxAddBit = 1;
491 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
492 idxAddBit++;
493 if (idxAddBit >= cReqUnits)
494 {
495 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
496
497 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
498 pChunk->cFreeUnits -= cReqUnits;
499 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
500
501 pExecMemAllocator->cAllocations += 1;
502 uint32_t const cbReq = cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
503 pExecMemAllocator->cbAllocated += cbReq;
504 pExecMemAllocator->cbFree -= cbReq;
505 pExecMemAllocator->idxChunkHint = idxChunk;
506
507 void * const pvMemRw = (uint8_t *)pChunk->pvChunkRw
508 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
509
510 if (ppChunkCtx)
511 *ppChunkCtx = pChunk->pCtx;
512
513 /*
514 * Initialize the header and return.
515 */
516# ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
517 PIEMEXECMEMALLOCHDR const pHdr = (PIEMEXECMEMALLOCHDR)pvMemRw;
518 pHdr->uMagic = IEMEXECMEMALLOCHDR_MAGIC;
519 pHdr->idxChunk = idxChunk;
520 pHdr->pTb = pTb;
521
522 if (ppvExec)
523 *ppvExec = (uint8_t *)pChunk->pvChunkRx
524 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT)
525 + sizeof(*pHdr);
526
527 return pHdr + 1;
528#else
529 if (ppvExec)
530 *ppvExec = (uint8_t *)pChunk->pvChunkRx
531 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
532
533 RT_NOREF(pTb);
534 return pvMem;
535#endif
536 }
537
538 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
539 }
540 return NULL;
541}
542
543
544static PIEMNATIVEINSTR
545iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq, PIEMTB pTb,
546 PIEMNATIVEINSTR *ppaExec, PCIEMNATIVEPERCHUNKCTX *ppChunkCtx)
547{
548 /*
549 * Figure out how much to allocate.
550 */
551#ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
552 uint32_t const cReqUnits = (cbReq + sizeof(IEMEXECMEMALLOCHDR) + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)
553#else
554 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)
555#endif
556 >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
557 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
558 {
559 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
560 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
561 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
562 {
563 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
564 pExecMemAllocator->cUnitsPerChunk - idxHint,
565 cReqUnits, idxChunk, pTb, (void **)ppaExec, ppChunkCtx);
566 if (pvRet)
567 return (PIEMNATIVEINSTR)pvRet;
568 }
569 return (PIEMNATIVEINSTR)iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
570 RT_MIN(pExecMemAllocator->cUnitsPerChunk,
571 RT_ALIGN_32(idxHint + cReqUnits, 64)),
572 cReqUnits, idxChunk, pTb, (void **)ppaExec, ppChunkCtx);
573 }
574 return NULL;
575}
576
577
578/**
579 * Allocates @a cbReq bytes of executable memory.
580 *
581 * @returns Pointer to the readable/writeable memory, NULL if out of memory or other problem
582 * encountered.
583 * @param pVCpu The cross context virtual CPU structure of the
584 * calling thread.
585 * @param cbReq How many bytes are required.
586 * @param pTb The translation block that will be using the allocation.
587 * @param ppaExec Where to return the pointer to executable view of
588 * the allocated memory, optional.
589 * @param ppChunkCtx Where to return the per chunk attached context
590 * if available, optional.
591 */
592DECLHIDDEN(PIEMNATIVEINSTR) iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq, PIEMTB pTb,
593 PIEMNATIVEINSTR *ppaExec, PCIEMNATIVEPERCHUNKCTX *ppChunkCtx) RT_NOEXCEPT
594{
595 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
596 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
597 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
598 STAM_PROFILE_START(&pExecMemAllocator->StatAlloc, a);
599
600 for (unsigned iIteration = 0;; iIteration++)
601 {
602 if (cbReq <= pExecMemAllocator->cbFree)
603 {
604 uint32_t const cChunks = pExecMemAllocator->cChunks;
605 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
606 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
607 {
608 PIEMNATIVEINSTR const pRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb,
609 ppaExec, ppChunkCtx);
610 if (pRet)
611 {
612 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
613 return pRet;
614 }
615 }
616 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
617 {
618 PIEMNATIVEINSTR const pRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb,
619 ppaExec, ppChunkCtx);
620 if (pRet)
621 {
622 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
623 return pRet;
624 }
625 }
626 }
627
628 /*
629 * Can we grow it with another chunk?
630 */
631 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
632 {
633 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
634 AssertLogRelRCReturn(rc, NULL);
635
636 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
637 PIEMNATIVEINSTR const pRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb,
638 ppaExec, ppChunkCtx);
639 if (pRet)
640 {
641 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
642 return pRet;
643 }
644 AssertFailed();
645 }
646
647 /*
648 * Try prune native TBs once.
649 */
650 if (iIteration == 0)
651 {
652#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
653 iemExecMemAllocatorPrune(pVCpu, pExecMemAllocator);
654#else
655 /* No header included in the instruction count here. */
656 uint32_t const cNeededInstrs = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) / sizeof(IEMNATIVEINSTR);
657 iemTbAllocatorFreeupNativeSpace(pVCpu, cNeededInstrs);
658#endif
659 }
660 else
661 {
662 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeExecMemInstrBufAllocFailed);
663 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
664 return NULL;
665 }
666 }
667}
668
669
670/** This is a hook to ensure the instruction cache is properly flushed before the code in the memory
671 * given by @a pv and @a cb is executed */
672DECLHIDDEN(void) iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb) RT_NOEXCEPT
673{
674#ifdef RT_OS_DARWIN
675 /*
676 * Flush the instruction cache:
677 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
678 */
679 /* sys_dcache_flush(pv, cb); - not necessary */
680 sys_icache_invalidate(pv, cb);
681 RT_NOREF(pVCpu);
682
683#elif defined(RT_OS_LINUX) && defined(RT_ARCH_ARM64)
684 RT_NOREF(pVCpu);
685
686 /* There is __builtin___clear_cache() but it flushes both the instruction and data cache, so do it manually. */
687 static uint32_t s_u32CtrEl0 = 0;
688 if (!s_u32CtrEl0)
689 asm volatile ("mrs %0, ctr_el0":"=r" (s_u32CtrEl0));
690 uintptr_t cbICacheLine = (uintptr_t)4 << (s_u32CtrEl0 & 0xf);
691
692 uintptr_t pb = (uintptr_t)pv & ~(cbICacheLine - 1);
693 for (; pb < (uintptr_t)pv + cb; pb += cbICacheLine)
694 asm volatile ("ic ivau, %0" : : "r" (pb) : "memory");
695
696 asm volatile ("dsb ish\n\t isb\n\t" : : : "memory");
697
698#else
699 RT_NOREF(pVCpu, pv, cb);
700#endif
701}
702
703
704/**
705 * Frees executable memory.
706 */
707DECLHIDDEN(void) iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb) RT_NOEXCEPT
708{
709 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
710 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
711 AssertPtr(pv);
712#ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
713 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
714
715 /* Align the size as we did when allocating the block. */
716 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
717
718#else
719 PIEMEXECMEMALLOCHDR pHdr = (PIEMEXECMEMALLOCHDR)pv - 1;
720 Assert(!((uintptr_t)pHdr & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
721 AssertReturnVoid(pHdr->uMagic == IEMEXECMEMALLOCHDR_MAGIC);
722 uint32_t const idxChunk = pHdr->idxChunk;
723 AssertReturnVoid(idxChunk < pExecMemAllocator->cChunks);
724 pv = pHdr;
725
726 /* Adjust and align the size to cover the whole allocation area. */
727 cb = RT_ALIGN_Z(cb + sizeof(*pHdr), IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
728#endif
729
730 /* Free it / assert sanity. */
731 bool fFound = false;
732 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
733#ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
734 uint32_t const cChunks = pExecMemAllocator->cChunks;
735 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
736#endif
737 {
738 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunkRx;
739 fFound = offChunk < cbChunk;
740 if (fFound)
741 {
742 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
743 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
744
745 /* Check that it's valid and free it. */
746 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
747 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
748 for (uint32_t i = 1; i < cReqUnits; i++)
749 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
750 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
751
752 /* Invalidate the header using the writeable memory view. */
753 pHdr = (PIEMEXECMEMALLOCHDR)((uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunkRw + offChunk);
754#ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
755 pHdr->uMagic = 0;
756 pHdr->idxChunk = 0;
757 pHdr->pTb = NULL;
758#endif
759 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
760 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
761
762 /* Update the stats. */
763 pExecMemAllocator->cbAllocated -= cb;
764 pExecMemAllocator->cbFree += cb;
765 pExecMemAllocator->cAllocations -= 1;
766 return;
767 }
768 }
769 AssertFailed();
770}
771
772
773/**
774 * Interface used by iemNativeRecompileAttachExecMemChunkCtx and unwind info
775 * generators.
776 */
777DECLHIDDEN(PIEMNATIVEINSTR)
778iemExecMemAllocatorAllocFromChunk(PVMCPU pVCpu, uint32_t idxChunk, uint32_t cbReq, PIEMNATIVEINSTR *ppaExec)
779{
780 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
781 AssertReturn(idxChunk < pExecMemAllocator->cChunks, NULL);
782 Assert(cbReq < _1M);
783 return iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, NULL /*pTb*/, ppaExec, NULL /*ppChunkCtx*/);
784}
785
786
787#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
788/**
789 * For getting the per-chunk context detailing common code for a TB.
790 *
791 * This is for use by the disassembler.
792 */
793DECLHIDDEN(PCIEMNATIVEPERCHUNKCTX) iemExecMemGetTbChunkCtx(PVMCPU pVCpu, PCIEMTB pTb)
794{
795 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
796 if ((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
797 {
798 uintptr_t const uAddress = (uintptr_t)pTb->Native.paInstructions;
799 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
800 uint32_t idxChunk = pExecMemAllocator->cChunks;
801 while (idxChunk-- > 0)
802 if (uAddress - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunkRx < cbChunk)
803 return pExecMemAllocator->aChunks[idxChunk].pCtx;
804 }
805 return NULL;
806}
807#endif
808
809
810#ifdef IN_RING3
811# ifdef RT_OS_WINDOWS
812
813/**
814 * Initializes the unwind info structures for windows hosts.
815 */
816static int
817iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
818 void *pvChunk, uint32_t idxChunk)
819{
820 RT_NOREF(pVCpu);
821
822 /*
823 * The AMD64 unwind opcodes.
824 *
825 * This is a program that starts with RSP after a RET instruction that
826 * ends up in recompiled code, and the operations we describe here will
827 * restore all non-volatile registers and bring RSP back to where our
828 * RET address is. This means it's reverse order from what happens in
829 * the prologue.
830 *
831 * Note! Using a frame register approach here both because we have one
832 * and but mainly because the UWOP_ALLOC_LARGE argument values
833 * would be a pain to write initializers for. On the positive
834 * side, we're impervious to changes in the the stack variable
835 * area can can deal with dynamic stack allocations if necessary.
836 */
837 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
838 {
839 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
840 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
841 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
842 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
843 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
844 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
845 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
846 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
847 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
848 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
849 };
850 union
851 {
852 IMAGE_UNWIND_INFO Info;
853 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
854 } s_UnwindInfo =
855 {
856 {
857 /* .Version = */ 1,
858 /* .Flags = */ 0,
859 /* .SizeOfProlog = */ 16, /* whatever */
860 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
861 /* .FrameRegister = */ X86_GREG_xBP,
862 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
863 }
864 };
865 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
866 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
867
868 /*
869 * Calc how much space we need and allocate it off the exec heap.
870 */
871 unsigned const cFunctionEntries = 1;
872 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
873 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
874 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
875 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeeded, NULL, NULL, NULL);
876 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
877 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
878
879 /*
880 * Initialize the structures.
881 */
882 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
883
884 paFunctions[0].BeginAddress = 0;
885 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
886 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
887
888 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
889 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
890
891 /*
892 * Register it.
893 */
894 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
895 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
896
897 return VINF_SUCCESS;
898}
899
900
901# else /* !RT_OS_WINDOWS */
902
903/**
904 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
905 */
906DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
907{
908 if (iValue >= 64)
909 {
910 Assert(iValue < 0x2000);
911 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
912 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
913 }
914 else if (iValue >= 0)
915 *Ptr.pb++ = (uint8_t)iValue;
916 else if (iValue > -64)
917 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
918 else
919 {
920 Assert(iValue > -0x2000);
921 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
922 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
923 }
924 return Ptr;
925}
926
927
928/**
929 * Emits an ULEB128 encoded value (up to 64-bit wide).
930 */
931DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
932{
933 while (uValue >= 0x80)
934 {
935 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
936 uValue >>= 7;
937 }
938 *Ptr.pb++ = (uint8_t)uValue;
939 return Ptr;
940}
941
942
943/**
944 * Emits a CFA rule as register @a uReg + offset @a off.
945 */
946DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
947{
948 *Ptr.pb++ = DW_CFA_def_cfa;
949 Ptr = iemDwarfPutUleb128(Ptr, uReg);
950 Ptr = iemDwarfPutUleb128(Ptr, off);
951 return Ptr;
952}
953
954
955/**
956 * Emits a register (@a uReg) save location:
957 * CFA + @a off * data_alignment_factor
958 */
959DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
960{
961 if (uReg < 0x40)
962 *Ptr.pb++ = DW_CFA_offset | uReg;
963 else
964 {
965 *Ptr.pb++ = DW_CFA_offset_extended;
966 Ptr = iemDwarfPutUleb128(Ptr, uReg);
967 }
968 Ptr = iemDwarfPutUleb128(Ptr, off);
969 return Ptr;
970}
971
972
973# if 0 /* unused */
974/**
975 * Emits a register (@a uReg) save location, using signed offset:
976 * CFA + @a offSigned * data_alignment_factor
977 */
978DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
979{
980 *Ptr.pb++ = DW_CFA_offset_extended_sf;
981 Ptr = iemDwarfPutUleb128(Ptr, uReg);
982 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
983 return Ptr;
984}
985# endif
986
987
988/**
989 * Initializes the unwind info section for non-windows hosts.
990 */
991static int
992iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
993 void *pvChunk, uint32_t idxChunk)
994{
995 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
996 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
997
998 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
999
1000 /*
1001 * Generate the CIE first.
1002 */
1003# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
1004 uint8_t const iDwarfVer = 3;
1005# else
1006 uint8_t const iDwarfVer = 4;
1007# endif
1008 RTPTRUNION const PtrCie = Ptr;
1009 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
1010 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
1011 *Ptr.pb++ = iDwarfVer; /* DwARF version */
1012 *Ptr.pb++ = 0; /* Augmentation. */
1013 if (iDwarfVer >= 4)
1014 {
1015 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
1016 *Ptr.pb++ = 0; /* Segment selector size. */
1017 }
1018# ifdef RT_ARCH_AMD64
1019 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
1020# else
1021 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
1022# endif
1023 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
1024# ifdef RT_ARCH_AMD64
1025 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
1026# elif defined(RT_ARCH_ARM64)
1027 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
1028# else
1029# error "port me"
1030# endif
1031 /* Initial instructions: */
1032# ifdef RT_ARCH_AMD64
1033 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
1034 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
1035 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
1036 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
1037 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
1038 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
1039 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
1040 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
1041# elif defined(RT_ARCH_ARM64)
1042# if 1
1043 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
1044# else
1045 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
1046# endif
1047 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
1048 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
1049 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
1050 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
1051 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
1052 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
1053 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
1054 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
1055 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
1056 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
1057 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
1058 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
1059 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
1060 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
1061# else
1062# error "port me"
1063# endif
1064 while ((Ptr.u - PtrCie.u) & 3)
1065 *Ptr.pb++ = DW_CFA_nop;
1066 /* Finalize the CIE size. */
1067 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
1068
1069 /*
1070 * Generate an FDE for the whole chunk area.
1071 */
1072# ifdef IEMNATIVE_USE_LIBUNWIND
1073 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
1074# endif
1075 RTPTRUNION const PtrFde = Ptr;
1076 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
1077 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
1078 Ptr.pu32++;
1079 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
1080 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
1081# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
1082 *Ptr.pb++ = DW_CFA_nop;
1083# endif
1084 while ((Ptr.u - PtrFde.u) & 3)
1085 *Ptr.pb++ = DW_CFA_nop;
1086 /* Finalize the FDE size. */
1087 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
1088
1089 /* Terminator entry. */
1090 *Ptr.pu32++ = 0;
1091 *Ptr.pu32++ = 0; /* just to be sure... */
1092 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
1093
1094 /*
1095 * Register it.
1096 */
1097# ifdef IEMNATIVE_USE_LIBUNWIND
1098 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
1099# else
1100 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
1101 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
1102# endif
1103
1104# ifdef IEMNATIVE_USE_GDB_JIT
1105 /*
1106 * Now for telling GDB about this (experimental).
1107 *
1108 * This seems to work best with ET_DYN.
1109 */
1110 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk,
1111 sizeof(GDBJITSYMFILE), NULL, NULL, NULL);
1112 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
1113 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
1114
1115 RT_ZERO(*pSymFile);
1116
1117 /*
1118 * The ELF header:
1119 */
1120 pSymFile->EHdr.e_ident[0] = ELFMAG0;
1121 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1122 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1123 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1124 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1125 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1126 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1127 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1128# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1129 pSymFile->EHdr.e_type = ET_DYN;
1130# else
1131 pSymFile->EHdr.e_type = ET_REL;
1132# endif
1133# ifdef RT_ARCH_AMD64
1134 pSymFile->EHdr.e_machine = EM_AMD64;
1135# elif defined(RT_ARCH_ARM64)
1136 pSymFile->EHdr.e_machine = EM_AARCH64;
1137# else
1138# error "port me"
1139# endif
1140 pSymFile->EHdr.e_version = 1; /*?*/
1141 pSymFile->EHdr.e_entry = 0;
1142# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1143 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1144# else
1145 pSymFile->EHdr.e_phoff = 0;
1146# endif
1147 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1148 pSymFile->EHdr.e_flags = 0;
1149 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1150# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1151 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1152 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1153# else
1154 pSymFile->EHdr.e_phentsize = 0;
1155 pSymFile->EHdr.e_phnum = 0;
1156# endif
1157 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1158 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1159 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1160
1161 uint32_t offStrTab = 0;
1162#define APPEND_STR(a_szStr) do { \
1163 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1164 offStrTab += sizeof(a_szStr); \
1165 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1166 } while (0)
1167#define APPEND_STR_FMT(a_szStr, ...) do { \
1168 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1169 offStrTab++; \
1170 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1171 } while (0)
1172
1173 /*
1174 * Section headers.
1175 */
1176 /* Section header #0: NULL */
1177 unsigned i = 0;
1178 APPEND_STR("");
1179 RT_ZERO(pSymFile->aShdrs[i]);
1180 i++;
1181
1182 /* Section header: .eh_frame */
1183 pSymFile->aShdrs[i].sh_name = offStrTab;
1184 APPEND_STR(".eh_frame");
1185 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1186 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1187# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1188 pSymFile->aShdrs[i].sh_offset
1189 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1190# else
1191 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1192 pSymFile->aShdrs[i].sh_offset = 0;
1193# endif
1194
1195 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1196 pSymFile->aShdrs[i].sh_link = 0;
1197 pSymFile->aShdrs[i].sh_info = 0;
1198 pSymFile->aShdrs[i].sh_addralign = 1;
1199 pSymFile->aShdrs[i].sh_entsize = 0;
1200 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1201 i++;
1202
1203 /* Section header: .shstrtab */
1204 unsigned const iShStrTab = i;
1205 pSymFile->EHdr.e_shstrndx = iShStrTab;
1206 pSymFile->aShdrs[i].sh_name = offStrTab;
1207 APPEND_STR(".shstrtab");
1208 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1209 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1210# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1211 pSymFile->aShdrs[i].sh_offset
1212 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1213# else
1214 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1215 pSymFile->aShdrs[i].sh_offset = 0;
1216# endif
1217 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1218 pSymFile->aShdrs[i].sh_link = 0;
1219 pSymFile->aShdrs[i].sh_info = 0;
1220 pSymFile->aShdrs[i].sh_addralign = 1;
1221 pSymFile->aShdrs[i].sh_entsize = 0;
1222 i++;
1223
1224 /* Section header: .symbols */
1225 pSymFile->aShdrs[i].sh_name = offStrTab;
1226 APPEND_STR(".symtab");
1227 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1228 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1229 pSymFile->aShdrs[i].sh_offset
1230 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1231 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1232 pSymFile->aShdrs[i].sh_link = iShStrTab;
1233 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1234 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1235 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1236 i++;
1237
1238# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1239 /* Section header: .symbols */
1240 pSymFile->aShdrs[i].sh_name = offStrTab;
1241 APPEND_STR(".dynsym");
1242 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1243 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1244 pSymFile->aShdrs[i].sh_offset
1245 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1246 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1247 pSymFile->aShdrs[i].sh_link = iShStrTab;
1248 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1249 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1250 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1251 i++;
1252# endif
1253
1254# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1255 /* Section header: .dynamic */
1256 pSymFile->aShdrs[i].sh_name = offStrTab;
1257 APPEND_STR(".dynamic");
1258 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1259 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1260 pSymFile->aShdrs[i].sh_offset
1261 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1262 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1263 pSymFile->aShdrs[i].sh_link = iShStrTab;
1264 pSymFile->aShdrs[i].sh_info = 0;
1265 pSymFile->aShdrs[i].sh_addralign = 1;
1266 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1267 i++;
1268# endif
1269
1270 /* Section header: .text */
1271 unsigned const iShText = i;
1272 pSymFile->aShdrs[i].sh_name = offStrTab;
1273 APPEND_STR(".text");
1274 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1275 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1276# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1277 pSymFile->aShdrs[i].sh_offset
1278 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1279# else
1280 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1281 pSymFile->aShdrs[i].sh_offset = 0;
1282# endif
1283 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1284 pSymFile->aShdrs[i].sh_link = 0;
1285 pSymFile->aShdrs[i].sh_info = 0;
1286 pSymFile->aShdrs[i].sh_addralign = 1;
1287 pSymFile->aShdrs[i].sh_entsize = 0;
1288 i++;
1289
1290 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1291
1292# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1293 /*
1294 * The program headers:
1295 */
1296 /* Everything in a single LOAD segment: */
1297 i = 0;
1298 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1299 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1300 pSymFile->aPhdrs[i].p_offset
1301 = pSymFile->aPhdrs[i].p_vaddr
1302 = pSymFile->aPhdrs[i].p_paddr = 0;
1303 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1304 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1305 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1306 i++;
1307 /* The .dynamic segment. */
1308 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1309 pSymFile->aPhdrs[i].p_flags = PF_R;
1310 pSymFile->aPhdrs[i].p_offset
1311 = pSymFile->aPhdrs[i].p_vaddr
1312 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1313 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1314 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1315 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1316 i++;
1317
1318 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1319
1320 /*
1321 * The dynamic section:
1322 */
1323 i = 0;
1324 pSymFile->aDyn[i].d_tag = DT_SONAME;
1325 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1326 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1327 i++;
1328 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1329 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1330 i++;
1331 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1332 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1333 i++;
1334 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1335 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1336 i++;
1337 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1338 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1339 i++;
1340 pSymFile->aDyn[i].d_tag = DT_NULL;
1341 i++;
1342 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1343# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1344
1345 /*
1346 * Symbol tables:
1347 */
1348 /** @todo gdb doesn't seem to really like this ... */
1349 i = 0;
1350 pSymFile->aSymbols[i].st_name = 0;
1351 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1352 pSymFile->aSymbols[i].st_value = 0;
1353 pSymFile->aSymbols[i].st_size = 0;
1354 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1355 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1356# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1357 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1358# endif
1359 i++;
1360
1361 pSymFile->aSymbols[i].st_name = 0;
1362 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1363 pSymFile->aSymbols[i].st_value = 0;
1364 pSymFile->aSymbols[i].st_size = 0;
1365 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1366 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1367 i++;
1368
1369 pSymFile->aSymbols[i].st_name = offStrTab;
1370 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1371# if 0
1372 pSymFile->aSymbols[i].st_shndx = iShText;
1373 pSymFile->aSymbols[i].st_value = 0;
1374# else
1375 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1376 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1377# endif
1378 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1379 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1380 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1381# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1382 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1383 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1384# endif
1385 i++;
1386
1387 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1388 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1389
1390 /*
1391 * The GDB JIT entry and informing GDB.
1392 */
1393 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1394# if 1
1395 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1396# else
1397 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1398# endif
1399
1400 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1401 RTCritSectEnter(&g_IemNativeGdbJitLock);
1402 pEhFrame->GdbJitEntry.pNext = NULL;
1403 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1404 if (__jit_debug_descriptor.pTail)
1405 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1406 else
1407 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1408 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1409 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1410
1411 /* Notify GDB: */
1412 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1413 __jit_debug_register_code();
1414 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1415 RTCritSectLeave(&g_IemNativeGdbJitLock);
1416
1417# else /* !IEMNATIVE_USE_GDB_JIT */
1418 RT_NOREF(pVCpu);
1419# endif /* !IEMNATIVE_USE_GDB_JIT */
1420
1421 return VINF_SUCCESS;
1422}
1423
1424# endif /* !RT_OS_WINDOWS */
1425#endif /* IN_RING3 */
1426
1427
1428/**
1429 * Adds another chunk to the executable memory allocator.
1430 *
1431 * This is used by the init code for the initial allocation and later by the
1432 * regular allocator function when it's out of memory.
1433 */
1434static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1435{
1436 /* Check that we've room for growth. */
1437 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1438 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1439
1440 /* Allocate a chunk. */
1441#ifdef RT_OS_DARWIN
1442 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1443#else
1444 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1445#endif
1446 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1447
1448#ifdef RT_OS_DARWIN
1449 /*
1450 * Because it is impossible to have a RWX memory allocation on macOS try to remap the memory
1451 * chunk readable/executable somewhere else so we can save us the hassle of switching between
1452 * protections when exeuctable memory is allocated.
1453 */
1454 int rc = VERR_NO_EXEC_MEMORY;
1455 mach_port_t hPortTask = mach_task_self();
1456 mach_vm_address_t AddrChunk = (mach_vm_address_t)pvChunk;
1457 mach_vm_address_t AddrRemapped = 0;
1458 vm_prot_t ProtCur = 0;
1459 vm_prot_t ProtMax = 0;
1460 kern_return_t krc = mach_vm_remap(hPortTask, &AddrRemapped, pExecMemAllocator->cbChunk, 0,
1461 VM_FLAGS_ANYWHERE | VM_FLAGS_RETURN_DATA_ADDR,
1462 hPortTask, AddrChunk, FALSE, &ProtCur, &ProtMax,
1463 VM_INHERIT_NONE);
1464 if (krc == KERN_SUCCESS)
1465 {
1466 krc = mach_vm_protect(mach_task_self(), AddrRemapped, pExecMemAllocator->cbChunk, FALSE, VM_PROT_READ | VM_PROT_EXECUTE);
1467 if (krc == KERN_SUCCESS)
1468 rc = VINF_SUCCESS;
1469 else
1470 {
1471 AssertLogRelMsgFailed(("mach_vm_protect -> %d (%#x)\n", krc, krc));
1472 krc = mach_vm_deallocate(hPortTask, AddrRemapped, pExecMemAllocator->cbChunk);
1473 Assert(krc == KERN_SUCCESS);
1474 }
1475 }
1476 else
1477 AssertLogRelMsgFailed(("mach_vm_remap -> %d (%#x)\n", krc, krc));
1478 if (RT_FAILURE(rc))
1479 {
1480 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1481 return rc;
1482 }
1483
1484 void *pvChunkRx = (void *)AddrRemapped;
1485#else
1486# if defined(IN_RING3) || defined(IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE)
1487 int rc = VINF_SUCCESS;
1488# endif
1489 void *pvChunkRx = pvChunk;
1490#endif
1491
1492 /*
1493 * Add the chunk.
1494 *
1495 * This must be done before the unwind init so windows can allocate
1496 * memory from the chunk when using the alternative sub-allocator.
1497 */
1498 pExecMemAllocator->aChunks[idxChunk].pvChunkRw = pvChunk;
1499 pExecMemAllocator->aChunks[idxChunk].pvChunkRx = pvChunkRx;
1500#ifdef IN_RING3
1501 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1502#endif
1503 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1504 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1505 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1506 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1507
1508 pExecMemAllocator->cChunks = idxChunk + 1;
1509 pExecMemAllocator->idxChunkHint = idxChunk;
1510
1511 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1512 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1513
1514 /* If there is a chunk context init callback call it. */
1515#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
1516 pExecMemAllocator->aChunks[idxChunk].pCtx = iemNativeRecompileAttachExecMemChunkCtx(pVCpu, idxChunk);
1517 if (pExecMemAllocator->aChunks[idxChunk].pCtx)
1518#endif
1519 {
1520#ifdef IN_RING3
1521 /*
1522 * Initialize the unwind information (this cannot really fail atm).
1523 * (This sets pvUnwindInfo.)
1524 */
1525 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunkRx, idxChunk);
1526#endif
1527 }
1528#if defined(IN_RING3) || defined(IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE)
1529 if (RT_SUCCESS(rc))
1530 { /* likely */ }
1531 else
1532 {
1533 /* Just in case the impossible happens, undo the above up: */
1534 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1535 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1536 pExecMemAllocator->cChunks = idxChunk;
1537 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1538 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1539 pExecMemAllocator->aChunks[idxChunk].pvChunkRw = NULL;
1540 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1541
1542# ifdef RT_OS_DARWIN
1543 krc = mach_vm_deallocate(mach_task_self(), (mach_vm_address_t)pExecMemAllocator->aChunks[idxChunk].pvChunkRx,
1544 pExecMemAllocator->cbChunk);
1545 Assert(krc == KERN_SUCCESS);
1546# endif
1547
1548 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1549 return rc;
1550 }
1551#endif
1552
1553 return VINF_SUCCESS;
1554}
1555
1556
1557/**
1558 * Initializes the executable memory allocator for native recompilation on the
1559 * calling EMT.
1560 *
1561 * @returns VBox status code.
1562 * @param pVCpu The cross context virtual CPU structure of the calling
1563 * thread.
1564 * @param cbMax The max size of the allocator.
1565 * @param cbInitial The initial allocator size.
1566 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1567 * dependent).
1568 */
1569int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk) RT_NOEXCEPT
1570{
1571 /*
1572 * Validate input.
1573 */
1574 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1575 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1576 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1577 || cbChunk == 0
1578 || ( RT_IS_POWER_OF_TWO(cbChunk)
1579 && cbChunk >= _1M
1580 && cbChunk <= _256M
1581 && cbChunk <= cbMax),
1582 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1583 VERR_OUT_OF_RANGE);
1584
1585 /*
1586 * Adjust/figure out the chunk size.
1587 */
1588 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1589 {
1590 if (cbMax >= _256M)
1591 cbChunk = _64M;
1592 else
1593 {
1594 if (cbMax < _16M)
1595 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1596 else
1597 cbChunk = (uint32_t)cbMax / 4;
1598 if (!RT_IS_POWER_OF_TWO(cbChunk))
1599 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1600 }
1601 }
1602#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
1603# if defined(RT_OS_AMD64)
1604 Assert(cbChunk <= _2G);
1605# elif defined(RT_OS_ARM64)
1606 if (cbChunk > _128M)
1607 cbChunk = _128M; /* Max relative branch distance is +/-2^(25+2) = +/-0x8000000 (134 217 728). */
1608# endif
1609#endif
1610
1611 if (cbChunk > cbMax)
1612 cbMax = cbChunk;
1613 else
1614 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1615 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1616 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1617
1618 /*
1619 * Allocate and initialize the allocatore instance.
1620 */
1621 size_t const offBitmaps = RT_ALIGN_Z(RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]), RT_CACHELINE_SIZE);
1622 size_t const cbBitmaps = (size_t)(cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3)) * cMaxChunks;
1623 size_t cbNeeded = offBitmaps + cbBitmaps;
1624 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1625 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1626#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1627 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1628 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1629#endif
1630 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1631 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1632 VERR_NO_MEMORY);
1633 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1634 pExecMemAllocator->cbChunk = cbChunk;
1635 pExecMemAllocator->cMaxChunks = cMaxChunks;
1636 pExecMemAllocator->cChunks = 0;
1637 pExecMemAllocator->idxChunkHint = 0;
1638 pExecMemAllocator->cAllocations = 0;
1639 pExecMemAllocator->cbTotal = 0;
1640 pExecMemAllocator->cbFree = 0;
1641 pExecMemAllocator->cbAllocated = 0;
1642 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1643 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1644 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1645 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmaps); /* Mark everything as allocated. Clear when chunks are added. */
1646#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1647 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1648#endif
1649 for (uint32_t i = 0; i < cMaxChunks; i++)
1650 {
1651 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1652 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1653 pExecMemAllocator->aChunks[i].pvChunkRw = NULL;
1654#ifdef IN_RING0
1655 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1656#else
1657 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1658#endif
1659 }
1660 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1661
1662 /*
1663 * Do the initial allocations.
1664 */
1665 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1666 {
1667 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1668 AssertLogRelRCReturn(rc, rc);
1669 }
1670
1671 pExecMemAllocator->idxChunkHint = 0;
1672
1673 /*
1674 * Register statistics.
1675 */
1676 PUVM const pUVM = pVCpu->pUVCpu->pUVM;
1677 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cAllocations, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1678 "Current number of allocations", "/IEM/CPU%u/re/ExecMem/cAllocations", pVCpu->idCpu);
1679 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cChunks, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
1680 "Currently allocated chunks", "/IEM/CPU%u/re/ExecMem/cChunks", pVCpu->idCpu);
1681 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cMaxChunks, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
1682 "Maximum number of chunks", "/IEM/CPU%u/re/ExecMem/cMaxChunks", pVCpu->idCpu);
1683 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbChunk, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1684 "Allocation chunk size", "/IEM/CPU%u/re/ExecMem/cbChunk", pVCpu->idCpu);
1685 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbAllocated, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1686 "Number of bytes current allocated", "/IEM/CPU%u/re/ExecMem/cbAllocated", pVCpu->idCpu);
1687 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbFree, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1688 "Number of bytes current free", "/IEM/CPU%u/re/ExecMem/cbFree", pVCpu->idCpu);
1689 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbTotal, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1690 "Total number of byte", "/IEM/CPU%u/re/ExecMem/cbTotal", pVCpu->idCpu);
1691#ifdef VBOX_WITH_STATISTICS
1692 STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatAlloc, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_CALL,
1693 "Profiling the allocator", "/IEM/CPU%u/re/ExecMem/ProfAlloc", pVCpu->idCpu);
1694#endif
1695#ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING
1696 STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatPruneProf, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_CALL,
1697 "Pruning executable memory (alt)", "/IEM/CPU%u/re/ExecMem/Pruning", pVCpu->idCpu);
1698 STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatPruneRecovered, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES_PER_CALL,
1699 "Bytes recovered while pruning", "/IEM/CPU%u/re/ExecMem/PruningRecovered", pVCpu->idCpu);
1700#endif
1701
1702 return VINF_SUCCESS;
1703}
1704
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette