VirtualBox

source: vbox/trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h@ 106453

最後變更 在這個檔案從106453是 106453,由 vboxsync 提交於 5 月 前

VMM/IEM: Eliminated the IEMNATIVE_WITH_SIMD_REG_ALLOCATOR define. Fixed bug in iemNativeEmitMemFetchStoreDataCommon where a SIMD register was masked in calls to iemNativeVarSaveVolatileRegsPreHlpCall and friends. Fixed theoretical loop-forever bugs in iemNativeSimdRegAllocFindFree & iemNativeRegAllocFindFree. bugref:10720

  • 屬性 svn:eol-style 設為 native
  • 屬性 svn:keywords 設為 Author Date Id Revision
檔案大小: 371.4 KB
 
1/* $Id: IEMN8veRecompilerEmit.h 106453 2024-10-17 13:54:35Z vboxsync $ */
2/** @file
3 * IEM - Interpreted Execution Manager - Native Recompiler Inlined Emitters.
4 */
5
6/*
7 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.alldomusa.eu.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28#ifndef VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
29#define VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
30#ifndef RT_WITHOUT_PRAGMA_ONCE
31# pragma once
32#endif
33
34#include "IEMN8veRecompiler.h"
35
36
37/** @defgroup grp_iem_n8ve_re_inline Native Recompiler Inlined Emitters
38 * @ingroup grp_iem_n8ve_re
39 * @{
40 */
41
42/**
43 * Emit a simple marker instruction to more easily tell where something starts
44 * in the disassembly.
45 */
46DECL_INLINE_THROW(uint32_t)
47iemNativeEmitMarker(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
48{
49#ifdef RT_ARCH_AMD64
50 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
51 if (uInfo == 0)
52 {
53 /* nop */
54 pbCodeBuf[off++] = 0x90;
55 }
56 else
57 {
58 /* nop [disp32] */
59 pbCodeBuf[off++] = 0x0f;
60 pbCodeBuf[off++] = 0x1f;
61 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, 0, 5);
62 pbCodeBuf[off++] = RT_BYTE1(uInfo);
63 pbCodeBuf[off++] = RT_BYTE2(uInfo);
64 pbCodeBuf[off++] = RT_BYTE3(uInfo);
65 pbCodeBuf[off++] = RT_BYTE4(uInfo);
66 }
67#elif defined(RT_ARCH_ARM64)
68 /* nop */
69 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
70 if (uInfo == 0)
71 pu32CodeBuf[off++] = ARMV8_A64_INSTR_NOP;
72 else
73 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(ARMV8_A64_REG_XZR, (uint16_t)uInfo);
74
75 RT_NOREF(uInfo);
76#else
77# error "port me"
78#endif
79 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
80 return off;
81}
82
83
84/**
85 * Emit a breakpoint instruction.
86 */
87DECL_FORCE_INLINE(uint32_t) iemNativeEmitBrkEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uInfo)
88{
89#ifdef RT_ARCH_AMD64
90 pCodeBuf[off++] = 0xcc;
91 RT_NOREF(uInfo); /** @todo use multibyte nop for info? */
92
93#elif defined(RT_ARCH_ARM64)
94 pCodeBuf[off++] = Armv8A64MkInstrBrk(uInfo & UINT32_C(0xffff));
95
96#else
97# error "error"
98#endif
99 return off;
100}
101
102
103/**
104 * Emit a breakpoint instruction.
105 */
106DECL_INLINE_THROW(uint32_t) iemNativeEmitBrk(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
107{
108#ifdef RT_ARCH_AMD64
109 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
110#elif defined(RT_ARCH_ARM64)
111 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
112#else
113# error "error"
114#endif
115 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
116 return off;
117}
118
119
120/*********************************************************************************************************************************
121* Loads, Stores and Related Stuff. *
122*********************************************************************************************************************************/
123
124#ifdef RT_ARCH_AMD64
125/**
126 * Common bit of iemNativeEmitLoadGprByGpr and friends.
127 */
128DECL_FORCE_INLINE(uint32_t)
129iemNativeEmitGprByGprDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp)
130{
131 if (offDisp == 0 && (iGprBase & 7) != X86_GREG_xBP) /* Can use encoding w/o displacement field. */
132 {
133 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, iGprReg & 7, iGprBase & 7);
134 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
135 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
136 }
137 else if (offDisp == (int8_t)offDisp)
138 {
139 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, iGprBase & 7);
140 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
141 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
142 pbCodeBuf[off++] = (uint8_t)offDisp;
143 }
144 else
145 {
146 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, iGprBase & 7);
147 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
148 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
149 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
150 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
151 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
152 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
153 }
154 return off;
155}
156#endif /* RT_ARCH_AMD64 */
157
158/**
159 * Emits setting a GPR to zero.
160 */
161DECL_INLINE_THROW(uint32_t)
162iemNativeEmitGprZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
163{
164#ifdef RT_ARCH_AMD64
165 /* xor gpr32, gpr32 */
166 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
167 if (iGpr >= 8)
168 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
169 pbCodeBuf[off++] = 0x33;
170 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
171
172#elif defined(RT_ARCH_ARM64)
173 /* mov gpr, #0x0 */
174 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
175 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | iGpr;
176
177#else
178# error "port me"
179#endif
180 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
181 return off;
182}
183
184
185/**
186 * Variant of iemNativeEmitLoadGpr32Imm where the caller ensures sufficent
187 * buffer space.
188 *
189 * Max buffer consumption:
190 * - AMD64: 6 instruction bytes.
191 * - ARM64: 2 instruction words (8 bytes).
192 *
193 * @note The top 32 bits will be cleared.
194 */
195DECL_FORCE_INLINE(uint32_t)
196iemNativeEmitLoadGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t uImm32)
197{
198#ifdef RT_ARCH_AMD64
199 if (uImm32 == 0)
200 {
201 /* xor gpr, gpr */
202 if (iGpr >= 8)
203 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
204 pCodeBuf[off++] = 0x33;
205 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
206 }
207 else
208 {
209 /* mov gpr, imm32 */
210 if (iGpr >= 8)
211 pCodeBuf[off++] = X86_OP_REX_B;
212 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
213 pCodeBuf[off++] = RT_BYTE1(uImm32);
214 pCodeBuf[off++] = RT_BYTE2(uImm32);
215 pCodeBuf[off++] = RT_BYTE3(uImm32);
216 pCodeBuf[off++] = RT_BYTE4(uImm32);
217 }
218
219#elif defined(RT_ARCH_ARM64)
220 if ((uImm32 >> 16) == 0)
221 /* movz gpr, imm16 */
222 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32, 0, false /*f64Bit*/);
223 else if ((uImm32 & UINT32_C(0xffff)) == 0)
224 /* movz gpr, imm16, lsl #16 */
225 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
226 else if ((uImm32 & UINT32_C(0xffff)) == UINT32_C(0xffff))
227 /* movn gpr, imm16, lsl #16 */
228 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32 >> 16, 1, false /*f64Bit*/);
229 else if ((uImm32 >> 16) == UINT32_C(0xffff))
230 /* movn gpr, imm16 */
231 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32, 0, false /*f64Bit*/);
232 else
233 {
234 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 & UINT32_C(0xffff), 0, false /*f64Bit*/);
235 pCodeBuf[off++] = Armv8A64MkInstrMovK(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
236 }
237
238#else
239# error "port me"
240#endif
241 return off;
242}
243
244
245/**
246 * Variant of iemNativeEmitLoadGpr32Imm where the caller ensures sufficent
247 * buffer space.
248 *
249 * Max buffer consumption:
250 * - AMD64: 6 instruction bytes.
251 * - ARM64: 2 instruction words (8 bytes).
252 *
253 * @note The top 32 bits will be cleared.
254 */
255template<uint32_t const a_uImm32>
256DECL_FORCE_INLINE(uint32_t) iemNativeEmitLoadGpr32ImmExT(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr)
257{
258#ifdef RT_ARCH_AMD64
259 if (a_uImm32 == 0)
260 {
261 /* xor gpr, gpr */
262 if (iGpr >= 8)
263 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
264 pCodeBuf[off++] = 0x33;
265 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
266 }
267 else
268 {
269 /* mov gpr, imm32 */
270 if (iGpr >= 8)
271 pCodeBuf[off++] = X86_OP_REX_B;
272 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
273 pCodeBuf[off++] = RT_BYTE1(a_uImm32);
274 pCodeBuf[off++] = RT_BYTE2(a_uImm32);
275 pCodeBuf[off++] = RT_BYTE3(a_uImm32);
276 pCodeBuf[off++] = RT_BYTE4(a_uImm32);
277 }
278
279#elif defined(RT_ARCH_ARM64)
280 if RT_CONSTEXPR_IF((a_uImm32 >> 16) == 0)
281 /* movz gpr, imm16 */
282 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, a_uImm32, 0, false /*f64Bit*/);
283 else if RT_CONSTEXPR_IF((a_uImm32 & UINT32_C(0xffff)) == 0)
284 /* movz gpr, imm16, lsl #16 */
285 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, a_uImm32 >> 16, 1, false /*f64Bit*/);
286 else if RT_CONSTEXPR_IF((a_uImm32 & UINT32_C(0xffff)) == UINT32_C(0xffff))
287 /* movn gpr, imm16, lsl #16 */
288 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~a_uImm32 >> 16, 1, false /*f64Bit*/);
289 else if RT_CONSTEXPR_IF((a_uImm32 >> 16) == UINT32_C(0xffff))
290 /* movn gpr, imm16 */
291 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~a_uImm32, 0, false /*f64Bit*/);
292 else
293 {
294 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, a_uImm32 & UINT32_C(0xffff), 0, false /*f64Bit*/);
295 pCodeBuf[off++] = Armv8A64MkInstrMovK(iGpr, a_uImm32 >> 16, 1, false /*f64Bit*/);
296 }
297
298#else
299# error "port me"
300#endif
301 return off;
302}
303
304
305/**
306 * Variant of iemNativeEmitLoadGprImm64 where the caller ensures sufficent
307 * buffer space.
308 *
309 * Max buffer consumption:
310 * - AMD64: 10 instruction bytes.
311 * - ARM64: 4 instruction words (16 bytes).
312 */
313DECL_FORCE_INLINE(uint32_t)
314iemNativeEmitLoadGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint64_t uImm64)
315{
316#ifdef RT_ARCH_AMD64
317 if (uImm64 == 0)
318 {
319 /* xor gpr, gpr */
320 if (iGpr >= 8)
321 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
322 pCodeBuf[off++] = 0x33;
323 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
324 }
325 else if (uImm64 <= UINT32_MAX)
326 {
327 /* mov gpr, imm32 */
328 if (iGpr >= 8)
329 pCodeBuf[off++] = X86_OP_REX_B;
330 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
331 pCodeBuf[off++] = RT_BYTE1(uImm64);
332 pCodeBuf[off++] = RT_BYTE2(uImm64);
333 pCodeBuf[off++] = RT_BYTE3(uImm64);
334 pCodeBuf[off++] = RT_BYTE4(uImm64);
335 }
336 else if (uImm64 == (uint64_t)(int32_t)uImm64)
337 {
338 /* mov gpr, sx(imm32) */
339 if (iGpr < 8)
340 pCodeBuf[off++] = X86_OP_REX_W;
341 else
342 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
343 pCodeBuf[off++] = 0xc7;
344 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGpr & 7);
345 pCodeBuf[off++] = RT_BYTE1(uImm64);
346 pCodeBuf[off++] = RT_BYTE2(uImm64);
347 pCodeBuf[off++] = RT_BYTE3(uImm64);
348 pCodeBuf[off++] = RT_BYTE4(uImm64);
349 }
350 else
351 {
352 /* mov gpr, imm64 */
353 if (iGpr < 8)
354 pCodeBuf[off++] = X86_OP_REX_W;
355 else
356 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
357 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
358 pCodeBuf[off++] = RT_BYTE1(uImm64);
359 pCodeBuf[off++] = RT_BYTE2(uImm64);
360 pCodeBuf[off++] = RT_BYTE3(uImm64);
361 pCodeBuf[off++] = RT_BYTE4(uImm64);
362 pCodeBuf[off++] = RT_BYTE5(uImm64);
363 pCodeBuf[off++] = RT_BYTE6(uImm64);
364 pCodeBuf[off++] = RT_BYTE7(uImm64);
365 pCodeBuf[off++] = RT_BYTE8(uImm64);
366 }
367
368#elif defined(RT_ARCH_ARM64)
369 /*
370 * Quick simplification: Do 32-bit load if top half is zero.
371 */
372 if (uImm64 <= UINT32_MAX)
373 return iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGpr, (uint32_t)uImm64);
374
375 /*
376 * We need to start this sequence with a 'mov grp, imm16, lsl #x' and
377 * supply remaining bits using 'movk grp, imm16, lsl #x'.
378 *
379 * The mov instruction is encoded 0xd2800000 + shift + imm16 + grp,
380 * while the movk is 0xf2800000 + shift + imm16 + grp, meaning the diff
381 * is 0x20000000 (bit 29). So, we keep this bit in a variable and set it
382 * after the first non-zero immediate component so we switch to movk for
383 * the remainder.
384 */
385 unsigned cZeroHalfWords = !( uImm64 & UINT16_MAX)
386 + !((uImm64 >> 16) & UINT16_MAX)
387 + !((uImm64 >> 32) & UINT16_MAX)
388 + !((uImm64 >> 48) & UINT16_MAX);
389 unsigned cFfffHalfWords = cZeroHalfWords >= 2 ? 0 /* skip */
390 : ( (uImm64 & UINT16_MAX) == UINT16_MAX)
391 + (((uImm64 >> 16) & UINT16_MAX) == UINT16_MAX)
392 + (((uImm64 >> 32) & UINT16_MAX) == UINT16_MAX)
393 + (((uImm64 >> 48) & UINT16_MAX) == UINT16_MAX);
394 if (cFfffHalfWords <= cZeroHalfWords)
395 {
396 uint32_t fMovBase = UINT32_C(0xd2800000) | iGpr;
397
398 /* movz gpr, imm16 */
399 uint32_t uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
400 if (uImmPart || cZeroHalfWords == 4)
401 {
402 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
403 fMovBase |= RT_BIT_32(29);
404 }
405 /* mov[z/k] gpr, imm16, lsl #16 */
406 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
407 if (uImmPart)
408 {
409 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
410 fMovBase |= RT_BIT_32(29);
411 }
412 /* mov[z/k] gpr, imm16, lsl #32 */
413 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
414 if (uImmPart)
415 {
416 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
417 fMovBase |= RT_BIT_32(29);
418 }
419 /* mov[z/k] gpr, imm16, lsl #48 */
420 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
421 if (uImmPart)
422 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
423 }
424 else
425 {
426 uint32_t fMovBase = UINT32_C(0x92800000) | iGpr;
427
428 /* find the first half-word that isn't UINT16_MAX. */
429 uint32_t const iHwNotFfff = (uImm64 & UINT16_MAX) != UINT16_MAX ? 0
430 : ((uImm64 >> 16) & UINT16_MAX) != UINT16_MAX ? 1
431 : ((uImm64 >> 32) & UINT16_MAX) != UINT16_MAX ? 2 : 3;
432
433 /* movn gpr, imm16, lsl #iHwNotFfff*16 */
434 uint32_t uImmPart = (uint32_t)(~(uImm64 >> (iHwNotFfff * 16)) & UINT32_C(0xffff)) << 5;
435 pCodeBuf[off++] = fMovBase | (iHwNotFfff << 21) | uImmPart;
436 fMovBase |= RT_BIT_32(30) | RT_BIT_32(29); /* -> movk */
437 /* movk gpr, imm16 */
438 if (iHwNotFfff != 0)
439 {
440 uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
441 if (uImmPart != UINT32_C(0xffff))
442 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
443 }
444 /* movk gpr, imm16, lsl #16 */
445 if (iHwNotFfff != 1)
446 {
447 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
448 if (uImmPart != UINT32_C(0xffff))
449 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
450 }
451 /* movk gpr, imm16, lsl #32 */
452 if (iHwNotFfff != 2)
453 {
454 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
455 if (uImmPart != UINT32_C(0xffff))
456 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
457 }
458 /* movk gpr, imm16, lsl #48 */
459 if (iHwNotFfff != 3)
460 {
461 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
462 if (uImmPart != UINT32_C(0xffff))
463 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
464 }
465 }
466
467#else
468# error "port me"
469#endif
470 return off;
471}
472
473
474/**
475 * Emits loading a constant into a 64-bit GPR
476 */
477DECL_INLINE_THROW(uint32_t)
478iemNativeEmitLoadGprImm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint64_t uImm64)
479{
480#ifdef RT_ARCH_AMD64
481 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 10), off, iGpr, uImm64);
482#elif defined(RT_ARCH_ARM64)
483 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGpr, uImm64);
484#else
485# error "port me"
486#endif
487 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
488 return off;
489}
490
491
492/**
493 * Emits loading a constant into a 32-bit GPR.
494 * @note The top 32 bits will be cleared.
495 */
496DECL_INLINE_THROW(uint32_t)
497iemNativeEmitLoadGprImm32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t uImm32)
498{
499#ifdef RT_ARCH_AMD64
500 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGpr, uImm32);
501#elif defined(RT_ARCH_ARM64)
502 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGpr, uImm32);
503#else
504# error "port me"
505#endif
506 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
507 return off;
508}
509
510
511/**
512 * Emits loading a constant into a 8-bit GPR
513 * @note The AMD64 version does *NOT* clear any bits in the 8..63 range,
514 * only the ARM64 version does that.
515 */
516DECL_INLINE_THROW(uint32_t)
517iemNativeEmitLoadGpr8Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint8_t uImm8)
518{
519#ifdef RT_ARCH_AMD64
520 /* mov gpr, imm8 */
521 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
522 if (iGpr >= 8)
523 pbCodeBuf[off++] = X86_OP_REX_B;
524 else if (iGpr >= 4)
525 pbCodeBuf[off++] = X86_OP_REX;
526 pbCodeBuf[off++] = 0xb0 + (iGpr & 7);
527 pbCodeBuf[off++] = RT_BYTE1(uImm8);
528
529#elif defined(RT_ARCH_ARM64)
530 /* movz gpr, imm16, lsl #0 */
531 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
532 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | (UINT32_C(0) << 21) | ((uint32_t)uImm8 << 5) | iGpr;
533
534#else
535# error "port me"
536#endif
537 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
538 return off;
539}
540
541
542#ifdef RT_ARCH_AMD64
543/**
544 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
545 */
546DECL_FORCE_INLINE(uint32_t)
547iemNativeEmitGprByVCpuDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu)
548{
549 if (offVCpu < 128)
550 {
551 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
552 pbCodeBuf[off++] = (uint8_t)(int8_t)offVCpu;
553 }
554 else
555 {
556 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
557 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offVCpu);
558 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offVCpu);
559 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offVCpu);
560 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offVCpu);
561 }
562 return off;
563}
564
565/**
566 * Special variant of iemNativeEmitGprByVCpuDisp for accessing the VM structure.
567 */
568DECL_FORCE_INLINE(uint32_t)
569iemNativeEmitGprByVCpuSignedDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offVCpu)
570{
571 if (offVCpu < 128 && offVCpu >= -128)
572 {
573 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
574 pbCodeBuf[off++] = (uint8_t)(int8_t)offVCpu;
575 }
576 else
577 {
578 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
579 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offVCpu);
580 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offVCpu);
581 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offVCpu);
582 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offVCpu);
583 }
584 return off;
585}
586
587#elif defined(RT_ARCH_ARM64)
588
589/**
590 * Common bit of iemNativeEmitLoadGprFromVCpuU64Ex and friends.
591 *
592 * @note Loads can use @a iGprReg for large offsets, stores requires a temporary
593 * registers (@a iGprTmp).
594 * @note DON'T try this with prefetch.
595 */
596DECL_FORCE_INLINE_THROW(uint32_t)
597iemNativeEmitGprByVCpuLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu,
598 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
599{
600 /*
601 * There are a couple of ldr variants that takes an immediate offset, so
602 * try use those if we can, otherwise we have to use the temporary register
603 * help with the addressing.
604 */
605 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
606 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
607 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
608 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
609 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
610 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
611 else if (!ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) || iGprTmp != UINT8_MAX)
612 {
613 /* The offset is too large, so we must load it into a register and use
614 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
615 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
616 if (iGprTmp == UINT8_MAX)
617 iGprTmp = iGprReg;
618 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, offVCpu);
619 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, iGprTmp);
620 }
621 else
622# ifdef IEM_WITH_THROW_CATCH
623 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
624# else
625 AssertReleaseFailedStmt(off = UINT32_MAX);
626# endif
627
628 return off;
629}
630
631/**
632 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
633 */
634DECL_FORCE_INLINE_THROW(uint32_t)
635iemNativeEmitGprByVCpuLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
636 uint32_t offVCpu, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
637{
638 /*
639 * There are a couple of ldr variants that takes an immediate offset, so
640 * try use those if we can, otherwise we have to use the temporary register
641 * help with the addressing.
642 */
643 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
644 {
645 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
646 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
647 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
648 }
649 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
650 {
651 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
652 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
653 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
654 }
655 else
656 {
657 /* The offset is too large, so we must load it into a register and use
658 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
659 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
660 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, offVCpu);
661 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
662 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU,
663 IEMNATIVE_REG_FIXED_TMP0);
664 }
665 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
666 return off;
667}
668
669
670/**
671 * Special variant of iemNativeEmitGprByVCpuLdStEx for accessing the VM
672 * structure.
673 *
674 * @note Loads can use @a iGprReg for large offsets, stores requires a temporary
675 * registers (@a iGprTmp).
676 * @note DON'T try this with prefetch.
677 */
678DECL_FORCE_INLINE_THROW(uint32_t)
679iemNativeEmitGprBySignedVCpuLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offVCpu,
680 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
681{
682 Assert((uint32_t)RT_ABS(offVCpu) < RT_BIT_32(28)); /* we should be way out of range for problematic sign extending issues. */
683 Assert(!((uint32_t)RT_ABS(offVCpu) & (cbData - 1)));
684
685 /*
686 * For negative offsets we need to use put the displacement in a register
687 * as the two variants with signed immediates will either post or pre
688 * increment the base address register.
689 */
690 if (!ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) || iGprTmp != UINT8_MAX)
691 {
692 uint8_t const idxIndexReg = !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) ? iGprReg : IEMNATIVE_REG_FIXED_TMP0;
693 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxIndexReg, offVCpu / (int32_t)cbData);
694 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, idxIndexReg,
695 kArmv8A64InstrLdStExtend_Sxtw, cbData > 1 /*fShifted*/);
696 }
697 else
698# ifdef IEM_WITH_THROW_CATCH
699 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
700# else
701 AssertReleaseFailedStmt(off = UINT32_MAX);
702# endif
703
704 return off;
705}
706
707/**
708 * Special variant of iemNativeEmitGprByVCpuLdSt for accessing the VM structure.
709 */
710DECL_FORCE_INLINE_THROW(uint32_t)
711iemNativeEmitGprBySignedVCpuLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
712 int32_t offVCpu, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
713{
714 off = iemNativeEmitGprBySignedVCpuLdStEx(iemNativeInstrBufEnsure(pReNative, off, 2 + 1), off, iGprReg,
715 offVCpu, enmOperation, cbData, IEMNATIVE_REG_FIXED_TMP0);
716 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
717 return off;
718}
719
720#endif /* RT_ARCH_ARM64 */
721
722
723/**
724 * Emits a 64-bit GPR load of a VCpu value.
725 */
726DECL_FORCE_INLINE_THROW(uint32_t)
727iemNativeEmitLoadGprFromVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
728{
729#ifdef RT_ARCH_AMD64
730 /* mov reg64, mem64 */
731 if (iGpr < 8)
732 pCodeBuf[off++] = X86_OP_REX_W;
733 else
734 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
735 pCodeBuf[off++] = 0x8b;
736 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
737
738#elif defined(RT_ARCH_ARM64)
739 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
740
741#else
742# error "port me"
743#endif
744 return off;
745}
746
747
748/**
749 * Emits a 64-bit GPR load of a VCpu value.
750 */
751DECL_INLINE_THROW(uint32_t)
752iemNativeEmitLoadGprFromVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
753{
754#ifdef RT_ARCH_AMD64
755 off = iemNativeEmitLoadGprFromVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
756 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
757
758#elif defined(RT_ARCH_ARM64)
759 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
760
761#else
762# error "port me"
763#endif
764 return off;
765}
766
767/**
768 * Emits a 32-bit GPR load of a VCpu value.
769 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
770 */
771DECL_FORCE_INLINE_THROW(uint32_t)
772iemNativeEmitLoadGprFromVCpuU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
773{
774#ifdef RT_ARCH_AMD64
775 /* mov reg32, mem32 */
776 if (iGpr >= 8)
777 pCodeBuf[off++] = X86_OP_REX_R;
778 pCodeBuf[off++] = 0x8b;
779 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
780
781#elif defined(RT_ARCH_ARM64)
782 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
783
784#else
785# error "port me"
786#endif
787 return off;
788}
789
790
791/**
792 * Emits a 32-bit GPR load of a VCpu value.
793 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
794 */
795DECL_INLINE_THROW(uint32_t)
796iemNativeEmitLoadGprFromVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
797{
798#ifdef RT_ARCH_AMD64
799 off = iemNativeEmitLoadGprFromVCpuU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
800 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
801
802#elif defined(RT_ARCH_ARM64)
803 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
804
805#else
806# error "port me"
807#endif
808 return off;
809}
810
811
812/**
813 * Emits a 16-bit GPR load of a VCpu value.
814 * @note Bits 16 thru 63 in the GPR will be zero after the operation.
815 */
816DECL_FORCE_INLINE_THROW(uint32_t)
817iemNativeEmitLoadGprFromVCpuU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
818{
819#ifdef RT_ARCH_AMD64
820 /* movzx reg32, mem16 */
821 if (iGpr >= 8)
822 pCodeBuf[off++] = X86_OP_REX_R;
823 pCodeBuf[off++] = 0x0f;
824 pCodeBuf[off++] = 0xb7;
825 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
826
827#elif defined(RT_ARCH_ARM64)
828 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t));
829
830#else
831# error "port me"
832#endif
833 return off;
834}
835
836
837/**
838 * Emits a 16-bit GPR load of a VCpu value.
839 * @note Bits 16 thru 63 in the GPR will be zero after the operation.
840 */
841DECL_INLINE_THROW(uint32_t)
842iemNativeEmitLoadGprFromVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
843{
844#ifdef RT_ARCH_AMD64
845 off = iemNativeEmitLoadGprFromVCpuU16Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGpr, offVCpu);
846 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
847
848#elif defined(RT_ARCH_ARM64)
849 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t));
850
851#else
852# error "port me"
853#endif
854 return off;
855}
856
857
858/**
859 * Emits a 8-bit GPR load of a VCpu value.
860 * @note Bits 8 thru 63 in the GPR will be zero after the operation.
861 */
862DECL_INLINE_THROW(uint32_t)
863iemNativeEmitLoadGprFromVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
864{
865#ifdef RT_ARCH_AMD64
866 /* movzx reg32, mem8 */
867 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
868 if (iGpr >= 8)
869 pbCodeBuf[off++] = X86_OP_REX_R;
870 pbCodeBuf[off++] = 0x0f;
871 pbCodeBuf[off++] = 0xb6;
872 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
873 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
874
875#elif defined(RT_ARCH_ARM64)
876 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t));
877
878#else
879# error "port me"
880#endif
881 return off;
882}
883
884
885/**
886 * Emits a store of a GPR value to a 64-bit VCpu field.
887 */
888DECL_FORCE_INLINE_THROW(uint32_t)
889iemNativeEmitStoreGprToVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu,
890 uint8_t iGprTmp = UINT8_MAX)
891{
892#ifdef RT_ARCH_AMD64
893 /* mov mem64, reg64 */
894 if (iGpr < 8)
895 pCodeBuf[off++] = X86_OP_REX_W;
896 else
897 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
898 pCodeBuf[off++] = 0x89;
899 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
900 RT_NOREF(iGprTmp);
901
902#elif defined(RT_ARCH_ARM64)
903 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
904
905#else
906# error "port me"
907#endif
908 return off;
909}
910
911
912/**
913 * Emits a store of a GPR value to a 64-bit VCpu field.
914 */
915DECL_INLINE_THROW(uint32_t)
916iemNativeEmitStoreGprToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
917{
918#ifdef RT_ARCH_AMD64
919 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
920#elif defined(RT_ARCH_ARM64)
921 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGpr, offVCpu,
922 IEMNATIVE_REG_FIXED_TMP0);
923#else
924# error "port me"
925#endif
926 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
927 return off;
928}
929
930
931/**
932 * Emits a store of a GPR value to a 32-bit VCpu field.
933 *
934 * @note Limited range on ARM64.
935 */
936DECL_INLINE_THROW(uint32_t)
937iemNativeEmitStoreGprToVCpuU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
938{
939#ifdef RT_ARCH_AMD64
940 /* mov mem32, reg32 */
941 if (iGpr >= 8)
942 pCodeBuf[off++] = X86_OP_REX_R;
943 pCodeBuf[off++] = 0x89;
944 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
945
946#elif defined(RT_ARCH_ARM64)
947 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
948
949#else
950# error "port me"
951#endif
952 return off;
953}
954
955
956/**
957 * Emits a store of a GPR value to a 32-bit VCpu field.
958 */
959DECL_INLINE_THROW(uint32_t)
960iemNativeEmitStoreGprToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
961{
962#ifdef RT_ARCH_AMD64
963 /* mov mem32, reg32 */
964 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
965 if (iGpr >= 8)
966 pbCodeBuf[off++] = X86_OP_REX_R;
967 pbCodeBuf[off++] = 0x89;
968 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
969 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
970
971#elif defined(RT_ARCH_ARM64)
972 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
973
974#else
975# error "port me"
976#endif
977 return off;
978}
979
980
981/**
982 * Emits a store of a GPR value to a 16-bit VCpu field.
983 */
984DECL_INLINE_THROW(uint32_t)
985iemNativeEmitStoreGprToVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
986{
987#ifdef RT_ARCH_AMD64
988 /* mov mem16, reg16 */
989 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
990 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
991 if (iGpr >= 8)
992 pbCodeBuf[off++] = X86_OP_REX_R;
993 pbCodeBuf[off++] = 0x89;
994 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
995 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
996
997#elif defined(RT_ARCH_ARM64)
998 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t));
999
1000#else
1001# error "port me"
1002#endif
1003 return off;
1004}
1005
1006
1007/**
1008 * Emits a store of a GPR value to a 8-bit VCpu field.
1009 */
1010DECL_INLINE_THROW(uint32_t)
1011iemNativeEmitStoreGprToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
1012{
1013#ifdef RT_ARCH_AMD64
1014 /* mov mem8, reg8 */
1015 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1016 if (iGpr >= 8)
1017 pbCodeBuf[off++] = X86_OP_REX_R;
1018 pbCodeBuf[off++] = 0x88;
1019 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
1020 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1021
1022#elif defined(RT_ARCH_ARM64)
1023 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
1024
1025#else
1026# error "port me"
1027#endif
1028 return off;
1029}
1030
1031
1032/**
1033 * Emits a store of an immediate value to a 64-bit VCpu field.
1034 *
1035 * @note Will allocate temporary registers on both ARM64 and AMD64.
1036 */
1037DECL_FORCE_INLINE_THROW(uint32_t)
1038iemNativeEmitStoreImmToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uImm, uint32_t offVCpu)
1039{
1040#ifdef RT_ARCH_AMD64
1041 /* mov mem32, imm32 */
1042 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
1043 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxRegImm, offVCpu);
1044 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1045 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
1046
1047#elif defined(RT_ARCH_ARM64)
1048 uint8_t const idxRegImm = uImm == 0 ? ARMV8_A64_REG_XZR : iemNativeRegAllocTmpImm(pReNative, &off, uImm);
1049 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t));
1050 if (idxRegImm != ARMV8_A64_REG_XZR)
1051 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
1052
1053#else
1054# error "port me"
1055#endif
1056 return off;
1057}
1058
1059
1060/**
1061 * Emits a store of an immediate value to a 32-bit VCpu field.
1062 *
1063 * @note ARM64: Will allocate temporary registers.
1064 */
1065DECL_FORCE_INLINE_THROW(uint32_t)
1066iemNativeEmitStoreImmToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uImm, uint32_t offVCpu)
1067{
1068#ifdef RT_ARCH_AMD64
1069 /* mov mem32, imm32 */
1070 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1071 pCodeBuf[off++] = 0xc7;
1072 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1073 pCodeBuf[off++] = RT_BYTE1(uImm);
1074 pCodeBuf[off++] = RT_BYTE2(uImm);
1075 pCodeBuf[off++] = RT_BYTE3(uImm);
1076 pCodeBuf[off++] = RT_BYTE4(uImm);
1077 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1078
1079#elif defined(RT_ARCH_ARM64)
1080 uint8_t const idxRegImm = uImm == 0 ? ARMV8_A64_REG_XZR : iemNativeRegAllocTmpImm(pReNative, &off, uImm);
1081 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
1082 if (idxRegImm != ARMV8_A64_REG_XZR)
1083 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
1084
1085#else
1086# error "port me"
1087#endif
1088 return off;
1089}
1090
1091
1092
1093/**
1094 * Emits a store of an immediate value to a 16-bit VCpu field.
1095 *
1096 * @note ARM64: A idxTmp1 is always required! The idxTmp2 depends on whehter the
1097 * offset can be encoded as an immediate or not. The @a offVCpu immediate
1098 * range is 0..8190 bytes from VMCPU and the same from CPUMCPU.
1099 */
1100DECL_FORCE_INLINE_THROW(uint32_t)
1101iemNativeEmitStoreImmToVCpuU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint32_t offVCpu,
1102 uint8_t idxTmp1 = UINT8_MAX, uint8_t idxTmp2 = UINT8_MAX)
1103{
1104#ifdef RT_ARCH_AMD64
1105 /* mov mem16, imm16 */
1106 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1107 pCodeBuf[off++] = 0xc7;
1108 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1109 pCodeBuf[off++] = RT_BYTE1(uImm);
1110 pCodeBuf[off++] = RT_BYTE2(uImm);
1111 RT_NOREF(idxTmp1, idxTmp2);
1112
1113#elif defined(RT_ARCH_ARM64)
1114 if (idxTmp1 != UINT8_MAX)
1115 {
1116 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp1, uImm);
1117 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, idxTmp1, offVCpu, kArmv8A64InstrLdStType_St_Half,
1118 sizeof(uint16_t), idxTmp2);
1119 }
1120 else
1121# ifdef IEM_WITH_THROW_CATCH
1122 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
1123# else
1124 AssertReleaseFailedStmt(off = UINT32_MAX);
1125# endif
1126
1127#else
1128# error "port me"
1129#endif
1130 return off;
1131}
1132
1133
1134/**
1135 * Emits a store of an immediate value to a 8-bit VCpu field.
1136 */
1137DECL_INLINE_THROW(uint32_t)
1138iemNativeEmitStoreImmToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bImm, uint32_t offVCpu,
1139 uint8_t idxRegTmp = UINT8_MAX)
1140{
1141#ifdef RT_ARCH_AMD64
1142 /* mov mem8, imm8 */
1143 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1144 pbCodeBuf[off++] = 0xc6;
1145 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, offVCpu);
1146 pbCodeBuf[off++] = bImm;
1147 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1148 RT_NOREF(idxRegTmp);
1149
1150#elif defined(RT_ARCH_ARM64)
1151 /* Cannot use IEMNATIVE_REG_FIXED_TMP0 for the immediate as that's used by iemNativeEmitGprByVCpuLdSt. */
1152 if (idxRegTmp != UINT8_MAX)
1153 {
1154 Assert(idxRegTmp != IEMNATIVE_REG_FIXED_TMP0);
1155 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegTmp, bImm);
1156 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegTmp, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
1157 }
1158 else
1159 {
1160 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, bImm);
1161 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
1162 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
1163 }
1164
1165#else
1166# error "port me"
1167#endif
1168 return off;
1169}
1170
1171
1172/**
1173 * Emits a load effective address to a GRP of a VCpu field.
1174 */
1175DECL_INLINE_THROW(uint32_t)
1176iemNativeEmitLeaGprByVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t offVCpu)
1177{
1178#ifdef RT_ARCH_AMD64
1179 /* lea gprdst, [rbx + offDisp] */
1180 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1181 if (iGprDst < 8)
1182 pbCodeBuf[off++] = X86_OP_REX_W;
1183 else
1184 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
1185 pbCodeBuf[off++] = 0x8d;
1186 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGprDst, offVCpu);
1187
1188#elif defined(RT_ARCH_ARM64)
1189 if (offVCpu < (unsigned)_4K)
1190 {
1191 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1192 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu);
1193 }
1194 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)_4K)
1195 {
1196 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1197 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX,
1198 offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx));
1199 }
1200 else if (offVCpu <= 0xffffffU)
1201 {
1202 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1203 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu >> 12,
1204 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
1205 if (offVCpu & 0xfffU)
1206 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, offVCpu & 0xfff);
1207 }
1208 else
1209 {
1210 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
1211 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offVCpu);
1212 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1213 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, iGprDst);
1214 }
1215
1216#else
1217# error "port me"
1218#endif
1219 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1220 return off;
1221}
1222
1223
1224/** This is just as a typesafe alternative to RT_UOFFSETOF. */
1225DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromStamCounterPtr(PVMCPU pVCpu, PSTAMCOUNTER pStamCounter)
1226{
1227 uintptr_t const off = (uintptr_t)pStamCounter - (uintptr_t)pVCpu;
1228 Assert(off < sizeof(VMCPU));
1229 return off;
1230}
1231
1232
1233/** This is just as a typesafe alternative to RT_UOFFSETOF. */
1234DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromU64Ptr(PVMCPU pVCpu, uint64_t *pu64)
1235{
1236 uintptr_t const off = (uintptr_t)pu64 - (uintptr_t)pVCpu;
1237 Assert(off < sizeof(VMCPU));
1238 return off;
1239}
1240
1241
1242/**
1243 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1244 *
1245 * @note The two temp registers are not required for AMD64. ARM64 always
1246 * requires the first, and the 2nd is needed if the offset cannot be
1247 * encoded as an immediate.
1248 */
1249DECL_FORCE_INLINE(uint32_t)
1250iemNativeEmitIncStamCounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1251{
1252#ifdef RT_ARCH_AMD64
1253 /* inc qword [pVCpu + off] */
1254 pCodeBuf[off++] = X86_OP_REX_W;
1255 pCodeBuf[off++] = 0xff;
1256 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1257 RT_NOREF(idxTmp1, idxTmp2);
1258
1259#elif defined(RT_ARCH_ARM64)
1260 /* Determine how we're to access pVCpu first. */
1261 uint32_t const cbData = sizeof(STAMCOUNTER);
1262 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
1263 {
1264 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1265 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1266 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1267 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1268 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1269 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1270 }
1271 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
1272 {
1273 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1274 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1275 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1276 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1277 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1278 }
1279 else
1280 {
1281 /* The offset is too large, so we must load it into a register and use
1282 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
1283 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1284 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1285 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1286 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1287 }
1288
1289#else
1290# error "port me"
1291#endif
1292 return off;
1293}
1294
1295
1296/**
1297 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1298 *
1299 * @note The two temp registers are not required for AMD64. ARM64 always
1300 * requires the first, and the 2nd is needed if the offset cannot be
1301 * encoded as an immediate.
1302 */
1303DECL_FORCE_INLINE(uint32_t)
1304iemNativeEmitIncStamCounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1305{
1306#ifdef RT_ARCH_AMD64
1307 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxTmp1, idxTmp2, offVCpu);
1308#elif defined(RT_ARCH_ARM64)
1309 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1310#else
1311# error "port me"
1312#endif
1313 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1314 return off;
1315}
1316
1317
1318/**
1319 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1320 *
1321 * @note The two temp registers are not required for AMD64. ARM64 always
1322 * requires the first, and the 2nd is needed if the offset cannot be
1323 * encoded as an immediate.
1324 */
1325DECL_FORCE_INLINE(uint32_t)
1326iemNativeEmitIncU32CounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1327{
1328 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1329#ifdef RT_ARCH_AMD64
1330 /* inc dword [pVCpu + offVCpu] */
1331 pCodeBuf[off++] = 0xff;
1332 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1333 RT_NOREF(idxTmp1, idxTmp2);
1334
1335#elif defined(RT_ARCH_ARM64)
1336 /* Determine how we're to access pVCpu first. */
1337 uint32_t const cbData = sizeof(uint32_t);
1338 if (offVCpu < (unsigned)(_4K * cbData))
1339 {
1340 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1341 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmp1,
1342 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1343 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1344 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmp1,
1345 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1346 }
1347 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1348 {
1349 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1350 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1351 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1352 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1353 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1354 }
1355 else
1356 {
1357 /* The offset is too large, so we must load it into a register and use
1358 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1359 of the instruction if that'll reduce the constant to 16-bits. */
1360 if (offVCpu / cbData < (unsigned)UINT16_MAX)
1361 {
1362 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp2, offVCpu / cbData);
1363 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1364 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1365 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1366 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1367 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1368 }
1369 else
1370 {
1371 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1372 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1373 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1374 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1375 }
1376 }
1377
1378#else
1379# error "port me"
1380#endif
1381 return off;
1382}
1383
1384
1385/**
1386 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1387 *
1388 * @note The two temp registers are not required for AMD64. ARM64 always
1389 * requires the first, and the 2nd is needed if the offset cannot be
1390 * encoded as an immediate.
1391 */
1392DECL_FORCE_INLINE(uint32_t)
1393iemNativeEmitIncU32CounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1394{
1395#ifdef RT_ARCH_AMD64
1396 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, idxTmp1, idxTmp2, offVCpu);
1397#elif defined(RT_ARCH_ARM64)
1398 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1399#else
1400# error "port me"
1401#endif
1402 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1403 return off;
1404}
1405
1406
1407/**
1408 * Emits code for OR'ing a bitmask into a 32-bit VMCPU member.
1409 *
1410 * @note May allocate temporary registers (not AMD64).
1411 */
1412DECL_FORCE_INLINE(uint32_t)
1413iemNativeEmitOrImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1414{
1415 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1416#ifdef RT_ARCH_AMD64
1417 /* or dword [pVCpu + offVCpu], imm8/32 */
1418 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1419 if (fMask < 0x80)
1420 {
1421 pCodeBuf[off++] = 0x83;
1422 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1423 pCodeBuf[off++] = (uint8_t)fMask;
1424 }
1425 else
1426 {
1427 pCodeBuf[off++] = 0x81;
1428 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1429 pCodeBuf[off++] = RT_BYTE1(fMask);
1430 pCodeBuf[off++] = RT_BYTE2(fMask);
1431 pCodeBuf[off++] = RT_BYTE3(fMask);
1432 pCodeBuf[off++] = RT_BYTE4(fMask);
1433 }
1434
1435#elif defined(RT_ARCH_ARM64)
1436 /* If the constant is unwieldy we'll need a register to hold it as well. */
1437 uint32_t uImmSizeLen, uImmRotate;
1438 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1439 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1440
1441 /* We need a temp register for holding the member value we're modifying. */
1442 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1443
1444 /* Determine how we're to access pVCpu first. */
1445 uint32_t const cbData = sizeof(uint32_t);
1446 if (offVCpu < (unsigned)(_4K * cbData))
1447 {
1448 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1449 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1450 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1451 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1452 if (idxTmpMask == UINT8_MAX)
1453 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1454 else
1455 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1456 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1457 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1458 }
1459 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1460 {
1461 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1462 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1463 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1464 if (idxTmpMask == UINT8_MAX)
1465 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1466 else
1467 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1468 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1469 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1470 }
1471 else
1472 {
1473 /* The offset is too large, so we must load it into a register and use
1474 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1475 of the instruction if that'll reduce the constant to 16-bits. */
1476 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1477 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1478 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1479 if (fShifted)
1480 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1481 else
1482 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1483
1484 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1485 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1486
1487 if (idxTmpMask == UINT8_MAX)
1488 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1489 else
1490 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1491
1492 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1493 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1494 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1495 }
1496 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1497 if (idxTmpMask != UINT8_MAX)
1498 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1499
1500#else
1501# error "port me"
1502#endif
1503 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1504 return off;
1505}
1506
1507
1508/**
1509 * Emits code for AND'ing a bitmask into a 32-bit VMCPU member.
1510 *
1511 * @note May allocate temporary registers (not AMD64).
1512 */
1513DECL_FORCE_INLINE(uint32_t)
1514iemNativeEmitAndImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1515{
1516 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1517#ifdef RT_ARCH_AMD64
1518 /* and dword [pVCpu + offVCpu], imm8/32 */
1519 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1520 if (fMask < 0x80)
1521 {
1522 pCodeBuf[off++] = 0x83;
1523 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1524 pCodeBuf[off++] = (uint8_t)fMask;
1525 }
1526 else
1527 {
1528 pCodeBuf[off++] = 0x81;
1529 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1530 pCodeBuf[off++] = RT_BYTE1(fMask);
1531 pCodeBuf[off++] = RT_BYTE2(fMask);
1532 pCodeBuf[off++] = RT_BYTE3(fMask);
1533 pCodeBuf[off++] = RT_BYTE4(fMask);
1534 }
1535
1536#elif defined(RT_ARCH_ARM64)
1537 /* If the constant is unwieldy we'll need a register to hold it as well. */
1538 uint32_t uImmSizeLen, uImmRotate;
1539 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1540 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1541
1542 /* We need a temp register for holding the member value we're modifying. */
1543 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1544
1545 /* Determine how we're to access pVCpu first. */
1546 uint32_t const cbData = sizeof(uint32_t);
1547 if (offVCpu < (unsigned)(_4K * cbData))
1548 {
1549 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1550 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1551 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1552 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1553 if (idxTmpMask == UINT8_MAX)
1554 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1555 else
1556 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1557 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1558 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1559 }
1560 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1561 {
1562 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1563 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1564 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1565 if (idxTmpMask == UINT8_MAX)
1566 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1567 else
1568 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1569 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1570 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1571 }
1572 else
1573 {
1574 /* The offset is too large, so we must load it into a register and use
1575 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1576 of the instruction if that'll reduce the constant to 16-bits. */
1577 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1578 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1579 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1580 if (fShifted)
1581 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1582 else
1583 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1584
1585 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1586 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1587
1588 if (idxTmpMask == UINT8_MAX)
1589 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1590 else
1591 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1592
1593 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1594 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1595 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1596 }
1597 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1598 if (idxTmpMask != UINT8_MAX)
1599 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1600
1601#else
1602# error "port me"
1603#endif
1604 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1605 return off;
1606}
1607
1608
1609/**
1610 * Emits a gprdst = gprsrc load.
1611 */
1612DECL_FORCE_INLINE(uint32_t)
1613iemNativeEmitLoadGprFromGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1614{
1615#ifdef RT_ARCH_AMD64
1616 /* mov gprdst, gprsrc */
1617 if ((iGprDst | iGprSrc) >= 8)
1618 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W | X86_OP_REX_B
1619 : iGprSrc >= 8 ? X86_OP_REX_W | X86_OP_REX_R | X86_OP_REX_B
1620 : X86_OP_REX_W | X86_OP_REX_R;
1621 else
1622 pCodeBuf[off++] = X86_OP_REX_W;
1623 pCodeBuf[off++] = 0x8b;
1624 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1625
1626#elif defined(RT_ARCH_ARM64)
1627 /* mov dst, src; alias for: orr dst, xzr, src */
1628 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_XZR, iGprSrc);
1629
1630#else
1631# error "port me"
1632#endif
1633 return off;
1634}
1635
1636
1637/**
1638 * Emits a gprdst = gprsrc load.
1639 */
1640DECL_INLINE_THROW(uint32_t)
1641iemNativeEmitLoadGprFromGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1642{
1643#ifdef RT_ARCH_AMD64
1644 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1645#elif defined(RT_ARCH_ARM64)
1646 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1647#else
1648# error "port me"
1649#endif
1650 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1651 return off;
1652}
1653
1654
1655/**
1656 * Emits a gprdst = gprsrc[31:0] load.
1657 * @note Bits 63 thru 32 are cleared.
1658 */
1659DECL_FORCE_INLINE(uint32_t)
1660iemNativeEmitLoadGprFromGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1661{
1662#ifdef RT_ARCH_AMD64
1663 /* mov gprdst, gprsrc */
1664 if ((iGprDst | iGprSrc) >= 8)
1665 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1666 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1667 : X86_OP_REX_R;
1668 pCodeBuf[off++] = 0x8b;
1669 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1670
1671#elif defined(RT_ARCH_ARM64)
1672 /* mov dst32, src32; alias for: orr dst32, wzr, src32 */
1673 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_WZR, iGprSrc, false /*f64bit*/);
1674
1675#else
1676# error "port me"
1677#endif
1678 return off;
1679}
1680
1681
1682/**
1683 * Emits a gprdst = gprsrc[31:0] load.
1684 * @note Bits 63 thru 32 are cleared.
1685 */
1686DECL_INLINE_THROW(uint32_t)
1687iemNativeEmitLoadGprFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1688{
1689#ifdef RT_ARCH_AMD64
1690 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1691#elif defined(RT_ARCH_ARM64)
1692 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1693#else
1694# error "port me"
1695#endif
1696 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1697 return off;
1698}
1699
1700
1701/**
1702 * Emits a gprdst = gprsrc[15:0] load.
1703 * @note Bits 63 thru 15 are cleared.
1704 */
1705DECL_INLINE_THROW(uint32_t)
1706iemNativeEmitLoadGprFromGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1707{
1708#ifdef RT_ARCH_AMD64
1709 /* movzx Gv,Ew */
1710 if ((iGprDst | iGprSrc) >= 8)
1711 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1712 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1713 : X86_OP_REX_R;
1714 pCodeBuf[off++] = 0x0f;
1715 pCodeBuf[off++] = 0xb7;
1716 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1717
1718#elif defined(RT_ARCH_ARM64)
1719 /* and gprdst, gprsrc, #0xffff */
1720# if 1
1721 Assert(Armv8A64ConvertImmRImmS2Mask32(0x0f, 0) == UINT16_MAX);
1722 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x0f, 0, false /*f64Bit*/);
1723# else
1724 Assert(Armv8A64ConvertImmRImmS2Mask64(0x4f, 0) == UINT16_MAX);
1725 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x4f, 0);
1726# endif
1727
1728#else
1729# error "port me"
1730#endif
1731 return off;
1732}
1733
1734
1735/**
1736 * Emits a gprdst = gprsrc[15:0] load.
1737 * @note Bits 63 thru 15 are cleared.
1738 */
1739DECL_INLINE_THROW(uint32_t)
1740iemNativeEmitLoadGprFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1741{
1742#ifdef RT_ARCH_AMD64
1743 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1744#elif defined(RT_ARCH_ARM64)
1745 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1746#else
1747# error "port me"
1748#endif
1749 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1750 return off;
1751}
1752
1753
1754/**
1755 * Emits a gprdst = gprsrc[7:0] load.
1756 * @note Bits 63 thru 8 are cleared.
1757 */
1758DECL_FORCE_INLINE(uint32_t)
1759iemNativeEmitLoadGprFromGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1760{
1761#ifdef RT_ARCH_AMD64
1762 /* movzx Gv,Eb */
1763 if (iGprDst >= 8 || iGprSrc >= 8)
1764 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1765 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1766 : X86_OP_REX_R;
1767 else if (iGprSrc >= 4)
1768 pCodeBuf[off++] = X86_OP_REX;
1769 pCodeBuf[off++] = 0x0f;
1770 pCodeBuf[off++] = 0xb6;
1771 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1772
1773#elif defined(RT_ARCH_ARM64)
1774 /* and gprdst, gprsrc, #0xff */
1775 Assert(Armv8A64ConvertImmRImmS2Mask32(0x07, 0) == UINT8_MAX);
1776 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x07, 0, false /*f64Bit*/);
1777
1778#else
1779# error "port me"
1780#endif
1781 return off;
1782}
1783
1784
1785/**
1786 * Emits a gprdst = gprsrc[7:0] load.
1787 * @note Bits 63 thru 8 are cleared.
1788 */
1789DECL_INLINE_THROW(uint32_t)
1790iemNativeEmitLoadGprFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1791{
1792#ifdef RT_ARCH_AMD64
1793 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1794#elif defined(RT_ARCH_ARM64)
1795 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1796#else
1797# error "port me"
1798#endif
1799 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1800 return off;
1801}
1802
1803
1804/**
1805 * Emits a gprdst = gprsrc[15:8] load (ah, ch, dh, bh).
1806 * @note Bits 63 thru 8 are cleared.
1807 */
1808DECL_INLINE_THROW(uint32_t)
1809iemNativeEmitLoadGprFromGpr8Hi(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1810{
1811#ifdef RT_ARCH_AMD64
1812 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1813
1814 /* movzx Gv,Ew */
1815 if ((iGprDst | iGprSrc) >= 8)
1816 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1817 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1818 : X86_OP_REX_R;
1819 pbCodeBuf[off++] = 0x0f;
1820 pbCodeBuf[off++] = 0xb7;
1821 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1822
1823 /* shr Ev,8 */
1824 if (iGprDst >= 8)
1825 pbCodeBuf[off++] = X86_OP_REX_B;
1826 pbCodeBuf[off++] = 0xc1;
1827 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
1828 pbCodeBuf[off++] = 8;
1829
1830#elif defined(RT_ARCH_ARM64)
1831 /* ubfx gprdst, gprsrc, #8, #8 - gprdst = gprsrc[15:8] */
1832 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1833 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(iGprDst, iGprSrc, 8, 8, false /*f64Bit*/);
1834
1835#else
1836# error "port me"
1837#endif
1838 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1839 return off;
1840}
1841
1842
1843/**
1844 * Sign-extends 32-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1845 */
1846DECL_INLINE_THROW(uint32_t)
1847iemNativeEmitLoadGprSignExtendedFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1848{
1849#ifdef RT_ARCH_AMD64
1850 /* movsxd r64, r/m32 */
1851 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1852 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1853 pbCodeBuf[off++] = 0x63;
1854 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1855
1856#elif defined(RT_ARCH_ARM64)
1857 /* sxtw dst, src */
1858 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1859 pu32CodeBuf[off++] = Armv8A64MkInstrSxtw(iGprDst, iGprSrc);
1860
1861#else
1862# error "port me"
1863#endif
1864 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1865 return off;
1866}
1867
1868
1869/**
1870 * Sign-extends 16-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1871 */
1872DECL_INLINE_THROW(uint32_t)
1873iemNativeEmitLoadGprSignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1874{
1875#ifdef RT_ARCH_AMD64
1876 /* movsx r64, r/m16 */
1877 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1878 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1879 pbCodeBuf[off++] = 0x0f;
1880 pbCodeBuf[off++] = 0xbf;
1881 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1882
1883#elif defined(RT_ARCH_ARM64)
1884 /* sxth dst, src */
1885 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1886 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc);
1887
1888#else
1889# error "port me"
1890#endif
1891 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1892 return off;
1893}
1894
1895
1896/**
1897 * Sign-extends 16-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1898 */
1899DECL_INLINE_THROW(uint32_t)
1900iemNativeEmitLoadGpr32SignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1901{
1902#ifdef RT_ARCH_AMD64
1903 /* movsx r64, r/m16 */
1904 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1905 if (iGprDst >= 8 || iGprSrc >= 8)
1906 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1907 pbCodeBuf[off++] = 0x0f;
1908 pbCodeBuf[off++] = 0xbf;
1909 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1910
1911#elif defined(RT_ARCH_ARM64)
1912 /* sxth dst32, src */
1913 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1914 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc, false /*f64Bit*/);
1915
1916#else
1917# error "port me"
1918#endif
1919 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1920 return off;
1921}
1922
1923
1924/**
1925 * Sign-extends 8-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1926 */
1927DECL_INLINE_THROW(uint32_t)
1928iemNativeEmitLoadGprSignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1929{
1930#ifdef RT_ARCH_AMD64
1931 /* movsx r64, r/m8 */
1932 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1933 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1934 pbCodeBuf[off++] = 0x0f;
1935 pbCodeBuf[off++] = 0xbe;
1936 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1937
1938#elif defined(RT_ARCH_ARM64)
1939 /* sxtb dst, src */
1940 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1941 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc);
1942
1943#else
1944# error "port me"
1945#endif
1946 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1947 return off;
1948}
1949
1950
1951/**
1952 * Sign-extends 8-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1953 * @note Bits 63 thru 32 are cleared.
1954 */
1955DECL_INLINE_THROW(uint32_t)
1956iemNativeEmitLoadGpr32SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1957{
1958#ifdef RT_ARCH_AMD64
1959 /* movsx r32, r/m8 */
1960 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1961 if (iGprDst >= 8 || iGprSrc >= 8)
1962 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1963 else if (iGprSrc >= 4)
1964 pbCodeBuf[off++] = X86_OP_REX;
1965 pbCodeBuf[off++] = 0x0f;
1966 pbCodeBuf[off++] = 0xbe;
1967 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1968
1969#elif defined(RT_ARCH_ARM64)
1970 /* sxtb dst32, src32 */
1971 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1972 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1973
1974#else
1975# error "port me"
1976#endif
1977 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1978 return off;
1979}
1980
1981
1982/**
1983 * Sign-extends 8-bit value in @a iGprSrc into a 16-bit value in @a iGprDst.
1984 * @note Bits 63 thru 16 are cleared.
1985 */
1986DECL_INLINE_THROW(uint32_t)
1987iemNativeEmitLoadGpr16SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1988{
1989#ifdef RT_ARCH_AMD64
1990 /* movsx r16, r/m8 */
1991 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1992 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1993 if (iGprDst >= 8 || iGprSrc >= 8)
1994 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1995 else if (iGprSrc >= 4)
1996 pbCodeBuf[off++] = X86_OP_REX;
1997 pbCodeBuf[off++] = 0x0f;
1998 pbCodeBuf[off++] = 0xbe;
1999 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
2000
2001 /* movzx r32, r/m16 */
2002 if (iGprDst >= 8)
2003 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
2004 pbCodeBuf[off++] = 0x0f;
2005 pbCodeBuf[off++] = 0xb7;
2006 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
2007
2008#elif defined(RT_ARCH_ARM64)
2009 /* sxtb dst32, src32; and dst32, dst32, #0xffff */
2010 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2011 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
2012 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2013 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
2014
2015#else
2016# error "port me"
2017#endif
2018 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2019 return off;
2020}
2021
2022
2023/**
2024 * Emits a gprdst = gprsrc + addend load.
2025 * @note The addend is 32-bit for AMD64 and 64-bit for ARM64.
2026 */
2027#ifdef RT_ARCH_AMD64
2028DECL_INLINE_THROW(uint32_t)
2029iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2030 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
2031{
2032 Assert(iAddend != 0);
2033
2034 /* lea gprdst, [gprsrc + iAddend] */
2035 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2036 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
2037 pbCodeBuf[off++] = 0x8d;
2038 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
2039 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2040 return off;
2041}
2042
2043#elif defined(RT_ARCH_ARM64)
2044DECL_INLINE_THROW(uint32_t)
2045iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2046 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
2047{
2048 if ((uint32_t)iAddend < 4096)
2049 {
2050 /* add dst, src, uimm12 */
2051 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2052 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend);
2053 }
2054 else if ((uint32_t)-iAddend < 4096)
2055 {
2056 /* sub dst, src, uimm12 */
2057 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2058 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend);
2059 }
2060 else
2061 {
2062 Assert(iGprSrc != iGprDst);
2063 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, iAddend);
2064 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2065 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst);
2066 }
2067 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2068 return off;
2069}
2070#else
2071# error "port me"
2072#endif
2073
2074/**
2075 * Emits a gprdst = gprsrc + addend load, accepting iAddend == 0.
2076 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
2077 */
2078#ifdef RT_ARCH_AMD64
2079DECL_INLINE_THROW(uint32_t)
2080iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2081 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
2082#else
2083DECL_INLINE_THROW(uint32_t)
2084iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2085 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
2086#endif
2087{
2088 if (iAddend != 0)
2089 return iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
2090 return iemNativeEmitLoadGprFromGpr(pReNative, off, iGprDst, iGprSrc);
2091}
2092
2093
2094/**
2095 * Emits a gprdst = gprsrc32 + addend load.
2096 * @note Bits 63 thru 32 are cleared.
2097 */
2098DECL_INLINE_THROW(uint32_t)
2099iemNativeEmitLoadGprFromGpr32WithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2100 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
2101{
2102 Assert(iAddend != 0);
2103
2104#ifdef RT_ARCH_AMD64
2105 /* a32 o32 lea gprdst, [gprsrc + iAddend] */
2106 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
2107 pbCodeBuf[off++] = X86_OP_PRF_SIZE_ADDR;
2108 if ((iGprDst | iGprSrc) >= 8)
2109 pbCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
2110 pbCodeBuf[off++] = 0x8d;
2111 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
2112
2113#elif defined(RT_ARCH_ARM64)
2114 if ((uint32_t)iAddend < 4096)
2115 {
2116 /* add dst, src, uimm12 */
2117 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2118 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend, false /*f64Bit*/);
2119 }
2120 else if ((uint32_t)-iAddend < 4096)
2121 {
2122 /* sub dst, src, uimm12 */
2123 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2124 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend, false /*f64Bit*/);
2125 }
2126 else
2127 {
2128 Assert(iGprSrc != iGprDst);
2129 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, (int64_t)iAddend);
2130 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2131 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst, false /*f64Bit*/);
2132 }
2133
2134#else
2135# error "port me"
2136#endif
2137 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2138 return off;
2139}
2140
2141
2142/**
2143 * Emits a gprdst = gprsrc32 + addend load, accepting iAddend == 0.
2144 */
2145DECL_INLINE_THROW(uint32_t)
2146iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2147 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
2148{
2149 if (iAddend != 0)
2150 return iemNativeEmitLoadGprFromGpr32WithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
2151 return iemNativeEmitLoadGprFromGpr32(pReNative, off, iGprDst, iGprSrc);
2152}
2153
2154
2155/**
2156 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
2157 * destination.
2158 */
2159DECL_FORCE_INLINE(uint32_t)
2160iemNativeEmitGprMergeInGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
2161{
2162#ifdef RT_ARCH_AMD64
2163 /* mov reg16, r/m16 */
2164 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
2165 if (idxDst >= 8 || idxSrc >= 8)
2166 pCodeBuf[off++] = (idxDst < 8 ? 0 : X86_OP_REX_R) | (idxSrc < 8 ? 0 : X86_OP_REX_B);
2167 pCodeBuf[off++] = 0x8b;
2168 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxDst & 7, idxSrc & 7);
2169
2170#elif defined(RT_ARCH_ARM64)
2171 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxSrc to idxDst bits 15:0. */
2172 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxDst, idxSrc, 0, 16);
2173
2174#else
2175# error "Port me!"
2176#endif
2177 return off;
2178}
2179
2180
2181/**
2182 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
2183 * destination.
2184 */
2185DECL_INLINE_THROW(uint32_t)
2186iemNativeEmitGprMergeInGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
2187{
2188#ifdef RT_ARCH_AMD64
2189 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, idxDst, idxSrc);
2190#elif defined(RT_ARCH_ARM64)
2191 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, idxDst, idxSrc);
2192#else
2193# error "Port me!"
2194#endif
2195 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2196 return off;
2197}
2198
2199
2200#ifdef RT_ARCH_AMD64
2201/**
2202 * Common bit of iemNativeEmitLoadGprByBp and friends.
2203 */
2204DECL_FORCE_INLINE(uint32_t) iemNativeEmitGprByBpDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offDisp,
2205 PIEMRECOMPILERSTATE pReNativeAssert)
2206{
2207 if (offDisp < 128 && offDisp >= -128)
2208 {
2209 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, X86_GREG_xBP);
2210 pbCodeBuf[off++] = (uint8_t)(int8_t)offDisp;
2211 }
2212 else
2213 {
2214 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, X86_GREG_xBP);
2215 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2216 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2217 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2218 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2219 }
2220 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNativeAssert, off); RT_NOREF(pReNativeAssert);
2221 return off;
2222}
2223#elif defined(RT_ARCH_ARM64)
2224/**
2225 * Common bit of iemNativeEmitLoadGprByBp and friends.
2226 */
2227DECL_FORCE_INLINE_THROW(uint32_t)
2228iemNativeEmitGprByBpLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2229 int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2230{
2231 if ((uint32_t)offDisp < 4096U * cbData && !((uint32_t)offDisp & (cbData - 1)))
2232 {
2233 /* str w/ unsigned imm12 (scaled) */
2234 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2235 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, ARMV8_A64_REG_BP, (uint32_t)offDisp / cbData);
2236 }
2237 else if (offDisp >= -256 && offDisp <= 256)
2238 {
2239 /* stur w/ signed imm9 (unscaled) */
2240 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2241 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(enmOperation, iGprReg, ARMV8_A64_REG_BP, offDisp);
2242 }
2243 else
2244 {
2245 /* Use temporary indexing register. */
2246 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2247 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2248 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, ARMV8_A64_REG_BP,
2249 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2250 }
2251 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2252 return off;
2253}
2254#endif
2255
2256
2257/**
2258 * Emits a 64-bit GRP load instruction with an BP relative source address.
2259 */
2260DECL_INLINE_THROW(uint32_t)
2261iemNativeEmitLoadGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2262{
2263#ifdef RT_ARCH_AMD64
2264 /* mov gprdst, qword [rbp + offDisp] */
2265 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2266 if (iGprDst < 8)
2267 pbCodeBuf[off++] = X86_OP_REX_W;
2268 else
2269 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2270 pbCodeBuf[off++] = 0x8b;
2271 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2272
2273#elif defined(RT_ARCH_ARM64)
2274 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2275
2276#else
2277# error "port me"
2278#endif
2279}
2280
2281
2282/**
2283 * Emits a 32-bit GRP load instruction with an BP relative source address.
2284 * @note Bits 63 thru 32 of the GPR will be cleared.
2285 */
2286DECL_INLINE_THROW(uint32_t)
2287iemNativeEmitLoadGprByBpU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2288{
2289#ifdef RT_ARCH_AMD64
2290 /* mov gprdst, dword [rbp + offDisp] */
2291 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2292 if (iGprDst >= 8)
2293 pbCodeBuf[off++] = X86_OP_REX_R;
2294 pbCodeBuf[off++] = 0x8b;
2295 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2296
2297#elif defined(RT_ARCH_ARM64)
2298 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2299
2300#else
2301# error "port me"
2302#endif
2303}
2304
2305
2306/**
2307 * Emits a 16-bit GRP load instruction with an BP relative source address.
2308 * @note Bits 63 thru 16 of the GPR will be cleared.
2309 */
2310DECL_INLINE_THROW(uint32_t)
2311iemNativeEmitLoadGprByBpU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2312{
2313#ifdef RT_ARCH_AMD64
2314 /* movzx gprdst, word [rbp + offDisp] */
2315 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2316 if (iGprDst >= 8)
2317 pbCodeBuf[off++] = X86_OP_REX_R;
2318 pbCodeBuf[off++] = 0x0f;
2319 pbCodeBuf[off++] = 0xb7;
2320 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2321
2322#elif defined(RT_ARCH_ARM64)
2323 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint32_t));
2324
2325#else
2326# error "port me"
2327#endif
2328}
2329
2330
2331/**
2332 * Emits a 8-bit GRP load instruction with an BP relative source address.
2333 * @note Bits 63 thru 8 of the GPR will be cleared.
2334 */
2335DECL_INLINE_THROW(uint32_t)
2336iemNativeEmitLoadGprByBpU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2337{
2338#ifdef RT_ARCH_AMD64
2339 /* movzx gprdst, byte [rbp + offDisp] */
2340 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2341 if (iGprDst >= 8)
2342 pbCodeBuf[off++] = X86_OP_REX_R;
2343 pbCodeBuf[off++] = 0x0f;
2344 pbCodeBuf[off++] = 0xb6;
2345 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2346
2347#elif defined(RT_ARCH_ARM64)
2348 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint32_t));
2349
2350#else
2351# error "port me"
2352#endif
2353}
2354
2355
2356/**
2357 * Emits a 128-bit vector register load instruction with an BP relative source address.
2358 */
2359DECL_FORCE_INLINE_THROW(uint32_t)
2360iemNativeEmitLoadVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2361{
2362#ifdef RT_ARCH_AMD64
2363 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
2364
2365 /* movdqu reg128, mem128 */
2366 pbCodeBuf[off++] = 0xf3;
2367 if (iVecRegDst >= 8)
2368 pbCodeBuf[off++] = X86_OP_REX_R;
2369 pbCodeBuf[off++] = 0x0f;
2370 pbCodeBuf[off++] = 0x6f;
2371 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2372#elif defined(RT_ARCH_ARM64)
2373 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2374#else
2375# error "port me"
2376#endif
2377}
2378
2379
2380/**
2381 * Emits a 256-bit vector register load instruction with an BP relative source address.
2382 */
2383DECL_FORCE_INLINE_THROW(uint32_t)
2384iemNativeEmitLoadVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2385{
2386#ifdef RT_ARCH_AMD64
2387 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2388
2389 /* vmovdqu reg256, mem256 */
2390 pbCodeBuf[off++] = X86_OP_VEX2;
2391 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegDst >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2392 pbCodeBuf[off++] = 0x6f;
2393 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2394#elif defined(RT_ARCH_ARM64)
2395 /* ASSUMES two consecutive vector registers for the 256-bit value. */
2396 Assert(!(iVecRegDst & 0x1));
2397 off = iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2398 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst + 1, offDisp + sizeof(RTUINT128U), kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2399#else
2400# error "port me"
2401#endif
2402}
2403
2404
2405/**
2406 * Emits a load effective address to a GRP with an BP relative source address.
2407 */
2408DECL_INLINE_THROW(uint32_t)
2409iemNativeEmitLeaGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2410{
2411#ifdef RT_ARCH_AMD64
2412 /* lea gprdst, [rbp + offDisp] */
2413 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2414 if (iGprDst < 8)
2415 pbCodeBuf[off++] = X86_OP_REX_W;
2416 else
2417 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2418 pbCodeBuf[off++] = 0x8d;
2419 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2420
2421#elif defined(RT_ARCH_ARM64)
2422 bool const fSub = offDisp < 0;
2423 uint32_t const offAbsDisp = (uint32_t)RT_ABS(offDisp);
2424 if (offAbsDisp <= 0xffffffU)
2425 {
2426 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2427 if (offAbsDisp <= 0xfffU)
2428 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, ARMV8_A64_REG_BP, offAbsDisp);
2429 else
2430 {
2431 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, ARMV8_A64_REG_BP, offAbsDisp >> 12,
2432 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
2433 if (offAbsDisp & 0xfffU)
2434 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, offAbsDisp & 0xfff);
2435 }
2436 }
2437 else
2438 {
2439 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
2440 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offAbsDisp);
2441 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2442 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(fSub, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2443 }
2444
2445#else
2446# error "port me"
2447#endif
2448
2449 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2450 return off;
2451}
2452
2453
2454/**
2455 * Emits a 64-bit GPR store with an BP relative destination address.
2456 *
2457 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2458 */
2459DECL_INLINE_THROW(uint32_t)
2460iemNativeEmitStoreGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iGprSrc)
2461{
2462#ifdef RT_ARCH_AMD64
2463 /* mov qword [rbp + offDisp], gprdst */
2464 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2465 if (iGprSrc < 8)
2466 pbCodeBuf[off++] = X86_OP_REX_W;
2467 else
2468 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2469 pbCodeBuf[off++] = 0x89;
2470 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprSrc, offDisp, pReNative);
2471
2472#elif defined(RT_ARCH_ARM64)
2473 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2474 {
2475 /* str w/ unsigned imm12 (scaled) */
2476 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2477 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc,
2478 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2479 }
2480 else if (offDisp >= -256 && offDisp <= 256)
2481 {
2482 /* stur w/ signed imm9 (unscaled) */
2483 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2484 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP, offDisp);
2485 }
2486 else if ((uint32_t)-offDisp < (unsigned)_4K)
2487 {
2488 /* Use temporary indexing register w/ sub uimm12. */
2489 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2490 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2491 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2492 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2493 }
2494 else
2495 {
2496 /* Use temporary indexing register. */
2497 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2498 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2499 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP,
2500 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2501 }
2502 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2503 return off;
2504
2505#else
2506# error "Port me!"
2507#endif
2508}
2509
2510
2511/**
2512 * Emits a 64-bit immediate store with an BP relative destination address.
2513 *
2514 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2515 */
2516DECL_INLINE_THROW(uint32_t)
2517iemNativeEmitStoreImm64ByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint64_t uImm64)
2518{
2519#ifdef RT_ARCH_AMD64
2520 if ((int64_t)uImm64 == (int32_t)uImm64)
2521 {
2522 /* mov qword [rbp + offDisp], imm32 - sign extended */
2523 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 11);
2524 pbCodeBuf[off++] = X86_OP_REX_W;
2525 pbCodeBuf[off++] = 0xc7;
2526 if (offDisp < 128 && offDisp >= -128)
2527 {
2528 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 0, X86_GREG_xBP);
2529 pbCodeBuf[off++] = (uint8_t)offDisp;
2530 }
2531 else
2532 {
2533 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 0, X86_GREG_xBP);
2534 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2535 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2536 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2537 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2538 }
2539 pbCodeBuf[off++] = RT_BYTE1(uImm64);
2540 pbCodeBuf[off++] = RT_BYTE2(uImm64);
2541 pbCodeBuf[off++] = RT_BYTE3(uImm64);
2542 pbCodeBuf[off++] = RT_BYTE4(uImm64);
2543 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2544 return off;
2545 }
2546#endif
2547
2548 /* Load tmp0, imm64; Store tmp to bp+disp. */
2549 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uImm64);
2550 return iemNativeEmitStoreGprByBp(pReNative, off, offDisp, IEMNATIVE_REG_FIXED_TMP0);
2551}
2552
2553
2554/**
2555 * Emits a 128-bit vector register store with an BP relative destination address.
2556 *
2557 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2558 */
2559DECL_INLINE_THROW(uint32_t)
2560iemNativeEmitStoreVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2561{
2562#ifdef RT_ARCH_AMD64
2563 /* movdqu [rbp + offDisp], vecsrc */
2564 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2565 pbCodeBuf[off++] = 0xf3;
2566 if (iVecRegSrc >= 8)
2567 pbCodeBuf[off++] = X86_OP_REX_R;
2568 pbCodeBuf[off++] = 0x0f;
2569 pbCodeBuf[off++] = 0x7f;
2570 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2571
2572#elif defined(RT_ARCH_ARM64)
2573 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2574 {
2575 /* str w/ unsigned imm12 (scaled) */
2576 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2577 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc,
2578 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2579 }
2580 else if (offDisp >= -256 && offDisp <= 256)
2581 {
2582 /* stur w/ signed imm9 (unscaled) */
2583 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2584 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP, offDisp);
2585 }
2586 else if ((uint32_t)-offDisp < (unsigned)_4K)
2587 {
2588 /* Use temporary indexing register w/ sub uimm12. */
2589 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2590 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2591 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2592 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2593 }
2594 else
2595 {
2596 /* Use temporary indexing register. */
2597 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2598 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2599 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP,
2600 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2601 }
2602 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2603 return off;
2604
2605#else
2606# error "Port me!"
2607#endif
2608}
2609
2610
2611/**
2612 * Emits a 256-bit vector register store with an BP relative destination address.
2613 *
2614 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2615 */
2616DECL_INLINE_THROW(uint32_t)
2617iemNativeEmitStoreVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2618{
2619#ifdef RT_ARCH_AMD64
2620 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2621
2622 /* vmovdqu mem256, reg256 */
2623 pbCodeBuf[off++] = X86_OP_VEX2;
2624 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegSrc >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2625 pbCodeBuf[off++] = 0x7f;
2626 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2627#elif defined(RT_ARCH_ARM64)
2628 Assert(!(iVecRegSrc & 0x1));
2629 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp, iVecRegSrc);
2630 return iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp + sizeof(RTUINT128U), iVecRegSrc + 1);
2631#else
2632# error "Port me!"
2633#endif
2634}
2635
2636#if defined(RT_ARCH_ARM64)
2637
2638/**
2639 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2640 *
2641 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2642 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2643 * caller does not heed this.
2644 *
2645 * @note DON'T try this with prefetch.
2646 */
2647DECL_FORCE_INLINE_THROW(uint32_t)
2648iemNativeEmitGprByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp,
2649 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2650{
2651 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2652 {
2653 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2654 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2655 }
2656 else if ( ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2657 && iGprReg != iGprBase)
2658 || iGprTmp != UINT8_MAX)
2659 {
2660 /* The offset is too large, so we must load it into a register and use
2661 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2662 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2663 if (iGprTmp == UINT8_MAX)
2664 iGprTmp = iGprReg;
2665 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2666 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, iGprTmp);
2667 }
2668 else
2669# ifdef IEM_WITH_THROW_CATCH
2670 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2671# else
2672 AssertReleaseFailedStmt(off = UINT32_MAX);
2673# endif
2674 return off;
2675}
2676
2677/**
2678 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2679 */
2680DECL_FORCE_INLINE_THROW(uint32_t)
2681iemNativeEmitGprByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2682 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2683{
2684 /*
2685 * There are a couple of ldr variants that takes an immediate offset, so
2686 * try use those if we can, otherwise we have to use the temporary register
2687 * help with the addressing.
2688 */
2689 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2690 {
2691 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2692 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2693 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2694 }
2695 else
2696 {
2697 /* The offset is too large, so we must load it into a register and use
2698 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2699 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2700 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2701
2702 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2703 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, idxTmpReg);
2704
2705 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2706 }
2707 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2708 return off;
2709}
2710
2711/**
2712 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
2713 *
2714 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2715 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2716 * caller does not heed this.
2717 *
2718 * @note DON'T try this with prefetch.
2719 */
2720DECL_FORCE_INLINE_THROW(uint32_t)
2721iemNativeEmitVecRegByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iGprBase, int32_t offDisp,
2722 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2723{
2724 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2725 {
2726 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2727 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
2728 }
2729 else if ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2730 || iGprTmp != UINT8_MAX)
2731 {
2732 /* The offset is too large, so we must load it into a register and use
2733 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2734 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2735 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2736 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, iGprTmp);
2737 }
2738 else
2739# ifdef IEM_WITH_THROW_CATCH
2740 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2741# else
2742 AssertReleaseFailedStmt(off = UINT32_MAX);
2743# endif
2744 return off;
2745}
2746
2747
2748/**
2749 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
2750 */
2751DECL_FORCE_INLINE_THROW(uint32_t)
2752iemNativeEmitVecRegByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg,
2753 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2754{
2755 /*
2756 * There are a couple of ldr variants that takes an immediate offset, so
2757 * try use those if we can, otherwise we have to use the temporary register
2758 * help with the addressing.
2759 */
2760 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2761 {
2762 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2763 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2764 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
2765 }
2766 else
2767 {
2768 /* The offset is too large, so we must load it into a register and use
2769 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2770 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2771 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2772
2773 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2774 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, idxTmpReg);
2775
2776 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2777 }
2778 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2779 return off;
2780}
2781#endif /* RT_ARCH_ARM64 */
2782
2783/**
2784 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2785 *
2786 * @note ARM64: Misaligned @a offDisp values and values not in the
2787 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2788 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2789 * does not heed this.
2790 */
2791DECL_FORCE_INLINE_THROW(uint32_t)
2792iemNativeEmitLoadGprByGprU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2793 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2794{
2795#ifdef RT_ARCH_AMD64
2796 /* mov reg64, mem64 */
2797 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2798 pCodeBuf[off++] = 0x8b;
2799 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2800 RT_NOREF(iGprTmp);
2801
2802#elif defined(RT_ARCH_ARM64)
2803 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2804 kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t), iGprTmp);
2805
2806#else
2807# error "port me"
2808#endif
2809 return off;
2810}
2811
2812
2813/**
2814 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2815 */
2816DECL_INLINE_THROW(uint32_t)
2817iemNativeEmitLoadGprByGprU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2818{
2819#ifdef RT_ARCH_AMD64
2820 off = iemNativeEmitLoadGprByGprU64Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2821 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2822
2823#elif defined(RT_ARCH_ARM64)
2824 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2825
2826#else
2827# error "port me"
2828#endif
2829 return off;
2830}
2831
2832
2833/**
2834 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2835 *
2836 * @note ARM64: Misaligned @a offDisp values and values not in the
2837 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2838 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2839 * caller does not heed this.
2840 *
2841 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2842 */
2843DECL_FORCE_INLINE_THROW(uint32_t)
2844iemNativeEmitLoadGprByGprU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2845 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2846{
2847#ifdef RT_ARCH_AMD64
2848 /* mov reg32, mem32 */
2849 if (iGprDst >= 8 || iGprBase >= 8)
2850 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2851 pCodeBuf[off++] = 0x8b;
2852 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2853 RT_NOREF(iGprTmp);
2854
2855#elif defined(RT_ARCH_ARM64)
2856 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2857 kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t), iGprTmp);
2858
2859#else
2860# error "port me"
2861#endif
2862 return off;
2863}
2864
2865
2866/**
2867 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2868 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2869 */
2870DECL_INLINE_THROW(uint32_t)
2871iemNativeEmitLoadGprByGprU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2872{
2873#ifdef RT_ARCH_AMD64
2874 off = iemNativeEmitLoadGprByGprU32Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2875 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2876
2877#elif defined(RT_ARCH_ARM64)
2878 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2879
2880#else
2881# error "port me"
2882#endif
2883 return off;
2884}
2885
2886
2887/**
2888 * Emits a 32-bit GPR load via a GPR base address with a displacement,
2889 * sign-extending the value to 64 bits.
2890 *
2891 * @note ARM64: Misaligned @a offDisp values and values not in the
2892 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2893 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2894 * caller does not heed this.
2895 */
2896DECL_FORCE_INLINE_THROW(uint32_t)
2897iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2898 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2899{
2900#ifdef RT_ARCH_AMD64
2901 /* movsxd reg64, mem32 */
2902 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2903 pCodeBuf[off++] = 0x63;
2904 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2905 RT_NOREF(iGprTmp);
2906
2907#elif defined(RT_ARCH_ARM64)
2908 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2909 kArmv8A64InstrLdStType_Ld_SignWord64, sizeof(uint32_t), iGprTmp);
2910
2911#else
2912# error "port me"
2913#endif
2914 return off;
2915}
2916
2917
2918/**
2919 * Emits a 16-bit GPR load via a GPR base address with a displacement.
2920 *
2921 * @note ARM64: Misaligned @a offDisp values and values not in the
2922 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2923 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2924 * caller does not heed this.
2925 *
2926 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2927 */
2928DECL_FORCE_INLINE_THROW(uint32_t)
2929iemNativeEmitLoadGprByGprU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2930 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2931{
2932#ifdef RT_ARCH_AMD64
2933 /* movzx reg32, mem16 */
2934 if (iGprDst >= 8 || iGprBase >= 8)
2935 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2936 pCodeBuf[off++] = 0x0f;
2937 pCodeBuf[off++] = 0xb7;
2938 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2939 RT_NOREF(iGprTmp);
2940
2941#elif defined(RT_ARCH_ARM64)
2942 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2943 kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t), iGprTmp);
2944
2945#else
2946# error "port me"
2947#endif
2948 return off;
2949}
2950
2951
2952/**
2953 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2954 * sign-extending the value to 64 bits.
2955 *
2956 * @note ARM64: Misaligned @a offDisp values and values not in the
2957 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2958 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2959 * caller does not heed this.
2960 */
2961DECL_FORCE_INLINE_THROW(uint32_t)
2962iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2963 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2964{
2965#ifdef RT_ARCH_AMD64
2966 /* movsx reg64, mem16 */
2967 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2968 pCodeBuf[off++] = 0x0f;
2969 pCodeBuf[off++] = 0xbf;
2970 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2971 RT_NOREF(iGprTmp);
2972
2973#elif defined(RT_ARCH_ARM64)
2974 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2975 kArmv8A64InstrLdStType_Ld_SignHalf64, sizeof(uint16_t), iGprTmp);
2976
2977#else
2978# error "port me"
2979#endif
2980 return off;
2981}
2982
2983
2984/**
2985 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2986 * sign-extending the value to 32 bits.
2987 *
2988 * @note ARM64: Misaligned @a offDisp values and values not in the
2989 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2990 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2991 * caller does not heed this.
2992 *
2993 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2994 */
2995DECL_FORCE_INLINE_THROW(uint32_t)
2996iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2997 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2998{
2999#ifdef RT_ARCH_AMD64
3000 /* movsx reg32, mem16 */
3001 if (iGprDst >= 8 || iGprBase >= 8)
3002 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3003 pCodeBuf[off++] = 0x0f;
3004 pCodeBuf[off++] = 0xbf;
3005 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
3006 RT_NOREF(iGprTmp);
3007
3008#elif defined(RT_ARCH_ARM64)
3009 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
3010 kArmv8A64InstrLdStType_Ld_SignHalf32, sizeof(uint16_t), iGprTmp);
3011
3012#else
3013# error "port me"
3014#endif
3015 return off;
3016}
3017
3018
3019/**
3020 * Emits a 8-bit GPR load via a GPR base address with a displacement.
3021 *
3022 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3023 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3024 * same. Will assert / throw if caller does not heed this.
3025 *
3026 * @note Bits 63 thru 8 in @a iGprDst will be cleared.
3027 */
3028DECL_FORCE_INLINE_THROW(uint32_t)
3029iemNativeEmitLoadGprByGprU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
3030 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3031{
3032#ifdef RT_ARCH_AMD64
3033 /* movzx reg32, mem8 */
3034 if (iGprDst >= 8 || iGprBase >= 8)
3035 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3036 pCodeBuf[off++] = 0x0f;
3037 pCodeBuf[off++] = 0xb6;
3038 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
3039 RT_NOREF(iGprTmp);
3040
3041#elif defined(RT_ARCH_ARM64)
3042 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
3043 kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t), iGprTmp);
3044
3045#else
3046# error "port me"
3047#endif
3048 return off;
3049}
3050
3051
3052/**
3053 * Emits a 8-bit GPR load via a GPR base address with a displacement,
3054 * sign-extending the value to 64 bits.
3055 *
3056 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3057 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3058 * same. Will assert / throw if caller does not heed this.
3059 */
3060DECL_FORCE_INLINE_THROW(uint32_t)
3061iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
3062 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3063{
3064#ifdef RT_ARCH_AMD64
3065 /* movsx reg64, mem8 */
3066 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3067 pCodeBuf[off++] = 0x0f;
3068 pCodeBuf[off++] = 0xbe;
3069 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
3070 RT_NOREF(iGprTmp);
3071
3072#elif defined(RT_ARCH_ARM64)
3073 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
3074 kArmv8A64InstrLdStType_Ld_SignByte64, sizeof(uint8_t), iGprTmp);
3075
3076#else
3077# error "port me"
3078#endif
3079 return off;
3080}
3081
3082
3083/**
3084 * Emits a 8-bit GPR load via a GPR base address with a displacement,
3085 * sign-extending the value to 32 bits.
3086 *
3087 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3088 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3089 * same. Will assert / throw if caller does not heed this.
3090 *
3091 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
3092 */
3093DECL_FORCE_INLINE_THROW(uint32_t)
3094iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
3095 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3096{
3097#ifdef RT_ARCH_AMD64
3098 /* movsx reg32, mem8 */
3099 if (iGprDst >= 8 || iGprBase >= 8)
3100 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3101 pCodeBuf[off++] = 0x0f;
3102 pCodeBuf[off++] = 0xbe;
3103 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
3104 RT_NOREF(iGprTmp);
3105
3106#elif defined(RT_ARCH_ARM64)
3107 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
3108 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
3109
3110#else
3111# error "port me"
3112#endif
3113 return off;
3114}
3115
3116
3117/**
3118 * Emits a 8-bit GPR load via a GPR base address with a displacement,
3119 * sign-extending the value to 16 bits.
3120 *
3121 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3122 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3123 * same. Will assert / throw if caller does not heed this.
3124 *
3125 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
3126 */
3127DECL_FORCE_INLINE_THROW(uint32_t)
3128iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
3129 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3130{
3131#ifdef RT_ARCH_AMD64
3132 /* movsx reg32, mem8 */
3133 if (iGprDst >= 8 || iGprBase >= 8)
3134 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3135 pCodeBuf[off++] = 0x0f;
3136 pCodeBuf[off++] = 0xbe;
3137 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
3138# if 1 /** @todo use 'movzx reg32, reg16' instead of 'and reg32, 0ffffh' ? */
3139 /* and reg32, 0xffffh */
3140 if (iGprDst >= 8)
3141 pCodeBuf[off++] = X86_OP_REX_B;
3142 pCodeBuf[off++] = 0x81;
3143 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
3144 pCodeBuf[off++] = 0xff;
3145 pCodeBuf[off++] = 0xff;
3146 pCodeBuf[off++] = 0;
3147 pCodeBuf[off++] = 0;
3148# else
3149 /* movzx reg32, reg16 */
3150 if (iGprDst >= 8)
3151 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
3152 pCodeBuf[off++] = 0x0f;
3153 pCodeBuf[off++] = 0xb7;
3154 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
3155# endif
3156 RT_NOREF(iGprTmp);
3157
3158#elif defined(RT_ARCH_ARM64)
3159 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
3160 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
3161 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
3162 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*64Bit*/);
3163
3164#else
3165# error "port me"
3166#endif
3167 return off;
3168}
3169
3170
3171/**
3172 * Emits a 128-bit vector register load via a GPR base address with a displacement.
3173 *
3174 * @note ARM64: Misaligned @a offDisp values and values not in the
3175 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3176 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3177 * does not heed this.
3178 */
3179DECL_FORCE_INLINE_THROW(uint32_t)
3180iemNativeEmitLoadVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3181 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3182{
3183#ifdef RT_ARCH_AMD64
3184 /* movdqu reg128, mem128 */
3185 pCodeBuf[off++] = 0xf3;
3186 if (iVecRegDst >= 8 || iGprBase >= 8)
3187 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3188 pCodeBuf[off++] = 0x0f;
3189 pCodeBuf[off++] = 0x6f;
3190 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3191 RT_NOREF(iGprTmp);
3192
3193#elif defined(RT_ARCH_ARM64)
3194 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3195 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3196
3197#else
3198# error "port me"
3199#endif
3200 return off;
3201}
3202
3203
3204/**
3205 * Emits a 128-bit GPR load via a GPR base address with a displacement.
3206 */
3207DECL_INLINE_THROW(uint32_t)
3208iemNativeEmitLoadVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3209{
3210#ifdef RT_ARCH_AMD64
3211 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3212 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3213
3214#elif defined(RT_ARCH_ARM64)
3215 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3216
3217#else
3218# error "port me"
3219#endif
3220 return off;
3221}
3222
3223
3224/**
3225 * Emits a 256-bit vector register load via a GPR base address with a displacement.
3226 *
3227 * @note ARM64: Misaligned @a offDisp values and values not in the
3228 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3229 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3230 * does not heed this.
3231 */
3232DECL_FORCE_INLINE_THROW(uint32_t)
3233iemNativeEmitLoadVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3234 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3235{
3236#ifdef RT_ARCH_AMD64
3237 /* vmovdqu reg256, mem256 */
3238 pCodeBuf[off++] = X86_OP_VEX3;
3239 pCodeBuf[off++] = (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
3240 | X86_OP_VEX3_BYTE1_X
3241 | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
3242 | UINT8_C(0x01);
3243 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
3244 pCodeBuf[off++] = 0x6f;
3245 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3246 RT_NOREF(iGprTmp);
3247
3248#elif defined(RT_ARCH_ARM64)
3249 Assert(!(iVecRegDst & 0x1));
3250 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3251 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3252 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3253 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3254#else
3255# error "port me"
3256#endif
3257 return off;
3258}
3259
3260
3261/**
3262 * Emits a 256-bit GPR load via a GPR base address with a displacement.
3263 */
3264DECL_INLINE_THROW(uint32_t)
3265iemNativeEmitLoadVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3266{
3267#ifdef RT_ARCH_AMD64
3268 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3269 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3270
3271#elif defined(RT_ARCH_ARM64)
3272 Assert(!(iVecRegDst & 0x1));
3273 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
3274 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3275 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3276 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3277
3278#else
3279# error "port me"
3280#endif
3281 return off;
3282}
3283
3284
3285/**
3286 * Emits a 64-bit GPR store via a GPR base address with a displacement.
3287 *
3288 * @note ARM64: Misaligned @a offDisp values and values not in the
3289 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3290 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3291 * does not heed this.
3292 */
3293DECL_FORCE_INLINE_THROW(uint32_t)
3294iemNativeEmitStoreGpr64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3295 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3296{
3297#ifdef RT_ARCH_AMD64
3298 /* mov mem64, reg64 */
3299 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3300 pCodeBuf[off++] = 0x89;
3301 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3302 RT_NOREF(iGprTmp);
3303
3304#elif defined(RT_ARCH_ARM64)
3305 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3306 kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
3307
3308#else
3309# error "port me"
3310#endif
3311 return off;
3312}
3313
3314
3315/**
3316 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3317 *
3318 * @note ARM64: Misaligned @a offDisp values and values not in the
3319 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3320 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3321 * does not heed this.
3322 */
3323DECL_FORCE_INLINE_THROW(uint32_t)
3324iemNativeEmitStoreGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3325 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3326{
3327#ifdef RT_ARCH_AMD64
3328 /* mov mem32, reg32 */
3329 if (iGprSrc >= 8 || iGprBase >= 8)
3330 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3331 pCodeBuf[off++] = 0x89;
3332 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3333 RT_NOREF(iGprTmp);
3334
3335#elif defined(RT_ARCH_ARM64)
3336 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3337 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3338
3339#else
3340# error "port me"
3341#endif
3342 return off;
3343}
3344
3345
3346/**
3347 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3348 *
3349 * @note ARM64: Misaligned @a offDisp values and values not in the
3350 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3351 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3352 * does not heed this.
3353 */
3354DECL_FORCE_INLINE_THROW(uint32_t)
3355iemNativeEmitStoreGpr16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3356 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3357{
3358#ifdef RT_ARCH_AMD64
3359 /* mov mem16, reg16 */
3360 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3361 if (iGprSrc >= 8 || iGprBase >= 8)
3362 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3363 pCodeBuf[off++] = 0x89;
3364 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3365 RT_NOREF(iGprTmp);
3366
3367#elif defined(RT_ARCH_ARM64)
3368 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3369 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3370
3371#else
3372# error "port me"
3373#endif
3374 return off;
3375}
3376
3377
3378/**
3379 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3380 *
3381 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3382 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3383 * same. Will assert / throw if caller does not heed this.
3384 */
3385DECL_FORCE_INLINE_THROW(uint32_t)
3386iemNativeEmitStoreGpr8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3387 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3388{
3389#ifdef RT_ARCH_AMD64
3390 /* mov mem8, reg8 */
3391 if (iGprSrc >= 8 || iGprBase >= 8)
3392 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3393 else if (iGprSrc >= 4)
3394 pCodeBuf[off++] = X86_OP_REX;
3395 pCodeBuf[off++] = 0x88;
3396 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3397 RT_NOREF(iGprTmp);
3398
3399#elif defined(RT_ARCH_ARM64)
3400 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3401 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3402
3403#else
3404# error "port me"
3405#endif
3406 return off;
3407}
3408
3409
3410/**
3411 * Emits a 64-bit immediate store via a GPR base address with a displacement.
3412 *
3413 * @note This will always require @a iGprTmpImm on ARM (except for uImm=0), on
3414 * AMD64 it depends on the immediate value.
3415 *
3416 * @note ARM64: Misaligned @a offDisp values and values not in the
3417 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3418 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3419 * does not heed this.
3420 */
3421DECL_FORCE_INLINE_THROW(uint32_t)
3422iemNativeEmitStoreImm64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint64_t uImm, uint8_t iGprBase,
3423 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3424{
3425#ifdef RT_ARCH_AMD64
3426 if ((int32_t)uImm == (int64_t)uImm)
3427 {
3428 /* mov mem64, imm32 (sign-extended) */
3429 pCodeBuf[off++] = X86_OP_REX_W | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3430 pCodeBuf[off++] = 0xc7;
3431 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3432 pCodeBuf[off++] = RT_BYTE1(uImm);
3433 pCodeBuf[off++] = RT_BYTE2(uImm);
3434 pCodeBuf[off++] = RT_BYTE3(uImm);
3435 pCodeBuf[off++] = RT_BYTE4(uImm);
3436 }
3437 else if (iGprImmTmp != UINT8_MAX || iGprTmp != UINT8_MAX)
3438 {
3439 /* require temporary register. */
3440 if (iGprImmTmp == UINT8_MAX)
3441 iGprImmTmp = iGprTmp;
3442 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3443 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp);
3444 }
3445 else
3446# ifdef IEM_WITH_THROW_CATCH
3447 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3448# else
3449 AssertReleaseFailedStmt(off = UINT32_MAX);
3450# endif
3451
3452#elif defined(RT_ARCH_ARM64)
3453 if (uImm == 0)
3454 iGprImmTmp = ARMV8_A64_REG_XZR;
3455 else
3456 {
3457 Assert(iGprImmTmp < 31);
3458 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3459 }
3460 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp, iGprTmp);
3461
3462#else
3463# error "port me"
3464#endif
3465 return off;
3466}
3467
3468
3469/**
3470 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3471 *
3472 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3473 *
3474 * @note ARM64: Misaligned @a offDisp values and values not in the
3475 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3476 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3477 * does not heed this.
3478 */
3479DECL_FORCE_INLINE_THROW(uint32_t)
3480iemNativeEmitStoreImm32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uImm, uint8_t iGprBase,
3481 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3482{
3483#ifdef RT_ARCH_AMD64
3484 /* mov mem32, imm32 */
3485 if (iGprBase >= 8)
3486 pCodeBuf[off++] = X86_OP_REX_B;
3487 pCodeBuf[off++] = 0xc7;
3488 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3489 pCodeBuf[off++] = RT_BYTE1(uImm);
3490 pCodeBuf[off++] = RT_BYTE2(uImm);
3491 pCodeBuf[off++] = RT_BYTE3(uImm);
3492 pCodeBuf[off++] = RT_BYTE4(uImm);
3493 RT_NOREF(iGprImmTmp, iGprTmp);
3494
3495#elif defined(RT_ARCH_ARM64)
3496 Assert(iGprImmTmp < 31);
3497 if (uImm == 0)
3498 iGprImmTmp = ARMV8_A64_REG_XZR;
3499 else
3500 {
3501 Assert(iGprImmTmp < 31);
3502 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3503 }
3504 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3505 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3506
3507#else
3508# error "port me"
3509#endif
3510 return off;
3511}
3512
3513
3514/**
3515 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3516 *
3517 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3518 *
3519 * @note ARM64: Misaligned @a offDisp values and values not in the
3520 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3521 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3522 * does not heed this.
3523 */
3524DECL_FORCE_INLINE_THROW(uint32_t)
3525iemNativeEmitStoreImm16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint8_t iGprBase,
3526 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3527{
3528#ifdef RT_ARCH_AMD64
3529 /* mov mem16, imm16 */
3530 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3531 if (iGprBase >= 8)
3532 pCodeBuf[off++] = X86_OP_REX_B;
3533 pCodeBuf[off++] = 0xc7;
3534 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3535 pCodeBuf[off++] = RT_BYTE1(uImm);
3536 pCodeBuf[off++] = RT_BYTE2(uImm);
3537 RT_NOREF(iGprImmTmp, iGprTmp);
3538
3539#elif defined(RT_ARCH_ARM64)
3540 if (uImm == 0)
3541 iGprImmTmp = ARMV8_A64_REG_XZR;
3542 else
3543 {
3544 Assert(iGprImmTmp < 31);
3545 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3546 }
3547 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3548 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3549
3550#else
3551# error "port me"
3552#endif
3553 return off;
3554}
3555
3556
3557/**
3558 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3559 *
3560 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3561 *
3562 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3563 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3564 * same. Will assert / throw if caller does not heed this.
3565 */
3566DECL_FORCE_INLINE_THROW(uint32_t)
3567iemNativeEmitStoreImm8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t uImm, uint8_t iGprBase,
3568 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3569{
3570#ifdef RT_ARCH_AMD64
3571 /* mov mem8, imm8 */
3572 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3573 if (iGprBase >= 8)
3574 pCodeBuf[off++] = X86_OP_REX_B;
3575 pCodeBuf[off++] = 0xc6;
3576 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3577 pCodeBuf[off++] = uImm;
3578 RT_NOREF(iGprImmTmp, iGprTmp);
3579
3580#elif defined(RT_ARCH_ARM64)
3581 if (uImm == 0)
3582 iGprImmTmp = ARMV8_A64_REG_XZR;
3583 else
3584 {
3585 Assert(iGprImmTmp < 31);
3586 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3587 }
3588 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3589 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3590
3591#else
3592# error "port me"
3593#endif
3594 return off;
3595}
3596
3597
3598/**
3599 * Emits a 128-bit vector register store via a GPR base address with a displacement.
3600 *
3601 * @note ARM64: Misaligned @a offDisp values and values not in the
3602 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3603 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3604 * does not heed this.
3605 */
3606DECL_FORCE_INLINE_THROW(uint32_t)
3607iemNativeEmitStoreVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3608 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3609{
3610#ifdef RT_ARCH_AMD64
3611 /* movdqu mem128, reg128 */
3612 pCodeBuf[off++] = 0xf3;
3613 if (iVecRegDst >= 8 || iGprBase >= 8)
3614 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3615 pCodeBuf[off++] = 0x0f;
3616 pCodeBuf[off++] = 0x7f;
3617 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3618 RT_NOREF(iGprTmp);
3619
3620#elif defined(RT_ARCH_ARM64)
3621 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3622 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3623
3624#else
3625# error "port me"
3626#endif
3627 return off;
3628}
3629
3630
3631/**
3632 * Emits a 128-bit vector register store via a GPR base address with a displacement.
3633 */
3634DECL_INLINE_THROW(uint32_t)
3635iemNativeEmitStoreVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3636{
3637#ifdef RT_ARCH_AMD64
3638 off = iemNativeEmitStoreVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3639 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3640
3641#elif defined(RT_ARCH_ARM64)
3642 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3643
3644#else
3645# error "port me"
3646#endif
3647 return off;
3648}
3649
3650
3651/**
3652 * Emits a 256-bit vector register store via a GPR base address with a displacement.
3653 *
3654 * @note ARM64: Misaligned @a offDisp values and values not in the
3655 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3656 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3657 * does not heed this.
3658 */
3659DECL_FORCE_INLINE_THROW(uint32_t)
3660iemNativeEmitStoreVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3661 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3662{
3663#ifdef RT_ARCH_AMD64
3664 /* vmovdqu mem256, reg256 */
3665 pCodeBuf[off++] = X86_OP_VEX3;
3666 pCodeBuf[off++] = (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
3667 | X86_OP_VEX3_BYTE1_X
3668 | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
3669 | UINT8_C(0x01);
3670 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
3671 pCodeBuf[off++] = 0x7f;
3672 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3673 RT_NOREF(iGprTmp);
3674
3675#elif defined(RT_ARCH_ARM64)
3676 Assert(!(iVecRegDst & 0x1));
3677 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3678 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3679 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3680 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3681#else
3682# error "port me"
3683#endif
3684 return off;
3685}
3686
3687
3688/**
3689 * Emits a 256-bit GPR load via a GPR base address with a displacement.
3690 */
3691DECL_INLINE_THROW(uint32_t)
3692iemNativeEmitStoreVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3693{
3694#ifdef RT_ARCH_AMD64
3695 off = iemNativeEmitStoreVecRegByGprU256Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3696 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3697
3698#elif defined(RT_ARCH_ARM64)
3699 Assert(!(iVecRegDst & 0x1));
3700 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
3701 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3702 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3703 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3704
3705#else
3706# error "port me"
3707#endif
3708 return off;
3709}
3710
3711
3712
3713/*********************************************************************************************************************************
3714* Subtraction and Additions *
3715*********************************************************************************************************************************/
3716
3717/**
3718 * Emits subtracting a 64-bit GPR from another, storing the result in the first.
3719 * @note The AMD64 version sets flags.
3720 */
3721DECL_INLINE_THROW(uint32_t)
3722iemNativeEmitSubTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3723{
3724#if defined(RT_ARCH_AMD64)
3725 /* sub Gv,Ev */
3726 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3727 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3728 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3729 pbCodeBuf[off++] = 0x2b;
3730 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3731
3732#elif defined(RT_ARCH_ARM64)
3733 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3734 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend);
3735
3736#else
3737# error "Port me"
3738#endif
3739 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3740 return off;
3741}
3742
3743
3744/**
3745 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3746 * @note The AMD64 version sets flags.
3747 */
3748DECL_FORCE_INLINE(uint32_t)
3749iemNativeEmitSubTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3750{
3751#if defined(RT_ARCH_AMD64)
3752 /* sub Gv,Ev */
3753 if (iGprDst >= 8 || iGprSubtrahend >= 8)
3754 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
3755 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3756 pCodeBuf[off++] = 0x2b;
3757 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3758
3759#elif defined(RT_ARCH_ARM64)
3760 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend, false /*f64Bit*/);
3761
3762#else
3763# error "Port me"
3764#endif
3765 return off;
3766}
3767
3768
3769/**
3770 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3771 * @note The AMD64 version sets flags.
3772 */
3773DECL_INLINE_THROW(uint32_t)
3774iemNativeEmitSubTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3775{
3776#if defined(RT_ARCH_AMD64)
3777 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSubtrahend);
3778#elif defined(RT_ARCH_ARM64)
3779 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSubtrahend);
3780#else
3781# error "Port me"
3782#endif
3783 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3784 return off;
3785}
3786
3787
3788/**
3789 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3790 *
3791 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3792 *
3793 * @note Larger constants will require a temporary register. Failing to specify
3794 * one when needed will trigger fatal assertion / throw.
3795 */
3796DECL_FORCE_INLINE_THROW(uint32_t)
3797iemNativeEmitSubGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3798 uint8_t iGprTmp = UINT8_MAX)
3799{
3800#ifdef RT_ARCH_AMD64
3801 pCodeBuf[off++] = iGprDst >= 8 ? X86_OP_REX_W | X86_OP_REX_B : X86_OP_REX_W;
3802 if (iSubtrahend == 1)
3803 {
3804 /* dec r/m64 */
3805 pCodeBuf[off++] = 0xff;
3806 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3807 }
3808 else if (iSubtrahend == -1)
3809 {
3810 /* inc r/m64 */
3811 pCodeBuf[off++] = 0xff;
3812 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3813 }
3814 else if ((int8_t)iSubtrahend == iSubtrahend)
3815 {
3816 /* sub r/m64, imm8 */
3817 pCodeBuf[off++] = 0x83;
3818 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3819 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3820 }
3821 else if ((int32_t)iSubtrahend == iSubtrahend)
3822 {
3823 /* sub r/m64, imm32 */
3824 pCodeBuf[off++] = 0x81;
3825 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3826 pCodeBuf[off++] = RT_BYTE1((uint64_t)iSubtrahend);
3827 pCodeBuf[off++] = RT_BYTE2((uint64_t)iSubtrahend);
3828 pCodeBuf[off++] = RT_BYTE3((uint64_t)iSubtrahend);
3829 pCodeBuf[off++] = RT_BYTE4((uint64_t)iSubtrahend);
3830 }
3831 else if (iGprTmp != UINT8_MAX)
3832 {
3833 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off - 1, iGprTmp, (uint64_t)iSubtrahend);
3834 /* sub r/m64, r64 */
3835 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B) | (iGprTmp < 8 ? 0 : X86_OP_REX_R);
3836 pCodeBuf[off++] = 0x29;
3837 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprTmp & 7, iGprDst & 7);
3838 }
3839 else
3840# ifdef IEM_WITH_THROW_CATCH
3841 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3842# else
3843 AssertReleaseFailedStmt(off = UINT32_MAX);
3844# endif
3845
3846#elif defined(RT_ARCH_ARM64)
3847 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3848 if (uAbsSubtrahend < 4096)
3849 {
3850 if (iSubtrahend >= 0)
3851 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3852 else
3853 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3854 }
3855 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3856 {
3857 if (iSubtrahend >= 0)
3858 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3859 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3860 else
3861 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3862 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3863 }
3864 else if (iGprTmp != UINT8_MAX)
3865 {
3866 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (uint64_t)iSubtrahend);
3867 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp);
3868 }
3869 else
3870# ifdef IEM_WITH_THROW_CATCH
3871 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3872# else
3873 AssertReleaseFailedStmt(off = UINT32_MAX);
3874# endif
3875
3876#else
3877# error "Port me"
3878#endif
3879 return off;
3880}
3881
3882
3883/**
3884 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3885 *
3886 * @note Larger constants will require a temporary register. Failing to specify
3887 * one when needed will trigger fatal assertion / throw.
3888 */
3889DECL_INLINE_THROW(uint32_t)
3890iemNativeEmitSubGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3891 uint8_t iGprTmp = UINT8_MAX)
3892
3893{
3894#ifdef RT_ARCH_AMD64
3895 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iSubtrahend, iGprTmp);
3896#elif defined(RT_ARCH_ARM64)
3897 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGprDst, iSubtrahend, iGprTmp);
3898#else
3899# error "Port me"
3900#endif
3901 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3902 return off;
3903}
3904
3905
3906/**
3907 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3908 *
3909 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3910 *
3911 * @note ARM64: Larger constants will require a temporary register. Failing to
3912 * specify one when needed will trigger fatal assertion / throw.
3913 */
3914DECL_FORCE_INLINE_THROW(uint32_t)
3915iemNativeEmitSubGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3916 uint8_t iGprTmp = UINT8_MAX)
3917{
3918#ifdef RT_ARCH_AMD64
3919 if (iGprDst >= 8)
3920 pCodeBuf[off++] = X86_OP_REX_B;
3921 if (iSubtrahend == 1)
3922 {
3923 /* dec r/m32 */
3924 pCodeBuf[off++] = 0xff;
3925 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3926 }
3927 else if (iSubtrahend == -1)
3928 {
3929 /* inc r/m32 */
3930 pCodeBuf[off++] = 0xff;
3931 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3932 }
3933 else if (iSubtrahend < 128 && iSubtrahend >= -128)
3934 {
3935 /* sub r/m32, imm8 */
3936 pCodeBuf[off++] = 0x83;
3937 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3938 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3939 }
3940 else
3941 {
3942 /* sub r/m32, imm32 */
3943 pCodeBuf[off++] = 0x81;
3944 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3945 pCodeBuf[off++] = RT_BYTE1(iSubtrahend);
3946 pCodeBuf[off++] = RT_BYTE2(iSubtrahend);
3947 pCodeBuf[off++] = RT_BYTE3(iSubtrahend);
3948 pCodeBuf[off++] = RT_BYTE4(iSubtrahend);
3949 }
3950 RT_NOREF(iGprTmp);
3951
3952#elif defined(RT_ARCH_ARM64)
3953 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3954 if (uAbsSubtrahend < 4096)
3955 {
3956 if (iSubtrahend >= 0)
3957 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3958 else
3959 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3960 }
3961 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3962 {
3963 if (iSubtrahend >= 0)
3964 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3965 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3966 else
3967 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3968 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3969 }
3970 else if (iGprTmp != UINT8_MAX)
3971 {
3972 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3973 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3974 }
3975 else
3976# ifdef IEM_WITH_THROW_CATCH
3977 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3978# else
3979 AssertReleaseFailedStmt(off = UINT32_MAX);
3980# endif
3981
3982#else
3983# error "Port me"
3984#endif
3985 return off;
3986}
3987
3988
3989/**
3990 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3991 *
3992 * @note ARM64: Larger constants will require a temporary register. Failing to
3993 * specify one when needed will trigger fatal assertion / throw.
3994 */
3995DECL_INLINE_THROW(uint32_t)
3996iemNativeEmitSubGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3997 uint8_t iGprTmp = UINT8_MAX)
3998
3999{
4000#ifdef RT_ARCH_AMD64
4001 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iSubtrahend, iGprTmp);
4002#elif defined(RT_ARCH_ARM64)
4003 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iSubtrahend, iGprTmp);
4004#else
4005# error "Port me"
4006#endif
4007 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4008 return off;
4009}
4010
4011
4012/**
4013 * Emits a 16-bit GPR subtract with a signed immediate subtrahend.
4014 *
4015 * This will optimize using DEC/INC/whatever and ARM64 will not set flags,
4016 * so not suitable as a base for conditional jumps.
4017 *
4018 * @note AMD64: Will only update the lower 16 bits of the register.
4019 * @note ARM64: Will update the entire register.
4020 * @note ARM64: Larger constants will require a temporary register. Failing to
4021 * specify one when needed will trigger fatal assertion / throw.
4022 */
4023DECL_FORCE_INLINE_THROW(uint32_t)
4024iemNativeEmitSubGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iSubtrahend,
4025 uint8_t iGprTmp = UINT8_MAX)
4026{
4027#ifdef RT_ARCH_AMD64
4028 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4029 if (iGprDst >= 8)
4030 pCodeBuf[off++] = X86_OP_REX_B;
4031 if (iSubtrahend == 1)
4032 {
4033 /* dec r/m16 */
4034 pCodeBuf[off++] = 0xff;
4035 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4036 }
4037 else if (iSubtrahend == -1)
4038 {
4039 /* inc r/m16 */
4040 pCodeBuf[off++] = 0xff;
4041 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4042 }
4043 else if ((int8_t)iSubtrahend == iSubtrahend)
4044 {
4045 /* sub r/m16, imm8 */
4046 pCodeBuf[off++] = 0x83;
4047 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
4048 pCodeBuf[off++] = (uint8_t)iSubtrahend;
4049 }
4050 else
4051 {
4052 /* sub r/m16, imm16 */
4053 pCodeBuf[off++] = 0x81;
4054 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
4055 pCodeBuf[off++] = RT_BYTE1((uint16_t)iSubtrahend);
4056 pCodeBuf[off++] = RT_BYTE2((uint16_t)iSubtrahend);
4057 }
4058 RT_NOREF(iGprTmp);
4059
4060#elif defined(RT_ARCH_ARM64)
4061 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
4062 if (uAbsSubtrahend < 4096)
4063 {
4064 if (iSubtrahend >= 0)
4065 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
4066 else
4067 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
4068 }
4069 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
4070 {
4071 if (iSubtrahend >= 0)
4072 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
4073 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
4074 else
4075 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
4076 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
4077 }
4078 else if (iGprTmp != UINT8_MAX)
4079 {
4080 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
4081 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
4082 }
4083 else
4084# ifdef IEM_WITH_THROW_CATCH
4085 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4086# else
4087 AssertReleaseFailedStmt(off = UINT32_MAX);
4088# endif
4089 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
4090
4091#else
4092# error "Port me"
4093#endif
4094 return off;
4095}
4096
4097
4098/**
4099 * Emits adding a 64-bit GPR to another, storing the result in the first.
4100 * @note The AMD64 version sets flags.
4101 */
4102DECL_FORCE_INLINE(uint32_t)
4103iemNativeEmitAddTwoGprsEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
4104{
4105#if defined(RT_ARCH_AMD64)
4106 /* add Gv,Ev */
4107 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4108 | (iGprAddend < 8 ? 0 : X86_OP_REX_B);
4109 pCodeBuf[off++] = 0x03;
4110 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
4111
4112#elif defined(RT_ARCH_ARM64)
4113 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend);
4114
4115#else
4116# error "Port me"
4117#endif
4118 return off;
4119}
4120
4121
4122/**
4123 * Emits adding a 64-bit GPR to another, storing the result in the first.
4124 * @note The AMD64 version sets flags.
4125 */
4126DECL_INLINE_THROW(uint32_t)
4127iemNativeEmitAddTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
4128{
4129#if defined(RT_ARCH_AMD64)
4130 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
4131#elif defined(RT_ARCH_ARM64)
4132 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
4133#else
4134# error "Port me"
4135#endif
4136 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4137 return off;
4138}
4139
4140
4141/**
4142 * Emits adding a 64-bit GPR to another, storing the result in the first.
4143 * @note The AMD64 version sets flags.
4144 */
4145DECL_FORCE_INLINE(uint32_t)
4146iemNativeEmitAddTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
4147{
4148#if defined(RT_ARCH_AMD64)
4149 /* add Gv,Ev */
4150 if (iGprDst >= 8 || iGprAddend >= 8)
4151 pCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0)
4152 | (iGprAddend >= 8 ? X86_OP_REX_B : 0);
4153 pCodeBuf[off++] = 0x03;
4154 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
4155
4156#elif defined(RT_ARCH_ARM64)
4157 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend, false /*f64Bit*/);
4158
4159#else
4160# error "Port me"
4161#endif
4162 return off;
4163}
4164
4165
4166/**
4167 * Emits adding a 64-bit GPR to another, storing the result in the first.
4168 * @note The AMD64 version sets flags.
4169 */
4170DECL_INLINE_THROW(uint32_t)
4171iemNativeEmitAddTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
4172{
4173#if defined(RT_ARCH_AMD64)
4174 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
4175#elif defined(RT_ARCH_ARM64)
4176 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
4177#else
4178# error "Port me"
4179#endif
4180 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4181 return off;
4182}
4183
4184
4185/**
4186 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
4187 */
4188DECL_INLINE_THROW(uint32_t)
4189iemNativeEmitAddGprImm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4190{
4191#if defined(RT_ARCH_AMD64)
4192 /* add or inc */
4193 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4194 if (iImm8 != 1)
4195 {
4196 pCodeBuf[off++] = 0x83;
4197 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4198 pCodeBuf[off++] = (uint8_t)iImm8;
4199 }
4200 else
4201 {
4202 pCodeBuf[off++] = 0xff;
4203 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4204 }
4205
4206#elif defined(RT_ARCH_ARM64)
4207 if (iImm8 >= 0)
4208 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint8_t)iImm8);
4209 else
4210 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint8_t)-iImm8);
4211
4212#else
4213# error "Port me"
4214#endif
4215 return off;
4216}
4217
4218
4219/**
4220 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
4221 */
4222DECL_INLINE_THROW(uint32_t)
4223iemNativeEmitAddGprImm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4224{
4225#if defined(RT_ARCH_AMD64)
4226 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
4227#elif defined(RT_ARCH_ARM64)
4228 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
4229#else
4230# error "Port me"
4231#endif
4232 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4233 return off;
4234}
4235
4236
4237/**
4238 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
4239 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4240 */
4241DECL_FORCE_INLINE(uint32_t)
4242iemNativeEmitAddGpr32Imm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4243{
4244#if defined(RT_ARCH_AMD64)
4245 /* add or inc */
4246 if (iGprDst >= 8)
4247 pCodeBuf[off++] = X86_OP_REX_B;
4248 if (iImm8 != 1)
4249 {
4250 pCodeBuf[off++] = 0x83;
4251 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4252 pCodeBuf[off++] = (uint8_t)iImm8;
4253 }
4254 else
4255 {
4256 pCodeBuf[off++] = 0xff;
4257 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4258 }
4259
4260#elif defined(RT_ARCH_ARM64)
4261 if (iImm8 >= 0)
4262 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint8_t)iImm8, false /*f64Bit*/);
4263 else
4264 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint8_t)-iImm8, false /*f64Bit*/);
4265
4266#else
4267# error "Port me"
4268#endif
4269 return off;
4270}
4271
4272
4273/**
4274 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
4275 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4276 */
4277DECL_INLINE_THROW(uint32_t)
4278iemNativeEmitAddGpr32Imm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4279{
4280#if defined(RT_ARCH_AMD64)
4281 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
4282#elif defined(RT_ARCH_ARM64)
4283 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
4284#else
4285# error "Port me"
4286#endif
4287 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4288 return off;
4289}
4290
4291
4292/**
4293 * Emits a 64-bit GPR additions with a 64-bit signed addend.
4294 *
4295 * @note Will assert / throw if @a iGprTmp is not specified when needed.
4296 */
4297DECL_FORCE_INLINE_THROW(uint32_t)
4298iemNativeEmitAddGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iAddend, uint8_t iGprTmp = UINT8_MAX)
4299{
4300#if defined(RT_ARCH_AMD64)
4301 if ((int8_t)iAddend == iAddend)
4302 return iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
4303
4304 if ((int32_t)iAddend == iAddend)
4305 {
4306 /* add grp, imm32 */
4307 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4308 pCodeBuf[off++] = 0x81;
4309 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4310 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4311 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4312 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4313 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4314 }
4315 else if (iGprTmp != UINT8_MAX)
4316 {
4317 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
4318
4319 /* add dst, tmpreg */
4320 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4321 | (iGprTmp < 8 ? 0 : X86_OP_REX_B);
4322 pCodeBuf[off++] = 0x03;
4323 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprTmp & 7);
4324 }
4325 else
4326# ifdef IEM_WITH_THROW_CATCH
4327 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4328# else
4329 AssertReleaseFailedStmt(off = UINT32_MAX);
4330# endif
4331
4332#elif defined(RT_ARCH_ARM64)
4333 uint64_t const uAbsAddend = (uint64_t)RT_ABS(iAddend);
4334 if (uAbsAddend <= 0xffffffU)
4335 {
4336 bool const fSub = iAddend < 0;
4337 if (uAbsAddend > 0xfffU)
4338 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, true /*f64Bit*/,
4339 false /*fSetFlags*/, true /*fShift12*/);
4340 if (uAbsAddend & 0xfffU)
4341 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & UINT32_C(0xfff));
4342 }
4343 else if (iGprTmp != UINT8_MAX)
4344 {
4345 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
4346 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp);
4347 }
4348 else
4349# ifdef IEM_WITH_THROW_CATCH
4350 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4351# else
4352 AssertReleaseFailedStmt(off = UINT32_MAX);
4353# endif
4354
4355#else
4356# error "Port me"
4357#endif
4358 return off;
4359}
4360
4361
4362/**
4363 * Emits a 64-bit GPR additions with a 64-bit signed addend.
4364 */
4365DECL_INLINE_THROW(uint32_t)
4366iemNativeEmitAddGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iAddend)
4367{
4368#if defined(RT_ARCH_AMD64)
4369 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
4370 return iemNativeEmitAddGprImm8(pReNative, off, iGprDst, (int8_t)iAddend);
4371
4372 if (iAddend <= INT32_MAX && iAddend >= INT32_MIN)
4373 {
4374 /* add grp, imm32 */
4375 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4376 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4377 pbCodeBuf[off++] = 0x81;
4378 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4379 pbCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4380 pbCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4381 pbCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4382 pbCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4383 }
4384 else
4385 {
4386 /* Best to use a temporary register to deal with this in the simplest way: */
4387 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
4388
4389 /* add dst, tmpreg */
4390 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4391 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4392 | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
4393 pbCodeBuf[off++] = 0x03;
4394 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iTmpReg & 7);
4395
4396 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4397 }
4398
4399#elif defined(RT_ARCH_ARM64)
4400 bool const fSub = iAddend < 0;
4401 uint64_t const uAbsAddend = (uint64_t)RT_ABS(iAddend);
4402 if (uAbsAddend <= 0xffffffU)
4403 {
4404 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4405 if (uAbsAddend > 0xfffU)
4406 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, true /*f64Bit*/,
4407 false /*fSetFlags*/, true /*fShift12*/);
4408 if (uAbsAddend & 0xfffU)
4409 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & UINT32_C(0xfff));
4410 }
4411 else
4412 {
4413 /* Use temporary register for the immediate. */
4414 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uAbsAddend);
4415
4416 /* add gprdst, gprdst, tmpreg */
4417 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4418 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(fSub, iGprDst, iGprDst, iTmpReg);
4419
4420 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4421 }
4422
4423#else
4424# error "Port me"
4425#endif
4426 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4427 return off;
4428}
4429
4430
4431/**
4432 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4433 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4434 * @note For ARM64 the iAddend value must be in the range 0x000000..0xffffff.
4435 * The negative ranges are also allowed, making it behave like a
4436 * subtraction. If the constant does not conform, bad stuff will happen.
4437 */
4438DECL_FORCE_INLINE_THROW(uint32_t)
4439iemNativeEmitAddGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iAddend, uint8_t iGprTmp = UINT8_MAX)
4440{
4441#if defined(RT_ARCH_AMD64)
4442 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
4443 return iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
4444
4445 /* add grp, imm32 */
4446 if (iGprDst >= 8)
4447 pCodeBuf[off++] = X86_OP_REX_B;
4448 pCodeBuf[off++] = 0x81;
4449 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4450 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4451 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4452 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4453 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4454 RT_NOREF(iGprTmp);
4455
4456#elif defined(RT_ARCH_ARM64)
4457 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4458 if (uAbsAddend <= 0xffffffU)
4459 {
4460 bool const fSub = iAddend < 0;
4461 if (uAbsAddend > 0xfffU)
4462 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, false /*f64Bit*/,
4463 false /*fSetFlags*/, true /*fShift12*/);
4464 if (uAbsAddend & 0xfffU)
4465 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & 0xfff, false /*f64Bit*/);
4466 }
4467 else if (iGprTmp != UINT8_MAX)
4468 {
4469 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, iAddend);
4470 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
4471 }
4472 else
4473# ifdef IEM_WITH_THROW_CATCH
4474 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4475# else
4476 AssertReleaseFailedStmt(off = UINT32_MAX);
4477# endif
4478
4479#else
4480# error "Port me"
4481#endif
4482 return off;
4483}
4484
4485
4486/**
4487 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4488 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4489 */
4490DECL_INLINE_THROW(uint32_t)
4491iemNativeEmitAddGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iAddend)
4492{
4493#if defined(RT_ARCH_AMD64)
4494 off = iemNativeEmitAddGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iAddend);
4495
4496#elif defined(RT_ARCH_ARM64)
4497 bool const fSub = iAddend < 0;
4498 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4499 if (uAbsAddend <= 0xffffffU)
4500 {
4501 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4502 if (uAbsAddend > 0xfffU)
4503 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, false /*f64Bit*/,
4504 false /*fSetFlags*/, true /*fShift12*/);
4505 if (uAbsAddend & 0xfffU)
4506 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & 0xfff, false /*f64Bit*/);
4507 }
4508 else
4509 {
4510 /* Use temporary register for the immediate. */
4511 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uAbsAddend);
4512
4513 /* add gprdst, gprdst, tmpreg */
4514 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4515 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(fSub, iGprDst, iGprDst, iTmpReg, false /*f64Bit*/);
4516
4517 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4518 }
4519
4520#else
4521# error "Port me"
4522#endif
4523 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4524 return off;
4525}
4526
4527
4528/**
4529 * Emits a 16-bit GPR add with a signed immediate addend.
4530 *
4531 * This will optimize using INC/DEC/whatever and ARM64 will not set flags,
4532 * so not suitable as a base for conditional jumps.
4533 *
4534 * @note AMD64: Will only update the lower 16 bits of the register.
4535 * @note ARM64: Will update the entire register.
4536 * @sa iemNativeEmitSubGpr16ImmEx
4537 */
4538DECL_FORCE_INLINE(uint32_t)
4539iemNativeEmitAddGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iAddend)
4540{
4541#ifdef RT_ARCH_AMD64
4542 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4543 if (iGprDst >= 8)
4544 pCodeBuf[off++] = X86_OP_REX_B;
4545 if (iAddend == 1)
4546 {
4547 /* inc r/m16 */
4548 pCodeBuf[off++] = 0xff;
4549 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4550 }
4551 else if (iAddend == -1)
4552 {
4553 /* dec r/m16 */
4554 pCodeBuf[off++] = 0xff;
4555 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4556 }
4557 else if ((int8_t)iAddend == iAddend)
4558 {
4559 /* add r/m16, imm8 */
4560 pCodeBuf[off++] = 0x83;
4561 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4562 pCodeBuf[off++] = (uint8_t)iAddend;
4563 }
4564 else
4565 {
4566 /* add r/m16, imm16 */
4567 pCodeBuf[off++] = 0x81;
4568 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4569 pCodeBuf[off++] = RT_BYTE1((uint16_t)iAddend);
4570 pCodeBuf[off++] = RT_BYTE2((uint16_t)iAddend);
4571 }
4572
4573#elif defined(RT_ARCH_ARM64)
4574 bool const fSub = iAddend < 0;
4575 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4576 if (uAbsAddend > 0xfffU)
4577 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, false /*f64Bit*/,
4578 false /*fSetFlags*/, true /*fShift12*/);
4579 if (uAbsAddend & 0xfffU)
4580 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & 0xfff, false /*f64Bit*/);
4581 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
4582
4583#else
4584# error "Port me"
4585#endif
4586 return off;
4587}
4588
4589
4590
4591/**
4592 * Adds two 64-bit GPRs together, storing the result in a third register.
4593 */
4594DECL_FORCE_INLINE(uint32_t)
4595iemNativeEmitGprEqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4596{
4597#ifdef RT_ARCH_AMD64
4598 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4599 {
4600 /** @todo consider LEA */
4601 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend1);
4602 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend2);
4603 }
4604 else
4605 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4606
4607#elif defined(RT_ARCH_ARM64)
4608 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2);
4609
4610#else
4611# error "Port me!"
4612#endif
4613 return off;
4614}
4615
4616
4617
4618/**
4619 * Adds two 32-bit GPRs together, storing the result in a third register.
4620 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4621 */
4622DECL_FORCE_INLINE(uint32_t)
4623iemNativeEmitGpr32EqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4624{
4625#ifdef RT_ARCH_AMD64
4626 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4627 {
4628 /** @todo consider LEA */
4629 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend1);
4630 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend2);
4631 }
4632 else
4633 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4634
4635#elif defined(RT_ARCH_ARM64)
4636 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2, false /*f64Bit*/);
4637
4638#else
4639# error "Port me!"
4640#endif
4641 return off;
4642}
4643
4644
4645/**
4646 * Adds a 64-bit GPR and a 64-bit unsigned constant, storing the result in a
4647 * third register.
4648 *
4649 * @note The ARM64 version does not work for non-trivial constants if the
4650 * two registers are the same. Will assert / throw exception.
4651 */
4652DECL_FORCE_INLINE_THROW(uint32_t)
4653iemNativeEmitGprEqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int64_t iImmAddend)
4654{
4655#ifdef RT_ARCH_AMD64
4656 /** @todo consider LEA */
4657 if ((int8_t)iImmAddend == iImmAddend)
4658 {
4659 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend);
4660 off = iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4661 }
4662 else
4663 {
4664 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4665 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4666 }
4667
4668#elif defined(RT_ARCH_ARM64)
4669 bool const fSub = iImmAddend < 0;
4670 uint64_t const uAbsImmAddend = RT_ABS(iImmAddend);
4671 if (uAbsImmAddend <= 0xfffU)
4672 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend);
4673 else if (uAbsImmAddend <= 0xffffffU)
4674 {
4675 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend >> 12,
4676 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
4677 if (uAbsImmAddend & 0xfffU)
4678 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsImmAddend & UINT32_C(0xfff));
4679 }
4680 else if (iGprDst != iGprAddend)
4681 {
4682 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, (uint64_t)iImmAddend);
4683 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4684 }
4685 else
4686# ifdef IEM_WITH_THROW_CATCH
4687 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4688# else
4689 AssertReleaseFailedStmt(off = UINT32_MAX);
4690# endif
4691
4692#else
4693# error "Port me!"
4694#endif
4695 return off;
4696}
4697
4698
4699/**
4700 * Adds a 32-bit GPR and a 32-bit unsigned constant, storing the result in a
4701 * third register.
4702 *
4703 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4704 *
4705 * @note The ARM64 version does not work for non-trivial constants if the
4706 * two registers are the same. Will assert / throw exception.
4707 */
4708DECL_FORCE_INLINE_THROW(uint32_t)
4709iemNativeEmitGpr32EqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int32_t iImmAddend)
4710{
4711#ifdef RT_ARCH_AMD64
4712 /** @todo consider LEA */
4713 if ((int8_t)iImmAddend == iImmAddend)
4714 {
4715 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4716 off = iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4717 }
4718 else
4719 {
4720 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4721 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4722 }
4723
4724#elif defined(RT_ARCH_ARM64)
4725 bool const fSub = iImmAddend < 0;
4726 uint32_t const uAbsImmAddend = RT_ABS(iImmAddend);
4727 if (uAbsImmAddend <= 0xfffU)
4728 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4729 else if (uAbsImmAddend <= 0xffffffU)
4730 {
4731 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend >> 12,
4732 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
4733 if (uAbsImmAddend & 0xfffU)
4734 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsImmAddend & 0xfff, false /*f64Bit*/);
4735 }
4736 else if (iGprDst != iGprAddend)
4737 {
4738 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, (uint32_t)iImmAddend);
4739 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4740 }
4741 else
4742# ifdef IEM_WITH_THROW_CATCH
4743 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4744# else
4745 AssertReleaseFailedStmt(off = UINT32_MAX);
4746# endif
4747
4748#else
4749# error "Port me!"
4750#endif
4751 return off;
4752}
4753
4754
4755/*********************************************************************************************************************************
4756* Unary Operations *
4757*********************************************************************************************************************************/
4758
4759/**
4760 * Emits code for two complement negation of a 64-bit GPR.
4761 */
4762DECL_FORCE_INLINE_THROW(uint32_t)
4763iemNativeEmitNegGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4764{
4765#if defined(RT_ARCH_AMD64)
4766 /* neg Ev */
4767 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4768 pCodeBuf[off++] = 0xf7;
4769 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4770
4771#elif defined(RT_ARCH_ARM64)
4772 /* sub dst, xzr, dst */
4773 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst);
4774
4775#else
4776# error "Port me"
4777#endif
4778 return off;
4779}
4780
4781
4782/**
4783 * Emits code for two complement negation of a 64-bit GPR.
4784 */
4785DECL_INLINE_THROW(uint32_t)
4786iemNativeEmitNegGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4787{
4788#if defined(RT_ARCH_AMD64)
4789 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4790#elif defined(RT_ARCH_ARM64)
4791 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4792#else
4793# error "Port me"
4794#endif
4795 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4796 return off;
4797}
4798
4799
4800/**
4801 * Emits code for two complement negation of a 32-bit GPR.
4802 * @note bit 32 thru 63 are set to zero.
4803 */
4804DECL_FORCE_INLINE_THROW(uint32_t)
4805iemNativeEmitNegGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4806{
4807#if defined(RT_ARCH_AMD64)
4808 /* neg Ev */
4809 if (iGprDst >= 8)
4810 pCodeBuf[off++] = X86_OP_REX_B;
4811 pCodeBuf[off++] = 0xf7;
4812 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4813
4814#elif defined(RT_ARCH_ARM64)
4815 /* sub dst, xzr, dst */
4816 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst, false /*f64Bit*/);
4817
4818#else
4819# error "Port me"
4820#endif
4821 return off;
4822}
4823
4824
4825/**
4826 * Emits code for two complement negation of a 32-bit GPR.
4827 * @note bit 32 thru 63 are set to zero.
4828 */
4829DECL_INLINE_THROW(uint32_t)
4830iemNativeEmitNegGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4831{
4832#if defined(RT_ARCH_AMD64)
4833 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4834#elif defined(RT_ARCH_ARM64)
4835 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4836#else
4837# error "Port me"
4838#endif
4839 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4840 return off;
4841}
4842
4843
4844
4845/*********************************************************************************************************************************
4846* Bit Operations *
4847*********************************************************************************************************************************/
4848
4849/**
4850 * Emits code for clearing bits 16 thru 63 in the GPR.
4851 */
4852DECL_INLINE_THROW(uint32_t)
4853iemNativeEmitClear16UpGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4854{
4855#if defined(RT_ARCH_AMD64)
4856 /* movzx Gv,Ew */
4857 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4858 if (iGprDst >= 8)
4859 pbCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
4860 pbCodeBuf[off++] = 0x0f;
4861 pbCodeBuf[off++] = 0xb7;
4862 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
4863
4864#elif defined(RT_ARCH_ARM64)
4865 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4866# if 1
4867 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(iGprDst, iGprDst);
4868# else
4869 ///* This produces 0xffff; 0x4f: N=1 imms=001111 (immr=0) => size=64 length=15 */
4870 //pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 0x4f);
4871# endif
4872#else
4873# error "Port me"
4874#endif
4875 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4876 return off;
4877}
4878
4879
4880/**
4881 * Emits code for AND'ing two 64-bit GPRs.
4882 *
4883 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4884 * and ARM64 hosts.
4885 */
4886DECL_FORCE_INLINE(uint32_t)
4887iemNativeEmitAndGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4888{
4889#if defined(RT_ARCH_AMD64)
4890 /* and Gv, Ev */
4891 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4892 pCodeBuf[off++] = 0x23;
4893 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4894 RT_NOREF(fSetFlags);
4895
4896#elif defined(RT_ARCH_ARM64)
4897 if (!fSetFlags)
4898 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc);
4899 else
4900 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc);
4901
4902#else
4903# error "Port me"
4904#endif
4905 return off;
4906}
4907
4908
4909/**
4910 * Emits code for AND'ing two 64-bit GPRs.
4911 *
4912 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4913 * and ARM64 hosts.
4914 */
4915DECL_INLINE_THROW(uint32_t)
4916iemNativeEmitAndGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4917{
4918#if defined(RT_ARCH_AMD64)
4919 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4920#elif defined(RT_ARCH_ARM64)
4921 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4922#else
4923# error "Port me"
4924#endif
4925 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4926 return off;
4927}
4928
4929
4930/**
4931 * Emits code for AND'ing two 32-bit GPRs.
4932 */
4933DECL_FORCE_INLINE(uint32_t)
4934iemNativeEmitAndGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4935{
4936#if defined(RT_ARCH_AMD64)
4937 /* and Gv, Ev */
4938 if (iGprDst >= 8 || iGprSrc >= 8)
4939 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4940 pCodeBuf[off++] = 0x23;
4941 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4942 RT_NOREF(fSetFlags);
4943
4944#elif defined(RT_ARCH_ARM64)
4945 if (!fSetFlags)
4946 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4947 else
4948 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4949
4950#else
4951# error "Port me"
4952#endif
4953 return off;
4954}
4955
4956
4957/**
4958 * Emits code for AND'ing two 32-bit GPRs.
4959 */
4960DECL_INLINE_THROW(uint32_t)
4961iemNativeEmitAndGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4962{
4963#if defined(RT_ARCH_AMD64)
4964 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4965#elif defined(RT_ARCH_ARM64)
4966 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4967#else
4968# error "Port me"
4969#endif
4970 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4971 return off;
4972}
4973
4974
4975/**
4976 * Emits code for AND'ing a 64-bit GPRs with a constant.
4977 *
4978 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4979 * and ARM64 hosts.
4980 */
4981DECL_INLINE_THROW(uint32_t)
4982iemNativeEmitAndGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm, bool fSetFlags = false)
4983{
4984#if defined(RT_ARCH_AMD64)
4985 if ((int64_t)uImm == (int8_t)uImm)
4986 {
4987 /* and Ev, imm8 */
4988 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4989 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4990 pbCodeBuf[off++] = 0x83;
4991 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4992 pbCodeBuf[off++] = (uint8_t)uImm;
4993 }
4994 else if ((int64_t)uImm == (int32_t)uImm)
4995 {
4996 /* and Ev, imm32 */
4997 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4998 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4999 pbCodeBuf[off++] = 0x81;
5000 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5001 pbCodeBuf[off++] = RT_BYTE1(uImm);
5002 pbCodeBuf[off++] = RT_BYTE2(uImm);
5003 pbCodeBuf[off++] = RT_BYTE3(uImm);
5004 pbCodeBuf[off++] = RT_BYTE4(uImm);
5005 }
5006 else
5007 {
5008 /* Use temporary register for the 64-bit immediate. */
5009 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5010 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg);
5011 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5012 }
5013 RT_NOREF(fSetFlags);
5014
5015#elif defined(RT_ARCH_ARM64)
5016 uint32_t uImmR = 0;
5017 uint32_t uImmNandS = 0;
5018 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
5019 {
5020 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5021 if (!fSetFlags)
5022 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR);
5023 else
5024 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR);
5025 }
5026 else
5027 {
5028 /* Use temporary register for the 64-bit immediate. */
5029 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5030 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg, fSetFlags);
5031 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5032 }
5033
5034#else
5035# error "Port me"
5036#endif
5037 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5038 return off;
5039}
5040
5041
5042/**
5043 * Emits code for AND'ing an 32-bit GPRs with a constant.
5044 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5045 * @note For ARM64 this only supports @a uImm values that can be expressed using
5046 * the two 6-bit immediates of the AND/ANDS instructions. The caller must
5047 * make sure this is possible!
5048 */
5049DECL_FORCE_INLINE_THROW(uint32_t)
5050iemNativeEmitAndGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
5051{
5052#if defined(RT_ARCH_AMD64)
5053 /* and Ev, imm */
5054 if (iGprDst >= 8)
5055 pCodeBuf[off++] = X86_OP_REX_B;
5056 if ((int32_t)uImm == (int8_t)uImm)
5057 {
5058 pCodeBuf[off++] = 0x83;
5059 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5060 pCodeBuf[off++] = (uint8_t)uImm;
5061 }
5062 else
5063 {
5064 pCodeBuf[off++] = 0x81;
5065 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5066 pCodeBuf[off++] = RT_BYTE1(uImm);
5067 pCodeBuf[off++] = RT_BYTE2(uImm);
5068 pCodeBuf[off++] = RT_BYTE3(uImm);
5069 pCodeBuf[off++] = RT_BYTE4(uImm);
5070 }
5071 RT_NOREF(fSetFlags);
5072
5073#elif defined(RT_ARCH_ARM64)
5074 uint32_t uImmR = 0;
5075 uint32_t uImmNandS = 0;
5076 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5077 {
5078 if (!fSetFlags)
5079 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5080 else
5081 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5082 }
5083 else
5084# ifdef IEM_WITH_THROW_CATCH
5085 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5086# else
5087 AssertReleaseFailedStmt(off = UINT32_MAX);
5088# endif
5089
5090#else
5091# error "Port me"
5092#endif
5093 return off;
5094}
5095
5096
5097/**
5098 * Emits code for AND'ing an 32-bit GPRs with a constant.
5099 *
5100 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5101 */
5102DECL_INLINE_THROW(uint32_t)
5103iemNativeEmitAndGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
5104{
5105#if defined(RT_ARCH_AMD64)
5106 off = iemNativeEmitAndGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm, fSetFlags);
5107
5108#elif defined(RT_ARCH_ARM64)
5109 uint32_t uImmR = 0;
5110 uint32_t uImmNandS = 0;
5111 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5112 {
5113 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5114 if (!fSetFlags)
5115 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5116 else
5117 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5118 }
5119 else
5120 {
5121 /* Use temporary register for the 64-bit immediate. */
5122 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5123 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, iGprDst, iTmpReg, fSetFlags);
5124 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5125 }
5126
5127#else
5128# error "Port me"
5129#endif
5130 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5131 return off;
5132}
5133
5134
5135/**
5136 * Emits code for AND'ing an 64-bit GPRs with a constant.
5137 *
5138 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
5139 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
5140 * the same.
5141 */
5142DECL_FORCE_INLINE_THROW(uint32_t)
5143iemNativeEmitGprEqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint64_t uImm,
5144 bool fSetFlags = false)
5145{
5146#if defined(RT_ARCH_AMD64)
5147 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
5148 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc);
5149 RT_NOREF(fSetFlags);
5150
5151#elif defined(RT_ARCH_ARM64)
5152 uint32_t uImmR = 0;
5153 uint32_t uImmNandS = 0;
5154 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
5155 {
5156 if (!fSetFlags)
5157 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR);
5158 else
5159 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR);
5160 }
5161 else if (iGprDst != iGprSrc)
5162 {
5163 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
5164 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
5165 }
5166 else
5167# ifdef IEM_WITH_THROW_CATCH
5168 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5169# else
5170 AssertReleaseFailedStmt(off = UINT32_MAX);
5171# endif
5172
5173#else
5174# error "Port me"
5175#endif
5176 return off;
5177}
5178
5179/**
5180 * Emits code for AND'ing an 32-bit GPRs with a constant.
5181 *
5182 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
5183 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
5184 * the same.
5185 *
5186 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5187 */
5188DECL_FORCE_INLINE_THROW(uint32_t)
5189iemNativeEmitGpr32EqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint32_t uImm,
5190 bool fSetFlags = false)
5191{
5192#if defined(RT_ARCH_AMD64)
5193 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
5194 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
5195 RT_NOREF(fSetFlags);
5196
5197#elif defined(RT_ARCH_ARM64)
5198 uint32_t uImmR = 0;
5199 uint32_t uImmNandS = 0;
5200 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5201 {
5202 if (!fSetFlags)
5203 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
5204 else
5205 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
5206 }
5207 else if (iGprDst != iGprSrc)
5208 {
5209 /* If a value greater or equal than 64K isn't more than 16 bits wide,
5210 we can use shifting to save an instruction. We prefer the builtin ctz
5211 here to our own, since the compiler can process uImm at compile time
5212 if it is a constant value (which is often the case). This is useful
5213 for the TLB looup code. */
5214 if (uImm > 0xffffU)
5215 {
5216# if defined(__GNUC__)
5217 unsigned cTrailingZeros = __builtin_ctz(uImm);
5218# else
5219 unsigned cTrailingZeros = ASMBitFirstSetU32(uImm) - 1;
5220# endif
5221 if ((uImm >> cTrailingZeros) <= 0xffffU)
5222 {
5223 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprDst, uImm >> cTrailingZeros);
5224 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprSrc,
5225 iGprDst, true /*f64Bit*/, cTrailingZeros, kArmv8A64InstrShift_Lsl);
5226 return off;
5227 }
5228 }
5229 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
5230 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
5231 }
5232 else
5233# ifdef IEM_WITH_THROW_CATCH
5234 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5235# else
5236 AssertReleaseFailedStmt(off = UINT32_MAX);
5237# endif
5238
5239#else
5240# error "Port me"
5241#endif
5242 return off;
5243}
5244
5245
5246/**
5247 * Emits code for OR'ing two 64-bit GPRs.
5248 */
5249DECL_FORCE_INLINE(uint32_t)
5250iemNativeEmitOrGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5251{
5252#if defined(RT_ARCH_AMD64)
5253 /* or Gv, Ev */
5254 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5255 pCodeBuf[off++] = 0x0b;
5256 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5257
5258#elif defined(RT_ARCH_ARM64)
5259 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc);
5260
5261#else
5262# error "Port me"
5263#endif
5264 return off;
5265}
5266
5267
5268/**
5269 * Emits code for OR'ing two 64-bit GPRs.
5270 */
5271DECL_INLINE_THROW(uint32_t)
5272iemNativeEmitOrGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5273{
5274#if defined(RT_ARCH_AMD64)
5275 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5276#elif defined(RT_ARCH_ARM64)
5277 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5278#else
5279# error "Port me"
5280#endif
5281 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5282 return off;
5283}
5284
5285
5286/**
5287 * Emits code for OR'ing two 32-bit GPRs.
5288 * @note Bits 63:32 of the destination GPR will be cleared.
5289 */
5290DECL_FORCE_INLINE(uint32_t)
5291iemNativeEmitOrGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5292{
5293#if defined(RT_ARCH_AMD64)
5294 /* or Gv, Ev */
5295 if (iGprDst >= 8 || iGprSrc >= 8)
5296 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5297 pCodeBuf[off++] = 0x0b;
5298 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5299
5300#elif defined(RT_ARCH_ARM64)
5301 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
5302
5303#else
5304# error "Port me"
5305#endif
5306 return off;
5307}
5308
5309
5310/**
5311 * Emits code for OR'ing two 32-bit GPRs.
5312 * @note Bits 63:32 of the destination GPR will be cleared.
5313 */
5314DECL_INLINE_THROW(uint32_t)
5315iemNativeEmitOrGpr32ByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5316{
5317#if defined(RT_ARCH_AMD64)
5318 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5319#elif defined(RT_ARCH_ARM64)
5320 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5321#else
5322# error "Port me"
5323#endif
5324 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5325 return off;
5326}
5327
5328
5329/**
5330 * Emits code for OR'ing a 64-bit GPRs with a constant.
5331 */
5332DECL_INLINE_THROW(uint32_t)
5333iemNativeEmitOrGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm)
5334{
5335#if defined(RT_ARCH_AMD64)
5336 if ((int64_t)uImm == (int8_t)uImm)
5337 {
5338 /* or Ev, imm8 */
5339 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5340 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5341 pbCodeBuf[off++] = 0x83;
5342 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5343 pbCodeBuf[off++] = (uint8_t)uImm;
5344 }
5345 else if ((int64_t)uImm == (int32_t)uImm)
5346 {
5347 /* or Ev, imm32 */
5348 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5349 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5350 pbCodeBuf[off++] = 0x81;
5351 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5352 pbCodeBuf[off++] = RT_BYTE1(uImm);
5353 pbCodeBuf[off++] = RT_BYTE2(uImm);
5354 pbCodeBuf[off++] = RT_BYTE3(uImm);
5355 pbCodeBuf[off++] = RT_BYTE4(uImm);
5356 }
5357 else
5358 {
5359 /* Use temporary register for the 64-bit immediate. */
5360 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5361 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iTmpReg);
5362 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5363 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5364 }
5365
5366#elif defined(RT_ARCH_ARM64)
5367 uint32_t uImmR = 0;
5368 uint32_t uImmNandS = 0;
5369 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
5370 {
5371 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5372 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR);
5373 }
5374 else
5375 {
5376 /* Use temporary register for the 64-bit immediate. */
5377 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5378 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iTmpReg);
5379 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5380 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5381 }
5382
5383#else
5384# error "Port me"
5385#endif
5386 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5387 return off;
5388}
5389
5390
5391/**
5392 * Emits code for OR'ing an 32-bit GPRs with a constant.
5393 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5394 * @note For ARM64 this only supports @a uImm values that can be expressed using
5395 * the two 6-bit immediates of the OR instructions. The caller must make
5396 * sure this is possible!
5397 */
5398DECL_FORCE_INLINE_THROW(uint32_t)
5399iemNativeEmitOrGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5400{
5401#if defined(RT_ARCH_AMD64)
5402 /* or Ev, imm */
5403 if (iGprDst >= 8)
5404 pCodeBuf[off++] = X86_OP_REX_B;
5405 if ((int32_t)uImm == (int8_t)uImm)
5406 {
5407 pCodeBuf[off++] = 0x83;
5408 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5409 pCodeBuf[off++] = (uint8_t)uImm;
5410 }
5411 else
5412 {
5413 pCodeBuf[off++] = 0x81;
5414 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5415 pCodeBuf[off++] = RT_BYTE1(uImm);
5416 pCodeBuf[off++] = RT_BYTE2(uImm);
5417 pCodeBuf[off++] = RT_BYTE3(uImm);
5418 pCodeBuf[off++] = RT_BYTE4(uImm);
5419 }
5420
5421#elif defined(RT_ARCH_ARM64)
5422 uint32_t uImmR = 0;
5423 uint32_t uImmNandS = 0;
5424 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5425 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5426 else
5427# ifdef IEM_WITH_THROW_CATCH
5428 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5429# else
5430 AssertReleaseFailedStmt(off = UINT32_MAX);
5431# endif
5432
5433#else
5434# error "Port me"
5435#endif
5436 return off;
5437}
5438
5439
5440/**
5441 * Emits code for OR'ing an 32-bit GPRs with a constant.
5442 *
5443 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5444 */
5445DECL_INLINE_THROW(uint32_t)
5446iemNativeEmitOrGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5447{
5448#if defined(RT_ARCH_AMD64)
5449 off = iemNativeEmitOrGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm);
5450
5451#elif defined(RT_ARCH_ARM64)
5452 uint32_t uImmR = 0;
5453 uint32_t uImmNandS = 0;
5454 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5455 {
5456 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5457 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5458 }
5459 else
5460 {
5461 /* Use temporary register for the 64-bit immediate. */
5462 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5463 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, iGprDst, iTmpReg);
5464 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5465 }
5466
5467#else
5468# error "Port me"
5469#endif
5470 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5471 return off;
5472}
5473
5474
5475
5476/**
5477 * ORs two 64-bit GPRs together, storing the result in a third register.
5478 */
5479DECL_FORCE_INLINE(uint32_t)
5480iemNativeEmitGprEqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5481{
5482#ifdef RT_ARCH_AMD64
5483 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5484 {
5485 /** @todo consider LEA */
5486 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc1);
5487 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5488 }
5489 else
5490 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5491
5492#elif defined(RT_ARCH_ARM64)
5493 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2);
5494
5495#else
5496# error "Port me!"
5497#endif
5498 return off;
5499}
5500
5501
5502
5503/**
5504 * Ors two 32-bit GPRs together, storing the result in a third register.
5505 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
5506 */
5507DECL_FORCE_INLINE(uint32_t)
5508iemNativeEmitGpr32EqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5509{
5510#ifdef RT_ARCH_AMD64
5511 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5512 {
5513 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc1);
5514 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5515 }
5516 else
5517 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5518
5519#elif defined(RT_ARCH_ARM64)
5520 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2, false /*f64Bit*/);
5521
5522#else
5523# error "Port me!"
5524#endif
5525 return off;
5526}
5527
5528
5529/**
5530 * Emits code for XOR'ing two 64-bit GPRs.
5531 */
5532DECL_INLINE_THROW(uint32_t)
5533iemNativeEmitXorGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5534{
5535#if defined(RT_ARCH_AMD64)
5536 /* and Gv, Ev */
5537 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5538 pCodeBuf[off++] = 0x33;
5539 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5540
5541#elif defined(RT_ARCH_ARM64)
5542 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc);
5543
5544#else
5545# error "Port me"
5546#endif
5547 return off;
5548}
5549
5550
5551/**
5552 * Emits code for XOR'ing two 64-bit GPRs.
5553 */
5554DECL_INLINE_THROW(uint32_t)
5555iemNativeEmitXorGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5556{
5557#if defined(RT_ARCH_AMD64)
5558 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5559#elif defined(RT_ARCH_ARM64)
5560 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5561#else
5562# error "Port me"
5563#endif
5564 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5565 return off;
5566}
5567
5568
5569/**
5570 * Emits code for XOR'ing two 32-bit GPRs.
5571 */
5572DECL_INLINE_THROW(uint32_t)
5573iemNativeEmitXorGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5574{
5575#if defined(RT_ARCH_AMD64)
5576 /* and Gv, Ev */
5577 if (iGprDst >= 8 || iGprSrc >= 8)
5578 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5579 pCodeBuf[off++] = 0x33;
5580 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5581
5582#elif defined(RT_ARCH_ARM64)
5583 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
5584
5585#else
5586# error "Port me"
5587#endif
5588 return off;
5589}
5590
5591
5592/**
5593 * Emits code for XOR'ing two 32-bit GPRs.
5594 */
5595DECL_INLINE_THROW(uint32_t)
5596iemNativeEmitXorGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5597{
5598#if defined(RT_ARCH_AMD64)
5599 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5600#elif defined(RT_ARCH_ARM64)
5601 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5602#else
5603# error "Port me"
5604#endif
5605 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5606 return off;
5607}
5608
5609
5610/**
5611 * Emits code for XOR'ing an 32-bit GPRs with a constant.
5612 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5613 * @note For ARM64 this only supports @a uImm values that can be expressed using
5614 * the two 6-bit immediates of the EOR instructions. The caller must make
5615 * sure this is possible!
5616 */
5617DECL_FORCE_INLINE_THROW(uint32_t)
5618iemNativeEmitXorGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5619{
5620#if defined(RT_ARCH_AMD64)
5621 /* xor Ev, imm */
5622 if (iGprDst >= 8)
5623 pCodeBuf[off++] = X86_OP_REX_B;
5624 if ((int32_t)uImm == (int8_t)uImm)
5625 {
5626 pCodeBuf[off++] = 0x83;
5627 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5628 pCodeBuf[off++] = (uint8_t)uImm;
5629 }
5630 else
5631 {
5632 pCodeBuf[off++] = 0x81;
5633 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5634 pCodeBuf[off++] = RT_BYTE1(uImm);
5635 pCodeBuf[off++] = RT_BYTE2(uImm);
5636 pCodeBuf[off++] = RT_BYTE3(uImm);
5637 pCodeBuf[off++] = RT_BYTE4(uImm);
5638 }
5639
5640#elif defined(RT_ARCH_ARM64)
5641 uint32_t uImmR = 0;
5642 uint32_t uImmNandS = 0;
5643 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5644 pCodeBuf[off++] = Armv8A64MkInstrEorImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5645 else
5646# ifdef IEM_WITH_THROW_CATCH
5647 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5648# else
5649 AssertReleaseFailedStmt(off = UINT32_MAX);
5650# endif
5651
5652#else
5653# error "Port me"
5654#endif
5655 return off;
5656}
5657
5658
5659/**
5660 * Emits code for XOR'ing two 32-bit GPRs.
5661 */
5662DECL_INLINE_THROW(uint32_t)
5663iemNativeEmitXorGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5664{
5665#if defined(RT_ARCH_AMD64)
5666 off = iemNativeEmitXorGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm);
5667#elif defined(RT_ARCH_ARM64)
5668 off = iemNativeEmitXorGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, uImm);
5669#else
5670# error "Port me"
5671#endif
5672 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5673 return off;
5674}
5675
5676
5677/*********************************************************************************************************************************
5678* Shifting *
5679*********************************************************************************************************************************/
5680
5681/**
5682 * Emits code for shifting a GPR a fixed number of bits to the left.
5683 */
5684DECL_FORCE_INLINE(uint32_t)
5685iemNativeEmitShiftGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5686{
5687 Assert(cShift > 0 && cShift < 64);
5688
5689#if defined(RT_ARCH_AMD64)
5690 /* shl dst, cShift */
5691 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5692 if (cShift != 1)
5693 {
5694 pCodeBuf[off++] = 0xc1;
5695 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5696 pCodeBuf[off++] = cShift;
5697 }
5698 else
5699 {
5700 pCodeBuf[off++] = 0xd1;
5701 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5702 }
5703
5704#elif defined(RT_ARCH_ARM64)
5705 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift);
5706
5707#else
5708# error "Port me"
5709#endif
5710 return off;
5711}
5712
5713
5714/**
5715 * Emits code for shifting a GPR a fixed number of bits to the left.
5716 */
5717DECL_INLINE_THROW(uint32_t)
5718iemNativeEmitShiftGprLeft(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5719{
5720#if defined(RT_ARCH_AMD64)
5721 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5722#elif defined(RT_ARCH_ARM64)
5723 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5724#else
5725# error "Port me"
5726#endif
5727 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5728 return off;
5729}
5730
5731
5732/**
5733 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5734 */
5735DECL_FORCE_INLINE(uint32_t)
5736iemNativeEmitShiftGpr32LeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5737{
5738 Assert(cShift > 0 && cShift < 32);
5739
5740#if defined(RT_ARCH_AMD64)
5741 /* shl dst, cShift */
5742 if (iGprDst >= 8)
5743 pCodeBuf[off++] = X86_OP_REX_B;
5744 if (cShift != 1)
5745 {
5746 pCodeBuf[off++] = 0xc1;
5747 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5748 pCodeBuf[off++] = cShift;
5749 }
5750 else
5751 {
5752 pCodeBuf[off++] = 0xd1;
5753 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5754 }
5755
5756#elif defined(RT_ARCH_ARM64)
5757 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5758
5759#else
5760# error "Port me"
5761#endif
5762 return off;
5763}
5764
5765
5766/**
5767 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5768 */
5769DECL_INLINE_THROW(uint32_t)
5770iemNativeEmitShiftGpr32Left(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5771{
5772#if defined(RT_ARCH_AMD64)
5773 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5774#elif defined(RT_ARCH_ARM64)
5775 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5776#else
5777# error "Port me"
5778#endif
5779 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5780 return off;
5781}
5782
5783
5784/**
5785 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5786 */
5787DECL_FORCE_INLINE(uint32_t)
5788iemNativeEmitShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5789{
5790 Assert(cShift > 0 && cShift < 64);
5791
5792#if defined(RT_ARCH_AMD64)
5793 /* shr dst, cShift */
5794 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5795 if (cShift != 1)
5796 {
5797 pCodeBuf[off++] = 0xc1;
5798 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5799 pCodeBuf[off++] = cShift;
5800 }
5801 else
5802 {
5803 pCodeBuf[off++] = 0xd1;
5804 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5805 }
5806
5807#elif defined(RT_ARCH_ARM64)
5808 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift);
5809
5810#else
5811# error "Port me"
5812#endif
5813 return off;
5814}
5815
5816
5817/**
5818 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5819 */
5820DECL_INLINE_THROW(uint32_t)
5821iemNativeEmitShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5822{
5823#if defined(RT_ARCH_AMD64)
5824 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5825#elif defined(RT_ARCH_ARM64)
5826 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5827#else
5828# error "Port me"
5829#endif
5830 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5831 return off;
5832}
5833
5834
5835/**
5836 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5837 * right.
5838 */
5839DECL_FORCE_INLINE(uint32_t)
5840iemNativeEmitShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5841{
5842 Assert(cShift > 0 && cShift < 32);
5843
5844#if defined(RT_ARCH_AMD64)
5845 /* shr dst, cShift */
5846 if (iGprDst >= 8)
5847 pCodeBuf[off++] = X86_OP_REX_B;
5848 if (cShift != 1)
5849 {
5850 pCodeBuf[off++] = 0xc1;
5851 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5852 pCodeBuf[off++] = cShift;
5853 }
5854 else
5855 {
5856 pCodeBuf[off++] = 0xd1;
5857 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5858 }
5859
5860#elif defined(RT_ARCH_ARM64)
5861 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5862
5863#else
5864# error "Port me"
5865#endif
5866 return off;
5867}
5868
5869
5870/**
5871 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5872 * right.
5873 */
5874DECL_INLINE_THROW(uint32_t)
5875iemNativeEmitShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5876{
5877#if defined(RT_ARCH_AMD64)
5878 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5879#elif defined(RT_ARCH_ARM64)
5880 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5881#else
5882# error "Port me"
5883#endif
5884 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5885 return off;
5886}
5887
5888
5889/**
5890 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5891 * right and assigning it to a different GPR.
5892 */
5893DECL_INLINE_THROW(uint32_t)
5894iemNativeEmitGpr32EqGprShiftRightImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint8_t cShift)
5895{
5896 Assert(cShift > 0); Assert(cShift < 32);
5897#if defined(RT_ARCH_AMD64)
5898 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
5899 off = iemNativeEmitShiftGpr32RightEx(pCodeBuf, off, iGprDst, cShift);
5900
5901#elif defined(RT_ARCH_ARM64)
5902 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprSrc, cShift, false /*64Bit*/);
5903
5904#else
5905# error "Port me"
5906#endif
5907 return off;
5908}
5909
5910
5911/**
5912 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
5913 */
5914DECL_FORCE_INLINE(uint32_t)
5915iemNativeEmitArithShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5916{
5917 Assert(cShift > 0 && cShift < 64);
5918
5919#if defined(RT_ARCH_AMD64)
5920 /* sar dst, cShift */
5921 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5922 if (cShift != 1)
5923 {
5924 pCodeBuf[off++] = 0xc1;
5925 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5926 pCodeBuf[off++] = cShift;
5927 }
5928 else
5929 {
5930 pCodeBuf[off++] = 0xd1;
5931 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5932 }
5933
5934#elif defined(RT_ARCH_ARM64)
5935 pCodeBuf[off++] = Armv8A64MkInstrAsrImm(iGprDst, iGprDst, cShift);
5936
5937#else
5938# error "Port me"
5939#endif
5940 return off;
5941}
5942
5943
5944/**
5945 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
5946 */
5947DECL_INLINE_THROW(uint32_t)
5948iemNativeEmitArithShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5949{
5950#if defined(RT_ARCH_AMD64)
5951 off = iemNativeEmitArithShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5952#elif defined(RT_ARCH_ARM64)
5953 off = iemNativeEmitArithShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5954#else
5955# error "Port me"
5956#endif
5957 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5958 return off;
5959}
5960
5961
5962/**
5963 * Emits code for (signed) shifting a 32-bit GPR a fixed number of bits to the right.
5964 */
5965DECL_FORCE_INLINE(uint32_t)
5966iemNativeEmitArithShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5967{
5968 Assert(cShift > 0 && cShift < 64);
5969
5970#if defined(RT_ARCH_AMD64)
5971 /* sar dst, cShift */
5972 if (iGprDst >= 8)
5973 pCodeBuf[off++] = X86_OP_REX_B;
5974 if (cShift != 1)
5975 {
5976 pCodeBuf[off++] = 0xc1;
5977 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5978 pCodeBuf[off++] = cShift;
5979 }
5980 else
5981 {
5982 pCodeBuf[off++] = 0xd1;
5983 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5984 }
5985
5986#elif defined(RT_ARCH_ARM64)
5987 pCodeBuf[off++] = Armv8A64MkInstrAsrImm(iGprDst, iGprDst, cShift, false /*f64Bit*/);
5988
5989#else
5990# error "Port me"
5991#endif
5992 return off;
5993}
5994
5995
5996/**
5997 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
5998 */
5999DECL_INLINE_THROW(uint32_t)
6000iemNativeEmitArithShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
6001{
6002#if defined(RT_ARCH_AMD64)
6003 off = iemNativeEmitArithShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
6004#elif defined(RT_ARCH_ARM64)
6005 off = iemNativeEmitArithShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
6006#else
6007# error "Port me"
6008#endif
6009 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6010 return off;
6011}
6012
6013
6014/**
6015 * Emits code for rotating a GPR a fixed number of bits to the left.
6016 */
6017DECL_FORCE_INLINE(uint32_t)
6018iemNativeEmitRotateGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
6019{
6020 Assert(cShift > 0 && cShift < 64);
6021
6022#if defined(RT_ARCH_AMD64)
6023 /* rol dst, cShift */
6024 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
6025 if (cShift != 1)
6026 {
6027 pCodeBuf[off++] = 0xc1;
6028 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
6029 pCodeBuf[off++] = cShift;
6030 }
6031 else
6032 {
6033 pCodeBuf[off++] = 0xd1;
6034 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
6035 }
6036
6037#elif defined(RT_ARCH_ARM64)
6038 pCodeBuf[off++] = Armv8A64MkInstrRorImm(iGprDst, iGprDst, cShift);
6039
6040#else
6041# error "Port me"
6042#endif
6043 return off;
6044}
6045
6046
6047#if defined(RT_ARCH_AMD64)
6048/**
6049 * Emits code for rotating a 32-bit GPR a fixed number of bits to the left via carry.
6050 */
6051DECL_FORCE_INLINE(uint32_t)
6052iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
6053{
6054 Assert(cShift > 0 && cShift < 32);
6055
6056 /* rcl dst, cShift */
6057 if (iGprDst >= 8)
6058 pCodeBuf[off++] = X86_OP_REX_B;
6059 if (cShift != 1)
6060 {
6061 pCodeBuf[off++] = 0xc1;
6062 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
6063 pCodeBuf[off++] = cShift;
6064 }
6065 else
6066 {
6067 pCodeBuf[off++] = 0xd1;
6068 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
6069 }
6070
6071 return off;
6072}
6073#endif /* RT_ARCH_AMD64 */
6074
6075
6076
6077/**
6078 * Emits code for reversing the byte order for a 16-bit value in a 32-bit GPR.
6079 * @note Bits 63:32 of the destination GPR will be cleared.
6080 */
6081DECL_FORCE_INLINE(uint32_t)
6082iemNativeEmitBswapGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
6083{
6084#if defined(RT_ARCH_AMD64)
6085 /*
6086 * There is no bswap r16 on x86 (the encoding exists but does not work).
6087 * So just use a rol (gcc -O2 is doing that).
6088 *
6089 * rol r16, 0x8
6090 */
6091 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6092 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6093 if (iGpr >= 8)
6094 pbCodeBuf[off++] = X86_OP_REX_B;
6095 pbCodeBuf[off++] = 0xc1;
6096 pbCodeBuf[off++] = 0xc0 | (iGpr & 7);
6097 pbCodeBuf[off++] = 0x08;
6098#elif defined(RT_ARCH_ARM64)
6099 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6100
6101 pu32CodeBuf[off++] = Armv8A64MkInstrRev16(iGpr, iGpr, false /*f64Bit*/);
6102#else
6103# error "Port me"
6104#endif
6105
6106 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6107 return off;
6108}
6109
6110
6111/**
6112 * Emits code for reversing the byte order in a 32-bit GPR.
6113 * @note Bits 63:32 of the destination GPR will be cleared.
6114 */
6115DECL_FORCE_INLINE(uint32_t)
6116iemNativeEmitBswapGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
6117{
6118#if defined(RT_ARCH_AMD64)
6119 /* bswap r32 */
6120 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6121
6122 if (iGpr >= 8)
6123 pbCodeBuf[off++] = X86_OP_REX_B;
6124 pbCodeBuf[off++] = 0x0f;
6125 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
6126#elif defined(RT_ARCH_ARM64)
6127 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6128
6129 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, false /*f64Bit*/);
6130#else
6131# error "Port me"
6132#endif
6133
6134 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6135 return off;
6136}
6137
6138
6139/**
6140 * Emits code for reversing the byte order in a 64-bit GPR.
6141 */
6142DECL_FORCE_INLINE(uint32_t)
6143iemNativeEmitBswapGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
6144{
6145#if defined(RT_ARCH_AMD64)
6146 /* bswap r64 */
6147 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6148
6149 if (iGpr >= 8)
6150 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
6151 else
6152 pbCodeBuf[off++] = X86_OP_REX_W;
6153 pbCodeBuf[off++] = 0x0f;
6154 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
6155#elif defined(RT_ARCH_ARM64)
6156 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6157
6158 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, true /*f64Bit*/);
6159#else
6160# error "Port me"
6161#endif
6162
6163 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6164 return off;
6165}
6166
6167
6168/*********************************************************************************************************************************
6169* Bitfield manipulation *
6170*********************************************************************************************************************************/
6171
6172/**
6173 * Emits code for clearing.
6174 */
6175DECL_FORCE_INLINE(uint32_t)
6176iemNativeEmitBitClearInGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const iGpr, uint8_t iBit)
6177{
6178 Assert(iBit < 32);
6179
6180#if defined(RT_ARCH_AMD64)
6181 /* btr r32, imm8 */
6182 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6183
6184 if (iGpr >= 8)
6185 pbCodeBuf[off++] = X86_OP_REX_B;
6186 pbCodeBuf[off++] = 0x0f;
6187 pbCodeBuf[off++] = 0xba;
6188 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGpr & 7);
6189 pbCodeBuf[off++] = iBit;
6190#elif defined(RT_ARCH_ARM64)
6191 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6192
6193 pu32CodeBuf[off++] = Armv8A64MkInstrBfc(iGpr, iBit /*offFirstBit*/, 1 /*cBits*/, true /*f64Bit*/);
6194#else
6195# error "Port me"
6196#endif
6197
6198 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6199 return off;
6200}
6201
6202
6203/*********************************************************************************************************************************
6204* Compare and Testing *
6205*********************************************************************************************************************************/
6206
6207
6208#ifdef RT_ARCH_ARM64
6209/**
6210 * Emits an ARM64 compare instruction.
6211 */
6212DECL_INLINE_THROW(uint32_t)
6213iemNativeEmitCmpArm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight,
6214 bool f64Bit = true, uint32_t cShift = 0, ARMV8A64INSTRSHIFT enmShift = kArmv8A64InstrShift_Lsr)
6215{
6216 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6217 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR /*iRegResult*/, iGprLeft, iGprRight,
6218 f64Bit, true /*fSetFlags*/, cShift, enmShift);
6219 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6220 return off;
6221}
6222#endif
6223
6224
6225/**
6226 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
6227 * with conditional instruction.
6228 */
6229DECL_FORCE_INLINE(uint32_t)
6230iemNativeEmitCmpGprWithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6231{
6232#ifdef RT_ARCH_AMD64
6233 /* cmp Gv, Ev */
6234 pCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
6235 pCodeBuf[off++] = 0x3b;
6236 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
6237
6238#elif defined(RT_ARCH_ARM64)
6239 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight);
6240
6241#else
6242# error "Port me!"
6243#endif
6244 return off;
6245}
6246
6247
6248/**
6249 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
6250 * with conditional instruction.
6251 */
6252DECL_INLINE_THROW(uint32_t)
6253iemNativeEmitCmpGprWithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6254{
6255#ifdef RT_ARCH_AMD64
6256 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
6257#elif defined(RT_ARCH_ARM64)
6258 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
6259#else
6260# error "Port me!"
6261#endif
6262 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6263 return off;
6264}
6265
6266
6267/**
6268 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
6269 * with conditional instruction.
6270 */
6271DECL_FORCE_INLINE(uint32_t)
6272iemNativeEmitCmpGpr32WithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6273{
6274#ifdef RT_ARCH_AMD64
6275 /* cmp Gv, Ev */
6276 if (iGprLeft >= 8 || iGprRight >= 8)
6277 pCodeBuf[off++] = (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
6278 pCodeBuf[off++] = 0x3b;
6279 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
6280
6281#elif defined(RT_ARCH_ARM64)
6282 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight, false /*f64Bit*/);
6283
6284#else
6285# error "Port me!"
6286#endif
6287 return off;
6288}
6289
6290
6291/**
6292 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
6293 * with conditional instruction.
6294 */
6295DECL_INLINE_THROW(uint32_t)
6296iemNativeEmitCmpGpr32WithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6297{
6298#ifdef RT_ARCH_AMD64
6299 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
6300#elif defined(RT_ARCH_ARM64)
6301 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
6302#else
6303# error "Port me!"
6304#endif
6305 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6306 return off;
6307}
6308
6309
6310/**
6311 * Emits a compare of a 64-bit GPR with a constant value, settings status
6312 * flags/whatever for use with conditional instruction.
6313 */
6314DECL_INLINE_THROW(uint32_t)
6315iemNativeEmitCmpGprWithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft,
6316 uint64_t uImm, uint8_t idxTmpReg = UINT8_MAX)
6317{
6318#ifdef RT_ARCH_AMD64
6319 if ((int8_t)uImm == (int64_t)uImm)
6320 {
6321 /* cmp Ev, Ib */
6322 pCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6323 pCodeBuf[off++] = 0x83;
6324 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6325 pCodeBuf[off++] = (uint8_t)uImm;
6326 return off;
6327 }
6328 if ((int32_t)uImm == (int64_t)uImm)
6329 {
6330 /* cmp Ev, imm */
6331 pCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6332 pCodeBuf[off++] = 0x81;
6333 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6334 pCodeBuf[off++] = RT_BYTE1(uImm);
6335 pCodeBuf[off++] = RT_BYTE2(uImm);
6336 pCodeBuf[off++] = RT_BYTE3(uImm);
6337 pCodeBuf[off++] = RT_BYTE4(uImm);
6338 return off;
6339 }
6340
6341#elif defined(RT_ARCH_ARM64)
6342 if (uImm < _4K)
6343 {
6344 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6345 true /*64Bit*/, true /*fSetFlags*/);
6346 return off;
6347 }
6348 if ((uImm & ~(uint64_t)0xfff000) == 0)
6349 {
6350 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm >> 12,
6351 true /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6352 return off;
6353 }
6354
6355#else
6356# error "Port me!"
6357#endif
6358
6359 if (idxTmpReg != UINT8_MAX)
6360 {
6361 /* Use temporary register for the immediate. */
6362 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpReg, uImm);
6363 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, iGprLeft, idxTmpReg);
6364 }
6365 else
6366# ifdef IEM_WITH_THROW_CATCH
6367 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6368# else
6369 AssertReleaseFailedStmt(off = UINT32_MAX);
6370# endif
6371
6372 return off;
6373}
6374
6375
6376/**
6377 * Emits a compare of a 64-bit GPR with a constant value, settings status
6378 * flags/whatever for use with conditional instruction.
6379 */
6380DECL_INLINE_THROW(uint32_t)
6381iemNativeEmitCmpGprWithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint64_t uImm)
6382{
6383#ifdef RT_ARCH_AMD64
6384 if ((int8_t)uImm == (int64_t)uImm)
6385 {
6386 /* cmp Ev, Ib */
6387 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
6388 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6389 pbCodeBuf[off++] = 0x83;
6390 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6391 pbCodeBuf[off++] = (uint8_t)uImm;
6392 }
6393 else if ((int32_t)uImm == (int64_t)uImm)
6394 {
6395 /* cmp Ev, imm */
6396 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
6397 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6398 pbCodeBuf[off++] = 0x81;
6399 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6400 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6401 pbCodeBuf[off++] = RT_BYTE1(uImm);
6402 pbCodeBuf[off++] = RT_BYTE2(uImm);
6403 pbCodeBuf[off++] = RT_BYTE3(uImm);
6404 pbCodeBuf[off++] = RT_BYTE4(uImm);
6405 }
6406 else
6407 {
6408 /* Use temporary register for the immediate. */
6409 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6410 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
6411 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6412 }
6413
6414#elif defined(RT_ARCH_ARM64)
6415 /** @todo guess there are clevere things we can do here... */
6416 if (uImm < _4K)
6417 {
6418 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6419 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6420 true /*64Bit*/, true /*fSetFlags*/);
6421 }
6422 else if ((uImm & ~(uint64_t)0xfff000) == 0)
6423 {
6424 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6425 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm >> 12,
6426 true /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6427 }
6428 else
6429 {
6430 /* Use temporary register for the immediate. */
6431 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6432 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
6433 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6434 }
6435
6436#else
6437# error "Port me!"
6438#endif
6439
6440 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6441 return off;
6442}
6443
6444
6445/**
6446 * Emits a compare of a 32-bit GPR with a constant value, settings status
6447 * flags/whatever for use with conditional instruction.
6448 *
6449 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
6450 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
6451 * bits all zero). Will release assert or throw exception if the caller
6452 * violates this restriction.
6453 */
6454DECL_FORCE_INLINE_THROW(uint32_t)
6455iemNativeEmitCmpGpr32WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
6456{
6457#ifdef RT_ARCH_AMD64
6458 if (iGprLeft >= 8)
6459 pCodeBuf[off++] = X86_OP_REX_B;
6460 if (uImm <= UINT32_C(0x7f))
6461 {
6462 /* cmp Ev, Ib */
6463 pCodeBuf[off++] = 0x83;
6464 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6465 pCodeBuf[off++] = (uint8_t)uImm;
6466 }
6467 else
6468 {
6469 /* cmp Ev, imm */
6470 pCodeBuf[off++] = 0x81;
6471 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6472 pCodeBuf[off++] = RT_BYTE1(uImm);
6473 pCodeBuf[off++] = RT_BYTE2(uImm);
6474 pCodeBuf[off++] = RT_BYTE3(uImm);
6475 pCodeBuf[off++] = RT_BYTE4(uImm);
6476 }
6477
6478#elif defined(RT_ARCH_ARM64)
6479 /** @todo guess there are clevere things we can do here... */
6480 if (uImm < _4K)
6481 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6482 false /*64Bit*/, true /*fSetFlags*/);
6483 else if ((uImm & ~(uint32_t)0xfff000) == 0)
6484 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6485 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6486 else
6487# ifdef IEM_WITH_THROW_CATCH
6488 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6489# else
6490 AssertReleaseFailedStmt(off = UINT32_MAX);
6491# endif
6492
6493#else
6494# error "Port me!"
6495#endif
6496 return off;
6497}
6498
6499
6500/**
6501 * Emits a compare of a 32-bit GPR with a constant value, settings status
6502 * flags/whatever for use with conditional instruction.
6503 */
6504DECL_INLINE_THROW(uint32_t)
6505iemNativeEmitCmpGpr32WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
6506{
6507#ifdef RT_ARCH_AMD64
6508 off = iemNativeEmitCmpGpr32WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm);
6509
6510#elif defined(RT_ARCH_ARM64)
6511 /** @todo guess there are clevere things we can do here... */
6512 if (uImm < _4K)
6513 {
6514 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6515 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6516 false /*64Bit*/, true /*fSetFlags*/);
6517 }
6518 else if ((uImm & ~(uint32_t)0xfff000) == 0)
6519 {
6520 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6521 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6522 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6523 }
6524 else
6525 {
6526 /* Use temporary register for the immediate. */
6527 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6528 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, iGprLeft, iTmpReg);
6529 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6530 }
6531
6532#else
6533# error "Port me!"
6534#endif
6535
6536 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6537 return off;
6538}
6539
6540
6541/**
6542 * Emits a compare of a 32-bit GPR with a constant value, settings status
6543 * flags/whatever for use with conditional instruction.
6544 *
6545 * @note ARM64: Helper register is required (@a idxTmpReg) for isolating the
6546 * 16-bit value from @a iGrpLeft.
6547 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
6548 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
6549 * bits all zero). Will release assert or throw exception if the caller
6550 * violates this restriction.
6551 */
6552DECL_FORCE_INLINE_THROW(uint32_t)
6553iemNativeEmitCmpGpr16WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
6554 uint8_t idxTmpReg = UINT8_MAX)
6555{
6556#ifdef RT_ARCH_AMD64
6557 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6558 if (iGprLeft >= 8)
6559 pCodeBuf[off++] = X86_OP_REX_B;
6560 if (uImm <= UINT32_C(0x7f))
6561 {
6562 /* cmp Ev, Ib */
6563 pCodeBuf[off++] = 0x83;
6564 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6565 pCodeBuf[off++] = (uint8_t)uImm;
6566 }
6567 else
6568 {
6569 /* cmp Ev, imm */
6570 pCodeBuf[off++] = 0x81;
6571 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6572 pCodeBuf[off++] = RT_BYTE1(uImm);
6573 pCodeBuf[off++] = RT_BYTE2(uImm);
6574 }
6575 RT_NOREF(idxTmpReg);
6576
6577#elif defined(RT_ARCH_ARM64)
6578# ifdef IEM_WITH_THROW_CATCH
6579 AssertStmt(idxTmpReg < 32, IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6580# else
6581 AssertReleaseStmt(idxTmpReg < 32, off = UINT32_MAX);
6582# endif
6583 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
6584 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, iGprLeft, 15, 0, false /*f64Bit*/);
6585 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, idxTmpReg, uImm);
6586
6587#else
6588# error "Port me!"
6589#endif
6590 return off;
6591}
6592
6593
6594/**
6595 * Emits a compare of a 16-bit GPR with a constant value, settings status
6596 * flags/whatever for use with conditional instruction.
6597 *
6598 * @note ARM64: Helper register is required (idxTmpReg).
6599 */
6600DECL_INLINE_THROW(uint32_t)
6601iemNativeEmitCmpGpr16WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
6602 uint8_t idxTmpReg = UINT8_MAX)
6603{
6604#ifdef RT_ARCH_AMD64
6605 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm, idxTmpReg);
6606#elif defined(RT_ARCH_ARM64)
6607 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGprLeft, uImm, idxTmpReg);
6608#else
6609# error "Port me!"
6610#endif
6611 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6612 return off;
6613}
6614
6615
6616
6617/*********************************************************************************************************************************
6618* Branching *
6619*********************************************************************************************************************************/
6620
6621/**
6622 * Emits a JMP rel32 / B imm19 to the given label.
6623 */
6624DECL_FORCE_INLINE_THROW(uint32_t)
6625iemNativeEmitJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t idxLabel)
6626{
6627 Assert(idxLabel < pReNative->cLabels);
6628
6629#ifdef RT_ARCH_AMD64
6630 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6631 {
6632 uint32_t offRel = pReNative->paLabels[idxLabel].off - (off + 2);
6633 if ((int32_t)offRel < 128 && (int32_t)offRel >= -128)
6634 {
6635 pCodeBuf[off++] = 0xeb; /* jmp rel8 */
6636 pCodeBuf[off++] = (uint8_t)offRel;
6637 }
6638 else
6639 {
6640 offRel -= 3;
6641 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
6642 pCodeBuf[off++] = RT_BYTE1(offRel);
6643 pCodeBuf[off++] = RT_BYTE2(offRel);
6644 pCodeBuf[off++] = RT_BYTE3(offRel);
6645 pCodeBuf[off++] = RT_BYTE4(offRel);
6646 }
6647 }
6648 else
6649 {
6650 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
6651 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6652 pCodeBuf[off++] = 0xfe;
6653 pCodeBuf[off++] = 0xff;
6654 pCodeBuf[off++] = 0xff;
6655 pCodeBuf[off++] = 0xff;
6656 }
6657 pCodeBuf[off++] = 0xcc; /* int3 poison */
6658
6659#elif defined(RT_ARCH_ARM64)
6660 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6661 {
6662 pCodeBuf[off] = Armv8A64MkInstrB(pReNative->paLabels[idxLabel].off - off);
6663 off++;
6664 }
6665 else
6666 {
6667 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm26At0);
6668 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
6669 }
6670
6671#else
6672# error "Port me!"
6673#endif
6674 return off;
6675}
6676
6677
6678/**
6679 * Emits a JMP rel32 / B imm19 to the given label.
6680 */
6681DECL_INLINE_THROW(uint32_t)
6682iemNativeEmitJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6683{
6684#ifdef RT_ARCH_AMD64
6685 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel);
6686#elif defined(RT_ARCH_ARM64)
6687 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel);
6688#else
6689# error "Port me!"
6690#endif
6691 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6692 return off;
6693}
6694
6695
6696/**
6697 * Emits a JMP rel32 / B imm19 to a new undefined label.
6698 */
6699DECL_INLINE_THROW(uint32_t)
6700iemNativeEmitJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6701{
6702 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6703 return iemNativeEmitJmpToLabel(pReNative, off, idxLabel);
6704}
6705
6706/** Condition type. */
6707#ifdef RT_ARCH_AMD64
6708typedef enum IEMNATIVEINSTRCOND : uint8_t
6709{
6710 kIemNativeInstrCond_o = 0,
6711 kIemNativeInstrCond_no,
6712 kIemNativeInstrCond_c,
6713 kIemNativeInstrCond_nc,
6714 kIemNativeInstrCond_e,
6715 kIemNativeInstrCond_z = kIemNativeInstrCond_e,
6716 kIemNativeInstrCond_ne,
6717 kIemNativeInstrCond_nz = kIemNativeInstrCond_ne,
6718 kIemNativeInstrCond_be,
6719 kIemNativeInstrCond_nbe,
6720 kIemNativeInstrCond_s,
6721 kIemNativeInstrCond_ns,
6722 kIemNativeInstrCond_p,
6723 kIemNativeInstrCond_np,
6724 kIemNativeInstrCond_l,
6725 kIemNativeInstrCond_nl,
6726 kIemNativeInstrCond_le,
6727 kIemNativeInstrCond_nle
6728} IEMNATIVEINSTRCOND;
6729#elif defined(RT_ARCH_ARM64)
6730typedef ARMV8INSTRCOND IEMNATIVEINSTRCOND;
6731# define kIemNativeInstrCond_o todo_conditional_codes
6732# define kIemNativeInstrCond_no todo_conditional_codes
6733# define kIemNativeInstrCond_c todo_conditional_codes
6734# define kIemNativeInstrCond_nc todo_conditional_codes
6735# define kIemNativeInstrCond_e kArmv8InstrCond_Eq
6736# define kIemNativeInstrCond_ne kArmv8InstrCond_Ne
6737# define kIemNativeInstrCond_be kArmv8InstrCond_Ls
6738# define kIemNativeInstrCond_nbe kArmv8InstrCond_Hi
6739# define kIemNativeInstrCond_s todo_conditional_codes
6740# define kIemNativeInstrCond_ns todo_conditional_codes
6741# define kIemNativeInstrCond_p todo_conditional_codes
6742# define kIemNativeInstrCond_np todo_conditional_codes
6743# define kIemNativeInstrCond_l kArmv8InstrCond_Lt
6744# define kIemNativeInstrCond_nl kArmv8InstrCond_Ge
6745# define kIemNativeInstrCond_le kArmv8InstrCond_Le
6746# define kIemNativeInstrCond_nle kArmv8InstrCond_Gt
6747#else
6748# error "Port me!"
6749#endif
6750
6751
6752/**
6753 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6754 */
6755DECL_FORCE_INLINE_THROW(uint32_t)
6756iemNativeEmitJccToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6757 uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6758{
6759 Assert(idxLabel < pReNative->cLabels);
6760
6761 uint32_t const offLabel = pReNative->paLabels[idxLabel].off;
6762#ifdef RT_ARCH_AMD64
6763 if (offLabel >= off)
6764 {
6765 /* jcc rel32 */
6766 pCodeBuf[off++] = 0x0f;
6767 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6768 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6769 pCodeBuf[off++] = 0x00;
6770 pCodeBuf[off++] = 0x00;
6771 pCodeBuf[off++] = 0x00;
6772 pCodeBuf[off++] = 0x00;
6773 }
6774 else
6775 {
6776 int32_t offDisp = offLabel - (off + 2);
6777 if ((int8_t)offDisp == offDisp)
6778 {
6779 /* jcc rel8 */
6780 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6781 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6782 }
6783 else
6784 {
6785 /* jcc rel32 */
6786 offDisp -= 4;
6787 pCodeBuf[off++] = 0x0f;
6788 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6789 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6790 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6791 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6792 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6793 }
6794 }
6795
6796#elif defined(RT_ARCH_ARM64)
6797 if (offLabel >= off)
6798 {
6799 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6800 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, -1);
6801 }
6802 else
6803 {
6804 Assert(off - offLabel <= 0x3ffffU);
6805 pCodeBuf[off] = Armv8A64MkInstrBCond(enmCond, offLabel - off);
6806 off++;
6807 }
6808
6809#else
6810# error "Port me!"
6811#endif
6812 return off;
6813}
6814
6815
6816/**
6817 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6818 */
6819DECL_INLINE_THROW(uint32_t)
6820iemNativeEmitJccToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6821{
6822#ifdef RT_ARCH_AMD64
6823 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel, enmCond);
6824#elif defined(RT_ARCH_ARM64)
6825 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel, enmCond);
6826#else
6827# error "Port me!"
6828#endif
6829 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6830 return off;
6831}
6832
6833
6834/**
6835 * Emits a Jcc rel32 / B.cc imm19 to a new label.
6836 */
6837DECL_INLINE_THROW(uint32_t)
6838iemNativeEmitJccToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6839 IEMNATIVELABELTYPE enmLabelType, uint16_t uData, IEMNATIVEINSTRCOND enmCond)
6840{
6841 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6842 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, enmCond);
6843}
6844
6845
6846/**
6847 * Emits a JZ/JE rel32 / B.EQ imm19 to the given label.
6848 */
6849DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6850{
6851#ifdef RT_ARCH_AMD64
6852 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_e);
6853#elif defined(RT_ARCH_ARM64)
6854 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Eq);
6855#else
6856# error "Port me!"
6857#endif
6858}
6859
6860/**
6861 * Emits a JZ/JE rel32 / B.EQ imm19 to a new label.
6862 */
6863DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6864 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6865{
6866#ifdef RT_ARCH_AMD64
6867 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_e);
6868#elif defined(RT_ARCH_ARM64)
6869 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Eq);
6870#else
6871# error "Port me!"
6872#endif
6873}
6874
6875
6876/**
6877 * Emits a JNZ/JNE rel32 / B.NE imm19 to the given label.
6878 */
6879DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6880{
6881#ifdef RT_ARCH_AMD64
6882 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_ne);
6883#elif defined(RT_ARCH_ARM64)
6884 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ne);
6885#else
6886# error "Port me!"
6887#endif
6888}
6889
6890/**
6891 * Emits a JNZ/JNE rel32 / B.NE imm19 to a new label.
6892 */
6893DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6894 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6895{
6896#ifdef RT_ARCH_AMD64
6897 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_ne);
6898#elif defined(RT_ARCH_ARM64)
6899 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ne);
6900#else
6901# error "Port me!"
6902#endif
6903}
6904
6905
6906/**
6907 * Emits a JBE/JNA rel32 / B.LS imm19 to the given label.
6908 */
6909DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6910{
6911#ifdef RT_ARCH_AMD64
6912 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_be);
6913#elif defined(RT_ARCH_ARM64)
6914 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ls);
6915#else
6916# error "Port me!"
6917#endif
6918}
6919
6920/**
6921 * Emits a JBE/JNA rel32 / B.LS imm19 to a new label.
6922 */
6923DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6924 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6925{
6926#ifdef RT_ARCH_AMD64
6927 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_be);
6928#elif defined(RT_ARCH_ARM64)
6929 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ls);
6930#else
6931# error "Port me!"
6932#endif
6933}
6934
6935
6936/**
6937 * Emits a JA/JNBE rel32 / B.HI imm19 to the given label.
6938 */
6939DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6940{
6941#ifdef RT_ARCH_AMD64
6942 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_nbe);
6943#elif defined(RT_ARCH_ARM64)
6944 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Hi);
6945#else
6946# error "Port me!"
6947#endif
6948}
6949
6950/**
6951 * Emits a JA/JNBE rel32 / B.HI imm19 to a new label.
6952 */
6953DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6954 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6955{
6956#ifdef RT_ARCH_AMD64
6957 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_nbe);
6958#elif defined(RT_ARCH_ARM64)
6959 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Hi);
6960#else
6961# error "Port me!"
6962#endif
6963}
6964
6965
6966/**
6967 * Emits a JL/JNGE rel32 / B.LT imm19 to the given label.
6968 */
6969DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6970{
6971#ifdef RT_ARCH_AMD64
6972 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_l);
6973#elif defined(RT_ARCH_ARM64)
6974 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Lt);
6975#else
6976# error "Port me!"
6977#endif
6978}
6979
6980/**
6981 * Emits a JA/JNGE rel32 / B.HI imm19 to a new label.
6982 */
6983DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6984 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6985{
6986#ifdef RT_ARCH_AMD64
6987 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_l);
6988#elif defined(RT_ARCH_ARM64)
6989 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Lt);
6990#else
6991# error "Port me!"
6992#endif
6993}
6994
6995
6996/**
6997 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6998 *
6999 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
7000 *
7001 * Only use hardcoded jumps forward when emitting for exactly one
7002 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
7003 * the right target address on all platforms!
7004 *
7005 * Please also note that on x86 it is necessary pass off + 256 or higher
7006 * for @a offTarget one believe the intervening code is more than 127
7007 * bytes long.
7008 */
7009DECL_FORCE_INLINE(uint32_t)
7010iemNativeEmitJccToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
7011{
7012#ifdef RT_ARCH_AMD64
7013 /* jcc rel8 / rel32 */
7014 int32_t offDisp = (int32_t)(offTarget - (off + 2));
7015 if (offDisp < 128 && offDisp >= -128)
7016 {
7017 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
7018 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
7019 }
7020 else
7021 {
7022 offDisp -= 4;
7023 pCodeBuf[off++] = 0x0f;
7024 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
7025 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
7026 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
7027 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
7028 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
7029 }
7030
7031#elif defined(RT_ARCH_ARM64)
7032 pCodeBuf[off] = Armv8A64MkInstrBCond(enmCond, (int32_t)(offTarget - off));
7033 off++;
7034#else
7035# error "Port me!"
7036#endif
7037 return off;
7038}
7039
7040
7041/**
7042 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
7043 *
7044 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
7045 *
7046 * Only use hardcoded jumps forward when emitting for exactly one
7047 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
7048 * the right target address on all platforms!
7049 *
7050 * Please also note that on x86 it is necessary pass off + 256 or higher
7051 * for @a offTarget if one believe the intervening code is more than 127
7052 * bytes long.
7053 */
7054DECL_INLINE_THROW(uint32_t)
7055iemNativeEmitJccToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
7056{
7057#ifdef RT_ARCH_AMD64
7058 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, offTarget, enmCond);
7059#elif defined(RT_ARCH_ARM64)
7060 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget, enmCond);
7061#else
7062# error "Port me!"
7063#endif
7064 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7065 return off;
7066}
7067
7068
7069/**
7070 * Emits a JZ/JE rel32 / B.EQ imm19 with a fixed displacement.
7071 *
7072 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
7073 */
7074DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
7075{
7076#ifdef RT_ARCH_AMD64
7077 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_e);
7078#elif defined(RT_ARCH_ARM64)
7079 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Eq);
7080#else
7081# error "Port me!"
7082#endif
7083}
7084
7085
7086/**
7087 * Emits a JNZ/JNE rel32 / B.NE imm19 with a fixed displacement.
7088 *
7089 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
7090 */
7091DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
7092{
7093#ifdef RT_ARCH_AMD64
7094 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_ne);
7095#elif defined(RT_ARCH_ARM64)
7096 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ne);
7097#else
7098# error "Port me!"
7099#endif
7100}
7101
7102
7103/**
7104 * Emits a JBE/JNA rel32 / B.LS imm19 with a fixed displacement.
7105 *
7106 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
7107 */
7108DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
7109{
7110#ifdef RT_ARCH_AMD64
7111 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_be);
7112#elif defined(RT_ARCH_ARM64)
7113 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ls);
7114#else
7115# error "Port me!"
7116#endif
7117}
7118
7119
7120/**
7121 * Emits a JA/JNBE rel32 / B.HI imm19 with a fixed displacement.
7122 *
7123 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
7124 */
7125DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
7126{
7127#ifdef RT_ARCH_AMD64
7128 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_nbe);
7129#elif defined(RT_ARCH_ARM64)
7130 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Hi);
7131#else
7132# error "Port me!"
7133#endif
7134}
7135
7136
7137/**
7138 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
7139 *
7140 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
7141 */
7142DECL_FORCE_INLINE(uint32_t) iemNativeEmitJmpToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget)
7143{
7144#ifdef RT_ARCH_AMD64
7145 /* jmp rel8 or rel32 */
7146 int32_t offDisp = offTarget - (off + 2);
7147 if (offDisp < 128 && offDisp >= -128)
7148 {
7149 pCodeBuf[off++] = 0xeb;
7150 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
7151 }
7152 else
7153 {
7154 offDisp -= 3;
7155 pCodeBuf[off++] = 0xe9;
7156 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
7157 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
7158 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
7159 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
7160 }
7161
7162#elif defined(RT_ARCH_ARM64)
7163 pCodeBuf[off] = Armv8A64MkInstrB((int32_t)(offTarget - off));
7164 off++;
7165
7166#else
7167# error "Port me!"
7168#endif
7169 return off;
7170}
7171
7172
7173/**
7174 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
7175 *
7176 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
7177 */
7178DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
7179{
7180#ifdef RT_ARCH_AMD64
7181 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, offTarget);
7182#elif defined(RT_ARCH_ARM64)
7183 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget);
7184#else
7185# error "Port me!"
7186#endif
7187 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7188 return off;
7189}
7190
7191
7192/**
7193 * Fixes up a conditional jump to a fixed label.
7194 * @see iemNativeEmitJmpToFixed, iemNativeEmitJnzToFixed,
7195 * iemNativeEmitJzToFixed, ...
7196 */
7197DECL_INLINE_THROW(void) iemNativeFixupFixedJump(PIEMRECOMPILERSTATE pReNative, uint32_t offFixup, uint32_t offTarget)
7198{
7199#ifdef RT_ARCH_AMD64
7200 uint8_t * const pbCodeBuf = pReNative->pInstrBuf;
7201 uint8_t const bOpcode = pbCodeBuf[offFixup];
7202 if ((uint8_t)(bOpcode - 0x70) < (uint8_t)0x10 || bOpcode == 0xeb)
7203 {
7204 pbCodeBuf[offFixup + 1] = (uint8_t)(offTarget - (offFixup + 2));
7205 AssertStmt((int8_t)pbCodeBuf[offFixup + 1] == (int32_t)(offTarget - (offFixup + 2)),
7206 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_FIXED_JUMP_OUT_OF_RANGE));
7207 }
7208 else
7209 {
7210 if (bOpcode != 0x0f)
7211 Assert(bOpcode == 0xe9);
7212 else
7213 {
7214 offFixup += 1;
7215 Assert((uint8_t)(pbCodeBuf[offFixup] - 0x80) <= 0x10);
7216 }
7217 uint32_t const offRel32 = offTarget - (offFixup + 5);
7218 pbCodeBuf[offFixup + 1] = RT_BYTE1(offRel32);
7219 pbCodeBuf[offFixup + 2] = RT_BYTE2(offRel32);
7220 pbCodeBuf[offFixup + 3] = RT_BYTE3(offRel32);
7221 pbCodeBuf[offFixup + 4] = RT_BYTE4(offRel32);
7222 }
7223
7224#elif defined(RT_ARCH_ARM64)
7225 int32_t const offDisp = offTarget - offFixup;
7226 uint32_t * const pu32CodeBuf = pReNative->pInstrBuf;
7227 if ((pu32CodeBuf[offFixup] & UINT32_C(0xff000000)) == UINT32_C(0x54000000))
7228 {
7229 /* B.COND + BC.COND */
7230 Assert(offDisp >= -262144 && offDisp < 262144);
7231 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xff00001f))
7232 | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
7233 }
7234 else if ((pu32CodeBuf[offFixup] & UINT32_C(0xfc000000)) == UINT32_C(0x14000000))
7235 {
7236 /* B imm26 */
7237 Assert(offDisp >= -33554432 && offDisp < 33554432);
7238 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xfc000000))
7239 | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
7240 }
7241 else if ((pu32CodeBuf[offFixup] & UINT32_C(0x7e000000)) == UINT32_C(0x34000000))
7242 {
7243 /* CBZ / CBNZ reg, imm19 */
7244 Assert((pu32CodeBuf[offFixup] & UINT32_C(0x7e000000)) == UINT32_C(0x34000000));
7245 Assert(offDisp >= -1048576 && offDisp < 1048576);
7246 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xff00001f))
7247 | (((uint32_t)offDisp << 5) & UINT32_C(0x00ffffe0));
7248 }
7249 else
7250 {
7251 /* TBZ / TBNZ reg, bit5, imm14 */
7252 Assert((pu32CodeBuf[offFixup] & UINT32_C(0x7e000000)) == UINT32_C(0x36000000));
7253 Assert(offDisp >= -8192 && offDisp < 8192);
7254 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xfff8001f))
7255 | (((uint32_t)offDisp << 5) & UINT32_C(0x0007ffe0));
7256 }
7257
7258#else
7259# error "Port me!"
7260#endif
7261}
7262
7263
7264#ifdef RT_ARCH_AMD64
7265/**
7266 * For doing bt on a register.
7267 */
7268DECL_INLINE_THROW(uint32_t)
7269iemNativeEmitAmd64TestBitInGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo)
7270{
7271 Assert(iBitNo < 64);
7272 /* bt Ev, imm8 */
7273 if (iBitNo >= 32)
7274 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
7275 else if (iGprSrc >= 8)
7276 pCodeBuf[off++] = X86_OP_REX_B;
7277 pCodeBuf[off++] = 0x0f;
7278 pCodeBuf[off++] = 0xba;
7279 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
7280 pCodeBuf[off++] = iBitNo;
7281 return off;
7282}
7283#endif /* RT_ARCH_AMD64 */
7284
7285
7286/**
7287 * Internal helper, don't call directly.
7288 */
7289DECL_INLINE_THROW(uint32_t)
7290iemNativeEmitTestBitInGprAndJmpToFixedIfCcEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo,
7291 uint32_t offTarget, uint32_t *poffFixup, bool fJmpIfSet)
7292{
7293 Assert(iBitNo < 64);
7294#ifdef RT_ARCH_AMD64
7295 if (iBitNo < 8)
7296 {
7297 /* test Eb, imm8 */
7298 if (iGprSrc >= 4)
7299 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7300 pCodeBuf[off++] = 0xf6;
7301 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7302 pCodeBuf[off++] = (uint8_t)1 << iBitNo;
7303 if (poffFixup)
7304 *poffFixup = off;
7305 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offTarget, fJmpIfSet ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7306 }
7307 else
7308 {
7309 /* bt Ev, imm8 */
7310 if (iBitNo >= 32)
7311 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
7312 else if (iGprSrc >= 8)
7313 pCodeBuf[off++] = X86_OP_REX_B;
7314 pCodeBuf[off++] = 0x0f;
7315 pCodeBuf[off++] = 0xba;
7316 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
7317 pCodeBuf[off++] = iBitNo;
7318 if (poffFixup)
7319 *poffFixup = off;
7320 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offTarget, fJmpIfSet ? kIemNativeInstrCond_c : kIemNativeInstrCond_nc);
7321 }
7322
7323#elif defined(RT_ARCH_ARM64)
7324 /* Just use the TBNZ instruction here. */
7325 if (poffFixup)
7326 *poffFixup = off;
7327 pCodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, off - offTarget, iGprSrc, iBitNo);
7328
7329#else
7330# error "Port me!"
7331#endif
7332 return off;
7333}
7334
7335
7336/**
7337 * Emits a jump to @a idxTarget on the condition that bit @a iBitNo _is_ _set_
7338 * in @a iGprSrc.
7339 */
7340DECL_INLINE_THROW(uint32_t)
7341iemNativeEmitTestBitInGprAndJmpToFixedIfSetEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo,
7342 uint32_t offTarget, uint32_t *poffFixup)
7343{
7344 return iemNativeEmitTestBitInGprAndJmpToFixedIfCcEx(pCodeBuf, off, iGprSrc, iBitNo, offTarget, poffFixup, true /*fJmpIfSet*/);
7345}
7346
7347
7348/**
7349 * Emits a jump to @a idxTarget on the condition that bit @a iBitNo _is_ _not_
7350 * _set_ in @a iGprSrc.
7351 */
7352DECL_INLINE_THROW(uint32_t)
7353iemNativeEmitTestBitInGprAndJmpToLabelIfNotSetEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo,
7354 uint32_t offTarget, uint32_t *poffFixup)
7355{
7356 return iemNativeEmitTestBitInGprAndJmpToFixedIfCcEx(pCodeBuf, off, iGprSrc, iBitNo, offTarget, poffFixup, false /*fJmpIfSet*/);
7357}
7358
7359
7360
7361/**
7362 * Internal helper, don't call directly.
7363 */
7364DECL_INLINE_THROW(uint32_t)
7365iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7366 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel, bool fJmpIfSet)
7367{
7368 Assert(iBitNo < 64);
7369#ifdef RT_ARCH_AMD64
7370 if (iBitNo < 8)
7371 {
7372 /* test Eb, imm8 */
7373 if (iGprSrc >= 4)
7374 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7375 pCodeBuf[off++] = 0xf6;
7376 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7377 pCodeBuf[off++] = (uint8_t)1 << iBitNo;
7378 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
7379 fJmpIfSet ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7380 }
7381 else
7382 {
7383 /* bt Ev, imm8 */
7384 if (iBitNo >= 32)
7385 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
7386 else if (iGprSrc >= 8)
7387 pCodeBuf[off++] = X86_OP_REX_B;
7388 pCodeBuf[off++] = 0x0f;
7389 pCodeBuf[off++] = 0xba;
7390 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
7391 pCodeBuf[off++] = iBitNo;
7392 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
7393 fJmpIfSet ? kIemNativeInstrCond_c : kIemNativeInstrCond_nc);
7394 }
7395
7396#elif defined(RT_ARCH_ARM64)
7397 /* Use the TBNZ instruction here. */
7398 if (pReNative->paLabels[idxLabel].enmType > kIemNativeLabelType_LastWholeTbBranch)
7399 {
7400 AssertMsg(pReNative->paLabels[idxLabel].off == UINT32_MAX,
7401 ("TODO: Please enable & test commented out code for jumping back to a predefined label.\n"));
7402 //uint32_t offLabel = pReNative->paLabels[idxLabel].off;
7403 //if (offLabel == UINT32_MAX)
7404 {
7405 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm14At5);
7406 pCodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, 0, iGprSrc, iBitNo);
7407 }
7408 //else
7409 //{
7410 // RT_BREAKPOINT();
7411 // Assert(off - offLabel <= 0x1fffU);
7412 // pCodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, offLabel - off, iGprSrc, iBitNo);
7413 //
7414 //}
7415 }
7416 else
7417 {
7418 Assert(Armv8A64ConvertImmRImmS2Mask64(0x40, (64U - iBitNo) & 63U) == RT_BIT_64(iBitNo));
7419 pCodeBuf[off++] = Armv8A64MkInstrTstImm(iGprSrc, 0x40, (64U - iBitNo) & 63U);
7420 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
7421 pCodeBuf[off++] = Armv8A64MkInstrBCond(fJmpIfSet ? kArmv8InstrCond_Ne : kArmv8InstrCond_Eq, 0);
7422 }
7423
7424#else
7425# error "Port me!"
7426#endif
7427 return off;
7428}
7429
7430
7431/**
7432 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _set_ in
7433 * @a iGprSrc.
7434 */
7435DECL_INLINE_THROW(uint32_t)
7436iemNativeEmitTestBitInGprAndJmpToLabelIfSetEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7437 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7438{
7439 return iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(pReNative, pCodeBuf, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
7440}
7441
7442
7443/**
7444 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _not_
7445 * _set_ in @a iGprSrc.
7446 */
7447DECL_INLINE_THROW(uint32_t)
7448iemNativeEmitTestBitInGprAndJmpToLabelIfNotSetEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7449 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7450{
7451 return iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(pReNative, pCodeBuf, off, iGprSrc, iBitNo, idxLabel, false /*fJmpIfSet*/);
7452}
7453
7454
7455/**
7456 * Internal helper, don't call directly.
7457 */
7458DECL_INLINE_THROW(uint32_t)
7459iemNativeEmitTestBitInGprAndJmpToLabelIfCc(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
7460 uint8_t iBitNo, uint32_t idxLabel, bool fJmpIfSet)
7461{
7462#ifdef RT_ARCH_AMD64
7463 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 5+6), off,
7464 iGprSrc, iBitNo, idxLabel, fJmpIfSet);
7465#elif defined(RT_ARCH_ARM64)
7466 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 2), off,
7467 iGprSrc, iBitNo, idxLabel, fJmpIfSet);
7468#else
7469# error "Port me!"
7470#endif
7471 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7472 return off;
7473}
7474
7475
7476/**
7477 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _set_ in
7478 * @a iGprSrc.
7479 */
7480DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7481 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7482{
7483 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
7484}
7485
7486
7487/**
7488 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _not_
7489 * _set_ in @a iGprSrc.
7490 */
7491DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7492 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7493{
7494 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, false /*fJmpIfSet*/);
7495}
7496
7497
7498/**
7499 * Emits a test for any of the bits from @a fBits in @a iGprSrc, setting CPU
7500 * flags accordingly.
7501 */
7502DECL_INLINE_THROW(uint32_t)
7503iemNativeEmitTestAnyBitsInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t fBits)
7504{
7505 Assert(fBits != 0);
7506#ifdef RT_ARCH_AMD64
7507
7508 if (fBits >= UINT32_MAX)
7509 {
7510 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7511
7512 /* test Ev,Gv */
7513 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
7514 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
7515 pbCodeBuf[off++] = 0x85;
7516 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 8, iTmpReg & 7);
7517
7518 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7519 }
7520 else if (fBits <= UINT32_MAX)
7521 {
7522 /* test Eb, imm8 or test Ev, imm32 */
7523 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
7524 if (fBits <= UINT8_MAX)
7525 {
7526 if (iGprSrc >= 4)
7527 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7528 pbCodeBuf[off++] = 0xf6;
7529 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7530 pbCodeBuf[off++] = (uint8_t)fBits;
7531 }
7532 else
7533 {
7534 if (iGprSrc >= 8)
7535 pbCodeBuf[off++] = X86_OP_REX_B;
7536 pbCodeBuf[off++] = 0xf7;
7537 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7538 pbCodeBuf[off++] = RT_BYTE1(fBits);
7539 pbCodeBuf[off++] = RT_BYTE2(fBits);
7540 pbCodeBuf[off++] = RT_BYTE3(fBits);
7541 pbCodeBuf[off++] = RT_BYTE4(fBits);
7542 }
7543 }
7544 /** @todo implement me. */
7545 else
7546 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
7547
7548#elif defined(RT_ARCH_ARM64)
7549 uint32_t uImmR = 0;
7550 uint32_t uImmNandS = 0;
7551 if (Armv8A64ConvertMask64ToImmRImmS(fBits, &uImmNandS, &uImmR))
7552 {
7553 /* ands xzr, iGprSrc, #fBits */
7554 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7555 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR);
7556 }
7557 else
7558 {
7559 /* ands xzr, iGprSrc, iTmpReg */
7560 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7561 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7562 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg);
7563 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7564 }
7565
7566#else
7567# error "Port me!"
7568#endif
7569 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7570 return off;
7571}
7572
7573
7574/**
7575 * Emits a test for any of the bits from @a fBits in the lower 32 bits of
7576 * @a iGprSrc, setting CPU flags accordingly.
7577 *
7578 * @note For ARM64 this only supports @a fBits values that can be expressed
7579 * using the two 6-bit immediates of the ANDS instruction. The caller
7580 * must make sure this is possible!
7581 */
7582DECL_FORCE_INLINE_THROW(uint32_t)
7583iemNativeEmitTestAnyBitsInGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint32_t fBits,
7584 uint8_t iTmpReg = UINT8_MAX)
7585{
7586 Assert(fBits != 0);
7587
7588#ifdef RT_ARCH_AMD64
7589 if (fBits <= UINT8_MAX)
7590 {
7591 /* test Eb, imm8 */
7592 if (iGprSrc >= 4)
7593 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7594 pCodeBuf[off++] = 0xf6;
7595 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7596 pCodeBuf[off++] = (uint8_t)fBits;
7597 }
7598 else
7599 {
7600 /* test Ev, imm32 */
7601 if (iGprSrc >= 8)
7602 pCodeBuf[off++] = X86_OP_REX_B;
7603 pCodeBuf[off++] = 0xf7;
7604 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7605 pCodeBuf[off++] = RT_BYTE1(fBits);
7606 pCodeBuf[off++] = RT_BYTE2(fBits);
7607 pCodeBuf[off++] = RT_BYTE3(fBits);
7608 pCodeBuf[off++] = RT_BYTE4(fBits);
7609 }
7610 RT_NOREF(iTmpReg);
7611
7612#elif defined(RT_ARCH_ARM64)
7613 /* ands xzr, src, #fBits */
7614 uint32_t uImmR = 0;
7615 uint32_t uImmNandS = 0;
7616 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7617 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7618 else if (iTmpReg != UINT8_MAX)
7619 {
7620 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iTmpReg, fBits);
7621 pCodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg, false /*f64Bit*/);
7622 }
7623 else
7624# ifdef IEM_WITH_THROW_CATCH
7625 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
7626# else
7627 AssertReleaseFailedStmt(off = UINT32_MAX);
7628# endif
7629
7630#else
7631# error "Port me!"
7632#endif
7633 return off;
7634}
7635
7636
7637
7638/**
7639 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
7640 * @a iGprSrc, setting CPU flags accordingly.
7641 *
7642 * @note For ARM64 this only supports @a fBits values that can be expressed
7643 * using the two 6-bit immediates of the ANDS instruction. The caller
7644 * must make sure this is possible!
7645 */
7646DECL_FORCE_INLINE_THROW(uint32_t)
7647iemNativeEmitTestAnyBitsInGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
7648{
7649 Assert(fBits != 0);
7650
7651#ifdef RT_ARCH_AMD64
7652 /* test Eb, imm8 */
7653 if (iGprSrc >= 4)
7654 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7655 pCodeBuf[off++] = 0xf6;
7656 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7657 pCodeBuf[off++] = fBits;
7658
7659#elif defined(RT_ARCH_ARM64)
7660 /* ands xzr, src, #fBits */
7661 uint32_t uImmR = 0;
7662 uint32_t uImmNandS = 0;
7663 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7664 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7665 else
7666# ifdef IEM_WITH_THROW_CATCH
7667 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
7668# else
7669 AssertReleaseFailedStmt(off = UINT32_MAX);
7670# endif
7671
7672#else
7673# error "Port me!"
7674#endif
7675 return off;
7676}
7677
7678
7679/**
7680 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
7681 * @a iGprSrc, setting CPU flags accordingly.
7682 */
7683DECL_INLINE_THROW(uint32_t)
7684iemNativeEmitTestAnyBitsInGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
7685{
7686 Assert(fBits != 0);
7687
7688#ifdef RT_ARCH_AMD64
7689 off = iemNativeEmitTestAnyBitsInGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprSrc, fBits);
7690
7691#elif defined(RT_ARCH_ARM64)
7692 /* ands xzr, src, [tmp|#imm] */
7693 uint32_t uImmR = 0;
7694 uint32_t uImmNandS = 0;
7695 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7696 {
7697 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7698 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7699 }
7700 else
7701 {
7702 /* Use temporary register for the 64-bit immediate. */
7703 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7704 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7705 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg, false /*f64Bit*/);
7706 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7707 }
7708
7709#else
7710# error "Port me!"
7711#endif
7712 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7713 return off;
7714}
7715
7716
7717/**
7718 * Emits a jump to @a idxLabel on the condition _any_ of the bits in @a fBits
7719 * are set in @a iGprSrc.
7720 */
7721DECL_INLINE_THROW(uint32_t)
7722iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7723 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
7724{
7725 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
7726
7727 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
7728 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7729
7730 return off;
7731}
7732
7733
7734/**
7735 * Emits a jump to @a idxLabel on the condition _none_ of the bits in @a fBits
7736 * are set in @a iGprSrc.
7737 */
7738DECL_INLINE_THROW(uint32_t)
7739iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7740 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
7741{
7742 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
7743
7744 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
7745 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7746
7747 return off;
7748}
7749
7750
7751/**
7752 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7753 *
7754 * The operand size is given by @a f64Bit.
7755 */
7756DECL_FORCE_INLINE_THROW(uint32_t)
7757iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7758 uint8_t iGprSrc, bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
7759{
7760 Assert(idxLabel < pReNative->cLabels);
7761
7762#ifdef RT_ARCH_AMD64
7763 /* test reg32,reg32 / test reg64,reg64 */
7764 if (f64Bit)
7765 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
7766 else if (iGprSrc >= 8)
7767 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
7768 pCodeBuf[off++] = 0x85;
7769 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
7770
7771 /* jnz idxLabel */
7772 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
7773 fJmpIfNotZero ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7774
7775#elif defined(RT_ARCH_ARM64)
7776 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
7777 {
7778 pCodeBuf[off] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, (int32_t)(pReNative->paLabels[idxLabel].off - off),
7779 iGprSrc, f64Bit);
7780 off++;
7781 }
7782 else
7783 {
7784 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
7785 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, 0, iGprSrc, f64Bit);
7786 }
7787
7788#else
7789# error "Port me!"
7790#endif
7791 return off;
7792}
7793
7794
7795/**
7796 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7797 *
7798 * The operand size is given by @a f64Bit.
7799 */
7800DECL_FORCE_INLINE_THROW(uint32_t)
7801iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
7802 bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
7803{
7804#ifdef RT_ARCH_AMD64
7805 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 3 + 6),
7806 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
7807#elif defined(RT_ARCH_ARM64)
7808 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1),
7809 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
7810#else
7811# error "Port me!"
7812#endif
7813 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7814 return off;
7815}
7816
7817
7818/**
7819 * Emits code that jumps to @a offTarget if @a iGprSrc is not zero.
7820 *
7821 * The operand size is given by @a f64Bit.
7822 */
7823DECL_FORCE_INLINE_THROW(uint32_t)
7824iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7825 uint8_t iGprSrc, bool f64Bit, bool fJmpIfNotZero, uint32_t offTarget)
7826{
7827#ifdef RT_ARCH_AMD64
7828 /* test reg32,reg32 / test reg64,reg64 */
7829 if (f64Bit)
7830 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
7831 else if (iGprSrc >= 8)
7832 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
7833 pCodeBuf[off++] = 0x85;
7834 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
7835
7836 /* jnz idxLabel */
7837 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offTarget,
7838 fJmpIfNotZero ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7839
7840#elif defined(RT_ARCH_ARM64)
7841 pCodeBuf[off] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, (int32_t)(offTarget - off), iGprSrc, f64Bit);
7842 off++;
7843
7844#else
7845# error "Port me!"
7846#endif
7847 return off;
7848}
7849
7850
7851/**
7852 * Emits code that jumps to @a offTarget if @a iGprSrc is not zero.
7853 *
7854 * The operand size is given by @a f64Bit.
7855 */
7856DECL_FORCE_INLINE_THROW(uint32_t)
7857iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
7858 bool f64Bit, bool fJmpIfNotZero, uint32_t offTarget)
7859{
7860#ifdef RT_ARCH_AMD64
7861 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 3 + 6),
7862 off, iGprSrc, f64Bit, fJmpIfNotZero, offTarget);
7863#elif defined(RT_ARCH_ARM64)
7864 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1),
7865 off, iGprSrc, f64Bit, fJmpIfNotZero, offTarget);
7866#else
7867# error "Port me!"
7868#endif
7869 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7870 return off;
7871}
7872
7873
7874/* if (Grp1 == 0) Jmp idxLabel; */
7875
7876/**
7877 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
7878 *
7879 * The operand size is given by @a f64Bit.
7880 */
7881DECL_FORCE_INLINE_THROW(uint32_t)
7882iemNativeEmitTestIfGprIsZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7883 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7884{
7885 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
7886 f64Bit, false /*fJmpIfNotZero*/, idxLabel);
7887}
7888
7889
7890/**
7891 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
7892 *
7893 * The operand size is given by @a f64Bit.
7894 */
7895DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7896 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7897{
7898 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, false /*fJmpIfNotZero*/, idxLabel);
7899}
7900
7901
7902/**
7903 * Emits code that jumps to a new label if @a iGprSrc is zero.
7904 *
7905 * The operand size is given by @a f64Bit.
7906 */
7907DECL_INLINE_THROW(uint32_t)
7908iemNativeEmitTestIfGprIsZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7909 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7910{
7911 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7912 return iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7913}
7914
7915
7916/**
7917 * Emits code that jumps to @a offTarget if @a iGprSrc is zero.
7918 *
7919 * The operand size is given by @a f64Bit.
7920 */
7921DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsZeroAndJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7922 uint8_t iGprSrc, bool f64Bit, uint32_t offTarget)
7923{
7924 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixed(pReNative, off, iGprSrc, f64Bit, false /*fJmpIfNotZero*/, offTarget);
7925}
7926
7927
7928/* if (Grp1 != 0) Jmp idxLabel; */
7929
7930/**
7931 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7932 *
7933 * The operand size is given by @a f64Bit.
7934 */
7935DECL_FORCE_INLINE_THROW(uint32_t)
7936iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7937 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7938{
7939 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
7940 f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7941}
7942
7943
7944/**
7945 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7946 *
7947 * The operand size is given by @a f64Bit.
7948 */
7949DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7950 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7951{
7952 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7953}
7954
7955
7956/**
7957 * Emits code that jumps to a new label if @a iGprSrc is not zero.
7958 *
7959 * The operand size is given by @a f64Bit.
7960 */
7961DECL_INLINE_THROW(uint32_t)
7962iemNativeEmitTestIfGprIsNotZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7963 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7964{
7965 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7966 return iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7967}
7968
7969
7970/* if (Grp1 != Gpr2) Jmp idxLabel; */
7971
7972/**
7973 * Emits code that jumps to the given label if @a iGprLeft and @a iGprRight
7974 * differs.
7975 */
7976DECL_INLINE_THROW(uint32_t)
7977iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7978 uint8_t iGprLeft, uint8_t iGprRight, uint32_t idxLabel)
7979{
7980 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iGprRight);
7981 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7982 return off;
7983}
7984
7985
7986/**
7987 * Emits code that jumps to a new label if @a iGprLeft and @a iGprRight differs.
7988 */
7989DECL_INLINE_THROW(uint32_t)
7990iemNativeEmitTestIfGprNotEqualGprAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7991 uint8_t iGprLeft, uint8_t iGprRight,
7992 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7993{
7994 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7995 return iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, iGprLeft, iGprRight, idxLabel);
7996}
7997
7998
7999/* if (Grp != Imm) Jmp idxLabel; */
8000
8001/**
8002 * Emits code that jumps to the given label if @a iGprSrc differs from @a uImm.
8003 */
8004DECL_INLINE_THROW(uint32_t)
8005iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8006 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
8007{
8008 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
8009 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
8010 return off;
8011}
8012
8013
8014/**
8015 * Emits code that jumps to a new label if @a iGprSrc differs from @a uImm.
8016 */
8017DECL_INLINE_THROW(uint32_t)
8018iemNativeEmitTestIfGprNotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8019 uint8_t iGprSrc, uint64_t uImm,
8020 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
8021{
8022 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
8023 return iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
8024}
8025
8026
8027/**
8028 * Emits code that jumps to the given label if 32-bit @a iGprSrc differs from
8029 * @a uImm.
8030 */
8031DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8032 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
8033{
8034 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
8035 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
8036 return off;
8037}
8038
8039
8040/**
8041 * Emits code that jumps to a new label if 32-bit @a iGprSrc differs from
8042 * @a uImm.
8043 */
8044DECL_INLINE_THROW(uint32_t)
8045iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8046 uint8_t iGprSrc, uint32_t uImm,
8047 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
8048{
8049 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
8050 return iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
8051}
8052
8053
8054/**
8055 * Emits code that jumps to the given label if 16-bit @a iGprSrc differs from
8056 * @a uImm.
8057 */
8058DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8059 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel)
8060{
8061 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm);
8062 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
8063 return off;
8064}
8065
8066
8067/**
8068 * Emits code that jumps to a new label if 16-bit @a iGprSrc differs from
8069 * @a uImm.
8070 */
8071DECL_INLINE_THROW(uint32_t)
8072iemNativeEmitTestIfGpr16NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8073 uint8_t iGprSrc, uint16_t uImm,
8074 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
8075{
8076 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
8077 return iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
8078}
8079
8080
8081/* if (Grp == Imm) Jmp idxLabel; */
8082
8083/**
8084 * Emits code that jumps to the given label if @a iGprSrc equals @a uImm.
8085 */
8086DECL_INLINE_THROW(uint32_t)
8087iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8088 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
8089{
8090 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
8091 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
8092 return off;
8093}
8094
8095
8096/**
8097 * Emits code that jumps to a new label if @a iGprSrc equals from @a uImm.
8098 */
8099DECL_INLINE_THROW(uint32_t)
8100iemNativeEmitTestIfGprEqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t uImm,
8101 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
8102{
8103 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
8104 return iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
8105}
8106
8107
8108/**
8109 * Emits code that jumps to the given label if 32-bit @a iGprSrc equals @a uImm.
8110 */
8111DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8112 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
8113{
8114 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
8115 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
8116 return off;
8117}
8118
8119
8120/**
8121 * Emits code that jumps to a new label if 32-bit @a iGprSrc equals @a uImm.
8122 */
8123DECL_INLINE_THROW(uint32_t)
8124iemNativeEmitTestIfGpr32EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint32_t uImm,
8125 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
8126{
8127 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
8128 return iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
8129}
8130
8131
8132/**
8133 * Emits code that jumps to the given label if 16-bit @a iGprSrc equals @a uImm.
8134 *
8135 * @note ARM64: Helper register is required (idxTmpReg).
8136 */
8137DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8138 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel,
8139 uint8_t idxTmpReg = UINT8_MAX)
8140{
8141 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm, idxTmpReg);
8142 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
8143 return off;
8144}
8145
8146
8147/**
8148 * Emits code that jumps to a new label if 16-bit @a iGprSrc equals @a uImm.
8149 *
8150 * @note ARM64: Helper register is required (idxTmpReg).
8151 */
8152DECL_INLINE_THROW(uint32_t)
8153iemNativeEmitTestIfGpr16EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint16_t uImm,
8154 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0,
8155 uint8_t idxTmpReg = UINT8_MAX)
8156{
8157 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
8158 return iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel, idxTmpReg);
8159}
8160
8161
8162
8163/*********************************************************************************************************************************
8164* Indirect Jumps. *
8165*********************************************************************************************************************************/
8166
8167/**
8168 * Emits an indirect jump a 64-bit address in a GPR.
8169 */
8170DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpViaGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc)
8171{
8172#ifdef RT_ARCH_AMD64
8173 uint8_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
8174 if (iGprSrc >= 8)
8175 pCodeBuf[off++] = X86_OP_REX_B;
8176 pCodeBuf[off++] = 0xff;
8177 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
8178
8179#elif defined(RT_ARCH_ARM64)
8180 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8181 pCodeBuf[off++] = Armv8A64MkInstrBr(iGprSrc);
8182
8183#else
8184# error "port me"
8185#endif
8186 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8187 return off;
8188}
8189
8190
8191/**
8192 * Emits an indirect jump to an immediate 64-bit address (uses the temporary GPR).
8193 */
8194DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t uPfn)
8195{
8196 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uPfn);
8197 return iemNativeEmitJmpViaGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP0);
8198}
8199
8200
8201/*********************************************************************************************************************************
8202* Calls. *
8203*********************************************************************************************************************************/
8204
8205/**
8206 * Emits a call to a 64-bit address.
8207 */
8208DECL_FORCE_INLINE(uint32_t) iemNativeEmitCallImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uintptr_t uPfn,
8209#ifdef RT_ARCH_AMD64
8210 uint8_t idxRegTmp = X86_GREG_xAX
8211#elif defined(RT_ARCH_ARM64)
8212 uint8_t idxRegTmp = IEMNATIVE_REG_FIXED_TMP0
8213#else
8214# error "Port me"
8215#endif
8216 )
8217{
8218 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxRegTmp, uPfn);
8219
8220#ifdef RT_ARCH_AMD64
8221 /* call idxRegTmp */
8222 if (idxRegTmp >= 8)
8223 pCodeBuf[off++] = X86_OP_REX_B;
8224 pCodeBuf[off++] = 0xff;
8225 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, idxRegTmp & 7);
8226
8227#elif defined(RT_ARCH_ARM64)
8228 pCodeBuf[off++] = Armv8A64MkInstrBlr(idxRegTmp);
8229
8230#else
8231# error "port me"
8232#endif
8233 return off;
8234}
8235
8236
8237/**
8238 * Emits a call to a 64-bit address.
8239 */
8240template<bool const a_fSkipEflChecks = false>
8241DECL_INLINE_THROW(uint32_t) iemNativeEmitCallImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t uPfn)
8242{
8243 if RT_CONSTEXPR_IF(!a_fSkipEflChecks)
8244 {
8245 IEMNATIVE_ASSERT_EFLAGS_POSTPONING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8246 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY( pReNative, X86_EFL_STATUS_BITS);
8247 }
8248
8249#ifdef RT_ARCH_AMD64
8250 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xAX, uPfn);
8251
8252 /* call rax */
8253 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8254 pbCodeBuf[off++] = 0xff;
8255 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
8256
8257#elif defined(RT_ARCH_ARM64)
8258 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uPfn);
8259
8260 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8261 pu32CodeBuf[off++] = Armv8A64MkInstrBlr(IEMNATIVE_REG_FIXED_TMP0);
8262
8263#else
8264# error "port me"
8265#endif
8266 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8267 return off;
8268}
8269
8270
8271/**
8272 * Emits code to load a stack variable into an argument GPR.
8273 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
8274 */
8275DECL_FORCE_INLINE_THROW(uint32_t)
8276iemNativeEmitLoadArgGregFromStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
8277 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = UINT32_MAX,
8278 bool fSpilledVarsInVolatileRegs = false)
8279{
8280 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8281 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8282 AssertStmt(pVar->enmKind == kIemNativeVarKind_Stack, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8283
8284 uint8_t const idxRegVar = pVar->idxReg;
8285 if ( idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs)
8286 && ( (RT_BIT_32(idxRegVar) & (~IEMNATIVE_CALL_VOLATILE_GREG_MASK | fHstVolatileRegsAllowed))
8287 || !fSpilledVarsInVolatileRegs ))
8288 {
8289 AssertStmt( !(RT_BIT_32(idxRegVar) & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
8290 || (RT_BIT_32(idxRegVar) & fHstVolatileRegsAllowed),
8291 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_13));
8292 if (!offAddend)
8293 {
8294 if (idxRegArg != idxRegVar)
8295 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegArg, idxRegVar);
8296 }
8297 else
8298 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegArg, idxRegVar, offAddend);
8299 }
8300 else
8301 {
8302 uint8_t const idxStackSlot = pVar->idxStackSlot;
8303 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8304 off = iemNativeEmitLoadGprByBp(pReNative, off, idxRegArg, iemNativeStackCalcBpDisp(idxStackSlot));
8305 if (offAddend)
8306 off = iemNativeEmitAddGprImm(pReNative, off, idxRegArg, offAddend);
8307 }
8308 return off;
8309}
8310
8311
8312/**
8313 * Emits code to load a stack or immediate variable value into an argument GPR,
8314 * optional with a addend.
8315 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
8316 */
8317DECL_FORCE_INLINE_THROW(uint32_t)
8318iemNativeEmitLoadArgGregFromImmOrStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
8319 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = 0,
8320 bool fSpilledVarsInVolatileRegs = false)
8321{
8322 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8323 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8324 if (pVar->enmKind == kIemNativeVarKind_Immediate)
8325 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegArg, pVar->u.uValue + offAddend);
8326 else
8327 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, idxRegArg, idxVar, offAddend,
8328 fHstVolatileRegsAllowed, fSpilledVarsInVolatileRegs);
8329 return off;
8330}
8331
8332
8333/**
8334 * Emits code to load the variable address into an argument GPR.
8335 *
8336 * This only works for uninitialized and stack variables.
8337 */
8338DECL_FORCE_INLINE_THROW(uint32_t)
8339iemNativeEmitLoadArgGregWithVarAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
8340 bool fFlushShadows)
8341{
8342 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8343 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8344 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
8345 || pVar->enmKind == kIemNativeVarKind_Stack,
8346 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8347 AssertStmt(!pVar->fSimdReg,
8348 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8349
8350 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8351 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
8352
8353 uint8_t const idxRegVar = pVar->idxReg;
8354 if (idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs))
8355 {
8356 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, idxRegVar);
8357 iemNativeRegFreeVar(pReNative, idxRegVar, fFlushShadows);
8358 Assert(pVar->idxReg == UINT8_MAX);
8359 }
8360 Assert( pVar->idxStackSlot != UINT8_MAX
8361 && pVar->idxReg == UINT8_MAX);
8362
8363 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
8364}
8365
8366
8367
8368/*********************************************************************************************************************************
8369* TB exiting helpers. *
8370*********************************************************************************************************************************/
8371
8372#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8373/* IEMAllN8veEmit-x86.h: */
8374template<uint32_t const a_bmInputRegs>
8375DECL_FORCE_INLINE_THROW(uint32_t)
8376iemNativeDoPostponedEFlagsAtTbExitEx(PIEMRECOMPILERSTATE pReNative, uint32_t off, PIEMNATIVEINSTR pCodeBuf);
8377
8378template<uint32_t const a_bmInputRegs>
8379DECL_FORCE_INLINE_THROW(uint32_t)
8380iemNativeDoPostponedEFlagsAtTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off);
8381#endif
8382
8383
8384/**
8385 * Helper for marking the current conditional branch as exiting the TB.
8386 *
8387 * This simplifies the state consolidation later when we reach the IEM_MC_ENDIF.
8388 */
8389DECL_FORCE_INLINE(void) iemNativeMarkCurCondBranchAsExiting(PIEMRECOMPILERSTATE pReNative)
8390{
8391 uint8_t idxCondDepth = pReNative->cCondDepth;
8392 if (idxCondDepth)
8393 {
8394 idxCondDepth--;
8395 pReNative->aCondStack[idxCondDepth].afExitTb[pReNative->aCondStack[idxCondDepth].fInElse] = true;
8396 }
8397}
8398
8399
8400/**
8401 * Unconditionally exits the translation block via a branch instructions.
8402 *
8403 * @note In case a delayed EFLAGS calculation is pending, this may emit an
8404 * additional IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS instructions.
8405 */
8406template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fActuallyExitingTb = true, bool const a_fPostponedEfl = true>
8407DECL_INLINE_THROW(uint32_t) iemNativeEmitTbExitEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off)
8408{
8409 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8410 AssertCompile(IEMNATIVELABELTYPE_IS_EXIT_REASON(a_enmExitReason));
8411
8412 if RT_CONSTEXPR_IF(a_fActuallyExitingTb)
8413 iemNativeMarkCurCondBranchAsExiting(pReNative);
8414
8415#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8416 if RT_CONSTEXPR_IF(a_fPostponedEfl)
8417 off = iemNativeDoPostponedEFlagsAtTbExitEx<IEMNATIVELABELTYPE_GET_INPUT_REG_MASK(a_enmExitReason)>(pReNative, off,
8418 pCodeBuf);
8419#endif
8420
8421#ifdef RT_ARCH_AMD64
8422 /* jmp rel32 */
8423 pCodeBuf[off++] = 0xe9;
8424 iemNativeAddTbExitFixup(pReNative, off, a_enmExitReason);
8425 pCodeBuf[off++] = 0xfe;
8426 pCodeBuf[off++] = 0xff;
8427 pCodeBuf[off++] = 0xff;
8428 pCodeBuf[off++] = 0xff;
8429
8430#elif defined(RT_ARCH_ARM64)
8431 iemNativeAddTbExitFixup(pReNative, off, a_enmExitReason);
8432 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
8433
8434#else
8435# error "Port me!"
8436#endif
8437 return off;
8438}
8439
8440
8441/**
8442 * Unconditionally exits the translation block via a branch instructions.
8443 *
8444 * @note In case a delayed EFLAGS calculation is pending, this may emit an
8445 * additional IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS instructions.
8446 */
8447template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fActuallyExitingTb = true, bool const a_fPostponedEfl = true>
8448DECL_INLINE_THROW(uint32_t) iemNativeEmitTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8449{
8450 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8451 AssertCompile(IEMNATIVELABELTYPE_IS_EXIT_REASON(a_enmExitReason));
8452
8453 if RT_CONSTEXPR_IF(a_fActuallyExitingTb)
8454 iemNativeMarkCurCondBranchAsExiting(pReNative);
8455
8456#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8457 if RT_CONSTEXPR_IF(a_fPostponedEfl)
8458 off = iemNativeDoPostponedEFlagsAtTbExit<IEMNATIVELABELTYPE_GET_INPUT_REG_MASK(a_enmExitReason)>(pReNative, off);
8459#endif
8460
8461#ifdef RT_ARCH_AMD64
8462 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
8463
8464 /* jmp rel32 */
8465 pCodeBuf[off++] = 0xe9;
8466 iemNativeAddTbExitFixup(pReNative, off, a_enmExitReason);
8467 pCodeBuf[off++] = 0xfe;
8468 pCodeBuf[off++] = 0xff;
8469 pCodeBuf[off++] = 0xff;
8470 pCodeBuf[off++] = 0xff;
8471
8472#elif defined(RT_ARCH_ARM64)
8473 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8474 iemNativeAddTbExitFixup(pReNative, off, a_enmExitReason);
8475 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
8476
8477#else
8478# error "Port me!"
8479#endif
8480 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8481 return off;
8482}
8483
8484
8485/**
8486 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
8487 *
8488 * @note In case a delayed EFLAGS calculation is pending, this may emit an
8489 * additional IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS instructions.
8490 */
8491template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8492DECL_FORCE_INLINE_THROW(uint32_t)
8493iemNativeEmitTbExitJccEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off, IEMNATIVEINSTRCOND enmCond)
8494{
8495 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8496 AssertCompile(IEMNATIVELABELTYPE_IS_EXIT_REASON(a_enmExitReason));
8497
8498#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8499 if RT_CONSTEXPR_IF(a_fPostponedEfl)
8500 if (pReNative->PostponedEfl.fEFlags)
8501 {
8502 /* Jcc l_NonPrimaryCodeStreamTarget */
8503 uint32_t const offFixup1 = off;
8504 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 1, enmCond);
8505
8506 /* JMP l_PrimaryCodeStreamResume */
8507 uint32_t const offFixup2 = off;
8508 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, off + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
8509
8510 /* l_NonPrimaryCodeStreamTarget: */
8511 iemNativeFixupFixedJump(pReNative, offFixup1, off);
8512 off = iemNativeEmitTbExitEx<a_enmExitReason, false /*a_fActuallyExitingTb*/, true>(pReNative, pCodeBuf, off);
8513
8514 /* l_PrimaryCodeStreamResume: */
8515 iemNativeFixupFixedJump(pReNative, offFixup2, off);
8516 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8517 return off;
8518 }
8519#endif
8520
8521#if defined(RT_ARCH_AMD64)
8522 /* jcc rel32 */
8523 pCodeBuf[off++] = 0x0f;
8524 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
8525 iemNativeAddTbExitFixup(pReNative, off, a_enmExitReason);
8526 pCodeBuf[off++] = 0x00;
8527 pCodeBuf[off++] = 0x00;
8528 pCodeBuf[off++] = 0x00;
8529 pCodeBuf[off++] = 0x00;
8530
8531#else
8532 /* ARM64 doesn't have the necessary jump range, so we jump via local label
8533 just like when we keep everything local. */
8534 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, a_enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8535 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel, enmCond);
8536#endif
8537 return off;
8538}
8539
8540
8541/**
8542 * Emits a Jcc rel32 / B.cc imm19 to the epilog.
8543 */
8544template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8545DECL_INLINE_THROW(uint32_t) iemNativeEmitTbExitJcc(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEINSTRCOND enmCond)
8546{
8547 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8548 AssertCompile(IEMNATIVELABELTYPE_IS_EXIT_REASON(a_enmExitReason));
8549
8550#ifdef RT_ARCH_AMD64
8551 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS + 5);
8552#elif defined(RT_ARCH_ARM64)
8553 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS + 1);
8554#else
8555# error "Port me!"
8556#endif
8557 off = iemNativeEmitTbExitJccEx<a_enmExitReason, a_fPostponedEfl>(pReNative, pCodeBuf, off, enmCond);
8558 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8559 return off;
8560}
8561
8562
8563/**
8564 * Emits a JNZ/JNE rel32 / B.NE imm19 to the TB exit routine with the given reason.
8565 */
8566template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8567DECL_INLINE_THROW(uint32_t) iemNativeEmitTbExitJnz(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8568{
8569#ifdef RT_ARCH_AMD64
8570 return iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kIemNativeInstrCond_ne);
8571#elif defined(RT_ARCH_ARM64)
8572 return iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kArmv8InstrCond_Ne);
8573#else
8574# error "Port me!"
8575#endif
8576}
8577
8578
8579/**
8580 * Emits a JZ/JE rel32 / B.EQ imm19 to the TB exit routine with the given reason.
8581 */
8582template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8583DECL_INLINE_THROW(uint32_t) iemNativeEmitTbExitJz(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8584{
8585#ifdef RT_ARCH_AMD64
8586 return iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kIemNativeInstrCond_e);
8587#elif defined(RT_ARCH_ARM64)
8588 return iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kArmv8InstrCond_Eq);
8589#else
8590# error "Port me!"
8591#endif
8592}
8593
8594
8595/**
8596 * Emits a JA/JNBE rel32 / B.HI imm19 to the TB exit.
8597 */
8598template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8599DECL_INLINE_THROW(uint32_t) iemNativeEmitTbExitJa(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8600{
8601#ifdef RT_ARCH_AMD64
8602 return iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kIemNativeInstrCond_nbe);
8603#elif defined(RT_ARCH_ARM64)
8604 return iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kArmv8InstrCond_Hi);
8605#else
8606# error "Port me!"
8607#endif
8608}
8609
8610
8611/**
8612 * Emits a JL/JNGE rel32 / B.LT imm19 to the TB exit with the given reason.
8613 */
8614template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8615DECL_INLINE_THROW(uint32_t) iemNativeEmitTbExitJl(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8616{
8617#ifdef RT_ARCH_AMD64
8618 return iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kIemNativeInstrCond_l);
8619#elif defined(RT_ARCH_ARM64)
8620 return iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kArmv8InstrCond_Lt);
8621#else
8622# error "Port me!"
8623#endif
8624}
8625
8626
8627/**
8628 * Emits a jump to the TB exit with @a a_enmExitReason on the condition _any_ of
8629 * the bits in @a fBits are set in @a iGprSrc.
8630 */
8631template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8632DECL_INLINE_THROW(uint32_t)
8633iemNativeEmitTbExitIfAnyBitsSetInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t fBits)
8634{
8635 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
8636
8637 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
8638 return iemNativeEmitTbExitJnz<a_enmExitReason, a_fPostponedEfl>(pReNative, off);
8639}
8640
8641
8642#if 0 /* unused */
8643/**
8644 * Emits a jump to @a idxLabel on the condition _none_ of the bits in @a fBits
8645 * are set in @a iGprSrc.
8646 */
8647template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8648DECL_INLINE_THROW(uint32_t)
8649iemNativeEmitTbExitIfNoBitsSetInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t fBits)
8650{
8651 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
8652
8653 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
8654 return iemNativeEmitJzTbExit<a_enmExitReason, a_fPostponedEfl>(pReNative, off);
8655}
8656#endif
8657
8658
8659#if 0 /* unused */
8660/**
8661 * Emits code that exits the TB with the given reason if @a iGprLeft and @a iGprRight
8662 * differs.
8663 */
8664template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8665DECL_INLINE_THROW(uint32_t)
8666iemNativeEmitTbExitIfGprNotEqualGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
8667{
8668 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iGprRight);
8669 off = iemNativeEmitJnzTbExit<a_enmExitReason, a_fPostponedEfl>(pReNative, off);
8670 return off;
8671}
8672#endif
8673
8674
8675/**
8676 * Emits code that jumps to the given label if 32-bit @a iGprSrc differs from
8677 * @a uImm.
8678 */
8679template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8680DECL_INLINE_THROW(uint32_t)
8681iemNativeEmitTbExitIfGpr32NotEqualImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint32_t uImm)
8682{
8683 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
8684 off = iemNativeEmitTbExitJnz<a_enmExitReason, a_fPostponedEfl>(pReNative, off);
8685 return off;
8686}
8687
8688
8689/**
8690 * Emits code that exits the current TB if @a iGprSrc differs from @a uImm.
8691 */
8692template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8693DECL_INLINE_THROW(uint32_t)
8694iemNativeEmitTbExitIfGprNotEqualImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t uImm)
8695{
8696 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
8697 off = iemNativeEmitTbExitJnz<a_enmExitReason, a_fPostponedEfl>(pReNative, off);
8698 return off;
8699}
8700
8701
8702/**
8703 * Emits code that exits the current TB with the given reason if 32-bit @a iGprSrc equals @a uImm.
8704 */
8705template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8706DECL_INLINE_THROW(uint32_t)
8707iemNativeEmitTbExitIfGpr32EqualsImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint32_t uImm)
8708{
8709 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
8710 off = iemNativeEmitTbExitJz<a_enmExitReason, a_fPostponedEfl>(pReNative, off);
8711 return off;
8712}
8713
8714
8715/**
8716 * Emits code to exit the current TB with the reason @a a_enmExitReason on the
8717 * condition that bit @a iBitNo _is_ _set_ in @a iGprSrc.
8718 *
8719 * @note On ARM64 the range is only +/-8191 instructions.
8720 */
8721template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8722DECL_INLINE_THROW(uint32_t)
8723iemNativeEmitTbExitIfBitSetInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo)
8724{
8725 AssertCompile(IEMNATIVELABELTYPE_IS_EXIT_REASON(a_enmExitReason));
8726
8727#if defined(RT_ARCH_AMD64)
8728 Assert(iBitNo < 64);
8729 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
8730 if (iBitNo < 8)
8731 {
8732 /* test Eb, imm8 */
8733 if (iGprSrc >= 4)
8734 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
8735 pbCodeBuf[off++] = 0xf6;
8736 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
8737 pbCodeBuf[off++] = (uint8_t)1 << iBitNo;
8738 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8739 off = iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kIemNativeInstrCond_ne);
8740 }
8741 else
8742 {
8743 /* bt Ev, imm8 */
8744 if (iBitNo >= 32)
8745 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8746 else if (iGprSrc >= 8)
8747 pbCodeBuf[off++] = X86_OP_REX_B;
8748 pbCodeBuf[off++] = 0x0f;
8749 pbCodeBuf[off++] = 0xba;
8750 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
8751 pbCodeBuf[off++] = iBitNo;
8752 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8753 off = iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kIemNativeInstrCond_c);
8754 }
8755 return off;
8756
8757#elif defined(RT_ARCH_ARM64)
8758 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8759 /** @todo Perhaps we should always apply the PostponedEfl code pattern here,
8760 * it's the same number of instructions as the TST + B.CC stuff? */
8761# ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8762 if RT_CONSTEXPR_IF(a_fPostponedEfl)
8763 if (pReNative->PostponedEfl.fEFlags)
8764 {
8765 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
8766 3 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
8767 pCodeBuf[off++] = Armv8A64MkInstrTbnz(1 /*l_NonPrimaryCodeStreamTarget*/, iGprSrc, iBitNo);
8768 uint32_t const offFixup = off;
8769 pCodeBuf[off++] = Armv8A64MkInstrB(0 /*l_PrimaryCodeStreamResume*/);
8770 /* l_NonPrimaryCodeStreamTarget: */
8771 off = iemNativeEmitTbExitEx<a_enmExitReason, false /*a_fActuallyExitingTb*/, true>(pReNative, pCodeBuf, off);
8772 /* l_PrimaryCodeStreamResume: */
8773 iemNativeFixupFixedJump(pReNative, offFixup, off);
8774 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8775 return off;
8776 }
8777# endif
8778 /* ARM64 doesn't have the necessary range to reach the per-chunk code, so
8779 we go via a local trampoline. */
8780 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, a_enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8781 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
8782#else
8783# error "port me"
8784#endif
8785}
8786
8787
8788/**
8789 * Emits code that exits the current TB with @a a_enmExitReason if @a iGprSrc is
8790 * not zero.
8791 *
8792 * The operand size is given by @a f64Bit.
8793 */
8794template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8795DECL_FORCE_INLINE_THROW(uint32_t)
8796iemNativeEmitTbExitIfGprIsNotZeroEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
8797 uint8_t iGprSrc, bool f64Bit)
8798{
8799 AssertCompile(IEMNATIVELABELTYPE_IS_EXIT_REASON(a_enmExitReason));
8800
8801#if defined(RT_ARCH_AMD64)
8802 /* test reg32,reg32 / test reg64,reg64 */
8803 if (f64Bit)
8804 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
8805 else if (iGprSrc >= 8)
8806 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
8807 pCodeBuf[off++] = 0x85;
8808 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
8809
8810 /* jnz idxLabel */
8811 return iemNativeEmitTbExitJccEx<a_enmExitReason, a_fPostponedEfl>(pReNative, pCodeBuf, off, kIemNativeInstrCond_ne);
8812
8813#elif defined(RT_ARCH_ARM64)
8814 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8815# ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8816 if RT_CONSTEXPR_IF(a_fPostponedEfl)
8817 if (pReNative->PostponedEfl.fEFlags)
8818 {
8819 pCodeBuf[off++] = Armv8A64MkInstrCbnz(1 /*l_NonPrimaryCodeStreamTarget*/, iGprSrc, f64Bit);
8820 uint32_t const offFixup = off;
8821 pCodeBuf[off++] = Armv8A64MkInstrB(0 /*l_PrimaryCodeStreamResume*/);
8822 /* l_NonPrimaryCodeStreamTarget: */
8823 off = iemNativeEmitTbExitEx<a_enmExitReason, false /*a_fActuallyExitingTb*/, true>(pReNative, pCodeBuf, off);
8824 /* l_PrimaryCodeStreamResume: */
8825 iemNativeFixupFixedJump(pReNative, offFixup, off);
8826 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8827 return off;
8828 }
8829# endif
8830 /* ARM64 doesn't have the necessary range to reach the per-chunk code, so
8831 we go via a local trampoline. */
8832 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, a_enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8833 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
8834 f64Bit, true /*fJmpIfNotZero*/, idxLabel);
8835#else
8836# error "port me"
8837#endif
8838}
8839
8840
8841/**
8842 * Emits code to exit the current TB with the given reason @a a_enmExitReason if
8843 * @a iGprSrc is not zero.
8844 *
8845 * The operand size is given by @a f64Bit.
8846 */
8847template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8848DECL_INLINE_THROW(uint32_t)
8849iemNativeEmitTbExitIfGprIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit)
8850{
8851#if defined(RT_ARCH_AMD64)
8852 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3 + 6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
8853
8854#else
8855 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
8856#endif
8857 off = iemNativeEmitTbExitIfGprIsNotZeroEx<a_enmExitReason, a_fPostponedEfl>(pReNative, pCodeBuf, off, iGprSrc, f64Bit);
8858 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8859 return off;
8860}
8861
8862
8863/**
8864 * Emits code that exits the current TB with @a a_enmExitReason if @a iGprSrc is
8865 * zero.
8866 *
8867 * The operand size is given by @a f64Bit.
8868 */
8869template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8870DECL_FORCE_INLINE_THROW(uint32_t)
8871iemNativeEmitTbExitIfGprIsZeroEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
8872 uint8_t iGprSrc, bool f64Bit)
8873{
8874 AssertCompile(IEMNATIVELABELTYPE_IS_EXIT_REASON(a_enmExitReason));
8875
8876#if defined(RT_ARCH_AMD64)
8877 /* test reg32,reg32 / test reg64,reg64 */
8878 if (f64Bit)
8879 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
8880 else if (iGprSrc >= 8)
8881 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
8882 pCodeBuf[off++] = 0x85;
8883 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
8884
8885 /* jnz idxLabel */
8886 return iemNativeEmitTbExitJccEx<a_enmExitReason, a_fPostponedEfl>(pReNative, pCodeBuf, off, kIemNativeInstrCond_e);
8887
8888#elif defined(RT_ARCH_ARM64)
8889 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8890# ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8891 if RT_CONSTEXPR_IF(a_fPostponedEfl)
8892 if (pReNative->PostponedEfl.fEFlags)
8893 {
8894 pCodeBuf[off++] = Armv8A64MkInstrCbz(1 /*l_NonPrimaryCodeStreamTarget*/, iGprSrc, f64Bit);
8895 uint32_t const offFixup = off;
8896 pCodeBuf[off++] = Armv8A64MkInstrB(0 /*l_PrimaryCodeStreamResume*/);
8897 /* l_NonPrimaryCodeStreamTarget: */
8898 off = iemNativeEmitTbExitEx<a_enmExitReason, false /*a_fActuallyExitingTb*/, true>(pReNative, pCodeBuf, off);
8899 /* l_PrimaryCodeStreamResume: */
8900 iemNativeFixupFixedJump(pReNative, offFixup, off);
8901 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8902 return off;
8903 }
8904# endif
8905 /* ARM64 doesn't have the necessary range to reach the per-chunk code, so
8906 we go via a local trampoline. */
8907 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, a_enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8908 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
8909 f64Bit, false /*fJmpIfNotZero*/, idxLabel);
8910#else
8911# error "port me"
8912#endif
8913}
8914
8915
8916/**
8917 * Emits code to exit the current TB with the given reason @a a_enmExitReason if @a iGprSrc is zero.
8918 *
8919 * The operand size is given by @a f64Bit.
8920 */
8921template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8922DECL_INLINE_THROW(uint32_t)
8923iemNativeEmitTbExitIfGprIsZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit)
8924{
8925#if defined(RT_ARCH_AMD64)
8926 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3 + 6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
8927
8928#else
8929 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
8930#endif
8931 off = iemNativeEmitTbExitIfGprIsZeroEx<a_enmExitReason, a_fPostponedEfl>(pReNative, pCodeBuf, off, iGprSrc, f64Bit);
8932 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8933 return off;
8934}
8935
8936
8937
8938/*********************************************************************************************************************************
8939* SIMD helpers. *
8940*********************************************************************************************************************************/
8941
8942/**
8943 * Emits code to load the variable address into an argument GPR.
8944 *
8945 * This is a special variant intended for SIMD variables only and only called
8946 * by the TLB miss path in the memory fetch/store code because there we pass
8947 * the value by reference and need both the register and stack depending on which
8948 * path is taken (TLB hit vs. miss).
8949 */
8950DECL_FORCE_INLINE_THROW(uint32_t)
8951iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
8952 bool fSyncRegWithStack = true)
8953{
8954 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8955 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8956 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
8957 || pVar->enmKind == kIemNativeVarKind_Stack,
8958 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8959 AssertStmt(pVar->fSimdReg,
8960 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8961 Assert( pVar->idxStackSlot != UINT8_MAX
8962 && pVar->idxReg != UINT8_MAX);
8963
8964 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8965 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
8966
8967 uint8_t const idxRegVar = pVar->idxReg;
8968 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8969 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
8970
8971 if (fSyncRegWithStack)
8972 {
8973 if (pVar->cbVar == sizeof(RTUINT128U))
8974 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDisp, idxRegVar);
8975 else
8976 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDisp, idxRegVar);
8977 }
8978
8979 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
8980}
8981
8982
8983/**
8984 * Emits code to sync the host SIMD register assigned to the given SIMD variable.
8985 *
8986 * This is a special helper and only called
8987 * by the TLB miss path in the memory fetch/store code because there we pass
8988 * the value by reference and need to sync the value on the stack with the assigned host register
8989 * after a TLB miss where the value ends up on the stack.
8990 */
8991DECL_FORCE_INLINE_THROW(uint32_t)
8992iemNativeEmitSimdVarSyncStackToRegister(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar)
8993{
8994 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8995 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8996 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
8997 || pVar->enmKind == kIemNativeVarKind_Stack,
8998 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8999 AssertStmt(pVar->fSimdReg,
9000 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9001 Assert( pVar->idxStackSlot != UINT8_MAX
9002 && pVar->idxReg != UINT8_MAX);
9003
9004 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
9005 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
9006
9007 uint8_t const idxRegVar = pVar->idxReg;
9008 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
9009 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
9010
9011 if (pVar->cbVar == sizeof(RTUINT128U))
9012 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxRegVar, offBpDisp);
9013 else
9014 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxRegVar, offBpDisp);
9015
9016 return off;
9017}
9018
9019
9020/**
9021 * Emits a gprdst = ~gprsrc store.
9022 */
9023DECL_FORCE_INLINE_THROW(uint32_t)
9024iemNativeEmitInvBitsGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
9025{
9026#ifdef RT_ARCH_AMD64
9027 if (iGprDst != iGprSrc)
9028 {
9029 /* mov gprdst, gprsrc. */
9030 if (f64Bit)
9031 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc);
9032 else
9033 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc); /* Bits 32:63 are cleared. */
9034 }
9035
9036 /* not gprdst */
9037 if (f64Bit || iGprDst >= 8)
9038 pCodeBuf[off++] = (f64Bit ? X86_OP_REX_W : 0)
9039 | (iGprDst >= 8 ? X86_OP_REX_B : 0);
9040 pCodeBuf[off++] = 0xf7;
9041 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
9042#elif defined(RT_ARCH_ARM64)
9043 pCodeBuf[off++] = Armv8A64MkInstrOrn(iGprDst, ARMV8_A64_REG_XZR, iGprSrc, f64Bit);
9044#else
9045# error "port me"
9046#endif
9047 return off;
9048}
9049
9050
9051/**
9052 * Emits a gprdst = ~gprsrc store.
9053 */
9054DECL_INLINE_THROW(uint32_t)
9055iemNativeEmitInvBitsGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
9056{
9057#ifdef RT_ARCH_AMD64
9058 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 9), off, iGprDst, iGprSrc, f64Bit);
9059#elif defined(RT_ARCH_ARM64)
9060 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, f64Bit);
9061#else
9062# error "port me"
9063#endif
9064 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9065 return off;
9066}
9067
9068
9069/**
9070 * Emits a 128-bit vector register store to a VCpu value.
9071 */
9072DECL_FORCE_INLINE_THROW(uint32_t)
9073iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9074{
9075#ifdef RT_ARCH_AMD64
9076 /* movdqa mem128, reg128 */ /* ASSUMING an aligned location here. */
9077 pCodeBuf[off++] = 0x66;
9078 if (iVecReg >= 8)
9079 pCodeBuf[off++] = X86_OP_REX_R;
9080 pCodeBuf[off++] = 0x0f;
9081 pCodeBuf[off++] = 0x7f;
9082 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
9083#elif defined(RT_ARCH_ARM64)
9084 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
9085
9086#else
9087# error "port me"
9088#endif
9089 return off;
9090}
9091
9092
9093/**
9094 * Emits a 128-bit vector register load of a VCpu value.
9095 */
9096DECL_INLINE_THROW(uint32_t)
9097iemNativeEmitSimdStoreVecRegToVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9098{
9099#ifdef RT_ARCH_AMD64
9100 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
9101#elif defined(RT_ARCH_ARM64)
9102 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
9103#else
9104# error "port me"
9105#endif
9106 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9107 return off;
9108}
9109
9110
9111/**
9112 * Emits a high 128-bit vector register store to a VCpu value.
9113 */
9114DECL_FORCE_INLINE_THROW(uint32_t)
9115iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9116{
9117#ifdef RT_ARCH_AMD64
9118 /* vextracti128 mem128, reg128, 1 */ /* ASSUMES AVX2 support. */
9119 pCodeBuf[off++] = X86_OP_VEX3;
9120 if (iVecReg >= 8)
9121 pCodeBuf[off++] = 0x63;
9122 else
9123 pCodeBuf[off++] = 0xe3;
9124 pCodeBuf[off++] = 0x7d;
9125 pCodeBuf[off++] = 0x39;
9126 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
9127 pCodeBuf[off++] = 0x01; /* Immediate */
9128#elif defined(RT_ARCH_ARM64)
9129 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
9130#else
9131# error "port me"
9132#endif
9133 return off;
9134}
9135
9136
9137/**
9138 * Emits a high 128-bit vector register load of a VCpu value.
9139 */
9140DECL_INLINE_THROW(uint32_t)
9141iemNativeEmitSimdStoreVecRegToVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9142{
9143#ifdef RT_ARCH_AMD64
9144 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
9145#elif defined(RT_ARCH_ARM64)
9146 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9147 Assert(!(iVecReg & 0x1));
9148 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
9149#else
9150# error "port me"
9151#endif
9152 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9153 return off;
9154}
9155
9156
9157/**
9158 * Emits a 128-bit vector register load of a VCpu value.
9159 */
9160DECL_FORCE_INLINE_THROW(uint32_t)
9161iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9162{
9163#ifdef RT_ARCH_AMD64
9164 /* movdqa reg128, mem128 */ /* ASSUMING an aligned location here. */
9165 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9166 if (iVecReg >= 8)
9167 pCodeBuf[off++] = X86_OP_REX_R;
9168 pCodeBuf[off++] = 0x0f;
9169 pCodeBuf[off++] = 0x6f;
9170 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
9171#elif defined(RT_ARCH_ARM64)
9172 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
9173
9174#else
9175# error "port me"
9176#endif
9177 return off;
9178}
9179
9180
9181/**
9182 * Emits a 128-bit vector register load of a VCpu value.
9183 */
9184DECL_INLINE_THROW(uint32_t)
9185iemNativeEmitSimdLoadVecRegFromVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9186{
9187#ifdef RT_ARCH_AMD64
9188 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
9189#elif defined(RT_ARCH_ARM64)
9190 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
9191#else
9192# error "port me"
9193#endif
9194 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9195 return off;
9196}
9197
9198
9199/**
9200 * Emits a 128-bit vector register load of a VCpu value.
9201 */
9202DECL_FORCE_INLINE_THROW(uint32_t)
9203iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9204{
9205#ifdef RT_ARCH_AMD64
9206 /* vinserti128 ymm, ymm, mem128, 1. */ /* ASSUMES AVX2 support */
9207 pCodeBuf[off++] = X86_OP_VEX3;
9208 if (iVecReg >= 8)
9209 pCodeBuf[off++] = 0x63;
9210 else
9211 pCodeBuf[off++] = 0xe3;
9212 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
9213 pCodeBuf[off++] = 0x38;
9214 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
9215 pCodeBuf[off++] = 0x01; /* Immediate */
9216#elif defined(RT_ARCH_ARM64)
9217 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
9218#else
9219# error "port me"
9220#endif
9221 return off;
9222}
9223
9224
9225/**
9226 * Emits a 128-bit vector register load of a VCpu value.
9227 */
9228DECL_INLINE_THROW(uint32_t)
9229iemNativeEmitSimdLoadVecRegFromVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9230{
9231#ifdef RT_ARCH_AMD64
9232 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
9233#elif defined(RT_ARCH_ARM64)
9234 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9235 Assert(!(iVecReg & 0x1));
9236 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
9237#else
9238# error "port me"
9239#endif
9240 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9241 return off;
9242}
9243
9244
9245/**
9246 * Emits a vecdst = vecsrc load.
9247 */
9248DECL_FORCE_INLINE(uint32_t)
9249iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9250{
9251#ifdef RT_ARCH_AMD64
9252 /* movdqu vecdst, vecsrc */
9253 pCodeBuf[off++] = 0xf3;
9254
9255 if ((iVecRegDst | iVecRegSrc) >= 8)
9256 pCodeBuf[off++] = iVecRegDst < 8 ? X86_OP_REX_B
9257 : iVecRegSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
9258 : X86_OP_REX_R;
9259 pCodeBuf[off++] = 0x0f;
9260 pCodeBuf[off++] = 0x6f;
9261 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
9262
9263#elif defined(RT_ARCH_ARM64)
9264 /* mov dst, src; alias for: orr dst, src, src */
9265 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
9266
9267#else
9268# error "port me"
9269#endif
9270 return off;
9271}
9272
9273
9274/**
9275 * Emits a vecdst = vecsrc load, 128-bit.
9276 */
9277DECL_INLINE_THROW(uint32_t)
9278iemNativeEmitSimdLoadVecRegFromVecRegU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9279{
9280#ifdef RT_ARCH_AMD64
9281 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
9282#elif defined(RT_ARCH_ARM64)
9283 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
9284#else
9285# error "port me"
9286#endif
9287 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9288 return off;
9289}
9290
9291
9292/**
9293 * Emits a vecdst[128:255] = vecsrc[128:255] load.
9294 */
9295DECL_FORCE_INLINE_THROW(uint32_t)
9296iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9297{
9298#ifdef RT_ARCH_AMD64
9299 /* vperm2i128 dst, dst, src, 0x30. */ /* ASSUMES AVX2 support */
9300 pCodeBuf[off++] = X86_OP_VEX3;
9301 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
9302 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
9303 pCodeBuf[off++] = 0x46;
9304 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
9305 pCodeBuf[off++] = 0x30; /* Immediate, this will leave the low 128 bits of dst untouched and move the high 128 bits from src to dst. */
9306
9307#elif defined(RT_ARCH_ARM64)
9308 RT_NOREF(pCodeBuf, iVecRegDst, iVecRegSrc);
9309
9310 /* Should never be called because we can just use iemNativeEmitSimdLoadVecRegFromVecRegU128(). */
9311# ifdef IEM_WITH_THROW_CATCH
9312 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
9313# else
9314 AssertReleaseFailedStmt(off = UINT32_MAX);
9315# endif
9316#else
9317# error "port me"
9318#endif
9319 return off;
9320}
9321
9322
9323/**
9324 * Emits a vecdst[128:255] = vecsrc[128:255] load, high 128-bit.
9325 */
9326DECL_INLINE_THROW(uint32_t)
9327iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9328{
9329#ifdef RT_ARCH_AMD64
9330 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
9331#elif defined(RT_ARCH_ARM64)
9332 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
9333 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iVecRegSrc + 1);
9334#else
9335# error "port me"
9336#endif
9337 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9338 return off;
9339}
9340
9341
9342/**
9343 * Emits a vecdst[0:127] = vecsrc[128:255] load.
9344 */
9345DECL_FORCE_INLINE_THROW(uint32_t)
9346iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9347{
9348#ifdef RT_ARCH_AMD64
9349 /* vextracti128 dst, src, 1. */ /* ASSUMES AVX2 support */
9350 pCodeBuf[off++] = X86_OP_VEX3;
9351 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegDst >= 8, false, iVecRegSrc >= 8);
9352 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
9353 pCodeBuf[off++] = 0x39;
9354 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7);
9355 pCodeBuf[off++] = 0x1;
9356
9357#elif defined(RT_ARCH_ARM64)
9358 RT_NOREF(pCodeBuf, iVecRegDst, iVecRegSrc);
9359
9360 /* Should never be called because we can just use iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(). */
9361# ifdef IEM_WITH_THROW_CATCH
9362 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
9363# else
9364 AssertReleaseFailedStmt(off = UINT32_MAX);
9365# endif
9366#else
9367# error "port me"
9368#endif
9369 return off;
9370}
9371
9372
9373/**
9374 * Emits a vecdst[0:127] = vecsrc[128:255] load, high 128-bit.
9375 */
9376DECL_INLINE_THROW(uint32_t)
9377iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9378{
9379#ifdef RT_ARCH_AMD64
9380 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iVecRegSrc);
9381#elif defined(RT_ARCH_ARM64)
9382 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
9383 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc + 1);
9384#else
9385# error "port me"
9386#endif
9387 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9388 return off;
9389}
9390
9391
9392/**
9393 * Emits a vecdst = vecsrc load, 256-bit.
9394 */
9395DECL_INLINE_THROW(uint32_t)
9396iemNativeEmitSimdLoadVecRegFromVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9397{
9398#ifdef RT_ARCH_AMD64
9399 /* vmovdqa ymm, ymm */
9400 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
9401 if (iVecRegDst >= 8 && iVecRegSrc >= 8)
9402 {
9403 pbCodeBuf[off++] = X86_OP_VEX3;
9404 pbCodeBuf[off++] = 0x41;
9405 pbCodeBuf[off++] = 0x7d;
9406 pbCodeBuf[off++] = 0x6f;
9407 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
9408 }
9409 else
9410 {
9411 pbCodeBuf[off++] = X86_OP_VEX2;
9412 pbCodeBuf[off++] = (iVecRegSrc >= 8 || iVecRegDst >= 8) ? 0x7d : 0xfd;
9413 pbCodeBuf[off++] = iVecRegSrc >= 8 ? 0x7f : 0x6f;
9414 pbCodeBuf[off++] = iVecRegSrc >= 8
9415 ? X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7)
9416 : X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
9417 }
9418#elif defined(RT_ARCH_ARM64)
9419 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9420 Assert(!(iVecRegDst & 0x1)); Assert(!(iVecRegSrc & 0x1));
9421 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst, iVecRegSrc );
9422 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst + 1, iVecRegSrc + 1);
9423#else
9424# error "port me"
9425#endif
9426 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9427 return off;
9428}
9429
9430
9431/**
9432 * Emits a vecdst = vecsrc load.
9433 */
9434DECL_FORCE_INLINE(uint32_t)
9435iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9436{
9437#ifdef RT_ARCH_AMD64
9438 /* vinserti128 dst, dst, src, 1. */ /* ASSUMES AVX2 support */
9439 pCodeBuf[off++] = X86_OP_VEX3;
9440 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
9441 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
9442 pCodeBuf[off++] = 0x38;
9443 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
9444 pCodeBuf[off++] = 0x01; /* Immediate */
9445
9446#elif defined(RT_ARCH_ARM64)
9447 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
9448 /* mov dst, src; alias for: orr dst, src, src */
9449 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst + 1, iVecRegSrc, iVecRegSrc);
9450
9451#else
9452# error "port me"
9453#endif
9454 return off;
9455}
9456
9457
9458/**
9459 * Emits a vecdst[128:255] = vecsrc[0:127] load, 128-bit.
9460 */
9461DECL_INLINE_THROW(uint32_t)
9462iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9463{
9464#ifdef RT_ARCH_AMD64
9465 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iVecRegSrc);
9466#elif defined(RT_ARCH_ARM64)
9467 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
9468#else
9469# error "port me"
9470#endif
9471 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9472 return off;
9473}
9474
9475
9476/**
9477 * Emits a gprdst = vecsrc[x] load, 64-bit.
9478 */
9479DECL_FORCE_INLINE(uint32_t)
9480iemNativeEmitSimdLoadGprFromVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
9481{
9482#ifdef RT_ARCH_AMD64
9483 if (iQWord >= 2)
9484 {
9485 /*
9486 * vpextrq doesn't work on the upper 128-bits.
9487 * So we use the following sequence:
9488 * vextracti128 vectmp0, vecsrc, 1
9489 * pextrq gpr, vectmp0, #(iQWord - 2)
9490 */
9491 /* vextracti128 */
9492 pCodeBuf[off++] = X86_OP_VEX3;
9493 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegSrc >= 8);
9494 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
9495 pCodeBuf[off++] = 0x39;
9496 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9497 pCodeBuf[off++] = 0x1;
9498
9499 /* pextrq */
9500 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9501 pCodeBuf[off++] = X86_OP_REX_W
9502 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
9503 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
9504 pCodeBuf[off++] = 0x0f;
9505 pCodeBuf[off++] = 0x3a;
9506 pCodeBuf[off++] = 0x16;
9507 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprDst & 7);
9508 pCodeBuf[off++] = iQWord - 2;
9509 }
9510 else
9511 {
9512 /* pextrq gpr, vecsrc, #iQWord (ASSUMES SSE4.1). */
9513 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9514 pCodeBuf[off++] = X86_OP_REX_W
9515 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
9516 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
9517 pCodeBuf[off++] = 0x0f;
9518 pCodeBuf[off++] = 0x3a;
9519 pCodeBuf[off++] = 0x16;
9520 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
9521 pCodeBuf[off++] = iQWord;
9522 }
9523#elif defined(RT_ARCH_ARM64)
9524 /* umov gprdst, vecsrc[iQWord] */
9525 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iQWord, kArmv8InstrUmovInsSz_U64);
9526#else
9527# error "port me"
9528#endif
9529 return off;
9530}
9531
9532
9533/**
9534 * Emits a gprdst = vecsrc[x] load, 64-bit.
9535 */
9536DECL_INLINE_THROW(uint32_t)
9537iemNativeEmitSimdLoadGprFromVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
9538{
9539 Assert(iQWord <= 3);
9540
9541#ifdef RT_ARCH_AMD64
9542 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iVecRegSrc, iQWord);
9543#elif defined(RT_ARCH_ARM64)
9544 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9545 Assert(!(iVecRegSrc & 0x1));
9546 /* Need to access the "high" 128-bit vector register. */
9547 if (iQWord >= 2)
9548 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iQWord - 2);
9549 else
9550 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iQWord);
9551#else
9552# error "port me"
9553#endif
9554 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9555 return off;
9556}
9557
9558
9559/**
9560 * Emits a gprdst = vecsrc[x] load, 32-bit.
9561 */
9562DECL_FORCE_INLINE(uint32_t)
9563iemNativeEmitSimdLoadGprFromVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
9564{
9565#ifdef RT_ARCH_AMD64
9566 if (iDWord >= 4)
9567 {
9568 /*
9569 * vpextrd doesn't work on the upper 128-bits.
9570 * So we use the following sequence:
9571 * vextracti128 vectmp0, vecsrc, 1
9572 * pextrd gpr, vectmp0, #(iDWord - 4)
9573 */
9574 /* vextracti128 */
9575 pCodeBuf[off++] = X86_OP_VEX3;
9576 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegSrc >= 8);
9577 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
9578 pCodeBuf[off++] = 0x39;
9579 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9580 pCodeBuf[off++] = 0x1;
9581
9582 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
9583 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9584 if (iGprDst >= 8 || IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8)
9585 pCodeBuf[off++] = (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
9586 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
9587 pCodeBuf[off++] = 0x0f;
9588 pCodeBuf[off++] = 0x3a;
9589 pCodeBuf[off++] = 0x16;
9590 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprDst & 7);
9591 pCodeBuf[off++] = iDWord - 4;
9592 }
9593 else
9594 {
9595 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
9596 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9597 if (iGprDst >= 8 || iVecRegSrc >= 8)
9598 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
9599 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
9600 pCodeBuf[off++] = 0x0f;
9601 pCodeBuf[off++] = 0x3a;
9602 pCodeBuf[off++] = 0x16;
9603 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
9604 pCodeBuf[off++] = iDWord;
9605 }
9606#elif defined(RT_ARCH_ARM64)
9607 Assert(iDWord < 4);
9608
9609 /* umov gprdst, vecsrc[iDWord] */
9610 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iDWord, kArmv8InstrUmovInsSz_U32, false /*fDst64Bit*/);
9611#else
9612# error "port me"
9613#endif
9614 return off;
9615}
9616
9617
9618/**
9619 * Emits a gprdst = vecsrc[x] load, 32-bit.
9620 */
9621DECL_INLINE_THROW(uint32_t)
9622iemNativeEmitSimdLoadGprFromVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
9623{
9624 Assert(iDWord <= 7);
9625
9626#ifdef RT_ARCH_AMD64
9627 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 15), off, iGprDst, iVecRegSrc, iDWord);
9628#elif defined(RT_ARCH_ARM64)
9629 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9630 Assert(!(iVecRegSrc & 0x1));
9631 /* Need to access the "high" 128-bit vector register. */
9632 if (iDWord >= 4)
9633 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iDWord - 4);
9634 else
9635 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iDWord);
9636#else
9637# error "port me"
9638#endif
9639 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9640 return off;
9641}
9642
9643
9644/**
9645 * Emits a gprdst = vecsrc[x] load, 16-bit.
9646 */
9647DECL_FORCE_INLINE(uint32_t)
9648iemNativeEmitSimdLoadGprFromVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
9649{
9650#ifdef RT_ARCH_AMD64
9651 if (iWord >= 8)
9652 {
9653 /** @todo Currently not used. */
9654 AssertReleaseFailed();
9655 }
9656 else
9657 {
9658 /* pextrw gpr, vecsrc, #iWord */
9659 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9660 if (iGprDst >= 8 || iVecRegSrc >= 8)
9661 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
9662 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_B);
9663 pCodeBuf[off++] = 0x0f;
9664 pCodeBuf[off++] = 0xc5;
9665 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iVecRegSrc & 7);
9666 pCodeBuf[off++] = iWord;
9667 }
9668#elif defined(RT_ARCH_ARM64)
9669 /* umov gprdst, vecsrc[iWord] */
9670 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iWord, kArmv8InstrUmovInsSz_U16, false /*fDst64Bit*/);
9671#else
9672# error "port me"
9673#endif
9674 return off;
9675}
9676
9677
9678/**
9679 * Emits a gprdst = vecsrc[x] load, 16-bit.
9680 */
9681DECL_INLINE_THROW(uint32_t)
9682iemNativeEmitSimdLoadGprFromVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
9683{
9684 Assert(iWord <= 16);
9685
9686#ifdef RT_ARCH_AMD64
9687 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGprDst, iVecRegSrc, iWord);
9688#elif defined(RT_ARCH_ARM64)
9689 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9690 Assert(!(iVecRegSrc & 0x1));
9691 /* Need to access the "high" 128-bit vector register. */
9692 if (iWord >= 8)
9693 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iWord - 8);
9694 else
9695 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iWord);
9696#else
9697# error "port me"
9698#endif
9699 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9700 return off;
9701}
9702
9703
9704/**
9705 * Emits a gprdst = vecsrc[x] load, 8-bit.
9706 */
9707DECL_FORCE_INLINE(uint32_t)
9708iemNativeEmitSimdLoadGprFromVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
9709{
9710#ifdef RT_ARCH_AMD64
9711 if (iByte >= 16)
9712 {
9713 /** @todo Currently not used. */
9714 AssertReleaseFailed();
9715 }
9716 else
9717 {
9718 /* pextrb gpr, vecsrc, #iByte */
9719 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9720 if (iGprDst >= 8 || iVecRegSrc >= 8)
9721 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
9722 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
9723 pCodeBuf[off++] = 0x0f;
9724 pCodeBuf[off++] = 0x3a;
9725 pCodeBuf[off++] = 0x14;
9726 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
9727 pCodeBuf[off++] = iByte;
9728 }
9729#elif defined(RT_ARCH_ARM64)
9730 /* umov gprdst, vecsrc[iByte] */
9731 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iByte, kArmv8InstrUmovInsSz_U8, false /*fDst64Bit*/);
9732#else
9733# error "port me"
9734#endif
9735 return off;
9736}
9737
9738
9739/**
9740 * Emits a gprdst = vecsrc[x] load, 8-bit.
9741 */
9742DECL_INLINE_THROW(uint32_t)
9743iemNativeEmitSimdLoadGprFromVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
9744{
9745 Assert(iByte <= 32);
9746
9747#ifdef RT_ARCH_AMD64
9748 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iByte);
9749#elif defined(RT_ARCH_ARM64)
9750 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9751 Assert(!(iVecRegSrc & 0x1));
9752 /* Need to access the "high" 128-bit vector register. */
9753 if (iByte >= 16)
9754 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iByte - 16);
9755 else
9756 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iByte);
9757#else
9758# error "port me"
9759#endif
9760 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9761 return off;
9762}
9763
9764
9765/**
9766 * Emits a vecdst[x] = gprsrc store, 64-bit.
9767 */
9768DECL_FORCE_INLINE(uint32_t)
9769iemNativeEmitSimdStoreGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
9770{
9771#ifdef RT_ARCH_AMD64
9772 if (iQWord >= 2)
9773 {
9774 /*
9775 * vpinsrq doesn't work on the upper 128-bits.
9776 * So we use the following sequence:
9777 * vextracti128 vectmp0, vecdst, 1
9778 * pinsrq vectmp0, gpr, #(iQWord - 2)
9779 * vinserti128 vecdst, vectmp0, 1
9780 */
9781 /* vextracti128 */
9782 pCodeBuf[off++] = X86_OP_VEX3;
9783 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
9784 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
9785 pCodeBuf[off++] = 0x39;
9786 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9787 pCodeBuf[off++] = 0x1;
9788
9789 /* pinsrq */
9790 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9791 pCodeBuf[off++] = X86_OP_REX_W
9792 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
9793 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9794 pCodeBuf[off++] = 0x0f;
9795 pCodeBuf[off++] = 0x3a;
9796 pCodeBuf[off++] = 0x22;
9797 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprSrc & 7);
9798 pCodeBuf[off++] = iQWord - 2;
9799
9800 /* vinserti128 */
9801 pCodeBuf[off++] = X86_OP_VEX3;
9802 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
9803 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
9804 pCodeBuf[off++] = 0x38;
9805 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9806 pCodeBuf[off++] = 0x01; /* Immediate */
9807 }
9808 else
9809 {
9810 /* pinsrq vecsrc, gpr, #iQWord (ASSUMES SSE4.1). */
9811 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9812 pCodeBuf[off++] = X86_OP_REX_W
9813 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9814 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9815 pCodeBuf[off++] = 0x0f;
9816 pCodeBuf[off++] = 0x3a;
9817 pCodeBuf[off++] = 0x22;
9818 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9819 pCodeBuf[off++] = iQWord;
9820 }
9821#elif defined(RT_ARCH_ARM64)
9822 /* ins vecsrc[iQWord], gpr */
9823 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iQWord, kArmv8InstrUmovInsSz_U64);
9824#else
9825# error "port me"
9826#endif
9827 return off;
9828}
9829
9830
9831/**
9832 * Emits a vecdst[x] = gprsrc store, 64-bit.
9833 */
9834DECL_INLINE_THROW(uint32_t)
9835iemNativeEmitSimdStoreGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
9836{
9837 Assert(iQWord <= 3);
9838
9839#ifdef RT_ARCH_AMD64
9840 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 19), off, iVecRegDst, iGprSrc, iQWord);
9841#elif defined(RT_ARCH_ARM64)
9842 Assert(!(iVecRegDst & 0x1));
9843 if (iQWord >= 2)
9844 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iGprSrc, iQWord - 2);
9845 else
9846 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iQWord);
9847#else
9848# error "port me"
9849#endif
9850 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9851 return off;
9852}
9853
9854
9855/**
9856 * Emits a vecdst[x] = gprsrc store, 32-bit.
9857 */
9858DECL_FORCE_INLINE(uint32_t)
9859iemNativeEmitSimdStoreGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
9860{
9861#ifdef RT_ARCH_AMD64
9862 if (iDWord >= 4)
9863 {
9864 /*
9865 * vpinsrq doesn't work on the upper 128-bits.
9866 * So we use the following sequence:
9867 * vextracti128 vectmp0, vecdst, 1
9868 * pinsrd vectmp0, gpr, #(iDword - 4)
9869 * vinserti128 vecdst, vectmp0, 1
9870 */
9871 /* vextracti128 */
9872 pCodeBuf[off++] = X86_OP_VEX3;
9873 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
9874 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
9875 pCodeBuf[off++] = 0x39;
9876 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9877 pCodeBuf[off++] = 0x1;
9878
9879 /* pinsrd */
9880 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9881 if (IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8 || iGprSrc >= 8)
9882 pCodeBuf[off++] = (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
9883 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9884 pCodeBuf[off++] = 0x0f;
9885 pCodeBuf[off++] = 0x3a;
9886 pCodeBuf[off++] = 0x22;
9887 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprSrc & 7);
9888 pCodeBuf[off++] = iDWord - 4;
9889
9890 /* vinserti128 */
9891 pCodeBuf[off++] = X86_OP_VEX3;
9892 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
9893 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
9894 pCodeBuf[off++] = 0x38;
9895 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9896 pCodeBuf[off++] = 0x01; /* Immediate */
9897 }
9898 else
9899 {
9900 /* pinsrd vecsrc, gpr, #iDWord (ASSUMES SSE4.1). */
9901 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9902 if (iVecRegDst >= 8 || iGprSrc >= 8)
9903 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9904 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9905 pCodeBuf[off++] = 0x0f;
9906 pCodeBuf[off++] = 0x3a;
9907 pCodeBuf[off++] = 0x22;
9908 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9909 pCodeBuf[off++] = iDWord;
9910 }
9911#elif defined(RT_ARCH_ARM64)
9912 /* ins vecsrc[iDWord], gpr */
9913 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iDWord, kArmv8InstrUmovInsSz_U32);
9914#else
9915# error "port me"
9916#endif
9917 return off;
9918}
9919
9920
9921/**
9922 * Emits a vecdst[x] = gprsrc store, 64-bit.
9923 */
9924DECL_INLINE_THROW(uint32_t)
9925iemNativeEmitSimdStoreGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
9926{
9927 Assert(iDWord <= 7);
9928
9929#ifdef RT_ARCH_AMD64
9930 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 19), off, iVecRegDst, iGprSrc, iDWord);
9931#elif defined(RT_ARCH_ARM64)
9932 Assert(!(iVecRegDst & 0x1));
9933 if (iDWord >= 4)
9934 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iGprSrc, iDWord - 4);
9935 else
9936 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iDWord);
9937#else
9938# error "port me"
9939#endif
9940 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9941 return off;
9942}
9943
9944
9945/**
9946 * Emits a vecdst[x] = gprsrc store, 16-bit.
9947 */
9948DECL_FORCE_INLINE(uint32_t)
9949iemNativeEmitSimdStoreGprToVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iWord)
9950{
9951#ifdef RT_ARCH_AMD64
9952 /* pinsrw vecsrc, gpr, #iWord. */
9953 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9954 if (iVecRegDst >= 8 || iGprSrc >= 8)
9955 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9956 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9957 pCodeBuf[off++] = 0x0f;
9958 pCodeBuf[off++] = 0xc4;
9959 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9960 pCodeBuf[off++] = iWord;
9961#elif defined(RT_ARCH_ARM64)
9962 /* ins vecsrc[iWord], gpr */
9963 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iWord, kArmv8InstrUmovInsSz_U16);
9964#else
9965# error "port me"
9966#endif
9967 return off;
9968}
9969
9970
9971/**
9972 * Emits a vecdst[x] = gprsrc store, 16-bit.
9973 */
9974DECL_INLINE_THROW(uint32_t)
9975iemNativeEmitSimdStoreGprToVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iWord)
9976{
9977 Assert(iWord <= 15);
9978
9979#ifdef RT_ARCH_AMD64
9980 off = iemNativeEmitSimdStoreGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iGprSrc, iWord);
9981#elif defined(RT_ARCH_ARM64)
9982 off = iemNativeEmitSimdStoreGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iWord);
9983#else
9984# error "port me"
9985#endif
9986 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9987 return off;
9988}
9989
9990
9991/**
9992 * Emits a vecdst[x] = gprsrc store, 8-bit.
9993 */
9994DECL_FORCE_INLINE(uint32_t)
9995iemNativeEmitSimdStoreGprToVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iByte)
9996{
9997#ifdef RT_ARCH_AMD64
9998 /* pinsrb vecsrc, gpr, #iByte (ASSUMES SSE4.1). */
9999 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10000 if (iVecRegDst >= 8 || iGprSrc >= 8)
10001 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
10002 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
10003 pCodeBuf[off++] = 0x0f;
10004 pCodeBuf[off++] = 0x3a;
10005 pCodeBuf[off++] = 0x20;
10006 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
10007 pCodeBuf[off++] = iByte;
10008#elif defined(RT_ARCH_ARM64)
10009 /* ins vecsrc[iByte], gpr */
10010 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iByte, kArmv8InstrUmovInsSz_U8);
10011#else
10012# error "port me"
10013#endif
10014 return off;
10015}
10016
10017
10018/**
10019 * Emits a vecdst[x] = gprsrc store, 8-bit.
10020 */
10021DECL_INLINE_THROW(uint32_t)
10022iemNativeEmitSimdStoreGprToVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iByte)
10023{
10024 Assert(iByte <= 15);
10025
10026#ifdef RT_ARCH_AMD64
10027 off = iemNativeEmitSimdStoreGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iByte);
10028#elif defined(RT_ARCH_ARM64)
10029 off = iemNativeEmitSimdStoreGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iByte);
10030#else
10031# error "port me"
10032#endif
10033 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10034 return off;
10035}
10036
10037
10038/**
10039 * Emits a vecdst.au32[iDWord] = 0 store.
10040 */
10041DECL_FORCE_INLINE(uint32_t)
10042iemNativeEmitSimdZeroVecRegElemU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
10043{
10044 Assert(iDWord <= 7);
10045
10046#ifdef RT_ARCH_AMD64
10047 /*
10048 * xor tmp0, tmp0
10049 * pinsrd xmm, tmp0, iDword
10050 */
10051 if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
10052 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
10053 pCodeBuf[off++] = 0x33;
10054 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_REG_FIXED_TMP0 & 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
10055 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(pCodeBuf, off, iVecReg, IEMNATIVE_REG_FIXED_TMP0, iDWord);
10056#elif defined(RT_ARCH_ARM64)
10057 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10058 Assert(!(iVecReg & 0x1));
10059 /* ins vecsrc[iDWord], wzr */
10060 if (iDWord >= 4)
10061 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg + 1, ARMV8_A64_REG_WZR, iDWord - 4, kArmv8InstrUmovInsSz_U32);
10062 else
10063 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg, ARMV8_A64_REG_WZR, iDWord, kArmv8InstrUmovInsSz_U32);
10064#else
10065# error "port me"
10066#endif
10067 return off;
10068}
10069
10070
10071/**
10072 * Emits a vecdst.au32[iDWord] = 0 store.
10073 */
10074DECL_INLINE_THROW(uint32_t)
10075iemNativeEmitSimdZeroVecRegElemU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
10076{
10077
10078#ifdef RT_ARCH_AMD64
10079 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, iDWord);
10080#elif defined(RT_ARCH_ARM64)
10081 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, iDWord);
10082#else
10083# error "port me"
10084#endif
10085 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10086 return off;
10087}
10088
10089
10090/**
10091 * Emits a vecdst[0:127] = 0 store.
10092 */
10093DECL_FORCE_INLINE(uint32_t)
10094iemNativeEmitSimdZeroVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
10095{
10096#ifdef RT_ARCH_AMD64
10097 /* pxor xmm, xmm */
10098 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10099 if (iVecReg >= 8)
10100 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
10101 pCodeBuf[off++] = 0x0f;
10102 pCodeBuf[off++] = 0xef;
10103 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
10104#elif defined(RT_ARCH_ARM64)
10105 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10106 Assert(!(iVecReg & 0x1));
10107 /* eor vecreg, vecreg, vecreg */
10108 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
10109#else
10110# error "port me"
10111#endif
10112 return off;
10113}
10114
10115
10116/**
10117 * Emits a vecdst[0:127] = 0 store.
10118 */
10119DECL_INLINE_THROW(uint32_t)
10120iemNativeEmitSimdZeroVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
10121{
10122#ifdef RT_ARCH_AMD64
10123 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
10124#elif defined(RT_ARCH_ARM64)
10125 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
10126#else
10127# error "port me"
10128#endif
10129 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10130 return off;
10131}
10132
10133
10134/**
10135 * Emits a vecdst[128:255] = 0 store.
10136 */
10137DECL_FORCE_INLINE(uint32_t)
10138iemNativeEmitSimdZeroVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
10139{
10140#ifdef RT_ARCH_AMD64
10141 /* vmovdqa xmm, xmm. This will clear the upper half of ymm */
10142 if (iVecReg < 8)
10143 {
10144 pCodeBuf[off++] = X86_OP_VEX2;
10145 pCodeBuf[off++] = 0xf9;
10146 }
10147 else
10148 {
10149 pCodeBuf[off++] = X86_OP_VEX3;
10150 pCodeBuf[off++] = 0x41;
10151 pCodeBuf[off++] = 0x79;
10152 }
10153 pCodeBuf[off++] = 0x6f;
10154 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
10155#elif defined(RT_ARCH_ARM64)
10156 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10157 Assert(!(iVecReg & 0x1));
10158 /* eor vecreg, vecreg, vecreg */
10159 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
10160#else
10161# error "port me"
10162#endif
10163 return off;
10164}
10165
10166
10167/**
10168 * Emits a vecdst[128:255] = 0 store.
10169 */
10170DECL_INLINE_THROW(uint32_t)
10171iemNativeEmitSimdZeroVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
10172{
10173#ifdef RT_ARCH_AMD64
10174 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecReg);
10175#elif defined(RT_ARCH_ARM64)
10176 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
10177#else
10178# error "port me"
10179#endif
10180 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10181 return off;
10182}
10183
10184
10185/**
10186 * Emits a vecdst[0:255] = 0 store.
10187 */
10188DECL_FORCE_INLINE(uint32_t)
10189iemNativeEmitSimdZeroVecRegU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
10190{
10191#ifdef RT_ARCH_AMD64
10192 /* vpxor ymm, ymm, ymm */
10193 if (iVecReg < 8)
10194 {
10195 pCodeBuf[off++] = X86_OP_VEX2;
10196 pCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
10197 }
10198 else
10199 {
10200 pCodeBuf[off++] = X86_OP_VEX3;
10201 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X | 0x01;
10202 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
10203 }
10204 pCodeBuf[off++] = 0xef;
10205 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
10206#elif defined(RT_ARCH_ARM64)
10207 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10208 Assert(!(iVecReg & 0x1));
10209 /* eor vecreg, vecreg, vecreg */
10210 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
10211 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
10212#else
10213# error "port me"
10214#endif
10215 return off;
10216}
10217
10218
10219/**
10220 * Emits a vecdst[0:255] = 0 store.
10221 */
10222DECL_INLINE_THROW(uint32_t)
10223iemNativeEmitSimdZeroVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
10224{
10225#ifdef RT_ARCH_AMD64
10226 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
10227#elif defined(RT_ARCH_ARM64)
10228 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecReg);
10229#else
10230# error "port me"
10231#endif
10232 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10233 return off;
10234}
10235
10236
10237/**
10238 * Emits a vecdst = gprsrc broadcast, 8-bit.
10239 */
10240DECL_FORCE_INLINE(uint32_t)
10241iemNativeEmitSimdBroadcastGprToVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10242{
10243#ifdef RT_ARCH_AMD64
10244 /* pinsrb vecdst, gpr, #0 (ASSUMES SSE 4.1) */
10245 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10246 if (iVecRegDst >= 8 || iGprSrc >= 8)
10247 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
10248 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
10249 pCodeBuf[off++] = 0x0f;
10250 pCodeBuf[off++] = 0x3a;
10251 pCodeBuf[off++] = 0x20;
10252 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
10253 pCodeBuf[off++] = 0x00;
10254
10255 /* vpbroadcastb {y,x}mm, xmm (ASSUMES AVX2). */
10256 pCodeBuf[off++] = X86_OP_VEX3;
10257 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
10258 | 0x02 /* opcode map. */
10259 | ( iVecRegDst >= 8
10260 ? 0
10261 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
10262 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
10263 pCodeBuf[off++] = 0x78;
10264 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
10265#elif defined(RT_ARCH_ARM64)
10266 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10267 Assert(!(iVecRegDst & 0x1) || !f256Bit);
10268
10269 /* dup vecsrc, gpr */
10270 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U8);
10271 if (f256Bit)
10272 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U8);
10273#else
10274# error "port me"
10275#endif
10276 return off;
10277}
10278
10279
10280/**
10281 * Emits a vecdst[x] = gprsrc broadcast, 8-bit.
10282 */
10283DECL_INLINE_THROW(uint32_t)
10284iemNativeEmitSimdBroadcastGprToVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10285{
10286#ifdef RT_ARCH_AMD64
10287 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
10288#elif defined(RT_ARCH_ARM64)
10289 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
10290#else
10291# error "port me"
10292#endif
10293 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10294 return off;
10295}
10296
10297
10298/**
10299 * Emits a vecdst = gprsrc broadcast, 16-bit.
10300 */
10301DECL_FORCE_INLINE(uint32_t)
10302iemNativeEmitSimdBroadcastGprToVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10303{
10304#ifdef RT_ARCH_AMD64
10305 /* pinsrw vecdst, gpr, #0 */
10306 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10307 if (iVecRegDst >= 8 || iGprSrc >= 8)
10308 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
10309 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
10310 pCodeBuf[off++] = 0x0f;
10311 pCodeBuf[off++] = 0xc4;
10312 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
10313 pCodeBuf[off++] = 0x00;
10314
10315 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
10316 pCodeBuf[off++] = X86_OP_VEX3;
10317 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
10318 | 0x02 /* opcode map. */
10319 | ( iVecRegDst >= 8
10320 ? 0
10321 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
10322 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
10323 pCodeBuf[off++] = 0x79;
10324 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
10325#elif defined(RT_ARCH_ARM64)
10326 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10327 Assert(!(iVecRegDst & 0x1) || !f256Bit);
10328
10329 /* dup vecsrc, gpr */
10330 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U16);
10331 if (f256Bit)
10332 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U16);
10333#else
10334# error "port me"
10335#endif
10336 return off;
10337}
10338
10339
10340/**
10341 * Emits a vecdst[x] = gprsrc broadcast, 16-bit.
10342 */
10343DECL_INLINE_THROW(uint32_t)
10344iemNativeEmitSimdBroadcastGprToVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10345{
10346#ifdef RT_ARCH_AMD64
10347 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
10348#elif defined(RT_ARCH_ARM64)
10349 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
10350#else
10351# error "port me"
10352#endif
10353 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10354 return off;
10355}
10356
10357
10358/**
10359 * Emits a vecdst = gprsrc broadcast, 32-bit.
10360 */
10361DECL_FORCE_INLINE(uint32_t)
10362iemNativeEmitSimdBroadcastGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10363{
10364#ifdef RT_ARCH_AMD64
10365 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
10366 * vbroadcast needs a memory operand or another xmm register to work... */
10367
10368 /* pinsrd vecdst, gpr, #0 (ASSUMES SSE4.1). */
10369 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10370 if (iVecRegDst >= 8 || iGprSrc >= 8)
10371 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
10372 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
10373 pCodeBuf[off++] = 0x0f;
10374 pCodeBuf[off++] = 0x3a;
10375 pCodeBuf[off++] = 0x22;
10376 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
10377 pCodeBuf[off++] = 0x00;
10378
10379 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
10380 pCodeBuf[off++] = X86_OP_VEX3;
10381 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
10382 | 0x02 /* opcode map. */
10383 | ( iVecRegDst >= 8
10384 ? 0
10385 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
10386 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
10387 pCodeBuf[off++] = 0x58;
10388 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
10389#elif defined(RT_ARCH_ARM64)
10390 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10391 Assert(!(iVecRegDst & 0x1) || !f256Bit);
10392
10393 /* dup vecsrc, gpr */
10394 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U32);
10395 if (f256Bit)
10396 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U32);
10397#else
10398# error "port me"
10399#endif
10400 return off;
10401}
10402
10403
10404/**
10405 * Emits a vecdst[x] = gprsrc broadcast, 32-bit.
10406 */
10407DECL_INLINE_THROW(uint32_t)
10408iemNativeEmitSimdBroadcastGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10409{
10410#ifdef RT_ARCH_AMD64
10411 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
10412#elif defined(RT_ARCH_ARM64)
10413 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
10414#else
10415# error "port me"
10416#endif
10417 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10418 return off;
10419}
10420
10421
10422/**
10423 * Emits a vecdst = gprsrc broadcast, 64-bit.
10424 */
10425DECL_FORCE_INLINE(uint32_t)
10426iemNativeEmitSimdBroadcastGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10427{
10428#ifdef RT_ARCH_AMD64
10429 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
10430 * vbroadcast needs a memory operand or another xmm register to work... */
10431
10432 /* pinsrq vecdst, gpr, #0 (ASSUMES SSE4.1). */
10433 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10434 pCodeBuf[off++] = X86_OP_REX_W
10435 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
10436 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
10437 pCodeBuf[off++] = 0x0f;
10438 pCodeBuf[off++] = 0x3a;
10439 pCodeBuf[off++] = 0x22;
10440 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
10441 pCodeBuf[off++] = 0x00;
10442
10443 /* vpbroadcastq {y,x}mm, xmm (ASSUMES AVX2). */
10444 pCodeBuf[off++] = X86_OP_VEX3;
10445 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
10446 | 0x02 /* opcode map. */
10447 | ( iVecRegDst >= 8
10448 ? 0
10449 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
10450 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
10451 pCodeBuf[off++] = 0x59;
10452 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
10453#elif defined(RT_ARCH_ARM64)
10454 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10455 Assert(!(iVecRegDst & 0x1) || !f256Bit);
10456
10457 /* dup vecsrc, gpr */
10458 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U64);
10459 if (f256Bit)
10460 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U64);
10461#else
10462# error "port me"
10463#endif
10464 return off;
10465}
10466
10467
10468/**
10469 * Emits a vecdst[x] = gprsrc broadcast, 64-bit.
10470 */
10471DECL_INLINE_THROW(uint32_t)
10472iemNativeEmitSimdBroadcastGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10473{
10474#ifdef RT_ARCH_AMD64
10475 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 14), off, iVecRegDst, iGprSrc, f256Bit);
10476#elif defined(RT_ARCH_ARM64)
10477 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
10478#else
10479# error "port me"
10480#endif
10481 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10482 return off;
10483}
10484
10485
10486/**
10487 * Emits a vecdst[0:127] = vecdst[128:255] = vecsrc[0:127] broadcast, 128-bit.
10488 */
10489DECL_FORCE_INLINE(uint32_t)
10490iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
10491{
10492#ifdef RT_ARCH_AMD64
10493 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(pCodeBuf, off, iVecRegDst, iVecRegSrc);
10494
10495 /* vinserti128 ymm, ymm, xmm, 1. */ /* ASSUMES AVX2 support */
10496 pCodeBuf[off++] = X86_OP_VEX3;
10497 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
10498 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
10499 pCodeBuf[off++] = 0x38;
10500 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
10501 pCodeBuf[off++] = 0x01; /* Immediate */
10502#elif defined(RT_ARCH_ARM64)
10503 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10504 Assert(!(iVecRegDst & 0x1));
10505
10506 /* mov dst, src; alias for: orr dst, src, src */
10507 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
10508 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst + 1, iVecRegSrc, iVecRegSrc);
10509#else
10510# error "port me"
10511#endif
10512 return off;
10513}
10514
10515
10516/**
10517 * Emits a vecdst[0:127] = vecdst[128:255] = vecsrc[0:127] broadcast, 128-bit.
10518 */
10519DECL_INLINE_THROW(uint32_t)
10520iemNativeEmitSimdBroadcastVecRegU128ToVecReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
10521{
10522#ifdef RT_ARCH_AMD64
10523 off = iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(iemNativeInstrBufEnsure(pReNative, off, 11), off, iVecRegDst, iVecRegSrc);
10524#elif defined(RT_ARCH_ARM64)
10525 off = iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecRegDst, iVecRegSrc);
10526#else
10527# error "port me"
10528#endif
10529 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10530 return off;
10531}
10532
10533
10534/** @} */
10535
10536#endif /* !VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h */
10537
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette