VirtualBox

source: vbox/trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h@ 104402

最後變更 在這個檔案從104402是 104402,由 vboxsync 提交於 11 月 前

VMM/IEM: Improved loading 32-bit constants via iemNativeEmitLoadGprImmEx. bugref:10370

  • 屬性 svn:eol-style 設為 native
  • 屬性 svn:keywords 設為 Author Date Id Revision
檔案大小: 328.2 KB
 
1/* $Id: IEMN8veRecompilerEmit.h 104402 2024-04-23 09:49:16Z vboxsync $ */
2/** @file
3 * IEM - Interpreted Execution Manager - Native Recompiler Inlined Emitters.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.alldomusa.eu.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28#ifndef VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
29#define VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
30#ifndef RT_WITHOUT_PRAGMA_ONCE
31# pragma once
32#endif
33
34#include "IEMN8veRecompiler.h"
35
36
37/** @defgroup grp_iem_n8ve_re_inline Native Recompiler Inlined Emitters
38 * @ingroup grp_iem_n8ve_re
39 * @{
40 */
41
42/**
43 * Emit a simple marker instruction to more easily tell where something starts
44 * in the disassembly.
45 */
46DECL_INLINE_THROW(uint32_t)
47iemNativeEmitMarker(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
48{
49#ifdef RT_ARCH_AMD64
50 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
51 if (uInfo == 0)
52 {
53 /* nop */
54 pbCodeBuf[off++] = 0x90;
55 }
56 else
57 {
58 /* nop [disp32] */
59 pbCodeBuf[off++] = 0x0f;
60 pbCodeBuf[off++] = 0x1f;
61 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, 0, 5);
62 pbCodeBuf[off++] = RT_BYTE1(uInfo);
63 pbCodeBuf[off++] = RT_BYTE2(uInfo);
64 pbCodeBuf[off++] = RT_BYTE3(uInfo);
65 pbCodeBuf[off++] = RT_BYTE4(uInfo);
66 }
67#elif defined(RT_ARCH_ARM64)
68 /* nop */
69 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
70 if (uInfo == 0)
71 pu32CodeBuf[off++] = ARMV8_A64_INSTR_NOP;
72 else
73 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(ARMV8_A64_REG_XZR, (uint16_t)uInfo);
74
75 RT_NOREF(uInfo);
76#else
77# error "port me"
78#endif
79 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
80 return off;
81}
82
83
84/**
85 * Emit a breakpoint instruction.
86 */
87DECL_FORCE_INLINE(uint32_t) iemNativeEmitBrkEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uInfo)
88{
89#ifdef RT_ARCH_AMD64
90 pCodeBuf[off++] = 0xcc;
91 RT_NOREF(uInfo); /** @todo use multibyte nop for info? */
92
93#elif defined(RT_ARCH_ARM64)
94 pCodeBuf[off++] = Armv8A64MkInstrBrk(uInfo & UINT32_C(0xffff));
95
96#else
97# error "error"
98#endif
99 return off;
100}
101
102
103/**
104 * Emit a breakpoint instruction.
105 */
106DECL_INLINE_THROW(uint32_t) iemNativeEmitBrk(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
107{
108#ifdef RT_ARCH_AMD64
109 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
110#elif defined(RT_ARCH_ARM64)
111 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
112#else
113# error "error"
114#endif
115 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
116 return off;
117}
118
119
120/*********************************************************************************************************************************
121* Loads, Stores and Related Stuff. *
122*********************************************************************************************************************************/
123
124#ifdef RT_ARCH_AMD64
125/**
126 * Common bit of iemNativeEmitLoadGprByGpr and friends.
127 */
128DECL_FORCE_INLINE(uint32_t)
129iemNativeEmitGprByGprDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp)
130{
131 if (offDisp == 0 && (iGprBase & 7) != X86_GREG_xBP) /* Can use encoding w/o displacement field. */
132 {
133 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, iGprReg & 7, iGprBase & 7);
134 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
135 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
136 }
137 else if (offDisp == (int8_t)offDisp)
138 {
139 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, iGprBase & 7);
140 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
141 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
142 pbCodeBuf[off++] = (uint8_t)offDisp;
143 }
144 else
145 {
146 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, iGprBase & 7);
147 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
148 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
149 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
150 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
151 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
152 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
153 }
154 return off;
155}
156#endif /* RT_ARCH_AMD64 */
157
158/**
159 * Emits setting a GPR to zero.
160 */
161DECL_INLINE_THROW(uint32_t)
162iemNativeEmitGprZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
163{
164#ifdef RT_ARCH_AMD64
165 /* xor gpr32, gpr32 */
166 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
167 if (iGpr >= 8)
168 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
169 pbCodeBuf[off++] = 0x33;
170 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
171
172#elif defined(RT_ARCH_ARM64)
173 /* mov gpr, #0x0 */
174 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
175 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | iGpr;
176
177#else
178# error "port me"
179#endif
180 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
181 return off;
182}
183
184
185/**
186 * Variant of iemNativeEmitLoadGpr32Imm where the caller ensures sufficent
187 * buffer space.
188 *
189 * Max buffer consumption:
190 * - AMD64: 6 instruction bytes.
191 * - ARM64: 2 instruction words (8 bytes).
192 *
193 * @note The top 32 bits will be cleared.
194 */
195DECL_FORCE_INLINE(uint32_t)
196iemNativeEmitLoadGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t uImm32)
197{
198#ifdef RT_ARCH_AMD64
199 if (uImm32 == 0)
200 {
201 /* xor gpr, gpr */
202 if (iGpr >= 8)
203 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
204 pCodeBuf[off++] = 0x33;
205 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
206 }
207 else
208 {
209 /* mov gpr, imm32 */
210 if (iGpr >= 8)
211 pCodeBuf[off++] = X86_OP_REX_B;
212 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
213 pCodeBuf[off++] = RT_BYTE1(uImm32);
214 pCodeBuf[off++] = RT_BYTE2(uImm32);
215 pCodeBuf[off++] = RT_BYTE3(uImm32);
216 pCodeBuf[off++] = RT_BYTE4(uImm32);
217 }
218
219#elif defined(RT_ARCH_ARM64)
220 if ((uImm32 >> 16) == 0)
221 /* movz gpr, imm16 */
222 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32, 0, false /*f64Bit*/);
223 else if ((uImm32 & UINT32_C(0xffff)) == 0)
224 /* movz gpr, imm16, lsl #16 */
225 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
226 else if ((uImm32 & UINT32_C(0xffff)) == UINT32_C(0xffff))
227 /* movn gpr, imm16, lsl #16 */
228 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32 >> 16, 1, false /*f64Bit*/);
229 else if ((uImm32 >> 16) == UINT32_C(0xffff))
230 /* movn gpr, imm16 */
231 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32, 0, false /*f64Bit*/);
232 else
233 {
234 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 & UINT32_C(0xffff), 0, false /*f64Bit*/);
235 pCodeBuf[off++] = Armv8A64MkInstrMovK(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
236 }
237
238#else
239# error "port me"
240#endif
241 return off;
242}
243
244
245/**
246 * Variant of iemNativeEmitLoadGprImm64 where the caller ensures sufficent
247 * buffer space.
248 *
249 * Max buffer consumption:
250 * - AMD64: 10 instruction bytes.
251 * - ARM64: 4 instruction words (16 bytes).
252 */
253DECL_FORCE_INLINE(uint32_t)
254iemNativeEmitLoadGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint64_t uImm64)
255{
256#ifdef RT_ARCH_AMD64
257 if (uImm64 == 0)
258 {
259 /* xor gpr, gpr */
260 if (iGpr >= 8)
261 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
262 pCodeBuf[off++] = 0x33;
263 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
264 }
265 else if (uImm64 <= UINT32_MAX)
266 {
267 /* mov gpr, imm32 */
268 if (iGpr >= 8)
269 pCodeBuf[off++] = X86_OP_REX_B;
270 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
271 pCodeBuf[off++] = RT_BYTE1(uImm64);
272 pCodeBuf[off++] = RT_BYTE2(uImm64);
273 pCodeBuf[off++] = RT_BYTE3(uImm64);
274 pCodeBuf[off++] = RT_BYTE4(uImm64);
275 }
276 else if (uImm64 == (uint64_t)(int32_t)uImm64)
277 {
278 /* mov gpr, sx(imm32) */
279 if (iGpr < 8)
280 pCodeBuf[off++] = X86_OP_REX_W;
281 else
282 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
283 pCodeBuf[off++] = 0xc7;
284 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGpr & 7);
285 pCodeBuf[off++] = RT_BYTE1(uImm64);
286 pCodeBuf[off++] = RT_BYTE2(uImm64);
287 pCodeBuf[off++] = RT_BYTE3(uImm64);
288 pCodeBuf[off++] = RT_BYTE4(uImm64);
289 }
290 else
291 {
292 /* mov gpr, imm64 */
293 if (iGpr < 8)
294 pCodeBuf[off++] = X86_OP_REX_W;
295 else
296 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
297 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
298 pCodeBuf[off++] = RT_BYTE1(uImm64);
299 pCodeBuf[off++] = RT_BYTE2(uImm64);
300 pCodeBuf[off++] = RT_BYTE3(uImm64);
301 pCodeBuf[off++] = RT_BYTE4(uImm64);
302 pCodeBuf[off++] = RT_BYTE5(uImm64);
303 pCodeBuf[off++] = RT_BYTE6(uImm64);
304 pCodeBuf[off++] = RT_BYTE7(uImm64);
305 pCodeBuf[off++] = RT_BYTE8(uImm64);
306 }
307
308#elif defined(RT_ARCH_ARM64)
309 /*
310 * Quick simplification: Do 32-bit load if top half is zero.
311 */
312 if (uImm64 <= UINT32_MAX)
313 return iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGpr, (uint32_t)uImm64);
314
315 /*
316 * We need to start this sequence with a 'mov grp, imm16, lsl #x' and
317 * supply remaining bits using 'movk grp, imm16, lsl #x'.
318 *
319 * The mov instruction is encoded 0xd2800000 + shift + imm16 + grp,
320 * while the movk is 0xf2800000 + shift + imm16 + grp, meaning the diff
321 * is 0x20000000 (bit 29). So, we keep this bit in a variable and set it
322 * after the first non-zero immediate component so we switch to movk for
323 * the remainder.
324 */
325 unsigned cZeroHalfWords = !( uImm64 & UINT16_MAX)
326 + !((uImm64 >> 16) & UINT16_MAX)
327 + !((uImm64 >> 32) & UINT16_MAX)
328 + !((uImm64 >> 48) & UINT16_MAX);
329 unsigned cFfffHalfWords = cZeroHalfWords >= 2 ? 0 /* skip */
330 : ( (uImm64 & UINT16_MAX) == UINT16_MAX)
331 + (((uImm64 >> 16) & UINT16_MAX) == UINT16_MAX)
332 + (((uImm64 >> 32) & UINT16_MAX) == UINT16_MAX)
333 + (((uImm64 >> 48) & UINT16_MAX) == UINT16_MAX);
334 if (cFfffHalfWords <= cZeroHalfWords)
335 {
336 uint32_t fMovBase = UINT32_C(0xd2800000) | iGpr;
337
338 /* movz gpr, imm16 */
339 uint32_t uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
340 if (uImmPart || cZeroHalfWords == 4)
341 {
342 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
343 fMovBase |= RT_BIT_32(29);
344 }
345 /* mov[z/k] gpr, imm16, lsl #16 */
346 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
347 if (uImmPart)
348 {
349 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
350 fMovBase |= RT_BIT_32(29);
351 }
352 /* mov[z/k] gpr, imm16, lsl #32 */
353 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
354 if (uImmPart)
355 {
356 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
357 fMovBase |= RT_BIT_32(29);
358 }
359 /* mov[z/k] gpr, imm16, lsl #48 */
360 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
361 if (uImmPart)
362 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
363 }
364 else
365 {
366 uint32_t fMovBase = UINT32_C(0x92800000) | iGpr;
367
368 /* find the first half-word that isn't UINT16_MAX. */
369 uint32_t const iHwNotFfff = (uImm64 & UINT16_MAX) != UINT16_MAX ? 0
370 : ((uImm64 >> 16) & UINT16_MAX) != UINT16_MAX ? 1
371 : ((uImm64 >> 32) & UINT16_MAX) != UINT16_MAX ? 2 : 3;
372
373 /* movn gpr, imm16, lsl #iHwNotFfff*16 */
374 uint32_t uImmPart = (uint32_t)(~(uImm64 >> (iHwNotFfff * 16)) & UINT32_C(0xffff)) << 5;
375 pCodeBuf[off++] = fMovBase | (iHwNotFfff << 21) | uImmPart;
376 fMovBase |= RT_BIT_32(30) | RT_BIT_32(29); /* -> movk */
377 /* movk gpr, imm16 */
378 if (iHwNotFfff != 0)
379 {
380 uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
381 if (uImmPart != UINT32_C(0xffff))
382 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
383 }
384 /* movk gpr, imm16, lsl #16 */
385 if (iHwNotFfff != 1)
386 {
387 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
388 if (uImmPart != UINT32_C(0xffff))
389 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
390 }
391 /* movk gpr, imm16, lsl #32 */
392 if (iHwNotFfff != 2)
393 {
394 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
395 if (uImmPart != UINT32_C(0xffff))
396 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
397 }
398 /* movk gpr, imm16, lsl #48 */
399 if (iHwNotFfff != 3)
400 {
401 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
402 if (uImmPart != UINT32_C(0xffff))
403 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
404 }
405 }
406
407#else
408# error "port me"
409#endif
410 return off;
411}
412
413
414/**
415 * Emits loading a constant into a 64-bit GPR
416 */
417DECL_INLINE_THROW(uint32_t)
418iemNativeEmitLoadGprImm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint64_t uImm64)
419{
420#ifdef RT_ARCH_AMD64
421 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 10), off, iGpr, uImm64);
422#elif defined(RT_ARCH_ARM64)
423 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGpr, uImm64);
424#else
425# error "port me"
426#endif
427 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
428 return off;
429}
430
431
432/**
433 * Emits loading a constant into a 32-bit GPR.
434 * @note The top 32 bits will be cleared.
435 */
436DECL_INLINE_THROW(uint32_t)
437iemNativeEmitLoadGprImm32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t uImm32)
438{
439#ifdef RT_ARCH_AMD64
440 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGpr, uImm32);
441#elif defined(RT_ARCH_ARM64)
442 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGpr, uImm32);
443#else
444# error "port me"
445#endif
446 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
447 return off;
448}
449
450
451/**
452 * Emits loading a constant into a 8-bit GPR
453 * @note The AMD64 version does *NOT* clear any bits in the 8..63 range,
454 * only the ARM64 version does that.
455 */
456DECL_INLINE_THROW(uint32_t)
457iemNativeEmitLoadGpr8Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint8_t uImm8)
458{
459#ifdef RT_ARCH_AMD64
460 /* mov gpr, imm8 */
461 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
462 if (iGpr >= 8)
463 pbCodeBuf[off++] = X86_OP_REX_B;
464 else if (iGpr >= 4)
465 pbCodeBuf[off++] = X86_OP_REX;
466 pbCodeBuf[off++] = 0xb0 + (iGpr & 7);
467 pbCodeBuf[off++] = RT_BYTE1(uImm8);
468
469#elif defined(RT_ARCH_ARM64)
470 /* movz gpr, imm16, lsl #0 */
471 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
472 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | (UINT32_C(0) << 21) | ((uint32_t)uImm8 << 5) | iGpr;
473
474#else
475# error "port me"
476#endif
477 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
478 return off;
479}
480
481
482#ifdef RT_ARCH_AMD64
483/**
484 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
485 */
486DECL_FORCE_INLINE(uint32_t)
487iemNativeEmitGprByVCpuDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu)
488{
489 if (offVCpu < 128)
490 {
491 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
492 pbCodeBuf[off++] = (uint8_t)(int8_t)offVCpu;
493 }
494 else
495 {
496 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
497 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offVCpu);
498 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offVCpu);
499 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offVCpu);
500 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offVCpu);
501 }
502 return off;
503}
504
505#elif defined(RT_ARCH_ARM64)
506
507/**
508 * Common bit of iemNativeEmitLoadGprFromVCpuU64Ex and friends.
509 *
510 * @note Loads can use @a iGprReg for large offsets, stores requires a temporary
511 * registers (@a iGprTmp).
512 * @note DON'T try this with prefetch.
513 */
514DECL_FORCE_INLINE_THROW(uint32_t)
515iemNativeEmitGprByVCpuLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu,
516 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
517{
518 /*
519 * There are a couple of ldr variants that takes an immediate offset, so
520 * try use those if we can, otherwise we have to use the temporary register
521 * help with the addressing.
522 */
523 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
524 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
525 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
526 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
527 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
528 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
529 else if (!ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) || iGprTmp != UINT8_MAX)
530 {
531 /* The offset is too large, so we must load it into a register and use
532 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
533 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
534 if (iGprTmp == UINT8_MAX)
535 iGprTmp = iGprReg;
536 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, offVCpu);
537 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, iGprTmp);
538 }
539 else
540# ifdef IEM_WITH_THROW_CATCH
541 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
542# else
543 AssertReleaseFailedStmt(off = UINT32_MAX);
544# endif
545
546 return off;
547}
548
549/**
550 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
551 */
552DECL_FORCE_INLINE_THROW(uint32_t)
553iemNativeEmitGprByVCpuLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
554 uint32_t offVCpu, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
555{
556 /*
557 * There are a couple of ldr variants that takes an immediate offset, so
558 * try use those if we can, otherwise we have to use the temporary register
559 * help with the addressing.
560 */
561 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
562 {
563 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
564 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
565 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
566 }
567 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
568 {
569 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
570 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
571 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
572 }
573 else
574 {
575 /* The offset is too large, so we must load it into a register and use
576 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
577 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
578 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, offVCpu);
579 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
580 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU,
581 IEMNATIVE_REG_FIXED_TMP0);
582 }
583 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
584 return off;
585}
586
587#endif /* RT_ARCH_ARM64 */
588
589
590/**
591 * Emits a 64-bit GPR load of a VCpu value.
592 */
593DECL_FORCE_INLINE_THROW(uint32_t)
594iemNativeEmitLoadGprFromVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
595{
596#ifdef RT_ARCH_AMD64
597 /* mov reg64, mem64 */
598 if (iGpr < 8)
599 pCodeBuf[off++] = X86_OP_REX_W;
600 else
601 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
602 pCodeBuf[off++] = 0x8b;
603 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off,iGpr, offVCpu);
604
605#elif defined(RT_ARCH_ARM64)
606 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
607
608#else
609# error "port me"
610#endif
611 return off;
612}
613
614
615/**
616 * Emits a 64-bit GPR load of a VCpu value.
617 */
618DECL_INLINE_THROW(uint32_t)
619iemNativeEmitLoadGprFromVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
620{
621#ifdef RT_ARCH_AMD64
622 off = iemNativeEmitLoadGprFromVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
623 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
624
625#elif defined(RT_ARCH_ARM64)
626 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
627
628#else
629# error "port me"
630#endif
631 return off;
632}
633
634
635/**
636 * Emits a 32-bit GPR load of a VCpu value.
637 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
638 */
639DECL_INLINE_THROW(uint32_t)
640iemNativeEmitLoadGprFromVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
641{
642#ifdef RT_ARCH_AMD64
643 /* mov reg32, mem32 */
644 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
645 if (iGpr >= 8)
646 pbCodeBuf[off++] = X86_OP_REX_R;
647 pbCodeBuf[off++] = 0x8b;
648 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
649 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
650
651#elif defined(RT_ARCH_ARM64)
652 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
653
654#else
655# error "port me"
656#endif
657 return off;
658}
659
660
661/**
662 * Emits a 16-bit GPR load of a VCpu value.
663 * @note Bits 16 thru 63 in the GPR will be zero after the operation.
664 */
665DECL_INLINE_THROW(uint32_t)
666iemNativeEmitLoadGprFromVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
667{
668#ifdef RT_ARCH_AMD64
669 /* movzx reg32, mem16 */
670 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
671 if (iGpr >= 8)
672 pbCodeBuf[off++] = X86_OP_REX_R;
673 pbCodeBuf[off++] = 0x0f;
674 pbCodeBuf[off++] = 0xb7;
675 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
676 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
677
678#elif defined(RT_ARCH_ARM64)
679 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t));
680
681#else
682# error "port me"
683#endif
684 return off;
685}
686
687
688/**
689 * Emits a 8-bit GPR load of a VCpu value.
690 * @note Bits 8 thru 63 in the GPR will be zero after the operation.
691 */
692DECL_INLINE_THROW(uint32_t)
693iemNativeEmitLoadGprFromVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
694{
695#ifdef RT_ARCH_AMD64
696 /* movzx reg32, mem8 */
697 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
698 if (iGpr >= 8)
699 pbCodeBuf[off++] = X86_OP_REX_R;
700 pbCodeBuf[off++] = 0x0f;
701 pbCodeBuf[off++] = 0xb6;
702 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
703 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
704
705#elif defined(RT_ARCH_ARM64)
706 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t));
707
708#else
709# error "port me"
710#endif
711 return off;
712}
713
714
715/**
716 * Emits a store of a GPR value to a 64-bit VCpu field.
717 */
718DECL_FORCE_INLINE_THROW(uint32_t)
719iemNativeEmitStoreGprToVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu,
720 uint8_t iGprTmp = UINT8_MAX)
721{
722#ifdef RT_ARCH_AMD64
723 /* mov mem64, reg64 */
724 if (iGpr < 8)
725 pCodeBuf[off++] = X86_OP_REX_W;
726 else
727 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
728 pCodeBuf[off++] = 0x89;
729 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
730 RT_NOREF(iGprTmp);
731
732#elif defined(RT_ARCH_ARM64)
733 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
734
735#else
736# error "port me"
737#endif
738 return off;
739}
740
741
742/**
743 * Emits a store of a GPR value to a 64-bit VCpu field.
744 */
745DECL_INLINE_THROW(uint32_t)
746iemNativeEmitStoreGprToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
747{
748#ifdef RT_ARCH_AMD64
749 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
750#elif defined(RT_ARCH_ARM64)
751 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGpr, offVCpu,
752 IEMNATIVE_REG_FIXED_TMP0);
753#else
754# error "port me"
755#endif
756 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
757 return off;
758}
759
760
761/**
762 * Emits a store of a GPR value to a 32-bit VCpu field.
763 */
764DECL_INLINE_THROW(uint32_t)
765iemNativeEmitStoreGprToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
766{
767#ifdef RT_ARCH_AMD64
768 /* mov mem32, reg32 */
769 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
770 if (iGpr >= 8)
771 pbCodeBuf[off++] = X86_OP_REX_R;
772 pbCodeBuf[off++] = 0x89;
773 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
774 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
775
776#elif defined(RT_ARCH_ARM64)
777 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
778
779#else
780# error "port me"
781#endif
782 return off;
783}
784
785
786/**
787 * Emits a store of a GPR value to a 16-bit VCpu field.
788 */
789DECL_INLINE_THROW(uint32_t)
790iemNativeEmitStoreGprToVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
791{
792#ifdef RT_ARCH_AMD64
793 /* mov mem16, reg16 */
794 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
795 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
796 if (iGpr >= 8)
797 pbCodeBuf[off++] = X86_OP_REX_R;
798 pbCodeBuf[off++] = 0x89;
799 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
800 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
801
802#elif defined(RT_ARCH_ARM64)
803 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t));
804
805#else
806# error "port me"
807#endif
808 return off;
809}
810
811
812/**
813 * Emits a store of a GPR value to a 8-bit VCpu field.
814 */
815DECL_INLINE_THROW(uint32_t)
816iemNativeEmitStoreGprToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
817{
818#ifdef RT_ARCH_AMD64
819 /* mov mem8, reg8 */
820 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
821 if (iGpr >= 8)
822 pbCodeBuf[off++] = X86_OP_REX_R;
823 pbCodeBuf[off++] = 0x88;
824 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
825 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
826
827#elif defined(RT_ARCH_ARM64)
828 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
829
830#else
831# error "port me"
832#endif
833 return off;
834}
835
836
837/**
838 * Emits a store of an immediate value to a 64-bit VCpu field.
839 *
840 * @note Will allocate temporary registers on both ARM64 and AMD64.
841 */
842DECL_FORCE_INLINE_THROW(uint32_t)
843iemNativeEmitStoreImmToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uImm, uint32_t offVCpu)
844{
845#ifdef RT_ARCH_AMD64
846 /* mov mem32, imm32 */
847 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
848 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxRegImm, offVCpu);
849 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
850 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
851
852#elif defined(RT_ARCH_ARM64)
853 uint8_t const idxRegImm = uImm == 0 ? ARMV8_A64_REG_XZR : iemNativeRegAllocTmpImm(pReNative, &off, uImm);
854 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t));
855 if (idxRegImm != ARMV8_A64_REG_XZR)
856 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
857
858#else
859# error "port me"
860#endif
861 return off;
862}
863
864
865/**
866 * Emits a store of an immediate value to a 32-bit VCpu field.
867 *
868 * @note ARM64: Will allocate temporary registers.
869 */
870DECL_FORCE_INLINE_THROW(uint32_t)
871iemNativeEmitStoreImmToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uImm, uint32_t offVCpu)
872{
873#ifdef RT_ARCH_AMD64
874 /* mov mem32, imm32 */
875 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
876 pCodeBuf[off++] = 0xc7;
877 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
878 pCodeBuf[off++] = RT_BYTE1(uImm);
879 pCodeBuf[off++] = RT_BYTE2(uImm);
880 pCodeBuf[off++] = RT_BYTE3(uImm);
881 pCodeBuf[off++] = RT_BYTE4(uImm);
882 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
883
884#elif defined(RT_ARCH_ARM64)
885 uint8_t const idxRegImm = uImm == 0 ? ARMV8_A64_REG_XZR : iemNativeRegAllocTmpImm(pReNative, &off, uImm);
886 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
887 if (idxRegImm != ARMV8_A64_REG_XZR)
888 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
889
890#else
891# error "port me"
892#endif
893 return off;
894}
895
896
897
898/**
899 * Emits a store of an immediate value to a 16-bit VCpu field.
900 *
901 * @note ARM64: A idxTmp1 is always required! The idxTmp2 depends on whehter the
902 * offset can be encoded as an immediate or not. The @a offVCpu immediate
903 * range is 0..8190 bytes from VMCPU and the same from CPUMCPU.
904 */
905DECL_FORCE_INLINE_THROW(uint32_t)
906iemNativeEmitStoreImmToVCpuU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint32_t offVCpu,
907 uint8_t idxTmp1 = UINT8_MAX, uint8_t idxTmp2 = UINT8_MAX)
908{
909#ifdef RT_ARCH_AMD64
910 /* mov mem16, imm16 */
911 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
912 pCodeBuf[off++] = 0xc7;
913 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
914 pCodeBuf[off++] = RT_BYTE1(uImm);
915 pCodeBuf[off++] = RT_BYTE2(uImm);
916 RT_NOREF(idxTmp1, idxTmp2);
917
918#elif defined(RT_ARCH_ARM64)
919 if (idxTmp1 != UINT8_MAX)
920 {
921 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp1, uImm);
922 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, idxTmp1, offVCpu, kArmv8A64InstrLdStType_St_Half,
923 sizeof(uint16_t), idxTmp2);
924 }
925 else
926# ifdef IEM_WITH_THROW_CATCH
927 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
928# else
929 AssertReleaseFailedStmt(off = UINT32_MAX);
930# endif
931
932#else
933# error "port me"
934#endif
935 return off;
936}
937
938
939/**
940 * Emits a store of an immediate value to a 8-bit VCpu field.
941 */
942DECL_INLINE_THROW(uint32_t)
943iemNativeEmitStoreImmToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bImm, uint32_t offVCpu)
944{
945#ifdef RT_ARCH_AMD64
946 /* mov mem8, imm8 */
947 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
948 pbCodeBuf[off++] = 0xc6;
949 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, offVCpu);
950 pbCodeBuf[off++] = bImm;
951 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
952
953#elif defined(RT_ARCH_ARM64)
954 /* Cannot use IEMNATIVE_REG_FIXED_TMP0 for the immediate as that's used by iemNativeEmitGprByVCpuLdSt. */
955 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, bImm);
956 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
957 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
958
959#else
960# error "port me"
961#endif
962 return off;
963}
964
965
966/**
967 * Emits a load effective address to a GRP of a VCpu field.
968 */
969DECL_INLINE_THROW(uint32_t)
970iemNativeEmitLeaGprByVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t offVCpu)
971{
972#ifdef RT_ARCH_AMD64
973 /* lea gprdst, [rbx + offDisp] */
974 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
975 if (iGprDst < 8)
976 pbCodeBuf[off++] = X86_OP_REX_W;
977 else
978 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
979 pbCodeBuf[off++] = 0x8d;
980 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGprDst, offVCpu);
981
982#elif defined(RT_ARCH_ARM64)
983 if (offVCpu < (unsigned)_4K)
984 {
985 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
986 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu);
987 }
988 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)_4K)
989 {
990 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
991 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX,
992 offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx));
993 }
994 else
995 {
996 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
997 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offVCpu);
998 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
999 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, iGprDst);
1000 }
1001
1002#else
1003# error "port me"
1004#endif
1005 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1006 return off;
1007}
1008
1009
1010/** This is just as a typesafe alternative to RT_UOFFSETOF. */
1011DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromStamCounterPtr(PVMCPU pVCpu, PSTAMCOUNTER pStamCounter)
1012{
1013 uintptr_t const off = (uintptr_t)pStamCounter - (uintptr_t)pVCpu;
1014 Assert(off < sizeof(VMCPU));
1015 return off;
1016}
1017
1018
1019/** This is just as a typesafe alternative to RT_UOFFSETOF. */
1020DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromU64Ptr(PVMCPU pVCpu, uint64_t *pu64)
1021{
1022 uintptr_t const off = (uintptr_t)pu64 - (uintptr_t)pVCpu;
1023 Assert(off < sizeof(VMCPU));
1024 return off;
1025}
1026
1027
1028/**
1029 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1030 *
1031 * @note The two temp registers are not required for AMD64. ARM64 always
1032 * requires the first, and the 2nd is needed if the offset cannot be
1033 * encoded as an immediate.
1034 */
1035DECL_FORCE_INLINE(uint32_t)
1036iemNativeEmitIncStamCounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1037{
1038#ifdef RT_ARCH_AMD64
1039 /* inc qword [pVCpu + off] */
1040 pCodeBuf[off++] = X86_OP_REX_W;
1041 pCodeBuf[off++] = 0xff;
1042 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1043 RT_NOREF(idxTmp1, idxTmp2);
1044
1045#elif defined(RT_ARCH_ARM64)
1046 /* Determine how we're to access pVCpu first. */
1047 uint32_t const cbData = sizeof(STAMCOUNTER);
1048 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
1049 {
1050 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1051 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1052 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1053 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1054 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1055 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1056 }
1057 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
1058 {
1059 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1060 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1061 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1062 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1063 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1064 }
1065 else
1066 {
1067 /* The offset is too large, so we must load it into a register and use
1068 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
1069 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1070 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1071 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1072 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1073 }
1074
1075#else
1076# error "port me"
1077#endif
1078 return off;
1079}
1080
1081
1082/**
1083 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1084 *
1085 * @note The two temp registers are not required for AMD64. ARM64 always
1086 * requires the first, and the 2nd is needed if the offset cannot be
1087 * encoded as an immediate.
1088 */
1089DECL_FORCE_INLINE(uint32_t)
1090iemNativeEmitIncStamCounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1091{
1092#ifdef RT_ARCH_AMD64
1093 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxTmp1, idxTmp2, offVCpu);
1094#elif defined(RT_ARCH_ARM64)
1095 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1096#else
1097# error "port me"
1098#endif
1099 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1100 return off;
1101}
1102
1103
1104/**
1105 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1106 *
1107 * @note The two temp registers are not required for AMD64. ARM64 always
1108 * requires the first, and the 2nd is needed if the offset cannot be
1109 * encoded as an immediate.
1110 */
1111DECL_FORCE_INLINE(uint32_t)
1112iemNativeEmitIncU32CounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1113{
1114 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1115#ifdef RT_ARCH_AMD64
1116 /* inc dword [pVCpu + offVCpu] */
1117 pCodeBuf[off++] = 0xff;
1118 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1119 RT_NOREF(idxTmp1, idxTmp2);
1120
1121#elif defined(RT_ARCH_ARM64)
1122 /* Determine how we're to access pVCpu first. */
1123 uint32_t const cbData = sizeof(uint32_t);
1124 if (offVCpu < (unsigned)(_4K * cbData))
1125 {
1126 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1127 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmp1,
1128 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1129 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1130 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmp1,
1131 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1132 }
1133 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1134 {
1135 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1136 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1137 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1138 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1139 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1140 }
1141 else
1142 {
1143 /* The offset is too large, so we must load it into a register and use
1144 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1145 of the instruction if that'll reduce the constant to 16-bits. */
1146 if (offVCpu / cbData < (unsigned)UINT16_MAX)
1147 {
1148 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp2, offVCpu / cbData);
1149 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1150 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1151 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1152 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1153 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1154 }
1155 else
1156 {
1157 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1158 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1159 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1160 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1161 }
1162 }
1163
1164#else
1165# error "port me"
1166#endif
1167 return off;
1168}
1169
1170
1171/**
1172 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1173 *
1174 * @note The two temp registers are not required for AMD64. ARM64 always
1175 * requires the first, and the 2nd is needed if the offset cannot be
1176 * encoded as an immediate.
1177 */
1178DECL_FORCE_INLINE(uint32_t)
1179iemNativeEmitIncU32CounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1180{
1181#ifdef RT_ARCH_AMD64
1182 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, idxTmp1, idxTmp2, offVCpu);
1183#elif defined(RT_ARCH_ARM64)
1184 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1185#else
1186# error "port me"
1187#endif
1188 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1189 return off;
1190}
1191
1192
1193/**
1194 * Emits code for OR'ing a bitmask into a 32-bit VMCPU member.
1195 *
1196 * @note May allocate temporary registers (not AMD64).
1197 */
1198DECL_FORCE_INLINE(uint32_t)
1199iemNativeEmitOrImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1200{
1201 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1202#ifdef RT_ARCH_AMD64
1203 /* or dword [pVCpu + offVCpu], imm8/32 */
1204 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1205 if (fMask < 0x80)
1206 {
1207 pCodeBuf[off++] = 0x83;
1208 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1209 pCodeBuf[off++] = (uint8_t)fMask;
1210 }
1211 else
1212 {
1213 pCodeBuf[off++] = 0x81;
1214 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1215 pCodeBuf[off++] = RT_BYTE1(fMask);
1216 pCodeBuf[off++] = RT_BYTE2(fMask);
1217 pCodeBuf[off++] = RT_BYTE3(fMask);
1218 pCodeBuf[off++] = RT_BYTE4(fMask);
1219 }
1220
1221#elif defined(RT_ARCH_ARM64)
1222 /* If the constant is unwieldy we'll need a register to hold it as well. */
1223 uint32_t uImmSizeLen, uImmRotate;
1224 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1225 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1226
1227 /* We need a temp register for holding the member value we're modifying. */
1228 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1229
1230 /* Determine how we're to access pVCpu first. */
1231 uint32_t const cbData = sizeof(uint32_t);
1232 if (offVCpu < (unsigned)(_4K * cbData))
1233 {
1234 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1235 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1236 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1237 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1238 if (idxTmpMask == UINT8_MAX)
1239 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1240 else
1241 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1242 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1243 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1244 }
1245 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1246 {
1247 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1248 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1249 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1250 if (idxTmpMask == UINT8_MAX)
1251 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1252 else
1253 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1254 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1255 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1256 }
1257 else
1258 {
1259 /* The offset is too large, so we must load it into a register and use
1260 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1261 of the instruction if that'll reduce the constant to 16-bits. */
1262 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1263 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1264 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1265 if (fShifted)
1266 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1267 else
1268 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1269
1270 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1271 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1272
1273 if (idxTmpMask == UINT8_MAX)
1274 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1275 else
1276 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1277
1278 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1279 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1280 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1281 }
1282 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1283 if (idxTmpMask != UINT8_MAX)
1284 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1285
1286#else
1287# error "port me"
1288#endif
1289 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1290 return off;
1291}
1292
1293
1294/**
1295 * Emits code for AND'ing a bitmask into a 32-bit VMCPU member.
1296 *
1297 * @note May allocate temporary registers (not AMD64).
1298 */
1299DECL_FORCE_INLINE(uint32_t)
1300iemNativeEmitAndImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1301{
1302 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1303#ifdef RT_ARCH_AMD64
1304 /* and dword [pVCpu + offVCpu], imm8/32 */
1305 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1306 if (fMask < 0x80)
1307 {
1308 pCodeBuf[off++] = 0x83;
1309 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1310 pCodeBuf[off++] = (uint8_t)fMask;
1311 }
1312 else
1313 {
1314 pCodeBuf[off++] = 0x81;
1315 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1316 pCodeBuf[off++] = RT_BYTE1(fMask);
1317 pCodeBuf[off++] = RT_BYTE2(fMask);
1318 pCodeBuf[off++] = RT_BYTE3(fMask);
1319 pCodeBuf[off++] = RT_BYTE4(fMask);
1320 }
1321
1322#elif defined(RT_ARCH_ARM64)
1323 /* If the constant is unwieldy we'll need a register to hold it as well. */
1324 uint32_t uImmSizeLen, uImmRotate;
1325 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1326 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1327
1328 /* We need a temp register for holding the member value we're modifying. */
1329 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1330
1331 /* Determine how we're to access pVCpu first. */
1332 uint32_t const cbData = sizeof(uint32_t);
1333 if (offVCpu < (unsigned)(_4K * cbData))
1334 {
1335 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1336 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1337 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1338 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1339 if (idxTmpMask == UINT8_MAX)
1340 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1341 else
1342 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1343 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1344 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1345 }
1346 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1347 {
1348 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1349 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1350 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1351 if (idxTmpMask == UINT8_MAX)
1352 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1353 else
1354 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1355 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1356 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1357 }
1358 else
1359 {
1360 /* The offset is too large, so we must load it into a register and use
1361 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1362 of the instruction if that'll reduce the constant to 16-bits. */
1363 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1364 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1365 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1366 if (fShifted)
1367 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1368 else
1369 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1370
1371 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1372 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1373
1374 if (idxTmpMask == UINT8_MAX)
1375 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1376 else
1377 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1378
1379 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1380 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1381 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1382 }
1383 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1384 if (idxTmpMask != UINT8_MAX)
1385 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1386
1387#else
1388# error "port me"
1389#endif
1390 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1391 return off;
1392}
1393
1394
1395/**
1396 * Emits a gprdst = gprsrc load.
1397 */
1398DECL_FORCE_INLINE(uint32_t)
1399iemNativeEmitLoadGprFromGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1400{
1401#ifdef RT_ARCH_AMD64
1402 /* mov gprdst, gprsrc */
1403 if ((iGprDst | iGprSrc) >= 8)
1404 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W | X86_OP_REX_B
1405 : iGprSrc >= 8 ? X86_OP_REX_W | X86_OP_REX_R | X86_OP_REX_B
1406 : X86_OP_REX_W | X86_OP_REX_R;
1407 else
1408 pCodeBuf[off++] = X86_OP_REX_W;
1409 pCodeBuf[off++] = 0x8b;
1410 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1411
1412#elif defined(RT_ARCH_ARM64)
1413 /* mov dst, src; alias for: orr dst, xzr, src */
1414 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_XZR, iGprSrc);
1415
1416#else
1417# error "port me"
1418#endif
1419 return off;
1420}
1421
1422
1423/**
1424 * Emits a gprdst = gprsrc load.
1425 */
1426DECL_INLINE_THROW(uint32_t)
1427iemNativeEmitLoadGprFromGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1428{
1429#ifdef RT_ARCH_AMD64
1430 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1431#elif defined(RT_ARCH_ARM64)
1432 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1433#else
1434# error "port me"
1435#endif
1436 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1437 return off;
1438}
1439
1440
1441/**
1442 * Emits a gprdst = gprsrc[31:0] load.
1443 * @note Bits 63 thru 32 are cleared.
1444 */
1445DECL_FORCE_INLINE(uint32_t)
1446iemNativeEmitLoadGprFromGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1447{
1448#ifdef RT_ARCH_AMD64
1449 /* mov gprdst, gprsrc */
1450 if ((iGprDst | iGprSrc) >= 8)
1451 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1452 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1453 : X86_OP_REX_R;
1454 pCodeBuf[off++] = 0x8b;
1455 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1456
1457#elif defined(RT_ARCH_ARM64)
1458 /* mov dst32, src32; alias for: orr dst32, wzr, src32 */
1459 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_WZR, iGprSrc, false /*f64bit*/);
1460
1461#else
1462# error "port me"
1463#endif
1464 return off;
1465}
1466
1467
1468/**
1469 * Emits a gprdst = gprsrc[31:0] load.
1470 * @note Bits 63 thru 32 are cleared.
1471 */
1472DECL_INLINE_THROW(uint32_t)
1473iemNativeEmitLoadGprFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1474{
1475#ifdef RT_ARCH_AMD64
1476 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1477#elif defined(RT_ARCH_ARM64)
1478 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1479#else
1480# error "port me"
1481#endif
1482 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1483 return off;
1484}
1485
1486
1487/**
1488 * Emits a gprdst = gprsrc[15:0] load.
1489 * @note Bits 63 thru 15 are cleared.
1490 */
1491DECL_INLINE_THROW(uint32_t)
1492iemNativeEmitLoadGprFromGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1493{
1494#ifdef RT_ARCH_AMD64
1495 /* movzx Gv,Ew */
1496 if ((iGprDst | iGprSrc) >= 8)
1497 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1498 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1499 : X86_OP_REX_R;
1500 pCodeBuf[off++] = 0x0f;
1501 pCodeBuf[off++] = 0xb7;
1502 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1503
1504#elif defined(RT_ARCH_ARM64)
1505 /* and gprdst, gprsrc, #0xffff */
1506# if 1
1507 Assert(Armv8A64ConvertImmRImmS2Mask32(0x0f, 0) == UINT16_MAX);
1508 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x0f, 0, false /*f64Bit*/);
1509# else
1510 Assert(Armv8A64ConvertImmRImmS2Mask64(0x4f, 0) == UINT16_MAX);
1511 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x4f, 0);
1512# endif
1513
1514#else
1515# error "port me"
1516#endif
1517 return off;
1518}
1519
1520
1521/**
1522 * Emits a gprdst = gprsrc[15:0] load.
1523 * @note Bits 63 thru 15 are cleared.
1524 */
1525DECL_INLINE_THROW(uint32_t)
1526iemNativeEmitLoadGprFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1527{
1528#ifdef RT_ARCH_AMD64
1529 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1530#elif defined(RT_ARCH_ARM64)
1531 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1532#else
1533# error "port me"
1534#endif
1535 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1536 return off;
1537}
1538
1539
1540/**
1541 * Emits a gprdst = gprsrc[7:0] load.
1542 * @note Bits 63 thru 8 are cleared.
1543 */
1544DECL_FORCE_INLINE(uint32_t)
1545iemNativeEmitLoadGprFromGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1546{
1547#ifdef RT_ARCH_AMD64
1548 /* movzx Gv,Eb */
1549 if (iGprDst >= 8 || iGprSrc >= 8)
1550 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1551 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1552 : X86_OP_REX_R;
1553 else if (iGprSrc >= 4)
1554 pCodeBuf[off++] = X86_OP_REX;
1555 pCodeBuf[off++] = 0x0f;
1556 pCodeBuf[off++] = 0xb6;
1557 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1558
1559#elif defined(RT_ARCH_ARM64)
1560 /* and gprdst, gprsrc, #0xff */
1561 Assert(Armv8A64ConvertImmRImmS2Mask32(0x07, 0) == UINT8_MAX);
1562 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x07, 0, false /*f64Bit*/);
1563
1564#else
1565# error "port me"
1566#endif
1567 return off;
1568}
1569
1570
1571/**
1572 * Emits a gprdst = gprsrc[7:0] load.
1573 * @note Bits 63 thru 8 are cleared.
1574 */
1575DECL_INLINE_THROW(uint32_t)
1576iemNativeEmitLoadGprFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1577{
1578#ifdef RT_ARCH_AMD64
1579 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1580#elif defined(RT_ARCH_ARM64)
1581 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1582#else
1583# error "port me"
1584#endif
1585 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1586 return off;
1587}
1588
1589
1590/**
1591 * Emits a gprdst = gprsrc[15:8] load (ah, ch, dh, bh).
1592 * @note Bits 63 thru 8 are cleared.
1593 */
1594DECL_INLINE_THROW(uint32_t)
1595iemNativeEmitLoadGprFromGpr8Hi(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1596{
1597#ifdef RT_ARCH_AMD64
1598 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1599
1600 /* movzx Gv,Ew */
1601 if ((iGprDst | iGprSrc) >= 8)
1602 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1603 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1604 : X86_OP_REX_R;
1605 pbCodeBuf[off++] = 0x0f;
1606 pbCodeBuf[off++] = 0xb7;
1607 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1608
1609 /* shr Ev,8 */
1610 if (iGprDst >= 8)
1611 pbCodeBuf[off++] = X86_OP_REX_B;
1612 pbCodeBuf[off++] = 0xc1;
1613 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
1614 pbCodeBuf[off++] = 8;
1615
1616#elif defined(RT_ARCH_ARM64)
1617 /* ubfx gprdst, gprsrc, #8, #8 - gprdst = gprsrc[15:8] */
1618 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1619 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(iGprDst, iGprSrc, 8, 8, false /*f64Bit*/);
1620
1621#else
1622# error "port me"
1623#endif
1624 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1625 return off;
1626}
1627
1628
1629/**
1630 * Sign-extends 32-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1631 */
1632DECL_INLINE_THROW(uint32_t)
1633iemNativeEmitLoadGprSignExtendedFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1634{
1635#ifdef RT_ARCH_AMD64
1636 /* movsxd r64, r/m32 */
1637 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1638 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1639 pbCodeBuf[off++] = 0x63;
1640 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1641
1642#elif defined(RT_ARCH_ARM64)
1643 /* sxtw dst, src */
1644 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1645 pu32CodeBuf[off++] = Armv8A64MkInstrSxtw(iGprDst, iGprSrc);
1646
1647#else
1648# error "port me"
1649#endif
1650 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1651 return off;
1652}
1653
1654
1655/**
1656 * Sign-extends 16-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1657 */
1658DECL_INLINE_THROW(uint32_t)
1659iemNativeEmitLoadGprSignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1660{
1661#ifdef RT_ARCH_AMD64
1662 /* movsx r64, r/m16 */
1663 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1664 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1665 pbCodeBuf[off++] = 0x0f;
1666 pbCodeBuf[off++] = 0xbf;
1667 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1668
1669#elif defined(RT_ARCH_ARM64)
1670 /* sxth dst, src */
1671 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1672 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc);
1673
1674#else
1675# error "port me"
1676#endif
1677 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1678 return off;
1679}
1680
1681
1682/**
1683 * Sign-extends 16-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1684 */
1685DECL_INLINE_THROW(uint32_t)
1686iemNativeEmitLoadGpr32SignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1687{
1688#ifdef RT_ARCH_AMD64
1689 /* movsx r64, r/m16 */
1690 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1691 if (iGprDst >= 8 || iGprSrc >= 8)
1692 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1693 pbCodeBuf[off++] = 0x0f;
1694 pbCodeBuf[off++] = 0xbf;
1695 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1696
1697#elif defined(RT_ARCH_ARM64)
1698 /* sxth dst32, src */
1699 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1700 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc, false /*f64Bit*/);
1701
1702#else
1703# error "port me"
1704#endif
1705 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1706 return off;
1707}
1708
1709
1710/**
1711 * Sign-extends 8-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1712 */
1713DECL_INLINE_THROW(uint32_t)
1714iemNativeEmitLoadGprSignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1715{
1716#ifdef RT_ARCH_AMD64
1717 /* movsx r64, r/m8 */
1718 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1719 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1720 pbCodeBuf[off++] = 0x0f;
1721 pbCodeBuf[off++] = 0xbe;
1722 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1723
1724#elif defined(RT_ARCH_ARM64)
1725 /* sxtb dst, src */
1726 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1727 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc);
1728
1729#else
1730# error "port me"
1731#endif
1732 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1733 return off;
1734}
1735
1736
1737/**
1738 * Sign-extends 8-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1739 * @note Bits 63 thru 32 are cleared.
1740 */
1741DECL_INLINE_THROW(uint32_t)
1742iemNativeEmitLoadGpr32SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1743{
1744#ifdef RT_ARCH_AMD64
1745 /* movsx r32, r/m8 */
1746 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1747 if (iGprDst >= 8 || iGprSrc >= 8)
1748 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1749 else if (iGprSrc >= 4)
1750 pbCodeBuf[off++] = X86_OP_REX;
1751 pbCodeBuf[off++] = 0x0f;
1752 pbCodeBuf[off++] = 0xbe;
1753 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1754
1755#elif defined(RT_ARCH_ARM64)
1756 /* sxtb dst32, src32 */
1757 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1758 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1759
1760#else
1761# error "port me"
1762#endif
1763 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1764 return off;
1765}
1766
1767
1768/**
1769 * Sign-extends 8-bit value in @a iGprSrc into a 16-bit value in @a iGprDst.
1770 * @note Bits 63 thru 16 are cleared.
1771 */
1772DECL_INLINE_THROW(uint32_t)
1773iemNativeEmitLoadGpr16SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1774{
1775#ifdef RT_ARCH_AMD64
1776 /* movsx r16, r/m8 */
1777 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1778 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1779 if (iGprDst >= 8 || iGprSrc >= 8)
1780 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1781 else if (iGprSrc >= 4)
1782 pbCodeBuf[off++] = X86_OP_REX;
1783 pbCodeBuf[off++] = 0x0f;
1784 pbCodeBuf[off++] = 0xbe;
1785 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1786
1787 /* movzx r32, r/m16 */
1788 if (iGprDst >= 8)
1789 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
1790 pbCodeBuf[off++] = 0x0f;
1791 pbCodeBuf[off++] = 0xb7;
1792 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
1793
1794#elif defined(RT_ARCH_ARM64)
1795 /* sxtb dst32, src32; and dst32, dst32, #0xffff */
1796 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1797 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1798 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1799 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
1800
1801#else
1802# error "port me"
1803#endif
1804 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1805 return off;
1806}
1807
1808
1809/**
1810 * Emits a gprdst = gprsrc + addend load.
1811 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
1812 */
1813#ifdef RT_ARCH_AMD64
1814DECL_INLINE_THROW(uint32_t)
1815iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1816 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1817{
1818 Assert(iAddend != 0);
1819
1820 /* lea gprdst, [gprsrc + iAddend] */
1821 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1822 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1823 pbCodeBuf[off++] = 0x8d;
1824 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1825 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1826 return off;
1827}
1828
1829#elif defined(RT_ARCH_ARM64)
1830DECL_INLINE_THROW(uint32_t)
1831iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1832 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1833{
1834 if ((uint32_t)iAddend < 4096)
1835 {
1836 /* add dst, src, uimm12 */
1837 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1838 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend);
1839 }
1840 else if ((uint32_t)-iAddend < 4096)
1841 {
1842 /* sub dst, src, uimm12 */
1843 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1844 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend);
1845 }
1846 else
1847 {
1848 Assert(iGprSrc != iGprDst);
1849 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, iAddend);
1850 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1851 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst);
1852 }
1853 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1854 return off;
1855}
1856#else
1857# error "port me"
1858#endif
1859
1860/**
1861 * Emits a gprdst = gprsrc + addend load, accepting iAddend == 0.
1862 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
1863 */
1864#ifdef RT_ARCH_AMD64
1865DECL_INLINE_THROW(uint32_t)
1866iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1867 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1868#else
1869DECL_INLINE_THROW(uint32_t)
1870iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1871 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1872#endif
1873{
1874 if (iAddend != 0)
1875 return iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1876 return iemNativeEmitLoadGprFromGpr(pReNative, off, iGprDst, iGprSrc);
1877}
1878
1879
1880/**
1881 * Emits a gprdst = gprsrc32 + addend load.
1882 * @note Bits 63 thru 32 are cleared.
1883 */
1884DECL_INLINE_THROW(uint32_t)
1885iemNativeEmitLoadGprFromGpr32WithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1886 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1887{
1888 Assert(iAddend != 0);
1889
1890#ifdef RT_ARCH_AMD64
1891 /* a32 o32 lea gprdst, [gprsrc + iAddend] */
1892 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1893 pbCodeBuf[off++] = X86_OP_PRF_SIZE_ADDR;
1894 if ((iGprDst | iGprSrc) >= 8)
1895 pbCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1896 pbCodeBuf[off++] = 0x8d;
1897 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1898
1899#elif defined(RT_ARCH_ARM64)
1900 if ((uint32_t)iAddend < 4096)
1901 {
1902 /* add dst, src, uimm12 */
1903 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1904 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend, false /*f64Bit*/);
1905 }
1906 else if ((uint32_t)-iAddend < 4096)
1907 {
1908 /* sub dst, src, uimm12 */
1909 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1910 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend, false /*f64Bit*/);
1911 }
1912 else
1913 {
1914 Assert(iGprSrc != iGprDst);
1915 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, (int64_t)iAddend);
1916 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1917 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst, false /*f64Bit*/);
1918 }
1919
1920#else
1921# error "port me"
1922#endif
1923 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1924 return off;
1925}
1926
1927
1928/**
1929 * Emits a gprdst = gprsrc32 + addend load, accepting iAddend == 0.
1930 */
1931DECL_INLINE_THROW(uint32_t)
1932iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1933 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1934{
1935 if (iAddend != 0)
1936 return iemNativeEmitLoadGprFromGpr32WithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1937 return iemNativeEmitLoadGprFromGpr32(pReNative, off, iGprDst, iGprSrc);
1938}
1939
1940
1941/**
1942 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1943 * destination.
1944 */
1945DECL_FORCE_INLINE(uint32_t)
1946iemNativeEmitGprMergeInGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1947{
1948#ifdef RT_ARCH_AMD64
1949 /* mov reg16, r/m16 */
1950 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1951 if (idxDst >= 8 || idxSrc >= 8)
1952 pCodeBuf[off++] = (idxDst < 8 ? 0 : X86_OP_REX_R) | (idxSrc < 8 ? 0 : X86_OP_REX_B);
1953 pCodeBuf[off++] = 0x8b;
1954 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxDst & 7, idxSrc & 7);
1955
1956#elif defined(RT_ARCH_ARM64)
1957 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxSrc to idxDst bits 15:0. */
1958 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxDst, idxSrc, 0, 16);
1959
1960#else
1961# error "Port me!"
1962#endif
1963 return off;
1964}
1965
1966
1967/**
1968 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1969 * destination.
1970 */
1971DECL_INLINE_THROW(uint32_t)
1972iemNativeEmitGprMergeInGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1973{
1974#ifdef RT_ARCH_AMD64
1975 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, idxDst, idxSrc);
1976#elif defined(RT_ARCH_ARM64)
1977 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, idxDst, idxSrc);
1978#else
1979# error "Port me!"
1980#endif
1981 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1982 return off;
1983}
1984
1985
1986#ifdef RT_ARCH_AMD64
1987/**
1988 * Common bit of iemNativeEmitLoadGprByBp and friends.
1989 */
1990DECL_FORCE_INLINE(uint32_t) iemNativeEmitGprByBpDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offDisp,
1991 PIEMRECOMPILERSTATE pReNativeAssert)
1992{
1993 if (offDisp < 128 && offDisp >= -128)
1994 {
1995 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, X86_GREG_xBP);
1996 pbCodeBuf[off++] = (uint8_t)(int8_t)offDisp;
1997 }
1998 else
1999 {
2000 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, X86_GREG_xBP);
2001 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2002 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2003 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2004 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2005 }
2006 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNativeAssert, off); RT_NOREF(pReNativeAssert);
2007 return off;
2008}
2009#elif defined(RT_ARCH_ARM64)
2010/**
2011 * Common bit of iemNativeEmitLoadGprByBp and friends.
2012 */
2013DECL_FORCE_INLINE_THROW(uint32_t)
2014iemNativeEmitGprByBpLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2015 int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2016{
2017 if ((uint32_t)offDisp < 4096U * cbData && !((uint32_t)offDisp & (cbData - 1)))
2018 {
2019 /* str w/ unsigned imm12 (scaled) */
2020 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2021 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, ARMV8_A64_REG_BP, (uint32_t)offDisp / cbData);
2022 }
2023 else if (offDisp >= -256 && offDisp <= 256)
2024 {
2025 /* stur w/ signed imm9 (unscaled) */
2026 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2027 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(enmOperation, iGprReg, ARMV8_A64_REG_BP, offDisp);
2028 }
2029 else
2030 {
2031 /* Use temporary indexing register. */
2032 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2033 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2034 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, ARMV8_A64_REG_BP,
2035 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2036 }
2037 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2038 return off;
2039}
2040#endif
2041
2042
2043/**
2044 * Emits a 64-bit GRP load instruction with an BP relative source address.
2045 */
2046DECL_INLINE_THROW(uint32_t)
2047iemNativeEmitLoadGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2048{
2049#ifdef RT_ARCH_AMD64
2050 /* mov gprdst, qword [rbp + offDisp] */
2051 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2052 if (iGprDst < 8)
2053 pbCodeBuf[off++] = X86_OP_REX_W;
2054 else
2055 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2056 pbCodeBuf[off++] = 0x8b;
2057 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2058
2059#elif defined(RT_ARCH_ARM64)
2060 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2061
2062#else
2063# error "port me"
2064#endif
2065}
2066
2067
2068/**
2069 * Emits a 32-bit GRP load instruction with an BP relative source address.
2070 * @note Bits 63 thru 32 of the GPR will be cleared.
2071 */
2072DECL_INLINE_THROW(uint32_t)
2073iemNativeEmitLoadGprByBpU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2074{
2075#ifdef RT_ARCH_AMD64
2076 /* mov gprdst, dword [rbp + offDisp] */
2077 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2078 if (iGprDst >= 8)
2079 pbCodeBuf[off++] = X86_OP_REX_R;
2080 pbCodeBuf[off++] = 0x8b;
2081 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2082
2083#elif defined(RT_ARCH_ARM64)
2084 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2085
2086#else
2087# error "port me"
2088#endif
2089}
2090
2091
2092/**
2093 * Emits a 16-bit GRP load instruction with an BP relative source address.
2094 * @note Bits 63 thru 16 of the GPR will be cleared.
2095 */
2096DECL_INLINE_THROW(uint32_t)
2097iemNativeEmitLoadGprByBpU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2098{
2099#ifdef RT_ARCH_AMD64
2100 /* movzx gprdst, word [rbp + offDisp] */
2101 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2102 if (iGprDst >= 8)
2103 pbCodeBuf[off++] = X86_OP_REX_R;
2104 pbCodeBuf[off++] = 0x0f;
2105 pbCodeBuf[off++] = 0xb7;
2106 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2107
2108#elif defined(RT_ARCH_ARM64)
2109 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint32_t));
2110
2111#else
2112# error "port me"
2113#endif
2114}
2115
2116
2117/**
2118 * Emits a 8-bit GRP load instruction with an BP relative source address.
2119 * @note Bits 63 thru 8 of the GPR will be cleared.
2120 */
2121DECL_INLINE_THROW(uint32_t)
2122iemNativeEmitLoadGprByBpU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2123{
2124#ifdef RT_ARCH_AMD64
2125 /* movzx gprdst, byte [rbp + offDisp] */
2126 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2127 if (iGprDst >= 8)
2128 pbCodeBuf[off++] = X86_OP_REX_R;
2129 pbCodeBuf[off++] = 0x0f;
2130 pbCodeBuf[off++] = 0xb6;
2131 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2132
2133#elif defined(RT_ARCH_ARM64)
2134 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint32_t));
2135
2136#else
2137# error "port me"
2138#endif
2139}
2140
2141
2142#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2143/**
2144 * Emits a 128-bit vector register load instruction with an BP relative source address.
2145 */
2146DECL_FORCE_INLINE_THROW(uint32_t)
2147iemNativeEmitLoadVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2148{
2149#ifdef RT_ARCH_AMD64
2150 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
2151
2152 /* movdqu reg128, mem128 */
2153 pbCodeBuf[off++] = 0xf3;
2154 if (iVecRegDst >= 8)
2155 pbCodeBuf[off++] = X86_OP_REX_R;
2156 pbCodeBuf[off++] = 0x0f;
2157 pbCodeBuf[off++] = 0x6f;
2158 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2159#elif defined(RT_ARCH_ARM64)
2160 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2161#else
2162# error "port me"
2163#endif
2164}
2165
2166
2167/**
2168 * Emits a 256-bit vector register load instruction with an BP relative source address.
2169 */
2170DECL_FORCE_INLINE_THROW(uint32_t)
2171iemNativeEmitLoadVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2172{
2173#ifdef RT_ARCH_AMD64
2174 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2175
2176 /* vmovdqu reg256, mem256 */
2177 pbCodeBuf[off++] = X86_OP_VEX2;
2178 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegDst >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2179 pbCodeBuf[off++] = 0x6f;
2180 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2181#elif defined(RT_ARCH_ARM64)
2182 /* ASSUMES two consecutive vector registers for the 256-bit value. */
2183 Assert(!(iVecRegDst & 0x1));
2184 off = iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2185 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst + 1, offDisp + sizeof(RTUINT128U), kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2186#else
2187# error "port me"
2188#endif
2189}
2190
2191#endif
2192
2193
2194/**
2195 * Emits a load effective address to a GRP with an BP relative source address.
2196 */
2197DECL_INLINE_THROW(uint32_t)
2198iemNativeEmitLeaGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2199{
2200#ifdef RT_ARCH_AMD64
2201 /* lea gprdst, [rbp + offDisp] */
2202 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2203 if (iGprDst < 8)
2204 pbCodeBuf[off++] = X86_OP_REX_W;
2205 else
2206 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2207 pbCodeBuf[off++] = 0x8d;
2208 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2209
2210#elif defined(RT_ARCH_ARM64)
2211 if ((uint32_t)offDisp < (unsigned)_4K)
2212 {
2213 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2214 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, ARMV8_A64_REG_BP, (uint32_t)offDisp);
2215 }
2216 else if ((uint32_t)-offDisp < (unsigned)_4K)
2217 {
2218 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2219 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2220 }
2221 else
2222 {
2223 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
2224 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offDisp >= 0 ? (uint32_t)offDisp : (uint32_t)-offDisp);
2225 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2226 if (offDisp >= 0)
2227 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2228 else
2229 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2230 }
2231
2232#else
2233# error "port me"
2234#endif
2235
2236 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2237 return off;
2238}
2239
2240
2241/**
2242 * Emits a 64-bit GPR store with an BP relative destination address.
2243 *
2244 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2245 */
2246DECL_INLINE_THROW(uint32_t)
2247iemNativeEmitStoreGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iGprSrc)
2248{
2249#ifdef RT_ARCH_AMD64
2250 /* mov qword [rbp + offDisp], gprdst */
2251 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2252 if (iGprSrc < 8)
2253 pbCodeBuf[off++] = X86_OP_REX_W;
2254 else
2255 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2256 pbCodeBuf[off++] = 0x89;
2257 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprSrc, offDisp, pReNative);
2258
2259#elif defined(RT_ARCH_ARM64)
2260 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2261 {
2262 /* str w/ unsigned imm12 (scaled) */
2263 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2264 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc,
2265 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2266 }
2267 else if (offDisp >= -256 && offDisp <= 256)
2268 {
2269 /* stur w/ signed imm9 (unscaled) */
2270 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2271 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP, offDisp);
2272 }
2273 else if ((uint32_t)-offDisp < (unsigned)_4K)
2274 {
2275 /* Use temporary indexing register w/ sub uimm12. */
2276 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2277 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2278 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2279 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2280 }
2281 else
2282 {
2283 /* Use temporary indexing register. */
2284 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2285 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2286 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP,
2287 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2288 }
2289 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2290 return off;
2291
2292#else
2293# error "Port me!"
2294#endif
2295}
2296
2297
2298/**
2299 * Emits a 64-bit immediate store with an BP relative destination address.
2300 *
2301 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2302 */
2303DECL_INLINE_THROW(uint32_t)
2304iemNativeEmitStoreImm64ByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint64_t uImm64)
2305{
2306#ifdef RT_ARCH_AMD64
2307 if ((int64_t)uImm64 == (int32_t)uImm64)
2308 {
2309 /* mov qword [rbp + offDisp], imm32 - sign extended */
2310 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 11);
2311 pbCodeBuf[off++] = X86_OP_REX_W;
2312 pbCodeBuf[off++] = 0xc7;
2313 if (offDisp < 128 && offDisp >= -128)
2314 {
2315 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 0, X86_GREG_xBP);
2316 pbCodeBuf[off++] = (uint8_t)offDisp;
2317 }
2318 else
2319 {
2320 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 0, X86_GREG_xBP);
2321 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2322 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2323 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2324 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2325 }
2326 pbCodeBuf[off++] = RT_BYTE1(uImm64);
2327 pbCodeBuf[off++] = RT_BYTE2(uImm64);
2328 pbCodeBuf[off++] = RT_BYTE3(uImm64);
2329 pbCodeBuf[off++] = RT_BYTE4(uImm64);
2330 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2331 return off;
2332 }
2333#endif
2334
2335 /* Load tmp0, imm64; Store tmp to bp+disp. */
2336 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uImm64);
2337 return iemNativeEmitStoreGprByBp(pReNative, off, offDisp, IEMNATIVE_REG_FIXED_TMP0);
2338}
2339
2340
2341#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2342/**
2343 * Emits a 128-bit vector register store with an BP relative destination address.
2344 *
2345 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2346 */
2347DECL_INLINE_THROW(uint32_t)
2348iemNativeEmitStoreVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2349{
2350#ifdef RT_ARCH_AMD64
2351 /* movdqu [rbp + offDisp], vecsrc */
2352 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2353 pbCodeBuf[off++] = 0xf3;
2354 if (iVecRegSrc >= 8)
2355 pbCodeBuf[off++] = X86_OP_REX_R;
2356 pbCodeBuf[off++] = 0x0f;
2357 pbCodeBuf[off++] = 0x7f;
2358 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2359
2360#elif defined(RT_ARCH_ARM64)
2361 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2362 {
2363 /* str w/ unsigned imm12 (scaled) */
2364 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2365 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc,
2366 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2367 }
2368 else if (offDisp >= -256 && offDisp <= 256)
2369 {
2370 /* stur w/ signed imm9 (unscaled) */
2371 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2372 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP, offDisp);
2373 }
2374 else if ((uint32_t)-offDisp < (unsigned)_4K)
2375 {
2376 /* Use temporary indexing register w/ sub uimm12. */
2377 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2378 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2379 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2380 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2381 }
2382 else
2383 {
2384 /* Use temporary indexing register. */
2385 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2386 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2387 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP,
2388 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2389 }
2390 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2391 return off;
2392
2393#else
2394# error "Port me!"
2395#endif
2396}
2397
2398
2399/**
2400 * Emits a 256-bit vector register store with an BP relative destination address.
2401 *
2402 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2403 */
2404DECL_INLINE_THROW(uint32_t)
2405iemNativeEmitStoreVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2406{
2407#ifdef RT_ARCH_AMD64
2408 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2409
2410 /* vmovdqu mem256, reg256 */
2411 pbCodeBuf[off++] = X86_OP_VEX2;
2412 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegSrc >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2413 pbCodeBuf[off++] = 0x7f;
2414 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2415#elif defined(RT_ARCH_ARM64)
2416 Assert(!(iVecRegSrc & 0x1));
2417 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp, iVecRegSrc);
2418 return iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp + sizeof(RTUINT128U), iVecRegSrc + 1);
2419#else
2420# error "Port me!"
2421#endif
2422}
2423#endif
2424
2425#if defined(RT_ARCH_ARM64)
2426
2427/**
2428 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2429 *
2430 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2431 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2432 * caller does not heed this.
2433 *
2434 * @note DON'T try this with prefetch.
2435 */
2436DECL_FORCE_INLINE_THROW(uint32_t)
2437iemNativeEmitGprByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp,
2438 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2439{
2440 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2441 {
2442 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2443 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2444 }
2445 else if ( ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2446 && iGprReg != iGprBase)
2447 || iGprTmp != UINT8_MAX)
2448 {
2449 /* The offset is too large, so we must load it into a register and use
2450 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2451 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2452 if (iGprTmp == UINT8_MAX)
2453 iGprTmp = iGprReg;
2454 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2455 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, iGprTmp);
2456 }
2457 else
2458# ifdef IEM_WITH_THROW_CATCH
2459 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2460# else
2461 AssertReleaseFailedStmt(off = UINT32_MAX);
2462# endif
2463 return off;
2464}
2465
2466/**
2467 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2468 */
2469DECL_FORCE_INLINE_THROW(uint32_t)
2470iemNativeEmitGprByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2471 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2472{
2473 /*
2474 * There are a couple of ldr variants that takes an immediate offset, so
2475 * try use those if we can, otherwise we have to use the temporary register
2476 * help with the addressing.
2477 */
2478 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2479 {
2480 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2481 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2482 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2483 }
2484 else
2485 {
2486 /* The offset is too large, so we must load it into a register and use
2487 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2488 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2489 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2490
2491 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2492 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, idxTmpReg);
2493
2494 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2495 }
2496 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2497 return off;
2498}
2499
2500# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2501/**
2502 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
2503 *
2504 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2505 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2506 * caller does not heed this.
2507 *
2508 * @note DON'T try this with prefetch.
2509 */
2510DECL_FORCE_INLINE_THROW(uint32_t)
2511iemNativeEmitVecRegByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iGprBase, int32_t offDisp,
2512 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2513{
2514 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2515 {
2516 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2517 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
2518 }
2519 else if ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2520 || iGprTmp != UINT8_MAX)
2521 {
2522 /* The offset is too large, so we must load it into a register and use
2523 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2524 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2525 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2526 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, iGprTmp);
2527 }
2528 else
2529# ifdef IEM_WITH_THROW_CATCH
2530 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2531# else
2532 AssertReleaseFailedStmt(off = UINT32_MAX);
2533# endif
2534 return off;
2535}
2536# endif
2537
2538
2539/**
2540 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
2541 */
2542DECL_FORCE_INLINE_THROW(uint32_t)
2543iemNativeEmitVecRegByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg,
2544 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2545{
2546 /*
2547 * There are a couple of ldr variants that takes an immediate offset, so
2548 * try use those if we can, otherwise we have to use the temporary register
2549 * help with the addressing.
2550 */
2551 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2552 {
2553 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2554 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2555 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
2556 }
2557 else
2558 {
2559 /* The offset is too large, so we must load it into a register and use
2560 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2561 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2562 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2563
2564 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2565 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, idxTmpReg);
2566
2567 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2568 }
2569 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2570 return off;
2571}
2572#endif /* RT_ARCH_ARM64 */
2573
2574/**
2575 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2576 *
2577 * @note ARM64: Misaligned @a offDisp values and values not in the
2578 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2579 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2580 * does not heed this.
2581 */
2582DECL_FORCE_INLINE_THROW(uint32_t)
2583iemNativeEmitLoadGprByGprU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2584 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2585{
2586#ifdef RT_ARCH_AMD64
2587 /* mov reg64, mem64 */
2588 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2589 pCodeBuf[off++] = 0x8b;
2590 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2591 RT_NOREF(iGprTmp);
2592
2593#elif defined(RT_ARCH_ARM64)
2594 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2595 kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t), iGprTmp);
2596
2597#else
2598# error "port me"
2599#endif
2600 return off;
2601}
2602
2603
2604/**
2605 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2606 */
2607DECL_INLINE_THROW(uint32_t)
2608iemNativeEmitLoadGprByGprU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2609{
2610#ifdef RT_ARCH_AMD64
2611 off = iemNativeEmitLoadGprByGprU64Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2612 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2613
2614#elif defined(RT_ARCH_ARM64)
2615 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2616
2617#else
2618# error "port me"
2619#endif
2620 return off;
2621}
2622
2623
2624/**
2625 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2626 *
2627 * @note ARM64: Misaligned @a offDisp values and values not in the
2628 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2629 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2630 * caller does not heed this.
2631 *
2632 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2633 */
2634DECL_FORCE_INLINE_THROW(uint32_t)
2635iemNativeEmitLoadGprByGprU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2636 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2637{
2638#ifdef RT_ARCH_AMD64
2639 /* mov reg32, mem32 */
2640 if (iGprDst >= 8 || iGprBase >= 8)
2641 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2642 pCodeBuf[off++] = 0x8b;
2643 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2644 RT_NOREF(iGprTmp);
2645
2646#elif defined(RT_ARCH_ARM64)
2647 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2648 kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t), iGprTmp);
2649
2650#else
2651# error "port me"
2652#endif
2653 return off;
2654}
2655
2656
2657/**
2658 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2659 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2660 */
2661DECL_INLINE_THROW(uint32_t)
2662iemNativeEmitLoadGprByGprU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2663{
2664#ifdef RT_ARCH_AMD64
2665 off = iemNativeEmitLoadGprByGprU32Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2666 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2667
2668#elif defined(RT_ARCH_ARM64)
2669 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2670
2671#else
2672# error "port me"
2673#endif
2674 return off;
2675}
2676
2677
2678/**
2679 * Emits a 32-bit GPR load via a GPR base address with a displacement,
2680 * sign-extending the value to 64 bits.
2681 *
2682 * @note ARM64: Misaligned @a offDisp values and values not in the
2683 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2684 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2685 * caller does not heed this.
2686 */
2687DECL_FORCE_INLINE_THROW(uint32_t)
2688iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2689 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2690{
2691#ifdef RT_ARCH_AMD64
2692 /* movsxd reg64, mem32 */
2693 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2694 pCodeBuf[off++] = 0x63;
2695 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2696 RT_NOREF(iGprTmp);
2697
2698#elif defined(RT_ARCH_ARM64)
2699 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2700 kArmv8A64InstrLdStType_Ld_SignWord64, sizeof(uint32_t), iGprTmp);
2701
2702#else
2703# error "port me"
2704#endif
2705 return off;
2706}
2707
2708
2709/**
2710 * Emits a 16-bit GPR load via a GPR base address with a displacement.
2711 *
2712 * @note ARM64: Misaligned @a offDisp values and values not in the
2713 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2714 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2715 * caller does not heed this.
2716 *
2717 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2718 */
2719DECL_FORCE_INLINE_THROW(uint32_t)
2720iemNativeEmitLoadGprByGprU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2721 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2722{
2723#ifdef RT_ARCH_AMD64
2724 /* movzx reg32, mem16 */
2725 if (iGprDst >= 8 || iGprBase >= 8)
2726 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2727 pCodeBuf[off++] = 0x0f;
2728 pCodeBuf[off++] = 0xb7;
2729 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2730 RT_NOREF(iGprTmp);
2731
2732#elif defined(RT_ARCH_ARM64)
2733 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2734 kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t), iGprTmp);
2735
2736#else
2737# error "port me"
2738#endif
2739 return off;
2740}
2741
2742
2743/**
2744 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2745 * sign-extending the value to 64 bits.
2746 *
2747 * @note ARM64: Misaligned @a offDisp values and values not in the
2748 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2749 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2750 * caller does not heed this.
2751 */
2752DECL_FORCE_INLINE_THROW(uint32_t)
2753iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2754 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2755{
2756#ifdef RT_ARCH_AMD64
2757 /* movsx reg64, mem16 */
2758 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2759 pCodeBuf[off++] = 0x0f;
2760 pCodeBuf[off++] = 0xbf;
2761 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2762 RT_NOREF(iGprTmp);
2763
2764#elif defined(RT_ARCH_ARM64)
2765 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2766 kArmv8A64InstrLdStType_Ld_SignHalf64, sizeof(uint16_t), iGprTmp);
2767
2768#else
2769# error "port me"
2770#endif
2771 return off;
2772}
2773
2774
2775/**
2776 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2777 * sign-extending the value to 32 bits.
2778 *
2779 * @note ARM64: Misaligned @a offDisp values and values not in the
2780 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2781 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2782 * caller does not heed this.
2783 *
2784 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2785 */
2786DECL_FORCE_INLINE_THROW(uint32_t)
2787iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2788 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2789{
2790#ifdef RT_ARCH_AMD64
2791 /* movsx reg32, mem16 */
2792 if (iGprDst >= 8 || iGprBase >= 8)
2793 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2794 pCodeBuf[off++] = 0x0f;
2795 pCodeBuf[off++] = 0xbf;
2796 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2797 RT_NOREF(iGprTmp);
2798
2799#elif defined(RT_ARCH_ARM64)
2800 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2801 kArmv8A64InstrLdStType_Ld_SignHalf32, sizeof(uint16_t), iGprTmp);
2802
2803#else
2804# error "port me"
2805#endif
2806 return off;
2807}
2808
2809
2810/**
2811 * Emits a 8-bit GPR load via a GPR base address with a displacement.
2812 *
2813 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2814 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2815 * same. Will assert / throw if caller does not heed this.
2816 *
2817 * @note Bits 63 thru 8 in @a iGprDst will be cleared.
2818 */
2819DECL_FORCE_INLINE_THROW(uint32_t)
2820iemNativeEmitLoadGprByGprU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2821 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2822{
2823#ifdef RT_ARCH_AMD64
2824 /* movzx reg32, mem8 */
2825 if (iGprDst >= 8 || iGprBase >= 8)
2826 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2827 pCodeBuf[off++] = 0x0f;
2828 pCodeBuf[off++] = 0xb6;
2829 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2830 RT_NOREF(iGprTmp);
2831
2832#elif defined(RT_ARCH_ARM64)
2833 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2834 kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t), iGprTmp);
2835
2836#else
2837# error "port me"
2838#endif
2839 return off;
2840}
2841
2842
2843/**
2844 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2845 * sign-extending the value to 64 bits.
2846 *
2847 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2848 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2849 * same. Will assert / throw if caller does not heed this.
2850 */
2851DECL_FORCE_INLINE_THROW(uint32_t)
2852iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2853 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2854{
2855#ifdef RT_ARCH_AMD64
2856 /* movsx reg64, mem8 */
2857 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2858 pCodeBuf[off++] = 0x0f;
2859 pCodeBuf[off++] = 0xbe;
2860 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2861 RT_NOREF(iGprTmp);
2862
2863#elif defined(RT_ARCH_ARM64)
2864 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2865 kArmv8A64InstrLdStType_Ld_SignByte64, sizeof(uint8_t), iGprTmp);
2866
2867#else
2868# error "port me"
2869#endif
2870 return off;
2871}
2872
2873
2874/**
2875 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2876 * sign-extending the value to 32 bits.
2877 *
2878 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2879 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2880 * same. Will assert / throw if caller does not heed this.
2881 *
2882 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2883 */
2884DECL_FORCE_INLINE_THROW(uint32_t)
2885iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2886 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2887{
2888#ifdef RT_ARCH_AMD64
2889 /* movsx reg32, mem8 */
2890 if (iGprDst >= 8 || iGprBase >= 8)
2891 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2892 pCodeBuf[off++] = 0x0f;
2893 pCodeBuf[off++] = 0xbe;
2894 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2895 RT_NOREF(iGprTmp);
2896
2897#elif defined(RT_ARCH_ARM64)
2898 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2899 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2900
2901#else
2902# error "port me"
2903#endif
2904 return off;
2905}
2906
2907
2908/**
2909 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2910 * sign-extending the value to 16 bits.
2911 *
2912 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2913 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2914 * same. Will assert / throw if caller does not heed this.
2915 *
2916 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2917 */
2918DECL_FORCE_INLINE_THROW(uint32_t)
2919iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2920 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2921{
2922#ifdef RT_ARCH_AMD64
2923 /* movsx reg32, mem8 */
2924 if (iGprDst >= 8 || iGprBase >= 8)
2925 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2926 pCodeBuf[off++] = 0x0f;
2927 pCodeBuf[off++] = 0xbe;
2928 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2929# if 1 /** @todo use 'movzx reg32, reg16' instead of 'and reg32, 0ffffh' ? */
2930 /* and reg32, 0xffffh */
2931 if (iGprDst >= 8)
2932 pCodeBuf[off++] = X86_OP_REX_B;
2933 pCodeBuf[off++] = 0x81;
2934 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
2935 pCodeBuf[off++] = 0xff;
2936 pCodeBuf[off++] = 0xff;
2937 pCodeBuf[off++] = 0;
2938 pCodeBuf[off++] = 0;
2939# else
2940 /* movzx reg32, reg16 */
2941 if (iGprDst >= 8)
2942 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
2943 pCodeBuf[off++] = 0x0f;
2944 pCodeBuf[off++] = 0xb7;
2945 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
2946# endif
2947 RT_NOREF(iGprTmp);
2948
2949#elif defined(RT_ARCH_ARM64)
2950 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2951 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2952 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2953 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*64Bit*/);
2954
2955#else
2956# error "port me"
2957#endif
2958 return off;
2959}
2960
2961
2962#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2963/**
2964 * Emits a 128-bit vector register load via a GPR base address with a displacement.
2965 *
2966 * @note ARM64: Misaligned @a offDisp values and values not in the
2967 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2968 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2969 * does not heed this.
2970 */
2971DECL_FORCE_INLINE_THROW(uint32_t)
2972iemNativeEmitLoadVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
2973 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2974{
2975#ifdef RT_ARCH_AMD64
2976 /* movdqu reg128, mem128 */
2977 pCodeBuf[off++] = 0xf3;
2978 if (iVecRegDst >= 8 || iGprBase >= 8)
2979 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2980 pCodeBuf[off++] = 0x0f;
2981 pCodeBuf[off++] = 0x6f;
2982 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
2983 RT_NOREF(iGprTmp);
2984
2985#elif defined(RT_ARCH_ARM64)
2986 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
2987 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
2988
2989#else
2990# error "port me"
2991#endif
2992 return off;
2993}
2994
2995
2996/**
2997 * Emits a 128-bit GPR load via a GPR base address with a displacement.
2998 */
2999DECL_INLINE_THROW(uint32_t)
3000iemNativeEmitLoadVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3001{
3002#ifdef RT_ARCH_AMD64
3003 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3004 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3005
3006#elif defined(RT_ARCH_ARM64)
3007 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3008
3009#else
3010# error "port me"
3011#endif
3012 return off;
3013}
3014
3015
3016/**
3017 * Emits a 256-bit vector register load via a GPR base address with a displacement.
3018 *
3019 * @note ARM64: Misaligned @a offDisp values and values not in the
3020 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3021 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3022 * does not heed this.
3023 */
3024DECL_FORCE_INLINE_THROW(uint32_t)
3025iemNativeEmitLoadVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3026 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3027{
3028#ifdef RT_ARCH_AMD64
3029 /* vmovdqu reg256, mem256 */
3030 pCodeBuf[off++] = X86_OP_VEX3;
3031 pCodeBuf[off++] = (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
3032 | X86_OP_VEX3_BYTE1_X
3033 | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
3034 | UINT8_C(0x01);
3035 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
3036 pCodeBuf[off++] = 0x6f;
3037 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3038 RT_NOREF(iGprTmp);
3039
3040#elif defined(RT_ARCH_ARM64)
3041 Assert(!(iVecRegDst & 0x1));
3042 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3043 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3044 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3045 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3046#else
3047# error "port me"
3048#endif
3049 return off;
3050}
3051
3052
3053/**
3054 * Emits a 256-bit GPR load via a GPR base address with a displacement.
3055 */
3056DECL_INLINE_THROW(uint32_t)
3057iemNativeEmitLoadVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3058{
3059#ifdef RT_ARCH_AMD64
3060 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3061 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3062
3063#elif defined(RT_ARCH_ARM64)
3064 Assert(!(iVecRegDst & 0x1));
3065 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
3066 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3067 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3068 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3069
3070#else
3071# error "port me"
3072#endif
3073 return off;
3074}
3075#endif
3076
3077
3078/**
3079 * Emits a 64-bit GPR store via a GPR base address with a displacement.
3080 *
3081 * @note ARM64: Misaligned @a offDisp values and values not in the
3082 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3083 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3084 * does not heed this.
3085 */
3086DECL_FORCE_INLINE_THROW(uint32_t)
3087iemNativeEmitStoreGpr64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3088 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3089{
3090#ifdef RT_ARCH_AMD64
3091 /* mov mem64, reg64 */
3092 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3093 pCodeBuf[off++] = 0x89;
3094 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3095 RT_NOREF(iGprTmp);
3096
3097#elif defined(RT_ARCH_ARM64)
3098 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3099 kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
3100
3101#else
3102# error "port me"
3103#endif
3104 return off;
3105}
3106
3107
3108/**
3109 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3110 *
3111 * @note ARM64: Misaligned @a offDisp values and values not in the
3112 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3113 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3114 * does not heed this.
3115 */
3116DECL_FORCE_INLINE_THROW(uint32_t)
3117iemNativeEmitStoreGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3118 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3119{
3120#ifdef RT_ARCH_AMD64
3121 /* mov mem32, reg32 */
3122 if (iGprSrc >= 8 || iGprBase >= 8)
3123 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3124 pCodeBuf[off++] = 0x89;
3125 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3126 RT_NOREF(iGprTmp);
3127
3128#elif defined(RT_ARCH_ARM64)
3129 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3130 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3131
3132#else
3133# error "port me"
3134#endif
3135 return off;
3136}
3137
3138
3139/**
3140 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3141 *
3142 * @note ARM64: Misaligned @a offDisp values and values not in the
3143 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3144 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3145 * does not heed this.
3146 */
3147DECL_FORCE_INLINE_THROW(uint32_t)
3148iemNativeEmitStoreGpr16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3149 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3150{
3151#ifdef RT_ARCH_AMD64
3152 /* mov mem16, reg16 */
3153 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3154 if (iGprSrc >= 8 || iGprBase >= 8)
3155 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3156 pCodeBuf[off++] = 0x89;
3157 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3158 RT_NOREF(iGprTmp);
3159
3160#elif defined(RT_ARCH_ARM64)
3161 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3162 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3163
3164#else
3165# error "port me"
3166#endif
3167 return off;
3168}
3169
3170
3171/**
3172 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3173 *
3174 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3175 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3176 * same. Will assert / throw if caller does not heed this.
3177 */
3178DECL_FORCE_INLINE_THROW(uint32_t)
3179iemNativeEmitStoreGpr8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3180 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3181{
3182#ifdef RT_ARCH_AMD64
3183 /* mov mem8, reg8 */
3184 if (iGprSrc >= 8 || iGprBase >= 8)
3185 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3186 else if (iGprSrc >= 4)
3187 pCodeBuf[off++] = X86_OP_REX;
3188 pCodeBuf[off++] = 0x88;
3189 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3190 RT_NOREF(iGprTmp);
3191
3192#elif defined(RT_ARCH_ARM64)
3193 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3194 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3195
3196#else
3197# error "port me"
3198#endif
3199 return off;
3200}
3201
3202
3203/**
3204 * Emits a 64-bit immediate store via a GPR base address with a displacement.
3205 *
3206 * @note This will always require @a iGprTmpImm on ARM (except for uImm=0), on
3207 * AMD64 it depends on the immediate value.
3208 *
3209 * @note ARM64: Misaligned @a offDisp values and values not in the
3210 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3211 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3212 * does not heed this.
3213 */
3214DECL_FORCE_INLINE_THROW(uint32_t)
3215iemNativeEmitStoreImm64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint64_t uImm, uint8_t iGprBase,
3216 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3217{
3218#ifdef RT_ARCH_AMD64
3219 if ((int32_t)uImm == (int64_t)uImm)
3220 {
3221 /* mov mem64, imm32 (sign-extended) */
3222 pCodeBuf[off++] = X86_OP_REX_W | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3223 pCodeBuf[off++] = 0xc7;
3224 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3225 pCodeBuf[off++] = RT_BYTE1(uImm);
3226 pCodeBuf[off++] = RT_BYTE2(uImm);
3227 pCodeBuf[off++] = RT_BYTE3(uImm);
3228 pCodeBuf[off++] = RT_BYTE4(uImm);
3229 }
3230 else if (iGprImmTmp != UINT8_MAX || iGprTmp != UINT8_MAX)
3231 {
3232 /* require temporary register. */
3233 if (iGprImmTmp == UINT8_MAX)
3234 iGprImmTmp = iGprTmp;
3235 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3236 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp);
3237 }
3238 else
3239# ifdef IEM_WITH_THROW_CATCH
3240 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3241# else
3242 AssertReleaseFailedStmt(off = UINT32_MAX);
3243# endif
3244
3245#elif defined(RT_ARCH_ARM64)
3246 if (uImm == 0)
3247 iGprImmTmp = ARMV8_A64_REG_XZR;
3248 else
3249 {
3250 Assert(iGprImmTmp < 31);
3251 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3252 }
3253 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp, iGprTmp);
3254
3255#else
3256# error "port me"
3257#endif
3258 return off;
3259}
3260
3261
3262/**
3263 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3264 *
3265 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3266 *
3267 * @note ARM64: Misaligned @a offDisp values and values not in the
3268 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3269 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3270 * does not heed this.
3271 */
3272DECL_FORCE_INLINE_THROW(uint32_t)
3273iemNativeEmitStoreImm32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uImm, uint8_t iGprBase,
3274 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3275{
3276#ifdef RT_ARCH_AMD64
3277 /* mov mem32, imm32 */
3278 if (iGprBase >= 8)
3279 pCodeBuf[off++] = X86_OP_REX_B;
3280 pCodeBuf[off++] = 0xc7;
3281 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3282 pCodeBuf[off++] = RT_BYTE1(uImm);
3283 pCodeBuf[off++] = RT_BYTE2(uImm);
3284 pCodeBuf[off++] = RT_BYTE3(uImm);
3285 pCodeBuf[off++] = RT_BYTE4(uImm);
3286 RT_NOREF(iGprImmTmp, iGprTmp);
3287
3288#elif defined(RT_ARCH_ARM64)
3289 Assert(iGprImmTmp < 31);
3290 if (uImm == 0)
3291 iGprImmTmp = ARMV8_A64_REG_XZR;
3292 else
3293 {
3294 Assert(iGprImmTmp < 31);
3295 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3296 }
3297 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3298 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3299
3300#else
3301# error "port me"
3302#endif
3303 return off;
3304}
3305
3306
3307/**
3308 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3309 *
3310 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3311 *
3312 * @note ARM64: Misaligned @a offDisp values and values not in the
3313 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3314 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3315 * does not heed this.
3316 */
3317DECL_FORCE_INLINE_THROW(uint32_t)
3318iemNativeEmitStoreImm16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint8_t iGprBase,
3319 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3320{
3321#ifdef RT_ARCH_AMD64
3322 /* mov mem16, imm16 */
3323 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3324 if (iGprBase >= 8)
3325 pCodeBuf[off++] = X86_OP_REX_B;
3326 pCodeBuf[off++] = 0xc7;
3327 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3328 pCodeBuf[off++] = RT_BYTE1(uImm);
3329 pCodeBuf[off++] = RT_BYTE2(uImm);
3330 RT_NOREF(iGprImmTmp, iGprTmp);
3331
3332#elif defined(RT_ARCH_ARM64)
3333 if (uImm == 0)
3334 iGprImmTmp = ARMV8_A64_REG_XZR;
3335 else
3336 {
3337 Assert(iGprImmTmp < 31);
3338 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3339 }
3340 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3341 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3342
3343#else
3344# error "port me"
3345#endif
3346 return off;
3347}
3348
3349
3350/**
3351 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3352 *
3353 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3354 *
3355 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3356 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3357 * same. Will assert / throw if caller does not heed this.
3358 */
3359DECL_FORCE_INLINE_THROW(uint32_t)
3360iemNativeEmitStoreImm8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t uImm, uint8_t iGprBase,
3361 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3362{
3363#ifdef RT_ARCH_AMD64
3364 /* mov mem8, imm8 */
3365 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3366 if (iGprBase >= 8)
3367 pCodeBuf[off++] = X86_OP_REX_B;
3368 pCodeBuf[off++] = 0xc6;
3369 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3370 pCodeBuf[off++] = uImm;
3371 RT_NOREF(iGprImmTmp, iGprTmp);
3372
3373#elif defined(RT_ARCH_ARM64)
3374 if (uImm == 0)
3375 iGprImmTmp = ARMV8_A64_REG_XZR;
3376 else
3377 {
3378 Assert(iGprImmTmp < 31);
3379 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3380 }
3381 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3382 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3383
3384#else
3385# error "port me"
3386#endif
3387 return off;
3388}
3389
3390
3391#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3392/**
3393 * Emits a 128-bit vector register store via a GPR base address with a displacement.
3394 *
3395 * @note ARM64: Misaligned @a offDisp values and values not in the
3396 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3397 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3398 * does not heed this.
3399 */
3400DECL_FORCE_INLINE_THROW(uint32_t)
3401iemNativeEmitStoreVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3402 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3403{
3404#ifdef RT_ARCH_AMD64
3405 /* movdqu mem128, reg128 */
3406 pCodeBuf[off++] = 0xf3;
3407 if (iVecRegDst >= 8 || iGprBase >= 8)
3408 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3409 pCodeBuf[off++] = 0x0f;
3410 pCodeBuf[off++] = 0x7f;
3411 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3412 RT_NOREF(iGprTmp);
3413
3414#elif defined(RT_ARCH_ARM64)
3415 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3416 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3417
3418#else
3419# error "port me"
3420#endif
3421 return off;
3422}
3423
3424
3425/**
3426 * Emits a 128-bit vector register store via a GPR base address with a displacement.
3427 */
3428DECL_INLINE_THROW(uint32_t)
3429iemNativeEmitStoreVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3430{
3431#ifdef RT_ARCH_AMD64
3432 off = iemNativeEmitStoreVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3433 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3434
3435#elif defined(RT_ARCH_ARM64)
3436 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3437
3438#else
3439# error "port me"
3440#endif
3441 return off;
3442}
3443
3444
3445/**
3446 * Emits a 256-bit vector register store via a GPR base address with a displacement.
3447 *
3448 * @note ARM64: Misaligned @a offDisp values and values not in the
3449 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3450 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3451 * does not heed this.
3452 */
3453DECL_FORCE_INLINE_THROW(uint32_t)
3454iemNativeEmitStoreVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3455 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3456{
3457#ifdef RT_ARCH_AMD64
3458 /* vmovdqu mem256, reg256 */
3459 pCodeBuf[off++] = X86_OP_VEX3;
3460 pCodeBuf[off++] = (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
3461 | X86_OP_VEX3_BYTE1_X
3462 | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
3463 | UINT8_C(0x01);
3464 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
3465 pCodeBuf[off++] = 0x7f;
3466 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3467 RT_NOREF(iGprTmp);
3468
3469#elif defined(RT_ARCH_ARM64)
3470 Assert(!(iVecRegDst & 0x1));
3471 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3472 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3473 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3474 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3475#else
3476# error "port me"
3477#endif
3478 return off;
3479}
3480
3481
3482/**
3483 * Emits a 256-bit GPR load via a GPR base address with a displacement.
3484 */
3485DECL_INLINE_THROW(uint32_t)
3486iemNativeEmitStoreVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3487{
3488#ifdef RT_ARCH_AMD64
3489 off = iemNativeEmitStoreVecRegByGprU256Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3490 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3491
3492#elif defined(RT_ARCH_ARM64)
3493 Assert(!(iVecRegDst & 0x1));
3494 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
3495 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3496 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3497 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3498
3499#else
3500# error "port me"
3501#endif
3502 return off;
3503}
3504#endif
3505
3506
3507
3508/*********************************************************************************************************************************
3509* Subtraction and Additions *
3510*********************************************************************************************************************************/
3511
3512/**
3513 * Emits subtracting a 64-bit GPR from another, storing the result in the first.
3514 * @note The AMD64 version sets flags.
3515 */
3516DECL_INLINE_THROW(uint32_t)
3517iemNativeEmitSubTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3518{
3519#if defined(RT_ARCH_AMD64)
3520 /* sub Gv,Ev */
3521 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3522 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3523 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3524 pbCodeBuf[off++] = 0x2b;
3525 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3526
3527#elif defined(RT_ARCH_ARM64)
3528 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3529 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend);
3530
3531#else
3532# error "Port me"
3533#endif
3534 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3535 return off;
3536}
3537
3538
3539/**
3540 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3541 * @note The AMD64 version sets flags.
3542 */
3543DECL_FORCE_INLINE(uint32_t)
3544iemNativeEmitSubTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3545{
3546#if defined(RT_ARCH_AMD64)
3547 /* sub Gv,Ev */
3548 if (iGprDst >= 8 || iGprSubtrahend >= 8)
3549 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
3550 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3551 pCodeBuf[off++] = 0x2b;
3552 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3553
3554#elif defined(RT_ARCH_ARM64)
3555 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend, false /*f64Bit*/);
3556
3557#else
3558# error "Port me"
3559#endif
3560 return off;
3561}
3562
3563
3564/**
3565 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3566 * @note The AMD64 version sets flags.
3567 */
3568DECL_INLINE_THROW(uint32_t)
3569iemNativeEmitSubTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3570{
3571#if defined(RT_ARCH_AMD64)
3572 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSubtrahend);
3573#elif defined(RT_ARCH_ARM64)
3574 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSubtrahend);
3575#else
3576# error "Port me"
3577#endif
3578 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3579 return off;
3580}
3581
3582
3583/**
3584 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3585 *
3586 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3587 *
3588 * @note Larger constants will require a temporary register. Failing to specify
3589 * one when needed will trigger fatal assertion / throw.
3590 */
3591DECL_FORCE_INLINE_THROW(uint32_t)
3592iemNativeEmitSubGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3593 uint8_t iGprTmp = UINT8_MAX)
3594{
3595#ifdef RT_ARCH_AMD64
3596 pCodeBuf[off++] = iGprDst >= 8 ? X86_OP_REX_W | X86_OP_REX_B : X86_OP_REX_W;
3597 if (iSubtrahend == 1)
3598 {
3599 /* dec r/m64 */
3600 pCodeBuf[off++] = 0xff;
3601 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3602 }
3603 else if (iSubtrahend == -1)
3604 {
3605 /* inc r/m64 */
3606 pCodeBuf[off++] = 0xff;
3607 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3608 }
3609 else if ((int8_t)iSubtrahend == iSubtrahend)
3610 {
3611 /* sub r/m64, imm8 */
3612 pCodeBuf[off++] = 0x83;
3613 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3614 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3615 }
3616 else if ((int32_t)iSubtrahend == iSubtrahend)
3617 {
3618 /* sub r/m64, imm32 */
3619 pCodeBuf[off++] = 0x81;
3620 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3621 pCodeBuf[off++] = RT_BYTE1((uint64_t)iSubtrahend);
3622 pCodeBuf[off++] = RT_BYTE2((uint64_t)iSubtrahend);
3623 pCodeBuf[off++] = RT_BYTE3((uint64_t)iSubtrahend);
3624 pCodeBuf[off++] = RT_BYTE4((uint64_t)iSubtrahend);
3625 }
3626 else if (iGprTmp != UINT8_MAX)
3627 {
3628 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off - 1, iGprTmp, (uint64_t)iSubtrahend);
3629 /* sub r/m64, r64 */
3630 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B) | (iGprTmp < 8 ? 0 : X86_OP_REX_R);
3631 pCodeBuf[off++] = 0x29;
3632 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprTmp & 7, iGprDst & 7);
3633 }
3634 else
3635# ifdef IEM_WITH_THROW_CATCH
3636 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3637# else
3638 AssertReleaseFailedStmt(off = UINT32_MAX);
3639# endif
3640
3641#elif defined(RT_ARCH_ARM64)
3642 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3643 if (uAbsSubtrahend < 4096)
3644 {
3645 if (iSubtrahend >= 0)
3646 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3647 else
3648 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3649 }
3650 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3651 {
3652 if (iSubtrahend >= 0)
3653 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3654 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3655 else
3656 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3657 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3658 }
3659 else if (iGprTmp != UINT8_MAX)
3660 {
3661 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (uint64_t)iSubtrahend);
3662 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp);
3663 }
3664 else
3665# ifdef IEM_WITH_THROW_CATCH
3666 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3667# else
3668 AssertReleaseFailedStmt(off = UINT32_MAX);
3669# endif
3670
3671#else
3672# error "Port me"
3673#endif
3674 return off;
3675}
3676
3677
3678/**
3679 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3680 *
3681 * @note Larger constants will require a temporary register. Failing to specify
3682 * one when needed will trigger fatal assertion / throw.
3683 */
3684DECL_INLINE_THROW(uint32_t)
3685iemNativeEmitSubGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3686 uint8_t iGprTmp = UINT8_MAX)
3687
3688{
3689#ifdef RT_ARCH_AMD64
3690 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iSubtrahend, iGprTmp);
3691#elif defined(RT_ARCH_ARM64)
3692 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGprDst, iSubtrahend, iGprTmp);
3693#else
3694# error "Port me"
3695#endif
3696 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3697 return off;
3698}
3699
3700
3701/**
3702 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3703 *
3704 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3705 *
3706 * @note ARM64: Larger constants will require a temporary register. Failing to
3707 * specify one when needed will trigger fatal assertion / throw.
3708 */
3709DECL_FORCE_INLINE_THROW(uint32_t)
3710iemNativeEmitSubGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3711 uint8_t iGprTmp = UINT8_MAX)
3712{
3713#ifdef RT_ARCH_AMD64
3714 if (iGprDst >= 8)
3715 pCodeBuf[off++] = X86_OP_REX_B;
3716 if (iSubtrahend == 1)
3717 {
3718 /* dec r/m32 */
3719 pCodeBuf[off++] = 0xff;
3720 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3721 }
3722 else if (iSubtrahend == -1)
3723 {
3724 /* inc r/m32 */
3725 pCodeBuf[off++] = 0xff;
3726 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3727 }
3728 else if (iSubtrahend < 128 && iSubtrahend >= -128)
3729 {
3730 /* sub r/m32, imm8 */
3731 pCodeBuf[off++] = 0x83;
3732 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3733 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3734 }
3735 else
3736 {
3737 /* sub r/m32, imm32 */
3738 pCodeBuf[off++] = 0x81;
3739 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3740 pCodeBuf[off++] = RT_BYTE1(iSubtrahend);
3741 pCodeBuf[off++] = RT_BYTE2(iSubtrahend);
3742 pCodeBuf[off++] = RT_BYTE3(iSubtrahend);
3743 pCodeBuf[off++] = RT_BYTE4(iSubtrahend);
3744 }
3745 RT_NOREF(iGprTmp);
3746
3747#elif defined(RT_ARCH_ARM64)
3748 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3749 if (uAbsSubtrahend < 4096)
3750 {
3751 if (iSubtrahend >= 0)
3752 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3753 else
3754 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3755 }
3756 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3757 {
3758 if (iSubtrahend >= 0)
3759 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3760 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3761 else
3762 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3763 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3764 }
3765 else if (iGprTmp != UINT8_MAX)
3766 {
3767 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3768 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3769 }
3770 else
3771# ifdef IEM_WITH_THROW_CATCH
3772 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3773# else
3774 AssertReleaseFailedStmt(off = UINT32_MAX);
3775# endif
3776
3777#else
3778# error "Port me"
3779#endif
3780 return off;
3781}
3782
3783
3784/**
3785 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3786 *
3787 * @note ARM64: Larger constants will require a temporary register. Failing to
3788 * specify one when needed will trigger fatal assertion / throw.
3789 */
3790DECL_INLINE_THROW(uint32_t)
3791iemNativeEmitSubGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3792 uint8_t iGprTmp = UINT8_MAX)
3793
3794{
3795#ifdef RT_ARCH_AMD64
3796 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iSubtrahend, iGprTmp);
3797#elif defined(RT_ARCH_ARM64)
3798 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iSubtrahend, iGprTmp);
3799#else
3800# error "Port me"
3801#endif
3802 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3803 return off;
3804}
3805
3806
3807/**
3808 * Emits a 16-bit GPR subtract with a signed immediate subtrahend.
3809 *
3810 * This will optimize using DEC/INC/whatever and ARM64 will not set flags,
3811 * so not suitable as a base for conditional jumps.
3812 *
3813 * @note AMD64: Will only update the lower 16 bits of the register.
3814 * @note ARM64: Will update the entire register.
3815 * @note ARM64: Larger constants will require a temporary register. Failing to
3816 * specify one when needed will trigger fatal assertion / throw.
3817 */
3818DECL_FORCE_INLINE_THROW(uint32_t)
3819iemNativeEmitSubGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iSubtrahend,
3820 uint8_t iGprTmp = UINT8_MAX)
3821{
3822#ifdef RT_ARCH_AMD64
3823 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3824 if (iGprDst >= 8)
3825 pCodeBuf[off++] = X86_OP_REX_B;
3826 if (iSubtrahend == 1)
3827 {
3828 /* dec r/m16 */
3829 pCodeBuf[off++] = 0xff;
3830 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3831 }
3832 else if (iSubtrahend == -1)
3833 {
3834 /* inc r/m16 */
3835 pCodeBuf[off++] = 0xff;
3836 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3837 }
3838 else if ((int8_t)iSubtrahend == iSubtrahend)
3839 {
3840 /* sub r/m16, imm8 */
3841 pCodeBuf[off++] = 0x83;
3842 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3843 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3844 }
3845 else
3846 {
3847 /* sub r/m16, imm16 */
3848 pCodeBuf[off++] = 0x81;
3849 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3850 pCodeBuf[off++] = RT_BYTE1((uint16_t)iSubtrahend);
3851 pCodeBuf[off++] = RT_BYTE2((uint16_t)iSubtrahend);
3852 }
3853 RT_NOREF(iGprTmp);
3854
3855#elif defined(RT_ARCH_ARM64)
3856 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3857 if (uAbsSubtrahend < 4096)
3858 {
3859 if (iSubtrahend >= 0)
3860 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3861 else
3862 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3863 }
3864 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3865 {
3866 if (iSubtrahend >= 0)
3867 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3868 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3869 else
3870 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3871 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3872 }
3873 else if (iGprTmp != UINT8_MAX)
3874 {
3875 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3876 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3877 }
3878 else
3879# ifdef IEM_WITH_THROW_CATCH
3880 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3881# else
3882 AssertReleaseFailedStmt(off = UINT32_MAX);
3883# endif
3884 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
3885
3886#else
3887# error "Port me"
3888#endif
3889 return off;
3890}
3891
3892
3893/**
3894 * Emits adding a 64-bit GPR to another, storing the result in the first.
3895 * @note The AMD64 version sets flags.
3896 */
3897DECL_FORCE_INLINE(uint32_t)
3898iemNativeEmitAddTwoGprsEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3899{
3900#if defined(RT_ARCH_AMD64)
3901 /* add Gv,Ev */
3902 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3903 | (iGprAddend < 8 ? 0 : X86_OP_REX_B);
3904 pCodeBuf[off++] = 0x03;
3905 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3906
3907#elif defined(RT_ARCH_ARM64)
3908 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend);
3909
3910#else
3911# error "Port me"
3912#endif
3913 return off;
3914}
3915
3916
3917/**
3918 * Emits adding a 64-bit GPR to another, storing the result in the first.
3919 * @note The AMD64 version sets flags.
3920 */
3921DECL_INLINE_THROW(uint32_t)
3922iemNativeEmitAddTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3923{
3924#if defined(RT_ARCH_AMD64)
3925 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3926#elif defined(RT_ARCH_ARM64)
3927 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
3928#else
3929# error "Port me"
3930#endif
3931 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3932 return off;
3933}
3934
3935
3936/**
3937 * Emits adding a 64-bit GPR to another, storing the result in the first.
3938 * @note The AMD64 version sets flags.
3939 */
3940DECL_FORCE_INLINE(uint32_t)
3941iemNativeEmitAddTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3942{
3943#if defined(RT_ARCH_AMD64)
3944 /* add Gv,Ev */
3945 if (iGprDst >= 8 || iGprAddend >= 8)
3946 pCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0)
3947 | (iGprAddend >= 8 ? X86_OP_REX_B : 0);
3948 pCodeBuf[off++] = 0x03;
3949 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3950
3951#elif defined(RT_ARCH_ARM64)
3952 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend, false /*f64Bit*/);
3953
3954#else
3955# error "Port me"
3956#endif
3957 return off;
3958}
3959
3960
3961/**
3962 * Emits adding a 64-bit GPR to another, storing the result in the first.
3963 * @note The AMD64 version sets flags.
3964 */
3965DECL_INLINE_THROW(uint32_t)
3966iemNativeEmitAddTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3967{
3968#if defined(RT_ARCH_AMD64)
3969 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3970#elif defined(RT_ARCH_ARM64)
3971 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
3972#else
3973# error "Port me"
3974#endif
3975 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3976 return off;
3977}
3978
3979
3980/**
3981 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
3982 */
3983DECL_INLINE_THROW(uint32_t)
3984iemNativeEmitAddGprImm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3985{
3986#if defined(RT_ARCH_AMD64)
3987 /* add or inc */
3988 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
3989 if (iImm8 != 1)
3990 {
3991 pCodeBuf[off++] = 0x83;
3992 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3993 pCodeBuf[off++] = (uint8_t)iImm8;
3994 }
3995 else
3996 {
3997 pCodeBuf[off++] = 0xff;
3998 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3999 }
4000
4001#elif defined(RT_ARCH_ARM64)
4002 if (iImm8 >= 0)
4003 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint8_t)iImm8);
4004 else
4005 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint8_t)-iImm8);
4006
4007#else
4008# error "Port me"
4009#endif
4010 return off;
4011}
4012
4013
4014/**
4015 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
4016 */
4017DECL_INLINE_THROW(uint32_t)
4018iemNativeEmitAddGprImm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4019{
4020#if defined(RT_ARCH_AMD64)
4021 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
4022#elif defined(RT_ARCH_ARM64)
4023 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
4024#else
4025# error "Port me"
4026#endif
4027 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4028 return off;
4029}
4030
4031
4032/**
4033 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
4034 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4035 */
4036DECL_FORCE_INLINE(uint32_t)
4037iemNativeEmitAddGpr32Imm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4038{
4039#if defined(RT_ARCH_AMD64)
4040 /* add or inc */
4041 if (iGprDst >= 8)
4042 pCodeBuf[off++] = X86_OP_REX_B;
4043 if (iImm8 != 1)
4044 {
4045 pCodeBuf[off++] = 0x83;
4046 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4047 pCodeBuf[off++] = (uint8_t)iImm8;
4048 }
4049 else
4050 {
4051 pCodeBuf[off++] = 0xff;
4052 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4053 }
4054
4055#elif defined(RT_ARCH_ARM64)
4056 if (iImm8 >= 0)
4057 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint8_t)iImm8, false /*f64Bit*/);
4058 else
4059 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint8_t)-iImm8, false /*f64Bit*/);
4060
4061#else
4062# error "Port me"
4063#endif
4064 return off;
4065}
4066
4067
4068/**
4069 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
4070 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4071 */
4072DECL_INLINE_THROW(uint32_t)
4073iemNativeEmitAddGpr32Imm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4074{
4075#if defined(RT_ARCH_AMD64)
4076 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
4077#elif defined(RT_ARCH_ARM64)
4078 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
4079#else
4080# error "Port me"
4081#endif
4082 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4083 return off;
4084}
4085
4086
4087/**
4088 * Emits a 64-bit GPR additions with a 64-bit signed addend.
4089 *
4090 * @note Will assert / throw if @a iGprTmp is not specified when needed.
4091 */
4092DECL_FORCE_INLINE_THROW(uint32_t)
4093iemNativeEmitAddGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iAddend, uint8_t iGprTmp = UINT8_MAX)
4094{
4095#if defined(RT_ARCH_AMD64)
4096 if ((int8_t)iAddend == iAddend)
4097 return iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
4098
4099 if ((int32_t)iAddend == iAddend)
4100 {
4101 /* add grp, imm32 */
4102 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4103 pCodeBuf[off++] = 0x81;
4104 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4105 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4106 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4107 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4108 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4109 }
4110 else if (iGprTmp != UINT8_MAX)
4111 {
4112 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
4113
4114 /* add dst, tmpreg */
4115 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4116 | (iGprTmp < 8 ? 0 : X86_OP_REX_B);
4117 pCodeBuf[off++] = 0x03;
4118 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprTmp & 7);
4119 }
4120 else
4121# ifdef IEM_WITH_THROW_CATCH
4122 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4123# else
4124 AssertReleaseFailedStmt(off = UINT32_MAX);
4125# endif
4126
4127#elif defined(RT_ARCH_ARM64)
4128 uint64_t const uAbsAddend = (uint64_t)RT_ABS(iAddend);
4129 if (uAbsAddend < 4096)
4130 {
4131 if (iAddend >= 0)
4132 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend);
4133 else
4134 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend);
4135 }
4136 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
4137 {
4138 if (iAddend >= 0)
4139 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend >> 12,
4140 true /*f64Bit*/, true /*fShift12*/);
4141 else
4142 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint32_t)uAbsAddend >> 12,
4143 true /*f64Bit*/, true /*fShift12*/);
4144 }
4145 else if (iGprTmp != UINT8_MAX)
4146 {
4147 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
4148 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp);
4149 }
4150 else
4151# ifdef IEM_WITH_THROW_CATCH
4152 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4153# else
4154 AssertReleaseFailedStmt(off = UINT32_MAX);
4155# endif
4156
4157#else
4158# error "Port me"
4159#endif
4160 return off;
4161}
4162
4163
4164/**
4165 * Emits a 64-bit GPR additions with a 64-bit signed addend.
4166 */
4167DECL_INLINE_THROW(uint32_t)
4168iemNativeEmitAddGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iAddend)
4169{
4170#if defined(RT_ARCH_AMD64)
4171 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
4172 return iemNativeEmitAddGprImm8(pReNative, off, iGprDst, (int8_t)iAddend);
4173
4174 if (iAddend <= INT32_MAX && iAddend >= INT32_MIN)
4175 {
4176 /* add grp, imm32 */
4177 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4178 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4179 pbCodeBuf[off++] = 0x81;
4180 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4181 pbCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4182 pbCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4183 pbCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4184 pbCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4185 }
4186 else
4187 {
4188 /* Best to use a temporary register to deal with this in the simplest way: */
4189 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
4190
4191 /* add dst, tmpreg */
4192 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4193 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4194 | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
4195 pbCodeBuf[off++] = 0x03;
4196 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iTmpReg & 7);
4197
4198 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4199 }
4200
4201#elif defined(RT_ARCH_ARM64)
4202 if ((uint64_t)RT_ABS(iAddend) < RT_BIT_32(12))
4203 {
4204 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4205 if (iAddend >= 0)
4206 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint32_t)iAddend);
4207 else
4208 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint32_t)-iAddend);
4209 }
4210 else
4211 {
4212 /* Use temporary register for the immediate. */
4213 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
4214
4215 /* add gprdst, gprdst, tmpreg */
4216 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4217 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iTmpReg);
4218
4219 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4220 }
4221
4222#else
4223# error "Port me"
4224#endif
4225 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4226 return off;
4227}
4228
4229
4230/**
4231 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4232 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4233 * @note For ARM64 the iAddend value must be in the range 0x000..0xfff,
4234 * or that range shifted 12 bits to the left (e.g. 0x1000..0xfff000 with
4235 * the lower 12 bits always zero). The negative ranges are also allowed,
4236 * making it behave like a subtraction. If the constant does not conform,
4237 * bad stuff will happen.
4238 */
4239DECL_FORCE_INLINE_THROW(uint32_t)
4240iemNativeEmitAddGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iAddend)
4241{
4242#if defined(RT_ARCH_AMD64)
4243 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
4244 return iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
4245
4246 /* add grp, imm32 */
4247 if (iGprDst >= 8)
4248 pCodeBuf[off++] = X86_OP_REX_B;
4249 pCodeBuf[off++] = 0x81;
4250 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4251 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4252 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4253 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4254 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4255
4256#elif defined(RT_ARCH_ARM64)
4257 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4258 if (uAbsAddend <= 0xfff)
4259 {
4260 if (iAddend >= 0)
4261 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
4262 else
4263 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
4264 }
4265 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
4266 {
4267 if (iAddend >= 0)
4268 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, uAbsAddend >> 12,
4269 false /*f64Bit*/, true /*fShift12*/);
4270 else
4271 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, uAbsAddend >> 12,
4272 false /*f64Bit*/, true /*fShift12*/);
4273 }
4274 else
4275# ifdef IEM_WITH_THROW_CATCH
4276 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4277# else
4278 AssertReleaseFailedStmt(off = UINT32_MAX);
4279# endif
4280
4281#else
4282# error "Port me"
4283#endif
4284 return off;
4285}
4286
4287
4288/**
4289 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4290 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4291 */
4292DECL_INLINE_THROW(uint32_t)
4293iemNativeEmitAddGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iAddend)
4294{
4295#if defined(RT_ARCH_AMD64)
4296 off = iemNativeEmitAddGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iAddend);
4297
4298#elif defined(RT_ARCH_ARM64)
4299 if ((uint64_t)RT_ABS(iAddend) < RT_BIT_32(12))
4300 {
4301 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4302 if (iAddend >= 0)
4303 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint32_t)iAddend, false /*f64Bit*/);
4304 else
4305 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint32_t)-iAddend, false /*f64Bit*/);
4306 }
4307 else
4308 {
4309 /* Use temporary register for the immediate. */
4310 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint32_t)iAddend);
4311
4312 /* add gprdst, gprdst, tmpreg */
4313 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4314 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iTmpReg, false /*f64Bit*/);
4315
4316 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4317 }
4318
4319#else
4320# error "Port me"
4321#endif
4322 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4323 return off;
4324}
4325
4326
4327/**
4328 * Emits a 16-bit GPR add with a signed immediate addend.
4329 *
4330 * This will optimize using INC/DEC/whatever and ARM64 will not set flags,
4331 * so not suitable as a base for conditional jumps.
4332 *
4333 * @note AMD64: Will only update the lower 16 bits of the register.
4334 * @note ARM64: Will update the entire register.
4335 * @note ARM64: Larger constants will require a temporary register. Failing to
4336 * specify one when needed will trigger fatal assertion / throw.
4337 * @sa iemNativeEmitSubGpr16ImmEx
4338 */
4339DECL_FORCE_INLINE_THROW(uint32_t)
4340iemNativeEmitAddGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iAddend,
4341 uint8_t iGprTmp = UINT8_MAX)
4342{
4343#ifdef RT_ARCH_AMD64
4344 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4345 if (iGprDst >= 8)
4346 pCodeBuf[off++] = X86_OP_REX_B;
4347 if (iAddend == 1)
4348 {
4349 /* inc r/m16 */
4350 pCodeBuf[off++] = 0xff;
4351 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4352 }
4353 else if (iAddend == -1)
4354 {
4355 /* dec r/m16 */
4356 pCodeBuf[off++] = 0xff;
4357 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4358 }
4359 else if ((int8_t)iAddend == iAddend)
4360 {
4361 /* add r/m16, imm8 */
4362 pCodeBuf[off++] = 0x83;
4363 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4364 pCodeBuf[off++] = (uint8_t)iAddend;
4365 }
4366 else
4367 {
4368 /* add r/m16, imm16 */
4369 pCodeBuf[off++] = 0x81;
4370 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4371 pCodeBuf[off++] = RT_BYTE1((uint16_t)iAddend);
4372 pCodeBuf[off++] = RT_BYTE2((uint16_t)iAddend);
4373 }
4374 RT_NOREF(iGprTmp);
4375
4376#elif defined(RT_ARCH_ARM64)
4377 uint32_t uAbsAddend = RT_ABS(iAddend);
4378 if (uAbsAddend < 4096)
4379 {
4380 if (iAddend >= 0)
4381 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
4382 else
4383 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsAddend, false /*f64Bit*/);
4384 }
4385 else if (uAbsAddend <= 0xfff000 && !(uAbsAddend & 0xfff))
4386 {
4387 if (iAddend >= 0)
4388 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsAddend >> 12,
4389 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
4390 else
4391 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsAddend >> 12,
4392 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
4393 }
4394 else if (iGprTmp != UINT8_MAX)
4395 {
4396 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iAddend);
4397 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
4398 }
4399 else
4400# ifdef IEM_WITH_THROW_CATCH
4401 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4402# else
4403 AssertReleaseFailedStmt(off = UINT32_MAX);
4404# endif
4405 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
4406
4407#else
4408# error "Port me"
4409#endif
4410 return off;
4411}
4412
4413
4414
4415/**
4416 * Adds two 64-bit GPRs together, storing the result in a third register.
4417 */
4418DECL_FORCE_INLINE(uint32_t)
4419iemNativeEmitGprEqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4420{
4421#ifdef RT_ARCH_AMD64
4422 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4423 {
4424 /** @todo consider LEA */
4425 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend1);
4426 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend2);
4427 }
4428 else
4429 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4430
4431#elif defined(RT_ARCH_ARM64)
4432 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2);
4433
4434#else
4435# error "Port me!"
4436#endif
4437 return off;
4438}
4439
4440
4441
4442/**
4443 * Adds two 32-bit GPRs together, storing the result in a third register.
4444 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4445 */
4446DECL_FORCE_INLINE(uint32_t)
4447iemNativeEmitGpr32EqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4448{
4449#ifdef RT_ARCH_AMD64
4450 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4451 {
4452 /** @todo consider LEA */
4453 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend1);
4454 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend2);
4455 }
4456 else
4457 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4458
4459#elif defined(RT_ARCH_ARM64)
4460 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2, false /*f64Bit*/);
4461
4462#else
4463# error "Port me!"
4464#endif
4465 return off;
4466}
4467
4468
4469/**
4470 * Adds a 64-bit GPR and a 64-bit unsigned constant, storing the result in a
4471 * third register.
4472 *
4473 * @note The ARM64 version does not work for non-trivial constants if the
4474 * two registers are the same. Will assert / throw exception.
4475 */
4476DECL_FORCE_INLINE_THROW(uint32_t)
4477iemNativeEmitGprEqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int64_t iImmAddend)
4478{
4479#ifdef RT_ARCH_AMD64
4480 /** @todo consider LEA */
4481 if ((int8_t)iImmAddend == iImmAddend)
4482 {
4483 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend);
4484 off = iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4485 }
4486 else
4487 {
4488 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4489 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4490 }
4491
4492#elif defined(RT_ARCH_ARM64)
4493 uint64_t const uAbsImmAddend = RT_ABS(iImmAddend);
4494 if (uAbsImmAddend < 4096)
4495 {
4496 if (iImmAddend >= 0)
4497 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprAddend, uAbsImmAddend);
4498 else
4499 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprAddend, uAbsImmAddend);
4500 }
4501 else if (uAbsImmAddend <= 0xfff000 && !(uAbsImmAddend & 0xfff))
4502 {
4503 if (iImmAddend >= 0)
4504 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, true /*f64Bit*/, true /*fShift12*/);
4505 else
4506 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, true /*f64Bit*/, true /*fShift12*/);
4507 }
4508 else if (iGprDst != iGprAddend)
4509 {
4510 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, (uint64_t)iImmAddend);
4511 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4512 }
4513 else
4514# ifdef IEM_WITH_THROW_CATCH
4515 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4516# else
4517 AssertReleaseFailedStmt(off = UINT32_MAX);
4518# endif
4519
4520#else
4521# error "Port me!"
4522#endif
4523 return off;
4524}
4525
4526
4527/**
4528 * Adds a 32-bit GPR and a 32-bit unsigned constant, storing the result in a
4529 * third register.
4530 *
4531 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4532 *
4533 * @note The ARM64 version does not work for non-trivial constants if the
4534 * two registers are the same. Will assert / throw exception.
4535 */
4536DECL_FORCE_INLINE_THROW(uint32_t)
4537iemNativeEmitGpr32EqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int32_t iImmAddend)
4538{
4539#ifdef RT_ARCH_AMD64
4540 /** @todo consider LEA */
4541 if ((int8_t)iImmAddend == iImmAddend)
4542 {
4543 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4544 off = iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4545 }
4546 else
4547 {
4548 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4549 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4550 }
4551
4552#elif defined(RT_ARCH_ARM64)
4553 uint32_t const uAbsImmAddend = RT_ABS(iImmAddend);
4554 if (uAbsImmAddend < 4096)
4555 {
4556 if (iImmAddend >= 0)
4557 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4558 else
4559 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4560 }
4561 else if (uAbsImmAddend <= 0xfff000 && !(uAbsImmAddend & 0xfff))
4562 {
4563 if (iImmAddend >= 0)
4564 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, false /*f64Bit*/, true /*fShift12*/);
4565 else
4566 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsImmAddend >> 12, false /*f64Bit*/, true /*fShift12*/);
4567 }
4568 else if (iGprDst != iGprAddend)
4569 {
4570 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, (uint32_t)iImmAddend);
4571 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4572 }
4573 else
4574# ifdef IEM_WITH_THROW_CATCH
4575 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4576# else
4577 AssertReleaseFailedStmt(off = UINT32_MAX);
4578# endif
4579
4580#else
4581# error "Port me!"
4582#endif
4583 return off;
4584}
4585
4586
4587/*********************************************************************************************************************************
4588* Unary Operations *
4589*********************************************************************************************************************************/
4590
4591/**
4592 * Emits code for two complement negation of a 64-bit GPR.
4593 */
4594DECL_FORCE_INLINE_THROW(uint32_t)
4595iemNativeEmitNegGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4596{
4597#if defined(RT_ARCH_AMD64)
4598 /* neg Ev */
4599 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4600 pCodeBuf[off++] = 0xf7;
4601 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4602
4603#elif defined(RT_ARCH_ARM64)
4604 /* sub dst, xzr, dst */
4605 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst);
4606
4607#else
4608# error "Port me"
4609#endif
4610 return off;
4611}
4612
4613
4614/**
4615 * Emits code for two complement negation of a 64-bit GPR.
4616 */
4617DECL_INLINE_THROW(uint32_t)
4618iemNativeEmitNegGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4619{
4620#if defined(RT_ARCH_AMD64)
4621 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4622#elif defined(RT_ARCH_ARM64)
4623 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4624#else
4625# error "Port me"
4626#endif
4627 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4628 return off;
4629}
4630
4631
4632/**
4633 * Emits code for two complement negation of a 32-bit GPR.
4634 * @note bit 32 thru 63 are set to zero.
4635 */
4636DECL_FORCE_INLINE_THROW(uint32_t)
4637iemNativeEmitNegGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4638{
4639#if defined(RT_ARCH_AMD64)
4640 /* neg Ev */
4641 if (iGprDst >= 8)
4642 pCodeBuf[off++] = X86_OP_REX_B;
4643 pCodeBuf[off++] = 0xf7;
4644 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4645
4646#elif defined(RT_ARCH_ARM64)
4647 /* sub dst, xzr, dst */
4648 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst, false /*f64Bit*/);
4649
4650#else
4651# error "Port me"
4652#endif
4653 return off;
4654}
4655
4656
4657/**
4658 * Emits code for two complement negation of a 32-bit GPR.
4659 * @note bit 32 thru 63 are set to zero.
4660 */
4661DECL_INLINE_THROW(uint32_t)
4662iemNativeEmitNegGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4663{
4664#if defined(RT_ARCH_AMD64)
4665 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4666#elif defined(RT_ARCH_ARM64)
4667 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4668#else
4669# error "Port me"
4670#endif
4671 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4672 return off;
4673}
4674
4675
4676
4677/*********************************************************************************************************************************
4678* Bit Operations *
4679*********************************************************************************************************************************/
4680
4681/**
4682 * Emits code for clearing bits 16 thru 63 in the GPR.
4683 */
4684DECL_INLINE_THROW(uint32_t)
4685iemNativeEmitClear16UpGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4686{
4687#if defined(RT_ARCH_AMD64)
4688 /* movzx Gv,Ew */
4689 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4690 if (iGprDst >= 8)
4691 pbCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
4692 pbCodeBuf[off++] = 0x0f;
4693 pbCodeBuf[off++] = 0xb7;
4694 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
4695
4696#elif defined(RT_ARCH_ARM64)
4697 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4698# if 1
4699 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(iGprDst, iGprDst);
4700# else
4701 ///* This produces 0xffff; 0x4f: N=1 imms=001111 (immr=0) => size=64 length=15 */
4702 //pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 0x4f);
4703# endif
4704#else
4705# error "Port me"
4706#endif
4707 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4708 return off;
4709}
4710
4711
4712/**
4713 * Emits code for AND'ing two 64-bit GPRs.
4714 *
4715 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4716 * and ARM64 hosts.
4717 */
4718DECL_FORCE_INLINE(uint32_t)
4719iemNativeEmitAndGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4720{
4721#if defined(RT_ARCH_AMD64)
4722 /* and Gv, Ev */
4723 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4724 pCodeBuf[off++] = 0x23;
4725 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4726 RT_NOREF(fSetFlags);
4727
4728#elif defined(RT_ARCH_ARM64)
4729 if (!fSetFlags)
4730 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc);
4731 else
4732 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc);
4733
4734#else
4735# error "Port me"
4736#endif
4737 return off;
4738}
4739
4740
4741/**
4742 * Emits code for AND'ing two 64-bit GPRs.
4743 *
4744 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4745 * and ARM64 hosts.
4746 */
4747DECL_INLINE_THROW(uint32_t)
4748iemNativeEmitAndGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4749{
4750#if defined(RT_ARCH_AMD64)
4751 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4752#elif defined(RT_ARCH_ARM64)
4753 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4754#else
4755# error "Port me"
4756#endif
4757 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4758 return off;
4759}
4760
4761
4762/**
4763 * Emits code for AND'ing two 32-bit GPRs.
4764 */
4765DECL_FORCE_INLINE(uint32_t)
4766iemNativeEmitAndGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4767{
4768#if defined(RT_ARCH_AMD64)
4769 /* and Gv, Ev */
4770 if (iGprDst >= 8 || iGprSrc >= 8)
4771 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4772 pCodeBuf[off++] = 0x23;
4773 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4774 RT_NOREF(fSetFlags);
4775
4776#elif defined(RT_ARCH_ARM64)
4777 if (!fSetFlags)
4778 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4779 else
4780 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4781
4782#else
4783# error "Port me"
4784#endif
4785 return off;
4786}
4787
4788
4789/**
4790 * Emits code for AND'ing two 32-bit GPRs.
4791 */
4792DECL_INLINE_THROW(uint32_t)
4793iemNativeEmitAndGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4794{
4795#if defined(RT_ARCH_AMD64)
4796 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4797#elif defined(RT_ARCH_ARM64)
4798 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4799#else
4800# error "Port me"
4801#endif
4802 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4803 return off;
4804}
4805
4806
4807/**
4808 * Emits code for AND'ing a 64-bit GPRs with a constant.
4809 *
4810 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4811 * and ARM64 hosts.
4812 */
4813DECL_INLINE_THROW(uint32_t)
4814iemNativeEmitAndGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm, bool fSetFlags = false)
4815{
4816#if defined(RT_ARCH_AMD64)
4817 if ((int64_t)uImm == (int8_t)uImm)
4818 {
4819 /* and Ev, imm8 */
4820 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4821 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4822 pbCodeBuf[off++] = 0x83;
4823 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4824 pbCodeBuf[off++] = (uint8_t)uImm;
4825 }
4826 else if ((int64_t)uImm == (int32_t)uImm)
4827 {
4828 /* and Ev, imm32 */
4829 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4830 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4831 pbCodeBuf[off++] = 0x81;
4832 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4833 pbCodeBuf[off++] = RT_BYTE1(uImm);
4834 pbCodeBuf[off++] = RT_BYTE2(uImm);
4835 pbCodeBuf[off++] = RT_BYTE3(uImm);
4836 pbCodeBuf[off++] = RT_BYTE4(uImm);
4837 }
4838 else
4839 {
4840 /* Use temporary register for the 64-bit immediate. */
4841 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4842 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg);
4843 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4844 }
4845 RT_NOREF(fSetFlags);
4846
4847#elif defined(RT_ARCH_ARM64)
4848 uint32_t uImmR = 0;
4849 uint32_t uImmNandS = 0;
4850 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4851 {
4852 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4853 if (!fSetFlags)
4854 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR);
4855 else
4856 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR);
4857 }
4858 else
4859 {
4860 /* Use temporary register for the 64-bit immediate. */
4861 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4862 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4863 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4864 }
4865
4866#else
4867# error "Port me"
4868#endif
4869 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4870 return off;
4871}
4872
4873
4874/**
4875 * Emits code for AND'ing an 32-bit GPRs with a constant.
4876 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4877 * @note For ARM64 this only supports @a uImm values that can be expressed using
4878 * the two 6-bit immediates of the AND/ANDS instructions. The caller must
4879 * make sure this is possible!
4880 */
4881DECL_FORCE_INLINE_THROW(uint32_t)
4882iemNativeEmitAndGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4883{
4884#if defined(RT_ARCH_AMD64)
4885 /* and Ev, imm */
4886 if (iGprDst >= 8)
4887 pCodeBuf[off++] = X86_OP_REX_B;
4888 if ((int32_t)uImm == (int8_t)uImm)
4889 {
4890 pCodeBuf[off++] = 0x83;
4891 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4892 pCodeBuf[off++] = (uint8_t)uImm;
4893 }
4894 else
4895 {
4896 pCodeBuf[off++] = 0x81;
4897 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4898 pCodeBuf[off++] = RT_BYTE1(uImm);
4899 pCodeBuf[off++] = RT_BYTE2(uImm);
4900 pCodeBuf[off++] = RT_BYTE3(uImm);
4901 pCodeBuf[off++] = RT_BYTE4(uImm);
4902 }
4903 RT_NOREF(fSetFlags);
4904
4905#elif defined(RT_ARCH_ARM64)
4906 uint32_t uImmR = 0;
4907 uint32_t uImmNandS = 0;
4908 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4909 {
4910 if (!fSetFlags)
4911 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4912 else
4913 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4914 }
4915 else
4916# ifdef IEM_WITH_THROW_CATCH
4917 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4918# else
4919 AssertReleaseFailedStmt(off = UINT32_MAX);
4920# endif
4921
4922#else
4923# error "Port me"
4924#endif
4925 return off;
4926}
4927
4928
4929/**
4930 * Emits code for AND'ing an 32-bit GPRs with a constant.
4931 *
4932 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4933 */
4934DECL_INLINE_THROW(uint32_t)
4935iemNativeEmitAndGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4936{
4937#if defined(RT_ARCH_AMD64)
4938 off = iemNativeEmitAndGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm, fSetFlags);
4939
4940#elif defined(RT_ARCH_ARM64)
4941 uint32_t uImmR = 0;
4942 uint32_t uImmNandS = 0;
4943 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4944 {
4945 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4946 if (!fSetFlags)
4947 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4948 else
4949 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4950 }
4951 else
4952 {
4953 /* Use temporary register for the 64-bit immediate. */
4954 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4955 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4956 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4957 }
4958
4959#else
4960# error "Port me"
4961#endif
4962 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4963 return off;
4964}
4965
4966
4967/**
4968 * Emits code for AND'ing an 64-bit GPRs with a constant.
4969 *
4970 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
4971 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
4972 * the same.
4973 */
4974DECL_FORCE_INLINE_THROW(uint32_t)
4975iemNativeEmitGprEqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint64_t uImm,
4976 bool fSetFlags = false)
4977{
4978#if defined(RT_ARCH_AMD64)
4979 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4980 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc);
4981 RT_NOREF(fSetFlags);
4982
4983#elif defined(RT_ARCH_ARM64)
4984 uint32_t uImmR = 0;
4985 uint32_t uImmNandS = 0;
4986 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4987 {
4988 if (!fSetFlags)
4989 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4990 else
4991 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4992 }
4993 else if (iGprDst != iGprSrc)
4994 {
4995 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4996 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
4997 }
4998 else
4999# ifdef IEM_WITH_THROW_CATCH
5000 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5001# else
5002 AssertReleaseFailedStmt(off = UINT32_MAX);
5003# endif
5004
5005#else
5006# error "Port me"
5007#endif
5008 return off;
5009}
5010
5011/**
5012 * Emits code for AND'ing an 32-bit GPRs with a constant.
5013 *
5014 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
5015 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
5016 * the same.
5017 *
5018 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5019 */
5020DECL_FORCE_INLINE_THROW(uint32_t)
5021iemNativeEmitGpr32EqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint32_t uImm,
5022 bool fSetFlags = false)
5023{
5024#if defined(RT_ARCH_AMD64)
5025 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
5026 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
5027 RT_NOREF(fSetFlags);
5028
5029#elif defined(RT_ARCH_ARM64)
5030 uint32_t uImmR = 0;
5031 uint32_t uImmNandS = 0;
5032 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5033 {
5034 if (!fSetFlags)
5035 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
5036 else
5037 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
5038 }
5039 else if (iGprDst != iGprSrc)
5040 {
5041 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
5042 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
5043 }
5044 else
5045# ifdef IEM_WITH_THROW_CATCH
5046 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5047# else
5048 AssertReleaseFailedStmt(off = UINT32_MAX);
5049# endif
5050
5051#else
5052# error "Port me"
5053#endif
5054 return off;
5055}
5056
5057
5058/**
5059 * Emits code for OR'ing two 64-bit GPRs.
5060 */
5061DECL_FORCE_INLINE(uint32_t)
5062iemNativeEmitOrGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5063{
5064#if defined(RT_ARCH_AMD64)
5065 /* or Gv, Ev */
5066 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5067 pCodeBuf[off++] = 0x0b;
5068 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5069
5070#elif defined(RT_ARCH_ARM64)
5071 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc);
5072
5073#else
5074# error "Port me"
5075#endif
5076 return off;
5077}
5078
5079
5080/**
5081 * Emits code for OR'ing two 64-bit GPRs.
5082 */
5083DECL_INLINE_THROW(uint32_t)
5084iemNativeEmitOrGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5085{
5086#if defined(RT_ARCH_AMD64)
5087 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5088#elif defined(RT_ARCH_ARM64)
5089 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5090#else
5091# error "Port me"
5092#endif
5093 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5094 return off;
5095}
5096
5097
5098/**
5099 * Emits code for OR'ing two 32-bit GPRs.
5100 * @note Bits 63:32 of the destination GPR will be cleared.
5101 */
5102DECL_FORCE_INLINE(uint32_t)
5103iemNativeEmitOrGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5104{
5105#if defined(RT_ARCH_AMD64)
5106 /* or Gv, Ev */
5107 if (iGprDst >= 8 || iGprSrc >= 8)
5108 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5109 pCodeBuf[off++] = 0x0b;
5110 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5111
5112#elif defined(RT_ARCH_ARM64)
5113 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
5114
5115#else
5116# error "Port me"
5117#endif
5118 return off;
5119}
5120
5121
5122/**
5123 * Emits code for OR'ing two 32-bit GPRs.
5124 * @note Bits 63:32 of the destination GPR will be cleared.
5125 */
5126DECL_INLINE_THROW(uint32_t)
5127iemNativeEmitOrGpr32ByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5128{
5129#if defined(RT_ARCH_AMD64)
5130 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5131#elif defined(RT_ARCH_ARM64)
5132 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5133#else
5134# error "Port me"
5135#endif
5136 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5137 return off;
5138}
5139
5140
5141/**
5142 * Emits code for OR'ing a 64-bit GPRs with a constant.
5143 */
5144DECL_INLINE_THROW(uint32_t)
5145iemNativeEmitOrGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm)
5146{
5147#if defined(RT_ARCH_AMD64)
5148 if ((int64_t)uImm == (int8_t)uImm)
5149 {
5150 /* or Ev, imm8 */
5151 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5152 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5153 pbCodeBuf[off++] = 0x83;
5154 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5155 pbCodeBuf[off++] = (uint8_t)uImm;
5156 }
5157 else if ((int64_t)uImm == (int32_t)uImm)
5158 {
5159 /* or Ev, imm32 */
5160 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5161 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5162 pbCodeBuf[off++] = 0x81;
5163 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5164 pbCodeBuf[off++] = RT_BYTE1(uImm);
5165 pbCodeBuf[off++] = RT_BYTE2(uImm);
5166 pbCodeBuf[off++] = RT_BYTE3(uImm);
5167 pbCodeBuf[off++] = RT_BYTE4(uImm);
5168 }
5169 else
5170 {
5171 /* Use temporary register for the 64-bit immediate. */
5172 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5173 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iTmpReg);
5174 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5175 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5176 }
5177
5178#elif defined(RT_ARCH_ARM64)
5179 uint32_t uImmR = 0;
5180 uint32_t uImmNandS = 0;
5181 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
5182 {
5183 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5184 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR);
5185 }
5186 else
5187 {
5188 /* Use temporary register for the 64-bit immediate. */
5189 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5190 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iTmpReg);
5191 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5192 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5193 }
5194
5195#else
5196# error "Port me"
5197#endif
5198 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5199 return off;
5200}
5201
5202
5203/**
5204 * Emits code for OR'ing an 32-bit GPRs with a constant.
5205 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5206 * @note For ARM64 this only supports @a uImm values that can be expressed using
5207 * the two 6-bit immediates of the OR instructions. The caller must make
5208 * sure this is possible!
5209 */
5210DECL_FORCE_INLINE_THROW(uint32_t)
5211iemNativeEmitOrGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5212{
5213#if defined(RT_ARCH_AMD64)
5214 /* or Ev, imm */
5215 if (iGprDst >= 8)
5216 pCodeBuf[off++] = X86_OP_REX_B;
5217 if ((int32_t)uImm == (int8_t)uImm)
5218 {
5219 pCodeBuf[off++] = 0x83;
5220 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5221 pCodeBuf[off++] = (uint8_t)uImm;
5222 }
5223 else
5224 {
5225 pCodeBuf[off++] = 0x81;
5226 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5227 pCodeBuf[off++] = RT_BYTE1(uImm);
5228 pCodeBuf[off++] = RT_BYTE2(uImm);
5229 pCodeBuf[off++] = RT_BYTE3(uImm);
5230 pCodeBuf[off++] = RT_BYTE4(uImm);
5231 }
5232
5233#elif defined(RT_ARCH_ARM64)
5234 uint32_t uImmR = 0;
5235 uint32_t uImmNandS = 0;
5236 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5237 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5238 else
5239# ifdef IEM_WITH_THROW_CATCH
5240 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5241# else
5242 AssertReleaseFailedStmt(off = UINT32_MAX);
5243# endif
5244
5245#else
5246# error "Port me"
5247#endif
5248 return off;
5249}
5250
5251
5252/**
5253 * Emits code for OR'ing an 32-bit GPRs with a constant.
5254 *
5255 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5256 */
5257DECL_INLINE_THROW(uint32_t)
5258iemNativeEmitOrGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5259{
5260#if defined(RT_ARCH_AMD64)
5261 off = iemNativeEmitOrGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm);
5262
5263#elif defined(RT_ARCH_ARM64)
5264 uint32_t uImmR = 0;
5265 uint32_t uImmNandS = 0;
5266 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5267 {
5268 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5269 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5270 }
5271 else
5272 {
5273 /* Use temporary register for the 64-bit immediate. */
5274 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5275 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, iGprDst, iTmpReg);
5276 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5277 }
5278
5279#else
5280# error "Port me"
5281#endif
5282 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5283 return off;
5284}
5285
5286
5287
5288/**
5289 * ORs two 64-bit GPRs together, storing the result in a third register.
5290 */
5291DECL_FORCE_INLINE(uint32_t)
5292iemNativeEmitGprEqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5293{
5294#ifdef RT_ARCH_AMD64
5295 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5296 {
5297 /** @todo consider LEA */
5298 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc1);
5299 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5300 }
5301 else
5302 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5303
5304#elif defined(RT_ARCH_ARM64)
5305 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2);
5306
5307#else
5308# error "Port me!"
5309#endif
5310 return off;
5311}
5312
5313
5314
5315/**
5316 * Ors two 32-bit GPRs together, storing the result in a third register.
5317 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
5318 */
5319DECL_FORCE_INLINE(uint32_t)
5320iemNativeEmitGpr32EqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5321{
5322#ifdef RT_ARCH_AMD64
5323 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5324 {
5325 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc1);
5326 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5327 }
5328 else
5329 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5330
5331#elif defined(RT_ARCH_ARM64)
5332 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2, false /*f64Bit*/);
5333
5334#else
5335# error "Port me!"
5336#endif
5337 return off;
5338}
5339
5340
5341/**
5342 * Emits code for XOR'ing two 64-bit GPRs.
5343 */
5344DECL_INLINE_THROW(uint32_t)
5345iemNativeEmitXorGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5346{
5347#if defined(RT_ARCH_AMD64)
5348 /* and Gv, Ev */
5349 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5350 pCodeBuf[off++] = 0x33;
5351 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5352
5353#elif defined(RT_ARCH_ARM64)
5354 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc);
5355
5356#else
5357# error "Port me"
5358#endif
5359 return off;
5360}
5361
5362
5363/**
5364 * Emits code for XOR'ing two 64-bit GPRs.
5365 */
5366DECL_INLINE_THROW(uint32_t)
5367iemNativeEmitXorGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5368{
5369#if defined(RT_ARCH_AMD64)
5370 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5371#elif defined(RT_ARCH_ARM64)
5372 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5373#else
5374# error "Port me"
5375#endif
5376 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5377 return off;
5378}
5379
5380
5381/**
5382 * Emits code for XOR'ing two 32-bit GPRs.
5383 */
5384DECL_INLINE_THROW(uint32_t)
5385iemNativeEmitXorGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5386{
5387#if defined(RT_ARCH_AMD64)
5388 /* and Gv, Ev */
5389 if (iGprDst >= 8 || iGprSrc >= 8)
5390 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5391 pCodeBuf[off++] = 0x33;
5392 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5393
5394#elif defined(RT_ARCH_ARM64)
5395 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
5396
5397#else
5398# error "Port me"
5399#endif
5400 return off;
5401}
5402
5403
5404/**
5405 * Emits code for XOR'ing two 32-bit GPRs.
5406 */
5407DECL_INLINE_THROW(uint32_t)
5408iemNativeEmitXorGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5409{
5410#if defined(RT_ARCH_AMD64)
5411 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5412#elif defined(RT_ARCH_ARM64)
5413 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5414#else
5415# error "Port me"
5416#endif
5417 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5418 return off;
5419}
5420
5421
5422/**
5423 * Emits code for XOR'ing an 32-bit GPRs with a constant.
5424 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5425 * @note For ARM64 this only supports @a uImm values that can be expressed using
5426 * the two 6-bit immediates of the EOR instructions. The caller must make
5427 * sure this is possible!
5428 */
5429DECL_FORCE_INLINE_THROW(uint32_t)
5430iemNativeEmitXorGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5431{
5432#if defined(RT_ARCH_AMD64)
5433 /* and Ev, imm */
5434 if (iGprDst >= 8)
5435 pCodeBuf[off++] = X86_OP_REX_B;
5436 if ((int32_t)uImm == (int8_t)uImm)
5437 {
5438 pCodeBuf[off++] = 0x83;
5439 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5440 pCodeBuf[off++] = (uint8_t)uImm;
5441 }
5442 else
5443 {
5444 pCodeBuf[off++] = 0x81;
5445 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5446 pCodeBuf[off++] = RT_BYTE1(uImm);
5447 pCodeBuf[off++] = RT_BYTE2(uImm);
5448 pCodeBuf[off++] = RT_BYTE3(uImm);
5449 pCodeBuf[off++] = RT_BYTE4(uImm);
5450 }
5451
5452#elif defined(RT_ARCH_ARM64)
5453 uint32_t uImmR = 0;
5454 uint32_t uImmNandS = 0;
5455 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5456 pCodeBuf[off++] = Armv8A64MkInstrEorImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5457 else
5458# ifdef IEM_WITH_THROW_CATCH
5459 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5460# else
5461 AssertReleaseFailedStmt(off = UINT32_MAX);
5462# endif
5463
5464#else
5465# error "Port me"
5466#endif
5467 return off;
5468}
5469
5470
5471/**
5472 * Emits code for XOR'ing two 32-bit GPRs.
5473 */
5474DECL_INLINE_THROW(uint32_t)
5475iemNativeEmitXorGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5476{
5477#if defined(RT_ARCH_AMD64)
5478 off = iemNativeEmitXorGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm);
5479#elif defined(RT_ARCH_ARM64)
5480 off = iemNativeEmitXorGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, uImm);
5481#else
5482# error "Port me"
5483#endif
5484 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5485 return off;
5486}
5487
5488
5489/*********************************************************************************************************************************
5490* Shifting *
5491*********************************************************************************************************************************/
5492
5493/**
5494 * Emits code for shifting a GPR a fixed number of bits to the left.
5495 */
5496DECL_FORCE_INLINE(uint32_t)
5497iemNativeEmitShiftGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5498{
5499 Assert(cShift > 0 && cShift < 64);
5500
5501#if defined(RT_ARCH_AMD64)
5502 /* shl dst, cShift */
5503 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5504 if (cShift != 1)
5505 {
5506 pCodeBuf[off++] = 0xc1;
5507 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5508 pCodeBuf[off++] = cShift;
5509 }
5510 else
5511 {
5512 pCodeBuf[off++] = 0xd1;
5513 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5514 }
5515
5516#elif defined(RT_ARCH_ARM64)
5517 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift);
5518
5519#else
5520# error "Port me"
5521#endif
5522 return off;
5523}
5524
5525
5526/**
5527 * Emits code for shifting a GPR a fixed number of bits to the left.
5528 */
5529DECL_INLINE_THROW(uint32_t)
5530iemNativeEmitShiftGprLeft(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5531{
5532#if defined(RT_ARCH_AMD64)
5533 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5534#elif defined(RT_ARCH_ARM64)
5535 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5536#else
5537# error "Port me"
5538#endif
5539 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5540 return off;
5541}
5542
5543
5544/**
5545 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5546 */
5547DECL_FORCE_INLINE(uint32_t)
5548iemNativeEmitShiftGpr32LeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5549{
5550 Assert(cShift > 0 && cShift < 32);
5551
5552#if defined(RT_ARCH_AMD64)
5553 /* shl dst, cShift */
5554 if (iGprDst >= 8)
5555 pCodeBuf[off++] = X86_OP_REX_B;
5556 if (cShift != 1)
5557 {
5558 pCodeBuf[off++] = 0xc1;
5559 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5560 pCodeBuf[off++] = cShift;
5561 }
5562 else
5563 {
5564 pCodeBuf[off++] = 0xd1;
5565 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5566 }
5567
5568#elif defined(RT_ARCH_ARM64)
5569 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5570
5571#else
5572# error "Port me"
5573#endif
5574 return off;
5575}
5576
5577
5578/**
5579 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5580 */
5581DECL_INLINE_THROW(uint32_t)
5582iemNativeEmitShiftGpr32Left(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5583{
5584#if defined(RT_ARCH_AMD64)
5585 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5586#elif defined(RT_ARCH_ARM64)
5587 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5588#else
5589# error "Port me"
5590#endif
5591 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5592 return off;
5593}
5594
5595
5596/**
5597 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5598 */
5599DECL_FORCE_INLINE(uint32_t)
5600iemNativeEmitShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5601{
5602 Assert(cShift > 0 && cShift < 64);
5603
5604#if defined(RT_ARCH_AMD64)
5605 /* shr dst, cShift */
5606 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5607 if (cShift != 1)
5608 {
5609 pCodeBuf[off++] = 0xc1;
5610 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5611 pCodeBuf[off++] = cShift;
5612 }
5613 else
5614 {
5615 pCodeBuf[off++] = 0xd1;
5616 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5617 }
5618
5619#elif defined(RT_ARCH_ARM64)
5620 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift);
5621
5622#else
5623# error "Port me"
5624#endif
5625 return off;
5626}
5627
5628
5629/**
5630 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5631 */
5632DECL_INLINE_THROW(uint32_t)
5633iemNativeEmitShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5634{
5635#if defined(RT_ARCH_AMD64)
5636 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5637#elif defined(RT_ARCH_ARM64)
5638 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5639#else
5640# error "Port me"
5641#endif
5642 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5643 return off;
5644}
5645
5646
5647/**
5648 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5649 * right.
5650 */
5651DECL_FORCE_INLINE(uint32_t)
5652iemNativeEmitShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5653{
5654 Assert(cShift > 0 && cShift < 32);
5655
5656#if defined(RT_ARCH_AMD64)
5657 /* shr dst, cShift */
5658 if (iGprDst >= 8)
5659 pCodeBuf[off++] = X86_OP_REX_B;
5660 if (cShift != 1)
5661 {
5662 pCodeBuf[off++] = 0xc1;
5663 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5664 pCodeBuf[off++] = cShift;
5665 }
5666 else
5667 {
5668 pCodeBuf[off++] = 0xd1;
5669 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5670 }
5671
5672#elif defined(RT_ARCH_ARM64)
5673 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5674
5675#else
5676# error "Port me"
5677#endif
5678 return off;
5679}
5680
5681
5682/**
5683 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5684 * right.
5685 */
5686DECL_INLINE_THROW(uint32_t)
5687iemNativeEmitShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5688{
5689#if defined(RT_ARCH_AMD64)
5690 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5691#elif defined(RT_ARCH_ARM64)
5692 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5693#else
5694# error "Port me"
5695#endif
5696 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5697 return off;
5698}
5699
5700
5701/**
5702 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5703 * right and assigning it to a different GPR.
5704 */
5705DECL_INLINE_THROW(uint32_t)
5706iemNativeEmitGpr32EqGprShiftRightImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint8_t cShift)
5707{
5708 Assert(cShift > 0); Assert(cShift < 32);
5709#if defined(RT_ARCH_AMD64)
5710 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
5711 off = iemNativeEmitShiftGpr32RightEx(pCodeBuf, off, iGprDst, cShift);
5712
5713#elif defined(RT_ARCH_ARM64)
5714 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprSrc, cShift, false /*64Bit*/);
5715
5716#else
5717# error "Port me"
5718#endif
5719 return off;
5720}
5721
5722
5723/**
5724 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
5725 */
5726DECL_FORCE_INLINE(uint32_t)
5727iemNativeEmitArithShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5728{
5729 Assert(cShift > 0 && cShift < 64);
5730
5731#if defined(RT_ARCH_AMD64)
5732 /* sar dst, cShift */
5733 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5734 if (cShift != 1)
5735 {
5736 pCodeBuf[off++] = 0xc1;
5737 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5738 pCodeBuf[off++] = cShift;
5739 }
5740 else
5741 {
5742 pCodeBuf[off++] = 0xd1;
5743 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5744 }
5745
5746#elif defined(RT_ARCH_ARM64)
5747 pCodeBuf[off++] = Armv8A64MkInstrAsrImm(iGprDst, iGprDst, cShift);
5748
5749#else
5750# error "Port me"
5751#endif
5752 return off;
5753}
5754
5755
5756/**
5757 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
5758 */
5759DECL_INLINE_THROW(uint32_t)
5760iemNativeEmitArithShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5761{
5762#if defined(RT_ARCH_AMD64)
5763 off = iemNativeEmitArithShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5764#elif defined(RT_ARCH_ARM64)
5765 off = iemNativeEmitArithShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5766#else
5767# error "Port me"
5768#endif
5769 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5770 return off;
5771}
5772
5773
5774/**
5775 * Emits code for (signed) shifting a 32-bit GPR a fixed number of bits to the right.
5776 */
5777DECL_FORCE_INLINE(uint32_t)
5778iemNativeEmitArithShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5779{
5780 Assert(cShift > 0 && cShift < 64);
5781
5782#if defined(RT_ARCH_AMD64)
5783 /* sar dst, cShift */
5784 if (iGprDst >= 8)
5785 pCodeBuf[off++] = X86_OP_REX_B;
5786 if (cShift != 1)
5787 {
5788 pCodeBuf[off++] = 0xc1;
5789 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5790 pCodeBuf[off++] = cShift;
5791 }
5792 else
5793 {
5794 pCodeBuf[off++] = 0xd1;
5795 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5796 }
5797
5798#elif defined(RT_ARCH_ARM64)
5799 pCodeBuf[off++] = Armv8A64MkInstrAsrImm(iGprDst, iGprDst, cShift, false /*f64Bit*/);
5800
5801#else
5802# error "Port me"
5803#endif
5804 return off;
5805}
5806
5807
5808/**
5809 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
5810 */
5811DECL_INLINE_THROW(uint32_t)
5812iemNativeEmitArithShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5813{
5814#if defined(RT_ARCH_AMD64)
5815 off = iemNativeEmitArithShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5816#elif defined(RT_ARCH_ARM64)
5817 off = iemNativeEmitArithShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5818#else
5819# error "Port me"
5820#endif
5821 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5822 return off;
5823}
5824
5825
5826/**
5827 * Emits code for rotating a GPR a fixed number of bits to the left.
5828 */
5829DECL_FORCE_INLINE(uint32_t)
5830iemNativeEmitRotateGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5831{
5832 Assert(cShift > 0 && cShift < 64);
5833
5834#if defined(RT_ARCH_AMD64)
5835 /* rol dst, cShift */
5836 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5837 if (cShift != 1)
5838 {
5839 pCodeBuf[off++] = 0xc1;
5840 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5841 pCodeBuf[off++] = cShift;
5842 }
5843 else
5844 {
5845 pCodeBuf[off++] = 0xd1;
5846 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5847 }
5848
5849#elif defined(RT_ARCH_ARM64)
5850 pCodeBuf[off++] = Armv8A64MkInstrRorImm(iGprDst, iGprDst, cShift);
5851
5852#else
5853# error "Port me"
5854#endif
5855 return off;
5856}
5857
5858
5859#if defined(RT_ARCH_AMD64)
5860/**
5861 * Emits code for rotating a 32-bit GPR a fixed number of bits to the left via carry.
5862 */
5863DECL_FORCE_INLINE(uint32_t)
5864iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5865{
5866 Assert(cShift > 0 && cShift < 32);
5867
5868 /* rcl dst, cShift */
5869 if (iGprDst >= 8)
5870 pCodeBuf[off++] = X86_OP_REX_B;
5871 if (cShift != 1)
5872 {
5873 pCodeBuf[off++] = 0xc1;
5874 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
5875 pCodeBuf[off++] = cShift;
5876 }
5877 else
5878 {
5879 pCodeBuf[off++] = 0xd1;
5880 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
5881 }
5882
5883 return off;
5884}
5885#endif /* RT_ARCH_AMD64 */
5886
5887
5888
5889/**
5890 * Emits code for reversing the byte order for a 16-bit value in a 32-bit GPR.
5891 * @note Bits 63:32 of the destination GPR will be cleared.
5892 */
5893DECL_FORCE_INLINE(uint32_t)
5894iemNativeEmitBswapGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5895{
5896#if defined(RT_ARCH_AMD64)
5897 /*
5898 * There is no bswap r16 on x86 (the encoding exists but does not work).
5899 * So just use a rol (gcc -O2 is doing that).
5900 *
5901 * rol r16, 0x8
5902 */
5903 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
5904 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5905 if (iGpr >= 8)
5906 pbCodeBuf[off++] = X86_OP_REX_B;
5907 pbCodeBuf[off++] = 0xc1;
5908 pbCodeBuf[off++] = 0xc0 | (iGpr & 7);
5909 pbCodeBuf[off++] = 0x08;
5910#elif defined(RT_ARCH_ARM64)
5911 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5912
5913 pu32CodeBuf[off++] = Armv8A64MkInstrRev16(iGpr, iGpr, false /*f64Bit*/);
5914#else
5915# error "Port me"
5916#endif
5917
5918 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5919 return off;
5920}
5921
5922
5923/**
5924 * Emits code for reversing the byte order in a 32-bit GPR.
5925 * @note Bits 63:32 of the destination GPR will be cleared.
5926 */
5927DECL_FORCE_INLINE(uint32_t)
5928iemNativeEmitBswapGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5929{
5930#if defined(RT_ARCH_AMD64)
5931 /* bswap r32 */
5932 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5933
5934 if (iGpr >= 8)
5935 pbCodeBuf[off++] = X86_OP_REX_B;
5936 pbCodeBuf[off++] = 0x0f;
5937 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5938#elif defined(RT_ARCH_ARM64)
5939 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5940
5941 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, false /*f64Bit*/);
5942#else
5943# error "Port me"
5944#endif
5945
5946 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5947 return off;
5948}
5949
5950
5951/**
5952 * Emits code for reversing the byte order in a 64-bit GPR.
5953 */
5954DECL_FORCE_INLINE(uint32_t)
5955iemNativeEmitBswapGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5956{
5957#if defined(RT_ARCH_AMD64)
5958 /* bswap r64 */
5959 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5960
5961 if (iGpr >= 8)
5962 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
5963 else
5964 pbCodeBuf[off++] = X86_OP_REX_W;
5965 pbCodeBuf[off++] = 0x0f;
5966 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5967#elif defined(RT_ARCH_ARM64)
5968 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5969
5970 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, true /*f64Bit*/);
5971#else
5972# error "Port me"
5973#endif
5974
5975 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5976 return off;
5977}
5978
5979
5980/*********************************************************************************************************************************
5981* Compare and Testing *
5982*********************************************************************************************************************************/
5983
5984
5985#ifdef RT_ARCH_ARM64
5986/**
5987 * Emits an ARM64 compare instruction.
5988 */
5989DECL_INLINE_THROW(uint32_t)
5990iemNativeEmitCmpArm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight,
5991 bool f64Bit = true, uint32_t cShift = 0, ARMV8A64INSTRSHIFT enmShift = kArmv8A64InstrShift_Lsr)
5992{
5993 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5994 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR /*iRegResult*/, iGprLeft, iGprRight,
5995 f64Bit, true /*fSetFlags*/, cShift, enmShift);
5996 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5997 return off;
5998}
5999#endif
6000
6001
6002/**
6003 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
6004 * with conditional instruction.
6005 */
6006DECL_FORCE_INLINE(uint32_t)
6007iemNativeEmitCmpGprWithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6008{
6009#ifdef RT_ARCH_AMD64
6010 /* cmp Gv, Ev */
6011 pCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
6012 pCodeBuf[off++] = 0x3b;
6013 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
6014
6015#elif defined(RT_ARCH_ARM64)
6016 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight);
6017
6018#else
6019# error "Port me!"
6020#endif
6021 return off;
6022}
6023
6024
6025/**
6026 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
6027 * with conditional instruction.
6028 */
6029DECL_INLINE_THROW(uint32_t)
6030iemNativeEmitCmpGprWithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6031{
6032#ifdef RT_ARCH_AMD64
6033 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
6034#elif defined(RT_ARCH_ARM64)
6035 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
6036#else
6037# error "Port me!"
6038#endif
6039 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6040 return off;
6041}
6042
6043
6044/**
6045 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
6046 * with conditional instruction.
6047 */
6048DECL_FORCE_INLINE(uint32_t)
6049iemNativeEmitCmpGpr32WithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6050{
6051#ifdef RT_ARCH_AMD64
6052 /* cmp Gv, Ev */
6053 if (iGprLeft >= 8 || iGprRight >= 8)
6054 pCodeBuf[off++] = (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
6055 pCodeBuf[off++] = 0x3b;
6056 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
6057
6058#elif defined(RT_ARCH_ARM64)
6059 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight, false /*f64Bit*/);
6060
6061#else
6062# error "Port me!"
6063#endif
6064 return off;
6065}
6066
6067
6068/**
6069 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
6070 * with conditional instruction.
6071 */
6072DECL_INLINE_THROW(uint32_t)
6073iemNativeEmitCmpGpr32WithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6074{
6075#ifdef RT_ARCH_AMD64
6076 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
6077#elif defined(RT_ARCH_ARM64)
6078 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
6079#else
6080# error "Port me!"
6081#endif
6082 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6083 return off;
6084}
6085
6086
6087/**
6088 * Emits a compare of a 64-bit GPR with a constant value, settings status
6089 * flags/whatever for use with conditional instruction.
6090 */
6091DECL_INLINE_THROW(uint32_t)
6092iemNativeEmitCmpGprWithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint64_t uImm)
6093{
6094#ifdef RT_ARCH_AMD64
6095 if (uImm <= UINT32_C(0xff))
6096 {
6097 /* cmp Ev, Ib */
6098 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
6099 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6100 pbCodeBuf[off++] = 0x83;
6101 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6102 pbCodeBuf[off++] = (uint8_t)uImm;
6103 }
6104 else if ((int64_t)uImm == (int32_t)uImm)
6105 {
6106 /* cmp Ev, imm */
6107 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
6108 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6109 pbCodeBuf[off++] = 0x81;
6110 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6111 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6112 pbCodeBuf[off++] = RT_BYTE1(uImm);
6113 pbCodeBuf[off++] = RT_BYTE2(uImm);
6114 pbCodeBuf[off++] = RT_BYTE3(uImm);
6115 pbCodeBuf[off++] = RT_BYTE4(uImm);
6116 }
6117 else
6118 {
6119 /* Use temporary register for the immediate. */
6120 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6121 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
6122 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6123 }
6124
6125#elif defined(RT_ARCH_ARM64)
6126 /** @todo guess there are clevere things we can do here... */
6127 if (uImm < _4K)
6128 {
6129 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6130 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6131 true /*64Bit*/, true /*fSetFlags*/);
6132 }
6133 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
6134 {
6135 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6136 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm >> 12,
6137 true /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6138 }
6139 else
6140 {
6141 /* Use temporary register for the immediate. */
6142 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6143 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
6144 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6145 }
6146
6147#else
6148# error "Port me!"
6149#endif
6150
6151 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6152 return off;
6153}
6154
6155
6156/**
6157 * Emits a compare of a 32-bit GPR with a constant value, settings status
6158 * flags/whatever for use with conditional instruction.
6159 *
6160 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
6161 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
6162 * bits all zero). Will release assert or throw exception if the caller
6163 * violates this restriction.
6164 */
6165DECL_FORCE_INLINE_THROW(uint32_t)
6166iemNativeEmitCmpGpr32WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
6167{
6168#ifdef RT_ARCH_AMD64
6169 if (iGprLeft >= 8)
6170 pCodeBuf[off++] = X86_OP_REX_B;
6171 if (uImm <= UINT32_C(0x7f))
6172 {
6173 /* cmp Ev, Ib */
6174 pCodeBuf[off++] = 0x83;
6175 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6176 pCodeBuf[off++] = (uint8_t)uImm;
6177 }
6178 else
6179 {
6180 /* cmp Ev, imm */
6181 pCodeBuf[off++] = 0x81;
6182 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6183 pCodeBuf[off++] = RT_BYTE1(uImm);
6184 pCodeBuf[off++] = RT_BYTE2(uImm);
6185 pCodeBuf[off++] = RT_BYTE3(uImm);
6186 pCodeBuf[off++] = RT_BYTE4(uImm);
6187 }
6188
6189#elif defined(RT_ARCH_ARM64)
6190 /** @todo guess there are clevere things we can do here... */
6191 if (uImm < _4K)
6192 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6193 false /*64Bit*/, true /*fSetFlags*/);
6194 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
6195 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6196 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6197 else
6198# ifdef IEM_WITH_THROW_CATCH
6199 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6200# else
6201 AssertReleaseFailedStmt(off = UINT32_MAX);
6202# endif
6203
6204#else
6205# error "Port me!"
6206#endif
6207 return off;
6208}
6209
6210
6211/**
6212 * Emits a compare of a 32-bit GPR with a constant value, settings status
6213 * flags/whatever for use with conditional instruction.
6214 */
6215DECL_INLINE_THROW(uint32_t)
6216iemNativeEmitCmpGpr32WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
6217{
6218#ifdef RT_ARCH_AMD64
6219 off = iemNativeEmitCmpGpr32WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm);
6220
6221#elif defined(RT_ARCH_ARM64)
6222 /** @todo guess there are clevere things we can do here... */
6223 if (uImm < _4K)
6224 {
6225 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6226 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6227 false /*64Bit*/, true /*fSetFlags*/);
6228 }
6229 else if (uImm < RT_BIT_32(12+12) && (uImm & (_4K - 1)) == 0)
6230 {
6231 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6232 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6233 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6234 }
6235 else
6236 {
6237 /* Use temporary register for the immediate. */
6238 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6239 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, iGprLeft, iTmpReg);
6240 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6241 }
6242
6243#else
6244# error "Port me!"
6245#endif
6246
6247 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6248 return off;
6249}
6250
6251
6252/**
6253 * Emits a compare of a 32-bit GPR with a constant value, settings status
6254 * flags/whatever for use with conditional instruction.
6255 *
6256 * @note ARM64: Helper register is required (@a idxTmpReg) for isolating the
6257 * 16-bit value from @a iGrpLeft.
6258 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
6259 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
6260 * bits all zero). Will release assert or throw exception if the caller
6261 * violates this restriction.
6262 */
6263DECL_FORCE_INLINE_THROW(uint32_t)
6264iemNativeEmitCmpGpr16WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
6265 uint8_t idxTmpReg = UINT8_MAX)
6266{
6267#ifdef RT_ARCH_AMD64
6268 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6269 if (iGprLeft >= 8)
6270 pCodeBuf[off++] = X86_OP_REX_B;
6271 if (uImm <= UINT32_C(0x7f))
6272 {
6273 /* cmp Ev, Ib */
6274 pCodeBuf[off++] = 0x83;
6275 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6276 pCodeBuf[off++] = (uint8_t)uImm;
6277 }
6278 else
6279 {
6280 /* cmp Ev, imm */
6281 pCodeBuf[off++] = 0x81;
6282 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6283 pCodeBuf[off++] = RT_BYTE1(uImm);
6284 pCodeBuf[off++] = RT_BYTE2(uImm);
6285 }
6286 RT_NOREF(idxTmpReg);
6287
6288#elif defined(RT_ARCH_ARM64)
6289# ifdef IEM_WITH_THROW_CATCH
6290 AssertStmt(idxTmpReg < 32, IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6291# else
6292 AssertReleaseStmt(idxTmpReg < 32, off = UINT32_MAX);
6293# endif
6294 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
6295 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, iGprLeft, 15, 0, false /*f64Bit*/);
6296 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, idxTmpReg, uImm);
6297
6298#else
6299# error "Port me!"
6300#endif
6301 return off;
6302}
6303
6304
6305/**
6306 * Emits a compare of a 16-bit GPR with a constant value, settings status
6307 * flags/whatever for use with conditional instruction.
6308 *
6309 * @note ARM64: Helper register is required (idxTmpReg).
6310 */
6311DECL_INLINE_THROW(uint32_t)
6312iemNativeEmitCmpGpr16WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
6313 uint8_t idxTmpReg = UINT8_MAX)
6314{
6315#ifdef RT_ARCH_AMD64
6316 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm, idxTmpReg);
6317#elif defined(RT_ARCH_ARM64)
6318 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGprLeft, uImm, idxTmpReg);
6319#else
6320# error "Port me!"
6321#endif
6322 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6323 return off;
6324}
6325
6326
6327
6328/*********************************************************************************************************************************
6329* Branching *
6330*********************************************************************************************************************************/
6331
6332/**
6333 * Emits a JMP rel32 / B imm19 to the given label.
6334 */
6335DECL_FORCE_INLINE_THROW(uint32_t)
6336iemNativeEmitJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t idxLabel)
6337{
6338 Assert(idxLabel < pReNative->cLabels);
6339
6340#ifdef RT_ARCH_AMD64
6341 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6342 {
6343 uint32_t offRel = pReNative->paLabels[idxLabel].off - (off + 2);
6344 if ((int32_t)offRel < 128 && (int32_t)offRel >= -128)
6345 {
6346 pCodeBuf[off++] = 0xeb; /* jmp rel8 */
6347 pCodeBuf[off++] = (uint8_t)offRel;
6348 }
6349 else
6350 {
6351 offRel -= 3;
6352 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
6353 pCodeBuf[off++] = RT_BYTE1(offRel);
6354 pCodeBuf[off++] = RT_BYTE2(offRel);
6355 pCodeBuf[off++] = RT_BYTE3(offRel);
6356 pCodeBuf[off++] = RT_BYTE4(offRel);
6357 }
6358 }
6359 else
6360 {
6361 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
6362 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6363 pCodeBuf[off++] = 0xfe;
6364 pCodeBuf[off++] = 0xff;
6365 pCodeBuf[off++] = 0xff;
6366 pCodeBuf[off++] = 0xff;
6367 }
6368 pCodeBuf[off++] = 0xcc; /* int3 poison */
6369
6370#elif defined(RT_ARCH_ARM64)
6371 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6372 {
6373 pCodeBuf[off] = Armv8A64MkInstrB(pReNative->paLabels[idxLabel].off - off);
6374 off++;
6375 }
6376 else
6377 {
6378 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm26At0);
6379 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
6380 }
6381
6382#else
6383# error "Port me!"
6384#endif
6385 return off;
6386}
6387
6388
6389/**
6390 * Emits a JMP rel32 / B imm19 to the given label.
6391 */
6392DECL_INLINE_THROW(uint32_t)
6393iemNativeEmitJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6394{
6395#ifdef RT_ARCH_AMD64
6396 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel);
6397#elif defined(RT_ARCH_ARM64)
6398 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel);
6399#else
6400# error "Port me!"
6401#endif
6402 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6403 return off;
6404}
6405
6406
6407/**
6408 * Emits a JMP rel32 / B imm19 to a new undefined label.
6409 */
6410DECL_INLINE_THROW(uint32_t)
6411iemNativeEmitJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6412{
6413 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6414 return iemNativeEmitJmpToLabel(pReNative, off, idxLabel);
6415}
6416
6417/** Condition type. */
6418#ifdef RT_ARCH_AMD64
6419typedef enum IEMNATIVEINSTRCOND : uint8_t
6420{
6421 kIemNativeInstrCond_o = 0,
6422 kIemNativeInstrCond_no,
6423 kIemNativeInstrCond_c,
6424 kIemNativeInstrCond_nc,
6425 kIemNativeInstrCond_e,
6426 kIemNativeInstrCond_z = kIemNativeInstrCond_e,
6427 kIemNativeInstrCond_ne,
6428 kIemNativeInstrCond_nz = kIemNativeInstrCond_ne,
6429 kIemNativeInstrCond_be,
6430 kIemNativeInstrCond_nbe,
6431 kIemNativeInstrCond_s,
6432 kIemNativeInstrCond_ns,
6433 kIemNativeInstrCond_p,
6434 kIemNativeInstrCond_np,
6435 kIemNativeInstrCond_l,
6436 kIemNativeInstrCond_nl,
6437 kIemNativeInstrCond_le,
6438 kIemNativeInstrCond_nle
6439} IEMNATIVEINSTRCOND;
6440#elif defined(RT_ARCH_ARM64)
6441typedef ARMV8INSTRCOND IEMNATIVEINSTRCOND;
6442# define kIemNativeInstrCond_o todo_conditional_codes
6443# define kIemNativeInstrCond_no todo_conditional_codes
6444# define kIemNativeInstrCond_c todo_conditional_codes
6445# define kIemNativeInstrCond_nc todo_conditional_codes
6446# define kIemNativeInstrCond_e kArmv8InstrCond_Eq
6447# define kIemNativeInstrCond_ne kArmv8InstrCond_Ne
6448# define kIemNativeInstrCond_be kArmv8InstrCond_Ls
6449# define kIemNativeInstrCond_nbe kArmv8InstrCond_Hi
6450# define kIemNativeInstrCond_s todo_conditional_codes
6451# define kIemNativeInstrCond_ns todo_conditional_codes
6452# define kIemNativeInstrCond_p todo_conditional_codes
6453# define kIemNativeInstrCond_np todo_conditional_codes
6454# define kIemNativeInstrCond_l kArmv8InstrCond_Lt
6455# define kIemNativeInstrCond_nl kArmv8InstrCond_Ge
6456# define kIemNativeInstrCond_le kArmv8InstrCond_Le
6457# define kIemNativeInstrCond_nle kArmv8InstrCond_Gt
6458#else
6459# error "Port me!"
6460#endif
6461
6462
6463/**
6464 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6465 */
6466DECL_FORCE_INLINE_THROW(uint32_t)
6467iemNativeEmitJccToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6468 uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6469{
6470 Assert(idxLabel < pReNative->cLabels);
6471
6472 uint32_t const offLabel = pReNative->paLabels[idxLabel].off;
6473#ifdef RT_ARCH_AMD64
6474 if (offLabel >= off)
6475 {
6476 /* jcc rel32 */
6477 pCodeBuf[off++] = 0x0f;
6478 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6479 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6480 pCodeBuf[off++] = 0x00;
6481 pCodeBuf[off++] = 0x00;
6482 pCodeBuf[off++] = 0x00;
6483 pCodeBuf[off++] = 0x00;
6484 }
6485 else
6486 {
6487 int32_t offDisp = offLabel - (off + 2);
6488 if ((int8_t)offDisp == offDisp)
6489 {
6490 /* jcc rel8 */
6491 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6492 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6493 }
6494 else
6495 {
6496 /* jcc rel32 */
6497 offDisp -= 4;
6498 pCodeBuf[off++] = 0x0f;
6499 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6500 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6501 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6502 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6503 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6504 }
6505 }
6506
6507#elif defined(RT_ARCH_ARM64)
6508 if (offLabel >= off)
6509 {
6510 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6511 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, -1);
6512 }
6513 else
6514 {
6515 Assert(off - offLabel <= 0x3ffffU);
6516 pCodeBuf[off] = Armv8A64MkInstrBCond(enmCond, offLabel - off);
6517 off++;
6518 }
6519
6520#else
6521# error "Port me!"
6522#endif
6523 return off;
6524}
6525
6526
6527/**
6528 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6529 */
6530DECL_INLINE_THROW(uint32_t)
6531iemNativeEmitJccToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6532{
6533#ifdef RT_ARCH_AMD64
6534 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel, enmCond);
6535#elif defined(RT_ARCH_ARM64)
6536 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel, enmCond);
6537#else
6538# error "Port me!"
6539#endif
6540 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6541 return off;
6542}
6543
6544
6545/**
6546 * Emits a Jcc rel32 / B.cc imm19 to a new label.
6547 */
6548DECL_INLINE_THROW(uint32_t)
6549iemNativeEmitJccToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6550 IEMNATIVELABELTYPE enmLabelType, uint16_t uData, IEMNATIVEINSTRCOND enmCond)
6551{
6552 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6553 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, enmCond);
6554}
6555
6556
6557/**
6558 * Emits a JZ/JE rel32 / B.EQ imm19 to the given label.
6559 */
6560DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6561{
6562#ifdef RT_ARCH_AMD64
6563 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_e);
6564#elif defined(RT_ARCH_ARM64)
6565 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Eq);
6566#else
6567# error "Port me!"
6568#endif
6569}
6570
6571/**
6572 * Emits a JZ/JE rel32 / B.EQ imm19 to a new label.
6573 */
6574DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6575 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6576{
6577#ifdef RT_ARCH_AMD64
6578 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_e);
6579#elif defined(RT_ARCH_ARM64)
6580 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Eq);
6581#else
6582# error "Port me!"
6583#endif
6584}
6585
6586
6587/**
6588 * Emits a JNZ/JNE rel32 / B.NE imm19 to the given label.
6589 */
6590DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6591{
6592#ifdef RT_ARCH_AMD64
6593 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_ne);
6594#elif defined(RT_ARCH_ARM64)
6595 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ne);
6596#else
6597# error "Port me!"
6598#endif
6599}
6600
6601/**
6602 * Emits a JNZ/JNE rel32 / B.NE imm19 to a new label.
6603 */
6604DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6605 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6606{
6607#ifdef RT_ARCH_AMD64
6608 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_ne);
6609#elif defined(RT_ARCH_ARM64)
6610 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ne);
6611#else
6612# error "Port me!"
6613#endif
6614}
6615
6616
6617/**
6618 * Emits a JBE/JNA rel32 / B.LS imm19 to the given label.
6619 */
6620DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6621{
6622#ifdef RT_ARCH_AMD64
6623 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_be);
6624#elif defined(RT_ARCH_ARM64)
6625 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ls);
6626#else
6627# error "Port me!"
6628#endif
6629}
6630
6631/**
6632 * Emits a JBE/JNA rel32 / B.LS imm19 to a new label.
6633 */
6634DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6635 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6636{
6637#ifdef RT_ARCH_AMD64
6638 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_be);
6639#elif defined(RT_ARCH_ARM64)
6640 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ls);
6641#else
6642# error "Port me!"
6643#endif
6644}
6645
6646
6647/**
6648 * Emits a JA/JNBE rel32 / B.HI imm19 to the given label.
6649 */
6650DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6651{
6652#ifdef RT_ARCH_AMD64
6653 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_nbe);
6654#elif defined(RT_ARCH_ARM64)
6655 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Hi);
6656#else
6657# error "Port me!"
6658#endif
6659}
6660
6661/**
6662 * Emits a JA/JNBE rel32 / B.HI imm19 to a new label.
6663 */
6664DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6665 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6666{
6667#ifdef RT_ARCH_AMD64
6668 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_nbe);
6669#elif defined(RT_ARCH_ARM64)
6670 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Hi);
6671#else
6672# error "Port me!"
6673#endif
6674}
6675
6676
6677/**
6678 * Emits a JL/JNGE rel32 / B.LT imm19 to the given label.
6679 */
6680DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6681{
6682#ifdef RT_ARCH_AMD64
6683 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_l);
6684#elif defined(RT_ARCH_ARM64)
6685 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Lt);
6686#else
6687# error "Port me!"
6688#endif
6689}
6690
6691/**
6692 * Emits a JA/JNGE rel32 / B.HI imm19 to a new label.
6693 */
6694DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6695 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6696{
6697#ifdef RT_ARCH_AMD64
6698 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_l);
6699#elif defined(RT_ARCH_ARM64)
6700 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Lt);
6701#else
6702# error "Port me!"
6703#endif
6704}
6705
6706
6707/**
6708 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6709 *
6710 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6711 *
6712 * Only use hardcoded jumps forward when emitting for exactly one
6713 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6714 * the right target address on all platforms!
6715 *
6716 * Please also note that on x86 it is necessary pass off + 256 or higher
6717 * for @a offTarget one believe the intervening code is more than 127
6718 * bytes long.
6719 */
6720DECL_FORCE_INLINE(uint32_t)
6721iemNativeEmitJccToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6722{
6723#ifdef RT_ARCH_AMD64
6724 /* jcc rel8 / rel32 */
6725 int32_t offDisp = (int32_t)(offTarget - (off + 2));
6726 if (offDisp < 128 && offDisp >= -128)
6727 {
6728 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6729 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6730 }
6731 else
6732 {
6733 offDisp -= 4;
6734 pCodeBuf[off++] = 0x0f;
6735 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6736 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6737 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6738 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6739 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6740 }
6741
6742#elif defined(RT_ARCH_ARM64)
6743 pCodeBuf[off] = Armv8A64MkInstrBCond(enmCond, (int32_t)(offTarget - off));
6744 off++;
6745#else
6746# error "Port me!"
6747#endif
6748 return off;
6749}
6750
6751
6752/**
6753 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6754 *
6755 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6756 *
6757 * Only use hardcoded jumps forward when emitting for exactly one
6758 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6759 * the right target address on all platforms!
6760 *
6761 * Please also note that on x86 it is necessary pass off + 256 or higher
6762 * for @a offTarget if one believe the intervening code is more than 127
6763 * bytes long.
6764 */
6765DECL_INLINE_THROW(uint32_t)
6766iemNativeEmitJccToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6767{
6768#ifdef RT_ARCH_AMD64
6769 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, offTarget, enmCond);
6770#elif defined(RT_ARCH_ARM64)
6771 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget, enmCond);
6772#else
6773# error "Port me!"
6774#endif
6775 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6776 return off;
6777}
6778
6779
6780/**
6781 * Emits a JZ/JE rel32 / B.EQ imm19 with a fixed displacement.
6782 *
6783 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6784 */
6785DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6786{
6787#ifdef RT_ARCH_AMD64
6788 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_e);
6789#elif defined(RT_ARCH_ARM64)
6790 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Eq);
6791#else
6792# error "Port me!"
6793#endif
6794}
6795
6796
6797/**
6798 * Emits a JNZ/JNE rel32 / B.NE imm19 with a fixed displacement.
6799 *
6800 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6801 */
6802DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6803{
6804#ifdef RT_ARCH_AMD64
6805 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_ne);
6806#elif defined(RT_ARCH_ARM64)
6807 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ne);
6808#else
6809# error "Port me!"
6810#endif
6811}
6812
6813
6814/**
6815 * Emits a JBE/JNA rel32 / B.LS imm19 with a fixed displacement.
6816 *
6817 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6818 */
6819DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6820{
6821#ifdef RT_ARCH_AMD64
6822 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_be);
6823#elif defined(RT_ARCH_ARM64)
6824 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ls);
6825#else
6826# error "Port me!"
6827#endif
6828}
6829
6830
6831/**
6832 * Emits a JA/JNBE rel32 / B.HI imm19 with a fixed displacement.
6833 *
6834 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6835 */
6836DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6837{
6838#ifdef RT_ARCH_AMD64
6839 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_nbe);
6840#elif defined(RT_ARCH_ARM64)
6841 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Hi);
6842#else
6843# error "Port me!"
6844#endif
6845}
6846
6847
6848/**
6849 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
6850 *
6851 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6852 */
6853DECL_FORCE_INLINE(uint32_t) iemNativeEmitJmpToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget)
6854{
6855#ifdef RT_ARCH_AMD64
6856 /* jmp rel8 or rel32 */
6857 int32_t offDisp = offTarget - (off + 2);
6858 if (offDisp < 128 && offDisp >= -128)
6859 {
6860 pCodeBuf[off++] = 0xeb;
6861 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6862 }
6863 else
6864 {
6865 offDisp -= 3;
6866 pCodeBuf[off++] = 0xe9;
6867 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6868 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6869 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6870 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6871 }
6872
6873#elif defined(RT_ARCH_ARM64)
6874 pCodeBuf[off] = Armv8A64MkInstrB((int32_t)(offTarget - off));
6875 off++;
6876
6877#else
6878# error "Port me!"
6879#endif
6880 return off;
6881}
6882
6883
6884/**
6885 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
6886 *
6887 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6888 */
6889DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6890{
6891#ifdef RT_ARCH_AMD64
6892 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, offTarget);
6893#elif defined(RT_ARCH_ARM64)
6894 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget);
6895#else
6896# error "Port me!"
6897#endif
6898 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6899 return off;
6900}
6901
6902
6903/**
6904 * Fixes up a conditional jump to a fixed label.
6905 * @see iemNativeEmitJmpToFixed, iemNativeEmitJnzToFixed,
6906 * iemNativeEmitJzToFixed, ...
6907 */
6908DECL_INLINE_THROW(void) iemNativeFixupFixedJump(PIEMRECOMPILERSTATE pReNative, uint32_t offFixup, uint32_t offTarget)
6909{
6910#ifdef RT_ARCH_AMD64
6911 uint8_t * const pbCodeBuf = pReNative->pInstrBuf;
6912 uint8_t const bOpcode = pbCodeBuf[offFixup];
6913 if ((uint8_t)(bOpcode - 0x70) < (uint8_t)0x10 || bOpcode == 0xeb)
6914 {
6915 pbCodeBuf[offFixup + 1] = (uint8_t)(offTarget - (offFixup + 2));
6916 AssertStmt(pbCodeBuf[offFixup + 1] == offTarget - (offFixup + 2),
6917 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_FIXED_JUMP_OUT_OF_RANGE));
6918 }
6919 else
6920 {
6921 if (bOpcode != 0x0f)
6922 Assert(bOpcode == 0xe9);
6923 else
6924 {
6925 offFixup += 1;
6926 Assert((uint8_t)(pbCodeBuf[offFixup] - 0x80) <= 0x10);
6927 }
6928 uint32_t const offRel32 = offTarget - (offFixup + 5);
6929 pbCodeBuf[offFixup + 1] = RT_BYTE1(offRel32);
6930 pbCodeBuf[offFixup + 2] = RT_BYTE2(offRel32);
6931 pbCodeBuf[offFixup + 3] = RT_BYTE3(offRel32);
6932 pbCodeBuf[offFixup + 4] = RT_BYTE4(offRel32);
6933 }
6934
6935#elif defined(RT_ARCH_ARM64)
6936 uint32_t * const pu32CodeBuf = pReNative->pInstrBuf;
6937 if ((pu32CodeBuf[offFixup] & UINT32_C(0xff000000)) == UINT32_C(0x54000000))
6938 {
6939 /* B.COND + BC.COND */
6940 int32_t const offDisp = offTarget - offFixup;
6941 Assert(offDisp >= -262144 && offDisp < 262144);
6942 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xff00001f))
6943 | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
6944 }
6945 else
6946 {
6947 /* B imm26 */
6948 Assert((pu32CodeBuf[offFixup] & UINT32_C(0xfc000000)) == UINT32_C(0x14000000));
6949 int32_t const offDisp = offTarget - offFixup;
6950 Assert(offDisp >= -33554432 && offDisp < 33554432);
6951 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xfc000000))
6952 | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
6953 }
6954
6955#else
6956# error "Port me!"
6957#endif
6958}
6959
6960
6961#ifdef RT_ARCH_AMD64
6962/**
6963 * For doing bt on a register.
6964 */
6965DECL_INLINE_THROW(uint32_t)
6966iemNativeEmitAmd64TestBitInGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo)
6967{
6968 Assert(iBitNo < 64);
6969 /* bt Ev, imm8 */
6970 if (iBitNo >= 32)
6971 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
6972 else if (iGprSrc >= 8)
6973 pCodeBuf[off++] = X86_OP_REX_B;
6974 pCodeBuf[off++] = 0x0f;
6975 pCodeBuf[off++] = 0xba;
6976 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
6977 pCodeBuf[off++] = iBitNo;
6978 return off;
6979}
6980#endif /* RT_ARCH_AMD64 */
6981
6982
6983/**
6984 * Internal helper, don't call directly.
6985 */
6986DECL_INLINE_THROW(uint32_t)
6987iemNativeEmitTestBitInGprAndJmpToLabelIfCc(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
6988 uint8_t iBitNo, uint32_t idxLabel, bool fJmpIfSet)
6989{
6990 Assert(iBitNo < 64);
6991#ifdef RT_ARCH_AMD64
6992 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6993 if (iBitNo < 8)
6994 {
6995 /* test Eb, imm8 */
6996 if (iGprSrc >= 4)
6997 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6998 pbCodeBuf[off++] = 0xf6;
6999 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7000 pbCodeBuf[off++] = (uint8_t)1 << iBitNo;
7001 off = iemNativeEmitJccToLabel(pReNative, off, idxLabel, fJmpIfSet ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7002 }
7003 else
7004 {
7005 /* bt Ev, imm8 */
7006 if (iBitNo >= 32)
7007 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
7008 else if (iGprSrc >= 8)
7009 pbCodeBuf[off++] = X86_OP_REX_B;
7010 pbCodeBuf[off++] = 0x0f;
7011 pbCodeBuf[off++] = 0xba;
7012 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
7013 pbCodeBuf[off++] = iBitNo;
7014 off = iemNativeEmitJccToLabel(pReNative, off, idxLabel, fJmpIfSet ? kIemNativeInstrCond_c : kIemNativeInstrCond_nc);
7015 }
7016
7017#elif defined(RT_ARCH_ARM64)
7018 /* Use the TBNZ instruction here. */
7019 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7020 if (pReNative->paLabels[idxLabel].enmType > kIemNativeLabelType_LastWholeTbBranch)
7021 {
7022 AssertMsg(pReNative->paLabels[idxLabel].off == UINT32_MAX,
7023 ("TODO: Please enable & test commented out code for jumping back to a predefined label.\n"));
7024 //uint32_t offLabel = pReNative->paLabels[idxLabel].off;
7025 //if (offLabel == UINT32_MAX)
7026 {
7027 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm14At5);
7028 pu32CodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, 0, iGprSrc, iBitNo);
7029 }
7030 //else
7031 //{
7032 // RT_BREAKPOINT();
7033 // Assert(off - offLabel <= 0x1fffU);
7034 // pu32CodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, offLabel - off, iGprSrc, iBitNo);
7035 //
7036 //}
7037 }
7038 else
7039 {
7040 Assert(Armv8A64ConvertImmRImmS2Mask64(0x40, (64U - iBitNo) & 63U) == RT_BIT_64(iBitNo));
7041 pu32CodeBuf[off++] = Armv8A64MkInstrTstImm(iGprSrc, 0x40, (64U - iBitNo) & 63U);
7042 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
7043 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(fJmpIfSet ? kArmv8InstrCond_Ne : kArmv8InstrCond_Eq, 0);
7044 }
7045
7046#else
7047# error "Port me!"
7048#endif
7049 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7050 return off;
7051}
7052
7053
7054/**
7055 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _set_ in
7056 * @a iGprSrc.
7057 *
7058 * @note On ARM64 the range is only +/-8191 instructions.
7059 */
7060DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7061 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7062{
7063 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
7064}
7065
7066
7067/**
7068 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _not_
7069 * _set_ in @a iGprSrc.
7070 *
7071 * @note On ARM64 the range is only +/-8191 instructions.
7072 */
7073DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7074 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7075{
7076 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, false /*fJmpIfSet*/);
7077}
7078
7079
7080/**
7081 * Emits a test for any of the bits from @a fBits in @a iGprSrc, setting CPU
7082 * flags accordingly.
7083 */
7084DECL_INLINE_THROW(uint32_t)
7085iemNativeEmitTestAnyBitsInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t fBits)
7086{
7087 Assert(fBits != 0);
7088#ifdef RT_ARCH_AMD64
7089
7090 if (fBits >= UINT32_MAX)
7091 {
7092 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7093
7094 /* test Ev,Gv */
7095 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
7096 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
7097 pbCodeBuf[off++] = 0x85;
7098 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 8, iTmpReg & 7);
7099
7100 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7101 }
7102 else if (fBits <= UINT32_MAX)
7103 {
7104 /* test Eb, imm8 or test Ev, imm32 */
7105 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
7106 if (fBits <= UINT8_MAX)
7107 {
7108 if (iGprSrc >= 4)
7109 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7110 pbCodeBuf[off++] = 0xf6;
7111 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7112 pbCodeBuf[off++] = (uint8_t)fBits;
7113 }
7114 else
7115 {
7116 if (iGprSrc >= 8)
7117 pbCodeBuf[off++] = X86_OP_REX_B;
7118 pbCodeBuf[off++] = 0xf7;
7119 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7120 pbCodeBuf[off++] = RT_BYTE1(fBits);
7121 pbCodeBuf[off++] = RT_BYTE2(fBits);
7122 pbCodeBuf[off++] = RT_BYTE3(fBits);
7123 pbCodeBuf[off++] = RT_BYTE4(fBits);
7124 }
7125 }
7126 /** @todo implement me. */
7127 else
7128 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
7129
7130#elif defined(RT_ARCH_ARM64)
7131 uint32_t uImmR = 0;
7132 uint32_t uImmNandS = 0;
7133 if (Armv8A64ConvertMask64ToImmRImmS(fBits, &uImmNandS, &uImmR))
7134 {
7135 /* ands xzr, iGprSrc, #fBits */
7136 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7137 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR);
7138 }
7139 else
7140 {
7141 /* ands xzr, iGprSrc, iTmpReg */
7142 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7143 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7144 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg);
7145 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7146 }
7147
7148#else
7149# error "Port me!"
7150#endif
7151 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7152 return off;
7153}
7154
7155
7156/**
7157 * Emits a test for any of the bits from @a fBits in the lower 32 bits of
7158 * @a iGprSrc, setting CPU flags accordingly.
7159 *
7160 * @note For ARM64 this only supports @a fBits values that can be expressed
7161 * using the two 6-bit immediates of the ANDS instruction. The caller
7162 * must make sure this is possible!
7163 */
7164DECL_FORCE_INLINE_THROW(uint32_t)
7165iemNativeEmitTestAnyBitsInGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint32_t fBits)
7166{
7167 Assert(fBits != 0);
7168
7169#ifdef RT_ARCH_AMD64
7170 if (fBits <= UINT8_MAX)
7171 {
7172 /* test Eb, imm8 */
7173 if (iGprSrc >= 4)
7174 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7175 pCodeBuf[off++] = 0xf6;
7176 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7177 pCodeBuf[off++] = (uint8_t)fBits;
7178 }
7179 else
7180 {
7181 /* test Ev, imm32 */
7182 if (iGprSrc >= 8)
7183 pCodeBuf[off++] = X86_OP_REX_B;
7184 pCodeBuf[off++] = 0xf7;
7185 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7186 pCodeBuf[off++] = RT_BYTE1(fBits);
7187 pCodeBuf[off++] = RT_BYTE2(fBits);
7188 pCodeBuf[off++] = RT_BYTE3(fBits);
7189 pCodeBuf[off++] = RT_BYTE4(fBits);
7190 }
7191
7192#elif defined(RT_ARCH_ARM64)
7193 /* ands xzr, src, #fBits */
7194 uint32_t uImmR = 0;
7195 uint32_t uImmNandS = 0;
7196 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7197 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7198 else
7199# ifdef IEM_WITH_THROW_CATCH
7200 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
7201# else
7202 AssertReleaseFailedStmt(off = UINT32_MAX);
7203# endif
7204
7205#else
7206# error "Port me!"
7207#endif
7208 return off;
7209}
7210
7211
7212
7213/**
7214 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
7215 * @a iGprSrc, setting CPU flags accordingly.
7216 *
7217 * @note For ARM64 this only supports @a fBits values that can be expressed
7218 * using the two 6-bit immediates of the ANDS instruction. The caller
7219 * must make sure this is possible!
7220 */
7221DECL_FORCE_INLINE_THROW(uint32_t)
7222iemNativeEmitTestAnyBitsInGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
7223{
7224 Assert(fBits != 0);
7225
7226#ifdef RT_ARCH_AMD64
7227 /* test Eb, imm8 */
7228 if (iGprSrc >= 4)
7229 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7230 pCodeBuf[off++] = 0xf6;
7231 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7232 pCodeBuf[off++] = fBits;
7233
7234#elif defined(RT_ARCH_ARM64)
7235 /* ands xzr, src, #fBits */
7236 uint32_t uImmR = 0;
7237 uint32_t uImmNandS = 0;
7238 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7239 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7240 else
7241# ifdef IEM_WITH_THROW_CATCH
7242 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
7243# else
7244 AssertReleaseFailedStmt(off = UINT32_MAX);
7245# endif
7246
7247#else
7248# error "Port me!"
7249#endif
7250 return off;
7251}
7252
7253
7254/**
7255 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
7256 * @a iGprSrc, setting CPU flags accordingly.
7257 */
7258DECL_INLINE_THROW(uint32_t)
7259iemNativeEmitTestAnyBitsInGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
7260{
7261 Assert(fBits != 0);
7262
7263#ifdef RT_ARCH_AMD64
7264 off = iemNativeEmitTestAnyBitsInGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprSrc, fBits);
7265
7266#elif defined(RT_ARCH_ARM64)
7267 /* ands xzr, src, [tmp|#imm] */
7268 uint32_t uImmR = 0;
7269 uint32_t uImmNandS = 0;
7270 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7271 {
7272 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7273 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7274 }
7275 else
7276 {
7277 /* Use temporary register for the 64-bit immediate. */
7278 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7279 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7280 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg, false /*f64Bit*/);
7281 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7282 }
7283
7284#else
7285# error "Port me!"
7286#endif
7287 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7288 return off;
7289}
7290
7291
7292/**
7293 * Emits a jump to @a idxLabel on the condition _any_ of the bits in @a fBits
7294 * are set in @a iGprSrc.
7295 */
7296DECL_INLINE_THROW(uint32_t)
7297iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7298 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
7299{
7300 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
7301
7302 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
7303 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7304
7305 return off;
7306}
7307
7308
7309/**
7310 * Emits a jump to @a idxLabel on the condition _none_ of the bits in @a fBits
7311 * are set in @a iGprSrc.
7312 */
7313DECL_INLINE_THROW(uint32_t)
7314iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7315 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
7316{
7317 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
7318
7319 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
7320 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7321
7322 return off;
7323}
7324
7325
7326/**
7327 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7328 *
7329 * The operand size is given by @a f64Bit.
7330 */
7331DECL_FORCE_INLINE_THROW(uint32_t)
7332iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7333 uint8_t iGprSrc, bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
7334{
7335 Assert(idxLabel < pReNative->cLabels);
7336
7337#ifdef RT_ARCH_AMD64
7338 /* test reg32,reg32 / test reg64,reg64 */
7339 if (f64Bit)
7340 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
7341 else if (iGprSrc >= 8)
7342 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
7343 pCodeBuf[off++] = 0x85;
7344 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
7345
7346 /* jnz idxLabel */
7347 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
7348 fJmpIfNotZero ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7349
7350#elif defined(RT_ARCH_ARM64)
7351 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
7352 {
7353 pCodeBuf[off] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, (int32_t)(pReNative->paLabels[idxLabel].off - off),
7354 iGprSrc, f64Bit);
7355 off++;
7356 }
7357 else
7358 {
7359 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
7360 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, 0, iGprSrc, f64Bit);
7361 }
7362
7363#else
7364# error "Port me!"
7365#endif
7366 return off;
7367}
7368
7369
7370/**
7371 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7372 *
7373 * The operand size is given by @a f64Bit.
7374 */
7375DECL_FORCE_INLINE_THROW(uint32_t)
7376iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
7377 bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
7378{
7379#ifdef RT_ARCH_AMD64
7380 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 3 + 6),
7381 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
7382#elif defined(RT_ARCH_ARM64)
7383 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1),
7384 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
7385#else
7386# error "Port me!"
7387#endif
7388 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7389 return off;
7390}
7391
7392
7393/* if (Grp1 == 0) Jmp idxLabel; */
7394
7395/**
7396 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
7397 *
7398 * The operand size is given by @a f64Bit.
7399 */
7400DECL_FORCE_INLINE_THROW(uint32_t)
7401iemNativeEmitTestIfGprIsZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7402 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7403{
7404 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
7405 f64Bit, false /*fJmpIfNotZero*/, idxLabel);
7406}
7407
7408
7409/**
7410 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
7411 *
7412 * The operand size is given by @a f64Bit.
7413 */
7414DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7415 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7416{
7417 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, false /*fJmpIfNotZero*/, idxLabel);
7418}
7419
7420
7421/**
7422 * Emits code that jumps to a new label if @a iGprSrc is zero.
7423 *
7424 * The operand size is given by @a f64Bit.
7425 */
7426DECL_INLINE_THROW(uint32_t)
7427iemNativeEmitTestIfGprIsZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7428 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7429{
7430 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7431 return iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7432}
7433
7434
7435/* if (Grp1 != 0) Jmp idxLabel; */
7436
7437/**
7438 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7439 *
7440 * The operand size is given by @a f64Bit.
7441 */
7442DECL_FORCE_INLINE_THROW(uint32_t)
7443iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7444 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7445{
7446 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
7447 f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7448}
7449
7450
7451/**
7452 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7453 *
7454 * The operand size is given by @a f64Bit.
7455 */
7456DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7457 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7458{
7459 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7460}
7461
7462
7463/**
7464 * Emits code that jumps to a new label if @a iGprSrc is not zero.
7465 *
7466 * The operand size is given by @a f64Bit.
7467 */
7468DECL_INLINE_THROW(uint32_t)
7469iemNativeEmitTestIfGprIsNotZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7470 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7471{
7472 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7473 return iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7474}
7475
7476
7477/* if (Grp1 != Gpr2) Jmp idxLabel; */
7478
7479/**
7480 * Emits code that jumps to the given label if @a iGprLeft and @a iGprRight
7481 * differs.
7482 */
7483DECL_INLINE_THROW(uint32_t)
7484iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7485 uint8_t iGprLeft, uint8_t iGprRight, uint32_t idxLabel)
7486{
7487 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iGprRight);
7488 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7489 return off;
7490}
7491
7492
7493/**
7494 * Emits code that jumps to a new label if @a iGprLeft and @a iGprRight differs.
7495 */
7496DECL_INLINE_THROW(uint32_t)
7497iemNativeEmitTestIfGprNotEqualGprAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7498 uint8_t iGprLeft, uint8_t iGprRight,
7499 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7500{
7501 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7502 return iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, iGprLeft, iGprRight, idxLabel);
7503}
7504
7505
7506/* if (Grp != Imm) Jmp idxLabel; */
7507
7508/**
7509 * Emits code that jumps to the given label if @a iGprSrc differs from @a uImm.
7510 */
7511DECL_INLINE_THROW(uint32_t)
7512iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7513 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
7514{
7515 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
7516 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7517 return off;
7518}
7519
7520
7521/**
7522 * Emits code that jumps to a new label if @a iGprSrc differs from @a uImm.
7523 */
7524DECL_INLINE_THROW(uint32_t)
7525iemNativeEmitTestIfGprNotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7526 uint8_t iGprSrc, uint64_t uImm,
7527 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7528{
7529 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7530 return iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7531}
7532
7533
7534/**
7535 * Emits code that jumps to the given label if 32-bit @a iGprSrc differs from
7536 * @a uImm.
7537 */
7538DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7539 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
7540{
7541 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
7542 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7543 return off;
7544}
7545
7546
7547/**
7548 * Emits code that jumps to a new label if 32-bit @a iGprSrc differs from
7549 * @a uImm.
7550 */
7551DECL_INLINE_THROW(uint32_t)
7552iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7553 uint8_t iGprSrc, uint32_t uImm,
7554 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7555{
7556 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7557 return iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7558}
7559
7560
7561/**
7562 * Emits code that jumps to the given label if 16-bit @a iGprSrc differs from
7563 * @a uImm.
7564 */
7565DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7566 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel)
7567{
7568 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm);
7569 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7570 return off;
7571}
7572
7573
7574/**
7575 * Emits code that jumps to a new label if 16-bit @a iGprSrc differs from
7576 * @a uImm.
7577 */
7578DECL_INLINE_THROW(uint32_t)
7579iemNativeEmitTestIfGpr16NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7580 uint8_t iGprSrc, uint16_t uImm,
7581 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7582{
7583 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7584 return iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7585}
7586
7587
7588/* if (Grp == Imm) Jmp idxLabel; */
7589
7590/**
7591 * Emits code that jumps to the given label if @a iGprSrc equals @a uImm.
7592 */
7593DECL_INLINE_THROW(uint32_t)
7594iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7595 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
7596{
7597 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
7598 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7599 return off;
7600}
7601
7602
7603/**
7604 * Emits code that jumps to a new label if @a iGprSrc equals from @a uImm.
7605 */
7606DECL_INLINE_THROW(uint32_t)
7607iemNativeEmitTestIfGprEqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t uImm,
7608 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7609{
7610 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7611 return iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7612}
7613
7614
7615/**
7616 * Emits code that jumps to the given label if 32-bit @a iGprSrc equals @a uImm.
7617 */
7618DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7619 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
7620{
7621 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
7622 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7623 return off;
7624}
7625
7626
7627/**
7628 * Emits code that jumps to a new label if 32-bit @a iGprSrc equals @a uImm.
7629 */
7630DECL_INLINE_THROW(uint32_t)
7631iemNativeEmitTestIfGpr32EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint32_t uImm,
7632 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7633{
7634 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7635 return iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7636}
7637
7638
7639/**
7640 * Emits code that jumps to the given label if 16-bit @a iGprSrc equals @a uImm.
7641 *
7642 * @note ARM64: Helper register is required (idxTmpReg).
7643 */
7644DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7645 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel,
7646 uint8_t idxTmpReg = UINT8_MAX)
7647{
7648 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm, idxTmpReg);
7649 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7650 return off;
7651}
7652
7653
7654/**
7655 * Emits code that jumps to a new label if 16-bit @a iGprSrc equals @a uImm.
7656 *
7657 * @note ARM64: Helper register is required (idxTmpReg).
7658 */
7659DECL_INLINE_THROW(uint32_t)
7660iemNativeEmitTestIfGpr16EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint16_t uImm,
7661 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0,
7662 uint8_t idxTmpReg = UINT8_MAX)
7663{
7664 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7665 return iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel, idxTmpReg);
7666}
7667
7668
7669/*********************************************************************************************************************************
7670* Calls. *
7671*********************************************************************************************************************************/
7672
7673/**
7674 * Emits a call to a 64-bit address.
7675 */
7676DECL_INLINE_THROW(uint32_t) iemNativeEmitCallImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t uPfn)
7677{
7678#ifdef RT_ARCH_AMD64
7679 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xAX, uPfn);
7680
7681 /* call rax */
7682 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7683 pbCodeBuf[off++] = 0xff;
7684 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
7685
7686#elif defined(RT_ARCH_ARM64)
7687 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uPfn);
7688
7689 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7690 pu32CodeBuf[off++] = Armv8A64MkInstrBlr(IEMNATIVE_REG_FIXED_TMP0);
7691
7692#else
7693# error "port me"
7694#endif
7695 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7696 return off;
7697}
7698
7699
7700/**
7701 * Emits code to load a stack variable into an argument GPR.
7702 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
7703 */
7704DECL_FORCE_INLINE_THROW(uint32_t)
7705iemNativeEmitLoadArgGregFromStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7706 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = UINT32_MAX,
7707 bool fSpilledVarsInVolatileRegs = false)
7708{
7709 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7710 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7711 AssertStmt(pVar->enmKind == kIemNativeVarKind_Stack, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7712
7713 uint8_t const idxRegVar = pVar->idxReg;
7714 if ( idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs)
7715 && ( (RT_BIT_32(idxRegVar) & (~IEMNATIVE_CALL_VOLATILE_GREG_MASK | fHstVolatileRegsAllowed))
7716 || !fSpilledVarsInVolatileRegs ))
7717 {
7718 AssertStmt( !(RT_BIT_32(idxRegVar) & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
7719 || (RT_BIT_32(idxRegVar) & fHstVolatileRegsAllowed),
7720 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_13));
7721 if (!offAddend)
7722 {
7723 if (idxRegArg != idxRegVar)
7724 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegArg, idxRegVar);
7725 }
7726 else
7727 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegArg, idxRegVar, offAddend);
7728 }
7729 else
7730 {
7731 uint8_t const idxStackSlot = pVar->idxStackSlot;
7732 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7733 off = iemNativeEmitLoadGprByBp(pReNative, off, idxRegArg, iemNativeStackCalcBpDisp(idxStackSlot));
7734 if (offAddend)
7735 off = iemNativeEmitAddGprImm(pReNative, off, idxRegArg, offAddend);
7736 }
7737 return off;
7738}
7739
7740
7741/**
7742 * Emits code to load a stack or immediate variable value into an argument GPR,
7743 * optional with a addend.
7744 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
7745 */
7746DECL_FORCE_INLINE_THROW(uint32_t)
7747iemNativeEmitLoadArgGregFromImmOrStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7748 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = 0,
7749 bool fSpilledVarsInVolatileRegs = false)
7750{
7751 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7752 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7753 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7754 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegArg, pVar->u.uValue + offAddend);
7755 else
7756 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, idxRegArg, idxVar, offAddend,
7757 fHstVolatileRegsAllowed, fSpilledVarsInVolatileRegs);
7758 return off;
7759}
7760
7761
7762/**
7763 * Emits code to load the variable address into an argument GPR.
7764 *
7765 * This only works for uninitialized and stack variables.
7766 */
7767DECL_FORCE_INLINE_THROW(uint32_t)
7768iemNativeEmitLoadArgGregWithVarAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7769 bool fFlushShadows)
7770{
7771 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7772 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7773 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
7774 || pVar->enmKind == kIemNativeVarKind_Stack,
7775 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7776 AssertStmt(!pVar->fSimdReg,
7777 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7778
7779 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7780 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
7781
7782 uint8_t const idxRegVar = pVar->idxReg;
7783 if (idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs))
7784 {
7785 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, idxRegVar);
7786 iemNativeRegFreeVar(pReNative, idxRegVar, fFlushShadows);
7787 Assert(pVar->idxReg == UINT8_MAX);
7788 }
7789 Assert( pVar->idxStackSlot != UINT8_MAX
7790 && pVar->idxReg == UINT8_MAX);
7791
7792 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
7793}
7794
7795
7796#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7797/**
7798 * Emits code to load the variable address into an argument GPR.
7799 *
7800 * This is a special variant intended for SIMD variables only and only called
7801 * by the TLB miss path in the memory fetch/store code because there we pass
7802 * the value by reference and need both the register and stack depending on which
7803 * path is taken (TLB hit vs. miss).
7804 */
7805DECL_FORCE_INLINE_THROW(uint32_t)
7806iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7807 bool fSyncRegWithStack = true)
7808{
7809 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7810 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7811 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
7812 || pVar->enmKind == kIemNativeVarKind_Stack,
7813 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7814 AssertStmt(pVar->fSimdReg,
7815 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7816 Assert( pVar->idxStackSlot != UINT8_MAX
7817 && pVar->idxReg != UINT8_MAX);
7818
7819 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7820 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
7821
7822 uint8_t const idxRegVar = pVar->idxReg;
7823 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7824 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
7825
7826 if (fSyncRegWithStack)
7827 {
7828 if (pVar->cbVar == sizeof(RTUINT128U))
7829 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDisp, idxRegVar);
7830 else
7831 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDisp, idxRegVar);
7832 }
7833
7834 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
7835}
7836
7837
7838/**
7839 * Emits code to sync the host SIMD register assigned to the given SIMD variable.
7840 *
7841 * This is a special helper and only called
7842 * by the TLB miss path in the memory fetch/store code because there we pass
7843 * the value by reference and need to sync the value on the stack with the assigned host register
7844 * after a TLB miss where the value ends up on the stack.
7845 */
7846DECL_FORCE_INLINE_THROW(uint32_t)
7847iemNativeEmitSimdVarSyncStackToRegister(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar)
7848{
7849 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7850 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7851 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
7852 || pVar->enmKind == kIemNativeVarKind_Stack,
7853 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7854 AssertStmt(pVar->fSimdReg,
7855 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7856 Assert( pVar->idxStackSlot != UINT8_MAX
7857 && pVar->idxReg != UINT8_MAX);
7858
7859 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7860 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
7861
7862 uint8_t const idxRegVar = pVar->idxReg;
7863 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7864 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
7865
7866 if (pVar->cbVar == sizeof(RTUINT128U))
7867 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxRegVar, offBpDisp);
7868 else
7869 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxRegVar, offBpDisp);
7870
7871 return off;
7872}
7873
7874
7875/**
7876 * Emits a gprdst = ~gprsrc store.
7877 */
7878DECL_FORCE_INLINE_THROW(uint32_t)
7879iemNativeEmitInvBitsGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
7880{
7881#ifdef RT_ARCH_AMD64
7882 if (iGprDst != iGprSrc)
7883 {
7884 /* mov gprdst, gprsrc. */
7885 if (f64Bit)
7886 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc);
7887 else
7888 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc); /* Bits 32:63 are cleared. */
7889 }
7890
7891 /* not gprdst */
7892 if (f64Bit || iGprDst >= 8)
7893 pCodeBuf[off++] = (f64Bit ? X86_OP_REX_W : 0)
7894 | (iGprDst >= 8 ? X86_OP_REX_B : 0);
7895 pCodeBuf[off++] = 0xf7;
7896 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
7897#elif defined(RT_ARCH_ARM64)
7898 pCodeBuf[off++] = Armv8A64MkInstrOrn(iGprDst, ARMV8_A64_REG_XZR, iGprSrc, f64Bit);
7899#else
7900# error "port me"
7901#endif
7902 return off;
7903}
7904
7905
7906/**
7907 * Emits a gprdst = ~gprsrc store.
7908 */
7909DECL_INLINE_THROW(uint32_t)
7910iemNativeEmitInvBitsGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
7911{
7912#ifdef RT_ARCH_AMD64
7913 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 9), off, iGprDst, iGprSrc, f64Bit);
7914#elif defined(RT_ARCH_ARM64)
7915 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, f64Bit);
7916#else
7917# error "port me"
7918#endif
7919 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7920 return off;
7921}
7922
7923
7924/**
7925 * Emits a 128-bit vector register store to a VCpu value.
7926 */
7927DECL_FORCE_INLINE_THROW(uint32_t)
7928iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7929{
7930#ifdef RT_ARCH_AMD64
7931 /* movdqa mem128, reg128 */ /* ASSUMING an aligned location here. */
7932 pCodeBuf[off++] = 0x66;
7933 if (iVecReg >= 8)
7934 pCodeBuf[off++] = X86_OP_REX_R;
7935 pCodeBuf[off++] = 0x0f;
7936 pCodeBuf[off++] = 0x7f;
7937 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7938#elif defined(RT_ARCH_ARM64)
7939 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
7940
7941#else
7942# error "port me"
7943#endif
7944 return off;
7945}
7946
7947
7948/**
7949 * Emits a 128-bit vector register load of a VCpu value.
7950 */
7951DECL_INLINE_THROW(uint32_t)
7952iemNativeEmitSimdStoreVecRegToVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7953{
7954#ifdef RT_ARCH_AMD64
7955 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
7956#elif defined(RT_ARCH_ARM64)
7957 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
7958#else
7959# error "port me"
7960#endif
7961 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7962 return off;
7963}
7964
7965
7966/**
7967 * Emits a high 128-bit vector register store to a VCpu value.
7968 */
7969DECL_FORCE_INLINE_THROW(uint32_t)
7970iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7971{
7972#ifdef RT_ARCH_AMD64
7973 /* vextracti128 mem128, reg128, 1 */ /* ASSUMES AVX2 support. */
7974 pCodeBuf[off++] = X86_OP_VEX3;
7975 if (iVecReg >= 8)
7976 pCodeBuf[off++] = 0x63;
7977 else
7978 pCodeBuf[off++] = 0xe3;
7979 pCodeBuf[off++] = 0x7d;
7980 pCodeBuf[off++] = 0x39;
7981 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7982 pCodeBuf[off++] = 0x01; /* Immediate */
7983#elif defined(RT_ARCH_ARM64)
7984 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
7985#else
7986# error "port me"
7987#endif
7988 return off;
7989}
7990
7991
7992/**
7993 * Emits a high 128-bit vector register load of a VCpu value.
7994 */
7995DECL_INLINE_THROW(uint32_t)
7996iemNativeEmitSimdStoreVecRegToVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7997{
7998#ifdef RT_ARCH_AMD64
7999 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
8000#elif defined(RT_ARCH_ARM64)
8001 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8002 Assert(!(iVecReg & 0x1));
8003 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
8004#else
8005# error "port me"
8006#endif
8007 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8008 return off;
8009}
8010
8011
8012/**
8013 * Emits a 128-bit vector register load of a VCpu value.
8014 */
8015DECL_FORCE_INLINE_THROW(uint32_t)
8016iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8017{
8018#ifdef RT_ARCH_AMD64
8019 /* movdqa reg128, mem128 */ /* ASSUMING an aligned location here. */
8020 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8021 if (iVecReg >= 8)
8022 pCodeBuf[off++] = X86_OP_REX_R;
8023 pCodeBuf[off++] = 0x0f;
8024 pCodeBuf[off++] = 0x6f;
8025 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
8026#elif defined(RT_ARCH_ARM64)
8027 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
8028
8029#else
8030# error "port me"
8031#endif
8032 return off;
8033}
8034
8035
8036/**
8037 * Emits a 128-bit vector register load of a VCpu value.
8038 */
8039DECL_INLINE_THROW(uint32_t)
8040iemNativeEmitSimdLoadVecRegFromVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8041{
8042#ifdef RT_ARCH_AMD64
8043 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
8044#elif defined(RT_ARCH_ARM64)
8045 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
8046#else
8047# error "port me"
8048#endif
8049 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8050 return off;
8051}
8052
8053
8054/**
8055 * Emits a 128-bit vector register load of a VCpu value.
8056 */
8057DECL_FORCE_INLINE_THROW(uint32_t)
8058iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8059{
8060#ifdef RT_ARCH_AMD64
8061 /* vinserti128 ymm, ymm, mem128, 1. */ /* ASSUMES AVX2 support */
8062 pCodeBuf[off++] = X86_OP_VEX3;
8063 if (iVecReg >= 8)
8064 pCodeBuf[off++] = 0x63;
8065 else
8066 pCodeBuf[off++] = 0xe3;
8067 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
8068 pCodeBuf[off++] = 0x38;
8069 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
8070 pCodeBuf[off++] = 0x01; /* Immediate */
8071#elif defined(RT_ARCH_ARM64)
8072 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
8073#else
8074# error "port me"
8075#endif
8076 return off;
8077}
8078
8079
8080/**
8081 * Emits a 128-bit vector register load of a VCpu value.
8082 */
8083DECL_INLINE_THROW(uint32_t)
8084iemNativeEmitSimdLoadVecRegFromVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8085{
8086#ifdef RT_ARCH_AMD64
8087 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
8088#elif defined(RT_ARCH_ARM64)
8089 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8090 Assert(!(iVecReg & 0x1));
8091 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
8092#else
8093# error "port me"
8094#endif
8095 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8096 return off;
8097}
8098
8099
8100/**
8101 * Emits a vecdst = vecsrc load.
8102 */
8103DECL_FORCE_INLINE(uint32_t)
8104iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8105{
8106#ifdef RT_ARCH_AMD64
8107 /* movdqu vecdst, vecsrc */
8108 pCodeBuf[off++] = 0xf3;
8109
8110 if ((iVecRegDst | iVecRegSrc) >= 8)
8111 pCodeBuf[off++] = iVecRegDst < 8 ? X86_OP_REX_B
8112 : iVecRegSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
8113 : X86_OP_REX_R;
8114 pCodeBuf[off++] = 0x0f;
8115 pCodeBuf[off++] = 0x6f;
8116 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8117
8118#elif defined(RT_ARCH_ARM64)
8119 /* mov dst, src; alias for: orr dst, src, src */
8120 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
8121
8122#else
8123# error "port me"
8124#endif
8125 return off;
8126}
8127
8128
8129/**
8130 * Emits a vecdst = vecsrc load, 128-bit.
8131 */
8132DECL_INLINE_THROW(uint32_t)
8133iemNativeEmitSimdLoadVecRegFromVecRegU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8134{
8135#ifdef RT_ARCH_AMD64
8136 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
8137#elif defined(RT_ARCH_ARM64)
8138 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
8139#else
8140# error "port me"
8141#endif
8142 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8143 return off;
8144}
8145
8146
8147/**
8148 * Emits a vecdst[128:255] = vecsrc[128:255] load.
8149 */
8150DECL_FORCE_INLINE_THROW(uint32_t)
8151iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8152{
8153#ifdef RT_ARCH_AMD64
8154 /* vperm2i128 dst, dst, src, 0x30. */ /* ASSUMES AVX2 support */
8155 pCodeBuf[off++] = X86_OP_VEX3;
8156 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
8157 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
8158 pCodeBuf[off++] = 0x46;
8159 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8160 pCodeBuf[off++] = 0x30; /* Immediate, this will leave the low 128 bits of dst untouched and move the high 128 bits from src to dst. */
8161
8162#elif defined(RT_ARCH_ARM64)
8163 RT_NOREF(pCodeBuf, iVecRegDst, iVecRegSrc);
8164
8165 /* Should never be called because we can just use iemNativeEmitSimdLoadVecRegFromVecRegU128(). */
8166# ifdef IEM_WITH_THROW_CATCH
8167 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
8168# else
8169 AssertReleaseFailedStmt(off = UINT32_MAX);
8170# endif
8171#else
8172# error "port me"
8173#endif
8174 return off;
8175}
8176
8177
8178/**
8179 * Emits a vecdst[128:255] = vecsrc[128:255] load, high 128-bit.
8180 */
8181DECL_INLINE_THROW(uint32_t)
8182iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8183{
8184#ifdef RT_ARCH_AMD64
8185 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
8186#elif defined(RT_ARCH_ARM64)
8187 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
8188 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iVecRegSrc + 1);
8189#else
8190# error "port me"
8191#endif
8192 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8193 return off;
8194}
8195
8196
8197/**
8198 * Emits a vecdst[0:127] = vecsrc[128:255] load.
8199 */
8200DECL_FORCE_INLINE_THROW(uint32_t)
8201iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8202{
8203#ifdef RT_ARCH_AMD64
8204 /* vextracti128 dst, src, 1. */ /* ASSUMES AVX2 support */
8205 pCodeBuf[off++] = X86_OP_VEX3;
8206 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegDst >= 8, false, iVecRegSrc >= 8);
8207 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
8208 pCodeBuf[off++] = 0x39;
8209 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7);
8210 pCodeBuf[off++] = 0x1;
8211
8212#elif defined(RT_ARCH_ARM64)
8213 RT_NOREF(pCodeBuf, iVecRegDst, iVecRegSrc);
8214
8215 /* Should never be called because we can just use iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(). */
8216# ifdef IEM_WITH_THROW_CATCH
8217 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
8218# else
8219 AssertReleaseFailedStmt(off = UINT32_MAX);
8220# endif
8221#else
8222# error "port me"
8223#endif
8224 return off;
8225}
8226
8227
8228/**
8229 * Emits a vecdst[0:127] = vecsrc[128:255] load, high 128-bit.
8230 */
8231DECL_INLINE_THROW(uint32_t)
8232iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8233{
8234#ifdef RT_ARCH_AMD64
8235 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iVecRegSrc);
8236#elif defined(RT_ARCH_ARM64)
8237 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
8238 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc + 1);
8239#else
8240# error "port me"
8241#endif
8242 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8243 return off;
8244}
8245
8246
8247/**
8248 * Emits a vecdst = vecsrc load, 256-bit.
8249 */
8250DECL_INLINE_THROW(uint32_t)
8251iemNativeEmitSimdLoadVecRegFromVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8252{
8253#ifdef RT_ARCH_AMD64
8254 /* vmovdqa ymm, ymm */
8255 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
8256 if (iVecRegDst >= 8 && iVecRegSrc >= 8)
8257 {
8258 pbCodeBuf[off++] = X86_OP_VEX3;
8259 pbCodeBuf[off++] = 0x41;
8260 pbCodeBuf[off++] = 0x7d;
8261 pbCodeBuf[off++] = 0x6f;
8262 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8263 }
8264 else
8265 {
8266 pbCodeBuf[off++] = X86_OP_VEX2;
8267 pbCodeBuf[off++] = (iVecRegSrc >= 8 || iVecRegDst >= 8) ? 0x7d : 0xfd;
8268 pbCodeBuf[off++] = iVecRegSrc >= 8 ? 0x7f : 0x6f;
8269 pbCodeBuf[off++] = iVecRegSrc >= 8
8270 ? X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7)
8271 : X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8272 }
8273#elif defined(RT_ARCH_ARM64)
8274 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8275 Assert(!(iVecRegDst & 0x1)); Assert(!(iVecRegSrc & 0x1));
8276 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst, iVecRegSrc );
8277 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst + 1, iVecRegSrc + 1);
8278#else
8279# error "port me"
8280#endif
8281 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8282 return off;
8283}
8284
8285
8286/**
8287 * Emits a vecdst = vecsrc load.
8288 */
8289DECL_FORCE_INLINE(uint32_t)
8290iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8291{
8292#ifdef RT_ARCH_AMD64
8293 /* vinserti128 dst, dst, src, 1. */ /* ASSUMES AVX2 support */
8294 pCodeBuf[off++] = X86_OP_VEX3;
8295 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
8296 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
8297 pCodeBuf[off++] = 0x38;
8298 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8299 pCodeBuf[off++] = 0x01; /* Immediate */
8300
8301#elif defined(RT_ARCH_ARM64)
8302 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
8303 /* mov dst, src; alias for: orr dst, src, src */
8304 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst + 1, iVecRegSrc, iVecRegSrc);
8305
8306#else
8307# error "port me"
8308#endif
8309 return off;
8310}
8311
8312
8313/**
8314 * Emits a vecdst[128:255] = vecsrc[0:127] load, 128-bit.
8315 */
8316DECL_INLINE_THROW(uint32_t)
8317iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8318{
8319#ifdef RT_ARCH_AMD64
8320 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iVecRegSrc);
8321#elif defined(RT_ARCH_ARM64)
8322 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
8323#else
8324# error "port me"
8325#endif
8326 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8327 return off;
8328}
8329
8330
8331/**
8332 * Emits a gprdst = vecsrc[x] load, 64-bit.
8333 */
8334DECL_FORCE_INLINE(uint32_t)
8335iemNativeEmitSimdLoadGprFromVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
8336{
8337#ifdef RT_ARCH_AMD64
8338 if (iQWord >= 2)
8339 {
8340 /*
8341 * vpextrq doesn't work on the upper 128-bits.
8342 * So we use the following sequence:
8343 * vextracti128 vectmp0, vecsrc, 1
8344 * pextrq gpr, vectmp0, #(iQWord - 2)
8345 */
8346 /* vextracti128 */
8347 pCodeBuf[off++] = X86_OP_VEX3;
8348 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegSrc >= 8);
8349 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
8350 pCodeBuf[off++] = 0x39;
8351 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
8352 pCodeBuf[off++] = 0x1;
8353
8354 /* pextrq */
8355 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8356 pCodeBuf[off++] = X86_OP_REX_W
8357 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
8358 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8359 pCodeBuf[off++] = 0x0f;
8360 pCodeBuf[off++] = 0x3a;
8361 pCodeBuf[off++] = 0x16;
8362 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprDst & 7);
8363 pCodeBuf[off++] = iQWord - 2;
8364 }
8365 else
8366 {
8367 /* pextrq gpr, vecsrc, #iQWord (ASSUMES SSE4.1). */
8368 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8369 pCodeBuf[off++] = X86_OP_REX_W
8370 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
8371 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8372 pCodeBuf[off++] = 0x0f;
8373 pCodeBuf[off++] = 0x3a;
8374 pCodeBuf[off++] = 0x16;
8375 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
8376 pCodeBuf[off++] = iQWord;
8377 }
8378#elif defined(RT_ARCH_ARM64)
8379 /* umov gprdst, vecsrc[iQWord] */
8380 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iQWord, kArmv8InstrUmovInsSz_U64);
8381#else
8382# error "port me"
8383#endif
8384 return off;
8385}
8386
8387
8388/**
8389 * Emits a gprdst = vecsrc[x] load, 64-bit.
8390 */
8391DECL_INLINE_THROW(uint32_t)
8392iemNativeEmitSimdLoadGprFromVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
8393{
8394 Assert(iQWord <= 3);
8395
8396#ifdef RT_ARCH_AMD64
8397 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iVecRegSrc, iQWord);
8398#elif defined(RT_ARCH_ARM64)
8399 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8400 Assert(!(iVecRegSrc & 0x1));
8401 /* Need to access the "high" 128-bit vector register. */
8402 if (iQWord >= 2)
8403 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iQWord - 2);
8404 else
8405 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iQWord);
8406#else
8407# error "port me"
8408#endif
8409 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8410 return off;
8411}
8412
8413
8414/**
8415 * Emits a gprdst = vecsrc[x] load, 32-bit.
8416 */
8417DECL_FORCE_INLINE(uint32_t)
8418iemNativeEmitSimdLoadGprFromVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
8419{
8420#ifdef RT_ARCH_AMD64
8421 if (iDWord >= 4)
8422 {
8423 /*
8424 * vpextrd doesn't work on the upper 128-bits.
8425 * So we use the following sequence:
8426 * vextracti128 vectmp0, vecsrc, 1
8427 * pextrd gpr, vectmp0, #(iDWord - 4)
8428 */
8429 /* vextracti128 */
8430 pCodeBuf[off++] = X86_OP_VEX3;
8431 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegSrc >= 8);
8432 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
8433 pCodeBuf[off++] = 0x39;
8434 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
8435 pCodeBuf[off++] = 0x1;
8436
8437 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
8438 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8439 if (iGprDst >= 8 || IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8)
8440 pCodeBuf[off++] = (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
8441 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8442 pCodeBuf[off++] = 0x0f;
8443 pCodeBuf[off++] = 0x3a;
8444 pCodeBuf[off++] = 0x16;
8445 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprDst & 7);
8446 pCodeBuf[off++] = iDWord - 4;
8447 }
8448 else
8449 {
8450 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
8451 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8452 if (iGprDst >= 8 || iVecRegSrc >= 8)
8453 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
8454 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8455 pCodeBuf[off++] = 0x0f;
8456 pCodeBuf[off++] = 0x3a;
8457 pCodeBuf[off++] = 0x16;
8458 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
8459 pCodeBuf[off++] = iDWord;
8460 }
8461#elif defined(RT_ARCH_ARM64)
8462 Assert(iDWord < 4);
8463
8464 /* umov gprdst, vecsrc[iDWord] */
8465 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iDWord, kArmv8InstrUmovInsSz_U32, false /*fDst64Bit*/);
8466#else
8467# error "port me"
8468#endif
8469 return off;
8470}
8471
8472
8473/**
8474 * Emits a gprdst = vecsrc[x] load, 32-bit.
8475 */
8476DECL_INLINE_THROW(uint32_t)
8477iemNativeEmitSimdLoadGprFromVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
8478{
8479 Assert(iDWord <= 7);
8480
8481#ifdef RT_ARCH_AMD64
8482 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 15), off, iGprDst, iVecRegSrc, iDWord);
8483#elif defined(RT_ARCH_ARM64)
8484 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8485 Assert(!(iVecRegSrc & 0x1));
8486 /* Need to access the "high" 128-bit vector register. */
8487 if (iDWord >= 4)
8488 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iDWord - 4);
8489 else
8490 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iDWord);
8491#else
8492# error "port me"
8493#endif
8494 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8495 return off;
8496}
8497
8498
8499/**
8500 * Emits a gprdst = vecsrc[x] load, 16-bit.
8501 */
8502DECL_FORCE_INLINE(uint32_t)
8503iemNativeEmitSimdLoadGprFromVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
8504{
8505#ifdef RT_ARCH_AMD64
8506 if (iWord >= 8)
8507 {
8508 /** @todo Currently not used. */
8509 AssertReleaseFailed();
8510 }
8511 else
8512 {
8513 /* pextrw gpr, vecsrc, #iWord */
8514 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8515 if (iGprDst >= 8 || iVecRegSrc >= 8)
8516 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
8517 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_B);
8518 pCodeBuf[off++] = 0x0f;
8519 pCodeBuf[off++] = 0xc5;
8520 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iVecRegSrc & 7);
8521 pCodeBuf[off++] = iWord;
8522 }
8523#elif defined(RT_ARCH_ARM64)
8524 /* umov gprdst, vecsrc[iWord] */
8525 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iWord, kArmv8InstrUmovInsSz_U16, false /*fDst64Bit*/);
8526#else
8527# error "port me"
8528#endif
8529 return off;
8530}
8531
8532
8533/**
8534 * Emits a gprdst = vecsrc[x] load, 16-bit.
8535 */
8536DECL_INLINE_THROW(uint32_t)
8537iemNativeEmitSimdLoadGprFromVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
8538{
8539 Assert(iWord <= 16);
8540
8541#ifdef RT_ARCH_AMD64
8542 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGprDst, iVecRegSrc, iWord);
8543#elif defined(RT_ARCH_ARM64)
8544 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8545 Assert(!(iVecRegSrc & 0x1));
8546 /* Need to access the "high" 128-bit vector register. */
8547 if (iWord >= 8)
8548 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iWord - 8);
8549 else
8550 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iWord);
8551#else
8552# error "port me"
8553#endif
8554 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8555 return off;
8556}
8557
8558
8559/**
8560 * Emits a gprdst = vecsrc[x] load, 8-bit.
8561 */
8562DECL_FORCE_INLINE(uint32_t)
8563iemNativeEmitSimdLoadGprFromVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
8564{
8565#ifdef RT_ARCH_AMD64
8566 if (iByte >= 16)
8567 {
8568 /** @todo Currently not used. */
8569 AssertReleaseFailed();
8570 }
8571 else
8572 {
8573 /* pextrb gpr, vecsrc, #iByte */
8574 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8575 if (iGprDst >= 8 || iVecRegSrc >= 8)
8576 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
8577 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8578 pCodeBuf[off++] = 0x0f;
8579 pCodeBuf[off++] = 0x3a;
8580 pCodeBuf[off++] = 0x14;
8581 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
8582 pCodeBuf[off++] = iByte;
8583 }
8584#elif defined(RT_ARCH_ARM64)
8585 /* umov gprdst, vecsrc[iByte] */
8586 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iByte, kArmv8InstrUmovInsSz_U8, false /*fDst64Bit*/);
8587#else
8588# error "port me"
8589#endif
8590 return off;
8591}
8592
8593
8594/**
8595 * Emits a gprdst = vecsrc[x] load, 8-bit.
8596 */
8597DECL_INLINE_THROW(uint32_t)
8598iemNativeEmitSimdLoadGprFromVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
8599{
8600 Assert(iByte <= 32);
8601
8602#ifdef RT_ARCH_AMD64
8603 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iByte);
8604#elif defined(RT_ARCH_ARM64)
8605 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8606 Assert(!(iVecRegSrc & 0x1));
8607 /* Need to access the "high" 128-bit vector register. */
8608 if (iByte >= 16)
8609 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iByte - 16);
8610 else
8611 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iByte);
8612#else
8613# error "port me"
8614#endif
8615 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8616 return off;
8617}
8618
8619
8620/**
8621 * Emits a vecdst[x] = gprsrc store, 64-bit.
8622 */
8623DECL_FORCE_INLINE(uint32_t)
8624iemNativeEmitSimdStoreGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
8625{
8626#ifdef RT_ARCH_AMD64
8627 if (iQWord >= 2)
8628 {
8629 /*
8630 * vpinsrq doesn't work on the upper 128-bits.
8631 * So we use the following sequence:
8632 * vextracti128 vectmp0, vecdst, 1
8633 * pinsrq vectmp0, gpr, #(iQWord - 2)
8634 * vinserti128 vecdst, vectmp0, 1
8635 */
8636 /* vextracti128 */
8637 pCodeBuf[off++] = X86_OP_VEX3;
8638 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
8639 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
8640 pCodeBuf[off++] = 0x39;
8641 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
8642 pCodeBuf[off++] = 0x1;
8643
8644 /* pinsrq */
8645 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8646 pCodeBuf[off++] = X86_OP_REX_W
8647 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
8648 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8649 pCodeBuf[off++] = 0x0f;
8650 pCodeBuf[off++] = 0x3a;
8651 pCodeBuf[off++] = 0x22;
8652 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprSrc & 7);
8653 pCodeBuf[off++] = iQWord - 2;
8654
8655 /* vinserti128 */
8656 pCodeBuf[off++] = X86_OP_VEX3;
8657 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
8658 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
8659 pCodeBuf[off++] = 0x38;
8660 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
8661 pCodeBuf[off++] = 0x01; /* Immediate */
8662 }
8663 else
8664 {
8665 /* pinsrq vecsrc, gpr, #iQWord (ASSUMES SSE4.1). */
8666 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8667 pCodeBuf[off++] = X86_OP_REX_W
8668 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8669 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8670 pCodeBuf[off++] = 0x0f;
8671 pCodeBuf[off++] = 0x3a;
8672 pCodeBuf[off++] = 0x22;
8673 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8674 pCodeBuf[off++] = iQWord;
8675 }
8676#elif defined(RT_ARCH_ARM64)
8677 /* ins vecsrc[iQWord], gpr */
8678 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iQWord, kArmv8InstrUmovInsSz_U64);
8679#else
8680# error "port me"
8681#endif
8682 return off;
8683}
8684
8685
8686/**
8687 * Emits a vecdst[x] = gprsrc store, 64-bit.
8688 */
8689DECL_INLINE_THROW(uint32_t)
8690iemNativeEmitSimdStoreGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
8691{
8692 Assert(iQWord <= 3);
8693
8694#ifdef RT_ARCH_AMD64
8695 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 19), off, iVecRegDst, iGprSrc, iQWord);
8696#elif defined(RT_ARCH_ARM64)
8697 Assert(!(iVecRegDst & 0x1));
8698 if (iQWord >= 2)
8699 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iGprSrc, iQWord - 2);
8700 else
8701 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iQWord);
8702#else
8703# error "port me"
8704#endif
8705 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8706 return off;
8707}
8708
8709
8710/**
8711 * Emits a vecdst[x] = gprsrc store, 32-bit.
8712 */
8713DECL_FORCE_INLINE(uint32_t)
8714iemNativeEmitSimdStoreGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
8715{
8716#ifdef RT_ARCH_AMD64
8717 if (iDWord >= 4)
8718 {
8719 /*
8720 * vpinsrq doesn't work on the upper 128-bits.
8721 * So we use the following sequence:
8722 * vextracti128 vectmp0, vecdst, 1
8723 * pinsrd vectmp0, gpr, #(iDword - 4)
8724 * vinserti128 vecdst, vectmp0, 1
8725 */
8726 /* vextracti128 */
8727 pCodeBuf[off++] = X86_OP_VEX3;
8728 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
8729 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
8730 pCodeBuf[off++] = 0x39;
8731 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
8732 pCodeBuf[off++] = 0x1;
8733
8734 /* pinsrd */
8735 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8736 if (IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8 || iGprSrc >= 8)
8737 pCodeBuf[off++] = (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
8738 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8739 pCodeBuf[off++] = 0x0f;
8740 pCodeBuf[off++] = 0x3a;
8741 pCodeBuf[off++] = 0x22;
8742 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprSrc & 7);
8743 pCodeBuf[off++] = iDWord - 4;
8744
8745 /* vinserti128 */
8746 pCodeBuf[off++] = X86_OP_VEX3;
8747 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
8748 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
8749 pCodeBuf[off++] = 0x38;
8750 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
8751 pCodeBuf[off++] = 0x01; /* Immediate */
8752 }
8753 else
8754 {
8755 /* pinsrd vecsrc, gpr, #iDWord (ASSUMES SSE4.1). */
8756 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8757 if (iVecRegDst >= 8 || iGprSrc >= 8)
8758 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8759 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8760 pCodeBuf[off++] = 0x0f;
8761 pCodeBuf[off++] = 0x3a;
8762 pCodeBuf[off++] = 0x22;
8763 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8764 pCodeBuf[off++] = iDWord;
8765 }
8766#elif defined(RT_ARCH_ARM64)
8767 /* ins vecsrc[iDWord], gpr */
8768 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iDWord, kArmv8InstrUmovInsSz_U32);
8769#else
8770# error "port me"
8771#endif
8772 return off;
8773}
8774
8775
8776/**
8777 * Emits a vecdst[x] = gprsrc store, 64-bit.
8778 */
8779DECL_INLINE_THROW(uint32_t)
8780iemNativeEmitSimdStoreGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
8781{
8782 Assert(iDWord <= 7);
8783
8784#ifdef RT_ARCH_AMD64
8785 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 19), off, iVecRegDst, iGprSrc, iDWord);
8786#elif defined(RT_ARCH_ARM64)
8787 Assert(!(iVecRegDst & 0x1));
8788 if (iDWord >= 4)
8789 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iGprSrc, iDWord - 4);
8790 else
8791 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iDWord);
8792#else
8793# error "port me"
8794#endif
8795 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8796 return off;
8797}
8798
8799
8800/**
8801 * Emits a vecdst[x] = gprsrc store, 16-bit.
8802 */
8803DECL_FORCE_INLINE(uint32_t)
8804iemNativeEmitSimdStoreGprToVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iWord)
8805{
8806#ifdef RT_ARCH_AMD64
8807 /* pinsrw vecsrc, gpr, #iWord. */
8808 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8809 if (iVecRegDst >= 8 || iGprSrc >= 8)
8810 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8811 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8812 pCodeBuf[off++] = 0x0f;
8813 pCodeBuf[off++] = 0xc4;
8814 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8815 pCodeBuf[off++] = iWord;
8816#elif defined(RT_ARCH_ARM64)
8817 /* ins vecsrc[iWord], gpr */
8818 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iWord, kArmv8InstrUmovInsSz_U16);
8819#else
8820# error "port me"
8821#endif
8822 return off;
8823}
8824
8825
8826/**
8827 * Emits a vecdst[x] = gprsrc store, 16-bit.
8828 */
8829DECL_INLINE_THROW(uint32_t)
8830iemNativeEmitSimdStoreGprToVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iWord)
8831{
8832 Assert(iWord <= 15);
8833
8834#ifdef RT_ARCH_AMD64
8835 off = iemNativeEmitSimdStoreGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iGprSrc, iWord);
8836#elif defined(RT_ARCH_ARM64)
8837 off = iemNativeEmitSimdStoreGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iWord);
8838#else
8839# error "port me"
8840#endif
8841 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8842 return off;
8843}
8844
8845
8846/**
8847 * Emits a vecdst[x] = gprsrc store, 8-bit.
8848 */
8849DECL_FORCE_INLINE(uint32_t)
8850iemNativeEmitSimdStoreGprToVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iByte)
8851{
8852#ifdef RT_ARCH_AMD64
8853 /* pinsrb vecsrc, gpr, #iByte (ASSUMES SSE4.1). */
8854 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8855 if (iVecRegDst >= 8 || iGprSrc >= 8)
8856 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8857 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8858 pCodeBuf[off++] = 0x0f;
8859 pCodeBuf[off++] = 0x3a;
8860 pCodeBuf[off++] = 0x20;
8861 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8862 pCodeBuf[off++] = iByte;
8863#elif defined(RT_ARCH_ARM64)
8864 /* ins vecsrc[iByte], gpr */
8865 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iByte, kArmv8InstrUmovInsSz_U8);
8866#else
8867# error "port me"
8868#endif
8869 return off;
8870}
8871
8872
8873/**
8874 * Emits a vecdst[x] = gprsrc store, 8-bit.
8875 */
8876DECL_INLINE_THROW(uint32_t)
8877iemNativeEmitSimdStoreGprToVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iByte)
8878{
8879 Assert(iByte <= 15);
8880
8881#ifdef RT_ARCH_AMD64
8882 off = iemNativeEmitSimdStoreGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iByte);
8883#elif defined(RT_ARCH_ARM64)
8884 off = iemNativeEmitSimdStoreGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iByte);
8885#else
8886# error "port me"
8887#endif
8888 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8889 return off;
8890}
8891
8892
8893/**
8894 * Emits a vecdst.au32[iDWord] = 0 store.
8895 */
8896DECL_FORCE_INLINE(uint32_t)
8897iemNativeEmitSimdZeroVecRegElemU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
8898{
8899 Assert(iDWord <= 7);
8900
8901#ifdef RT_ARCH_AMD64
8902 /*
8903 * xor tmp0, tmp0
8904 * pinsrd xmm, tmp0, iDword
8905 */
8906 if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
8907 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
8908 pCodeBuf[off++] = 0x33;
8909 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_REG_FIXED_TMP0 & 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
8910 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(pCodeBuf, off, iVecReg, IEMNATIVE_REG_FIXED_TMP0, iDWord);
8911#elif defined(RT_ARCH_ARM64)
8912 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8913 Assert(!(iVecReg & 0x1));
8914 /* ins vecsrc[iDWord], wzr */
8915 if (iDWord >= 4)
8916 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg + 1, ARMV8_A64_REG_WZR, iDWord - 4, kArmv8InstrUmovInsSz_U32);
8917 else
8918 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg, ARMV8_A64_REG_WZR, iDWord, kArmv8InstrUmovInsSz_U32);
8919#else
8920# error "port me"
8921#endif
8922 return off;
8923}
8924
8925
8926/**
8927 * Emits a vecdst.au32[iDWord] = 0 store.
8928 */
8929DECL_INLINE_THROW(uint32_t)
8930iemNativeEmitSimdZeroVecRegElemU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
8931{
8932
8933#ifdef RT_ARCH_AMD64
8934 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, iDWord);
8935#elif defined(RT_ARCH_ARM64)
8936 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, iDWord);
8937#else
8938# error "port me"
8939#endif
8940 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8941 return off;
8942}
8943
8944
8945/**
8946 * Emits a vecdst[0:127] = 0 store.
8947 */
8948DECL_FORCE_INLINE(uint32_t)
8949iemNativeEmitSimdZeroVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
8950{
8951#ifdef RT_ARCH_AMD64
8952 /* pxor xmm, xmm */
8953 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8954 if (iVecReg >= 8)
8955 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
8956 pCodeBuf[off++] = 0x0f;
8957 pCodeBuf[off++] = 0xef;
8958 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
8959#elif defined(RT_ARCH_ARM64)
8960 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8961 Assert(!(iVecReg & 0x1));
8962 /* eor vecreg, vecreg, vecreg */
8963 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
8964#else
8965# error "port me"
8966#endif
8967 return off;
8968}
8969
8970
8971/**
8972 * Emits a vecdst[0:127] = 0 store.
8973 */
8974DECL_INLINE_THROW(uint32_t)
8975iemNativeEmitSimdZeroVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
8976{
8977#ifdef RT_ARCH_AMD64
8978 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
8979#elif defined(RT_ARCH_ARM64)
8980 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
8981#else
8982# error "port me"
8983#endif
8984 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8985 return off;
8986}
8987
8988
8989/**
8990 * Emits a vecdst[128:255] = 0 store.
8991 */
8992DECL_FORCE_INLINE(uint32_t)
8993iemNativeEmitSimdZeroVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
8994{
8995#ifdef RT_ARCH_AMD64
8996 /* vmovdqa xmm, xmm. This will clear the upper half of ymm */
8997 if (iVecReg < 8)
8998 {
8999 pCodeBuf[off++] = X86_OP_VEX2;
9000 pCodeBuf[off++] = 0xf9;
9001 }
9002 else
9003 {
9004 pCodeBuf[off++] = X86_OP_VEX3;
9005 pCodeBuf[off++] = 0x41;
9006 pCodeBuf[off++] = 0x79;
9007 }
9008 pCodeBuf[off++] = 0x6f;
9009 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
9010#elif defined(RT_ARCH_ARM64)
9011 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9012 Assert(!(iVecReg & 0x1));
9013 /* eor vecreg, vecreg, vecreg */
9014 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
9015#else
9016# error "port me"
9017#endif
9018 return off;
9019}
9020
9021
9022/**
9023 * Emits a vecdst[128:255] = 0 store.
9024 */
9025DECL_INLINE_THROW(uint32_t)
9026iemNativeEmitSimdZeroVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
9027{
9028#ifdef RT_ARCH_AMD64
9029 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecReg);
9030#elif defined(RT_ARCH_ARM64)
9031 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
9032#else
9033# error "port me"
9034#endif
9035 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9036 return off;
9037}
9038
9039
9040/**
9041 * Emits a vecdst[0:255] = 0 store.
9042 */
9043DECL_FORCE_INLINE(uint32_t)
9044iemNativeEmitSimdZeroVecRegU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
9045{
9046#ifdef RT_ARCH_AMD64
9047 /* vpxor ymm, ymm, ymm */
9048 if (iVecReg < 8)
9049 {
9050 pCodeBuf[off++] = X86_OP_VEX2;
9051 pCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
9052 }
9053 else
9054 {
9055 pCodeBuf[off++] = X86_OP_VEX3;
9056 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X | 0x01;
9057 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
9058 }
9059 pCodeBuf[off++] = 0xef;
9060 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
9061#elif defined(RT_ARCH_ARM64)
9062 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9063 Assert(!(iVecReg & 0x1));
9064 /* eor vecreg, vecreg, vecreg */
9065 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
9066 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
9067#else
9068# error "port me"
9069#endif
9070 return off;
9071}
9072
9073
9074/**
9075 * Emits a vecdst[0:255] = 0 store.
9076 */
9077DECL_INLINE_THROW(uint32_t)
9078iemNativeEmitSimdZeroVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
9079{
9080#ifdef RT_ARCH_AMD64
9081 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
9082#elif defined(RT_ARCH_ARM64)
9083 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecReg);
9084#else
9085# error "port me"
9086#endif
9087 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9088 return off;
9089}
9090
9091
9092/**
9093 * Emits a vecdst = gprsrc broadcast, 8-bit.
9094 */
9095DECL_FORCE_INLINE(uint32_t)
9096iemNativeEmitSimdBroadcastGprToVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9097{
9098#ifdef RT_ARCH_AMD64
9099 /* pinsrb vecdst, gpr, #0 (ASSUMES SSE 4.1) */
9100 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9101 if (iVecRegDst >= 8 || iGprSrc >= 8)
9102 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9103 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9104 pCodeBuf[off++] = 0x0f;
9105 pCodeBuf[off++] = 0x3a;
9106 pCodeBuf[off++] = 0x20;
9107 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9108 pCodeBuf[off++] = 0x00;
9109
9110 /* vpbroadcastb {y,x}mm, xmm (ASSUMES AVX2). */
9111 pCodeBuf[off++] = X86_OP_VEX3;
9112 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
9113 | 0x02 /* opcode map. */
9114 | ( iVecRegDst >= 8
9115 ? 0
9116 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
9117 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
9118 pCodeBuf[off++] = 0x78;
9119 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
9120#elif defined(RT_ARCH_ARM64)
9121 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9122 Assert(!(iVecRegDst & 0x1) || !f256Bit);
9123
9124 /* dup vecsrc, gpr */
9125 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U8);
9126 if (f256Bit)
9127 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U8);
9128#else
9129# error "port me"
9130#endif
9131 return off;
9132}
9133
9134
9135/**
9136 * Emits a vecdst[x] = gprsrc broadcast, 8-bit.
9137 */
9138DECL_INLINE_THROW(uint32_t)
9139iemNativeEmitSimdBroadcastGprToVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9140{
9141#ifdef RT_ARCH_AMD64
9142 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
9143#elif defined(RT_ARCH_ARM64)
9144 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
9145#else
9146# error "port me"
9147#endif
9148 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9149 return off;
9150}
9151
9152
9153/**
9154 * Emits a vecdst = gprsrc broadcast, 16-bit.
9155 */
9156DECL_FORCE_INLINE(uint32_t)
9157iemNativeEmitSimdBroadcastGprToVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9158{
9159#ifdef RT_ARCH_AMD64
9160 /* pinsrw vecdst, gpr, #0 */
9161 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9162 if (iVecRegDst >= 8 || iGprSrc >= 8)
9163 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9164 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9165 pCodeBuf[off++] = 0x0f;
9166 pCodeBuf[off++] = 0xc4;
9167 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9168 pCodeBuf[off++] = 0x00;
9169
9170 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
9171 pCodeBuf[off++] = X86_OP_VEX3;
9172 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
9173 | 0x02 /* opcode map. */
9174 | ( iVecRegDst >= 8
9175 ? 0
9176 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
9177 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
9178 pCodeBuf[off++] = 0x79;
9179 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
9180#elif defined(RT_ARCH_ARM64)
9181 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9182 Assert(!(iVecRegDst & 0x1) || !f256Bit);
9183
9184 /* dup vecsrc, gpr */
9185 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U16);
9186 if (f256Bit)
9187 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U16);
9188#else
9189# error "port me"
9190#endif
9191 return off;
9192}
9193
9194
9195/**
9196 * Emits a vecdst[x] = gprsrc broadcast, 16-bit.
9197 */
9198DECL_INLINE_THROW(uint32_t)
9199iemNativeEmitSimdBroadcastGprToVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9200{
9201#ifdef RT_ARCH_AMD64
9202 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
9203#elif defined(RT_ARCH_ARM64)
9204 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
9205#else
9206# error "port me"
9207#endif
9208 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9209 return off;
9210}
9211
9212
9213/**
9214 * Emits a vecdst = gprsrc broadcast, 32-bit.
9215 */
9216DECL_FORCE_INLINE(uint32_t)
9217iemNativeEmitSimdBroadcastGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9218{
9219#ifdef RT_ARCH_AMD64
9220 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
9221 * vbroadcast needs a memory operand or another xmm register to work... */
9222
9223 /* pinsrd vecdst, gpr, #0 (ASSUMES SSE4.1). */
9224 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9225 if (iVecRegDst >= 8 || iGprSrc >= 8)
9226 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9227 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9228 pCodeBuf[off++] = 0x0f;
9229 pCodeBuf[off++] = 0x3a;
9230 pCodeBuf[off++] = 0x22;
9231 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9232 pCodeBuf[off++] = 0x00;
9233
9234 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
9235 pCodeBuf[off++] = X86_OP_VEX3;
9236 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
9237 | 0x02 /* opcode map. */
9238 | ( iVecRegDst >= 8
9239 ? 0
9240 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
9241 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
9242 pCodeBuf[off++] = 0x58;
9243 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
9244#elif defined(RT_ARCH_ARM64)
9245 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9246 Assert(!(iVecRegDst & 0x1) || !f256Bit);
9247
9248 /* dup vecsrc, gpr */
9249 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U32);
9250 if (f256Bit)
9251 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U32);
9252#else
9253# error "port me"
9254#endif
9255 return off;
9256}
9257
9258
9259/**
9260 * Emits a vecdst[x] = gprsrc broadcast, 32-bit.
9261 */
9262DECL_INLINE_THROW(uint32_t)
9263iemNativeEmitSimdBroadcastGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9264{
9265#ifdef RT_ARCH_AMD64
9266 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
9267#elif defined(RT_ARCH_ARM64)
9268 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
9269#else
9270# error "port me"
9271#endif
9272 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9273 return off;
9274}
9275
9276
9277/**
9278 * Emits a vecdst = gprsrc broadcast, 64-bit.
9279 */
9280DECL_FORCE_INLINE(uint32_t)
9281iemNativeEmitSimdBroadcastGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9282{
9283#ifdef RT_ARCH_AMD64
9284 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
9285 * vbroadcast needs a memory operand or another xmm register to work... */
9286
9287 /* pinsrq vecdst, gpr, #0 (ASSUMES SSE4.1). */
9288 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9289 pCodeBuf[off++] = X86_OP_REX_W
9290 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9291 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9292 pCodeBuf[off++] = 0x0f;
9293 pCodeBuf[off++] = 0x3a;
9294 pCodeBuf[off++] = 0x22;
9295 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9296 pCodeBuf[off++] = 0x00;
9297
9298 /* vpbroadcastq {y,x}mm, xmm (ASSUMES AVX2). */
9299 pCodeBuf[off++] = X86_OP_VEX3;
9300 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
9301 | 0x02 /* opcode map. */
9302 | ( iVecRegDst >= 8
9303 ? 0
9304 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
9305 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
9306 pCodeBuf[off++] = 0x59;
9307 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
9308#elif defined(RT_ARCH_ARM64)
9309 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9310 Assert(!(iVecRegDst & 0x1) || !f256Bit);
9311
9312 /* dup vecsrc, gpr */
9313 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U64);
9314 if (f256Bit)
9315 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U64);
9316#else
9317# error "port me"
9318#endif
9319 return off;
9320}
9321
9322
9323/**
9324 * Emits a vecdst[x] = gprsrc broadcast, 64-bit.
9325 */
9326DECL_INLINE_THROW(uint32_t)
9327iemNativeEmitSimdBroadcastGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9328{
9329#ifdef RT_ARCH_AMD64
9330 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 14), off, iVecRegDst, iGprSrc, f256Bit);
9331#elif defined(RT_ARCH_ARM64)
9332 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
9333#else
9334# error "port me"
9335#endif
9336 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9337 return off;
9338}
9339
9340
9341/**
9342 * Emits a vecdst[0:127] = vecdst[128:255] = vecsrc[0:127] broadcast, 128-bit.
9343 */
9344DECL_FORCE_INLINE(uint32_t)
9345iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9346{
9347#ifdef RT_ARCH_AMD64
9348 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(pCodeBuf, off, iVecRegDst, iVecRegSrc);
9349
9350 /* vinserti128 ymm, ymm, xmm, 1. */ /* ASSUMES AVX2 support */
9351 pCodeBuf[off++] = X86_OP_VEX3;
9352 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
9353 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
9354 pCodeBuf[off++] = 0x38;
9355 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
9356 pCodeBuf[off++] = 0x01; /* Immediate */
9357#elif defined(RT_ARCH_ARM64)
9358 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9359 Assert(!(iVecRegDst & 0x1));
9360
9361 /* mov dst, src; alias for: orr dst, src, src */
9362 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
9363 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst + 1, iVecRegSrc, iVecRegSrc);
9364#else
9365# error "port me"
9366#endif
9367 return off;
9368}
9369
9370
9371/**
9372 * Emits a vecdst[0:127] = vecdst[128:255] = vecsrc[0:127] broadcast, 128-bit.
9373 */
9374DECL_INLINE_THROW(uint32_t)
9375iemNativeEmitSimdBroadcastVecRegU128ToVecReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9376{
9377#ifdef RT_ARCH_AMD64
9378 off = iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(iemNativeInstrBufEnsure(pReNative, off, 11), off, iVecRegDst, iVecRegSrc);
9379#elif defined(RT_ARCH_ARM64)
9380 off = iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecRegDst, iVecRegSrc);
9381#else
9382# error "port me"
9383#endif
9384 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9385 return off;
9386}
9387
9388#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
9389
9390/** @} */
9391
9392#endif /* !VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h */
9393
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette