VirtualBox

source: vbox/trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h

Last change on this file was 106465, checked in by vboxsync, 5 weeks ago

VMM/IEM: Added iemNativeEmitLoadGprWithGstReg[Ex]T and iemNativeEmitStoreGprToGstReg[Ex]T as better way of explictly loading & storing standard guest registers. bugref:10720

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 372.3 KB
Line 
1/* $Id: IEMN8veRecompilerEmit.h 106465 2024-10-18 00:27:52Z vboxsync $ */
2/** @file
3 * IEM - Interpreted Execution Manager - Native Recompiler Inlined Emitters.
4 */
5
6/*
7 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28#ifndef VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
29#define VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
30#ifndef RT_WITHOUT_PRAGMA_ONCE
31# pragma once
32#endif
33
34#include "IEMN8veRecompiler.h"
35
36
37/** @defgroup grp_iem_n8ve_re_inline Native Recompiler Inlined Emitters
38 * @ingroup grp_iem_n8ve_re
39 * @{
40 */
41
42/**
43 * Emit a simple marker instruction to more easily tell where something starts
44 * in the disassembly.
45 */
46DECL_INLINE_THROW(uint32_t)
47iemNativeEmitMarker(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
48{
49#ifdef RT_ARCH_AMD64
50 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
51 if (uInfo == 0)
52 {
53 /* nop */
54 pbCodeBuf[off++] = 0x90;
55 }
56 else
57 {
58 /* nop [disp32] */
59 pbCodeBuf[off++] = 0x0f;
60 pbCodeBuf[off++] = 0x1f;
61 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, 0, 5);
62 pbCodeBuf[off++] = RT_BYTE1(uInfo);
63 pbCodeBuf[off++] = RT_BYTE2(uInfo);
64 pbCodeBuf[off++] = RT_BYTE3(uInfo);
65 pbCodeBuf[off++] = RT_BYTE4(uInfo);
66 }
67#elif defined(RT_ARCH_ARM64)
68 /* nop */
69 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
70 if (uInfo == 0)
71 pu32CodeBuf[off++] = ARMV8_A64_INSTR_NOP;
72 else
73 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(ARMV8_A64_REG_XZR, (uint16_t)uInfo);
74
75 RT_NOREF(uInfo);
76#else
77# error "port me"
78#endif
79 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
80 return off;
81}
82
83
84/**
85 * Emit a breakpoint instruction.
86 */
87DECL_FORCE_INLINE(uint32_t) iemNativeEmitBrkEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uInfo)
88{
89#ifdef RT_ARCH_AMD64
90 pCodeBuf[off++] = 0xcc;
91 RT_NOREF(uInfo); /** @todo use multibyte nop for info? */
92
93#elif defined(RT_ARCH_ARM64)
94 pCodeBuf[off++] = Armv8A64MkInstrBrk(uInfo & UINT32_C(0xffff));
95
96#else
97# error "error"
98#endif
99 return off;
100}
101
102
103/**
104 * Emit a breakpoint instruction.
105 */
106DECL_INLINE_THROW(uint32_t) iemNativeEmitBrk(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
107{
108#ifdef RT_ARCH_AMD64
109 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
110#elif defined(RT_ARCH_ARM64)
111 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
112#else
113# error "error"
114#endif
115 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
116 return off;
117}
118
119
120/*********************************************************************************************************************************
121* Loads, Stores and Related Stuff. *
122*********************************************************************************************************************************/
123
124#ifdef RT_ARCH_AMD64
125/**
126 * Common bit of iemNativeEmitLoadGprByGpr and friends.
127 */
128DECL_FORCE_INLINE(uint32_t)
129iemNativeEmitGprByGprDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp)
130{
131 if (offDisp == 0 && (iGprBase & 7) != X86_GREG_xBP) /* Can use encoding w/o displacement field. */
132 {
133 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, iGprReg & 7, iGprBase & 7);
134 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
135 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
136 }
137 else if (offDisp == (int8_t)offDisp)
138 {
139 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, iGprBase & 7);
140 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
141 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
142 pbCodeBuf[off++] = (uint8_t)offDisp;
143 }
144 else
145 {
146 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, iGprBase & 7);
147 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
148 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
149 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
150 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
151 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
152 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
153 }
154 return off;
155}
156#endif /* RT_ARCH_AMD64 */
157
158/**
159 * Emits setting a GPR to zero.
160 */
161DECL_INLINE_THROW(uint32_t)
162iemNativeEmitGprZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
163{
164#ifdef RT_ARCH_AMD64
165 /* xor gpr32, gpr32 */
166 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
167 if (iGpr >= 8)
168 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
169 pbCodeBuf[off++] = 0x33;
170 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
171
172#elif defined(RT_ARCH_ARM64)
173 /* mov gpr, #0x0 */
174 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
175 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | iGpr;
176
177#else
178# error "port me"
179#endif
180 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
181 return off;
182}
183
184
185/**
186 * Variant of iemNativeEmitLoadGpr32Imm where the caller ensures sufficent
187 * buffer space.
188 *
189 * Max buffer consumption:
190 * - AMD64: 6 instruction bytes.
191 * - ARM64: 2 instruction words (8 bytes).
192 *
193 * @note The top 32 bits will be cleared.
194 */
195DECL_FORCE_INLINE(uint32_t)
196iemNativeEmitLoadGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t uImm32)
197{
198#ifdef RT_ARCH_AMD64
199 if (uImm32 == 0)
200 {
201 /* xor gpr, gpr */
202 if (iGpr >= 8)
203 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
204 pCodeBuf[off++] = 0x33;
205 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
206 }
207 else
208 {
209 /* mov gpr, imm32 */
210 if (iGpr >= 8)
211 pCodeBuf[off++] = X86_OP_REX_B;
212 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
213 pCodeBuf[off++] = RT_BYTE1(uImm32);
214 pCodeBuf[off++] = RT_BYTE2(uImm32);
215 pCodeBuf[off++] = RT_BYTE3(uImm32);
216 pCodeBuf[off++] = RT_BYTE4(uImm32);
217 }
218
219#elif defined(RT_ARCH_ARM64)
220 if ((uImm32 >> 16) == 0)
221 /* movz gpr, imm16 */
222 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32, 0, false /*f64Bit*/);
223 else if ((uImm32 & UINT32_C(0xffff)) == 0)
224 /* movz gpr, imm16, lsl #16 */
225 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
226 else if ((uImm32 & UINT32_C(0xffff)) == UINT32_C(0xffff))
227 /* movn gpr, imm16, lsl #16 */
228 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32 >> 16, 1, false /*f64Bit*/);
229 else if ((uImm32 >> 16) == UINT32_C(0xffff))
230 /* movn gpr, imm16 */
231 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32, 0, false /*f64Bit*/);
232 else
233 {
234 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 & UINT32_C(0xffff), 0, false /*f64Bit*/);
235 pCodeBuf[off++] = Armv8A64MkInstrMovK(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
236 }
237
238#else
239# error "port me"
240#endif
241 return off;
242}
243
244
245/**
246 * Variant of iemNativeEmitLoadGpr32Imm where the caller ensures sufficent
247 * buffer space.
248 *
249 * Max buffer consumption:
250 * - AMD64: 6 instruction bytes.
251 * - ARM64: 2 instruction words (8 bytes).
252 *
253 * @note The top 32 bits will be cleared.
254 */
255template<uint32_t const a_uImm32>
256DECL_FORCE_INLINE(uint32_t) iemNativeEmitLoadGpr32ImmExT(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr)
257{
258#ifdef RT_ARCH_AMD64
259 if (a_uImm32 == 0)
260 {
261 /* xor gpr, gpr */
262 if (iGpr >= 8)
263 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
264 pCodeBuf[off++] = 0x33;
265 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
266 }
267 else
268 {
269 /* mov gpr, imm32 */
270 if (iGpr >= 8)
271 pCodeBuf[off++] = X86_OP_REX_B;
272 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
273 pCodeBuf[off++] = RT_BYTE1(a_uImm32);
274 pCodeBuf[off++] = RT_BYTE2(a_uImm32);
275 pCodeBuf[off++] = RT_BYTE3(a_uImm32);
276 pCodeBuf[off++] = RT_BYTE4(a_uImm32);
277 }
278
279#elif defined(RT_ARCH_ARM64)
280 if RT_CONSTEXPR_IF((a_uImm32 >> 16) == 0)
281 /* movz gpr, imm16 */
282 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, a_uImm32, 0, false /*f64Bit*/);
283 else if RT_CONSTEXPR_IF((a_uImm32 & UINT32_C(0xffff)) == 0)
284 /* movz gpr, imm16, lsl #16 */
285 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, a_uImm32 >> 16, 1, false /*f64Bit*/);
286 else if RT_CONSTEXPR_IF((a_uImm32 & UINT32_C(0xffff)) == UINT32_C(0xffff))
287 /* movn gpr, imm16, lsl #16 */
288 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~a_uImm32 >> 16, 1, false /*f64Bit*/);
289 else if RT_CONSTEXPR_IF((a_uImm32 >> 16) == UINT32_C(0xffff))
290 /* movn gpr, imm16 */
291 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~a_uImm32, 0, false /*f64Bit*/);
292 else
293 {
294 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, a_uImm32 & UINT32_C(0xffff), 0, false /*f64Bit*/);
295 pCodeBuf[off++] = Armv8A64MkInstrMovK(iGpr, a_uImm32 >> 16, 1, false /*f64Bit*/);
296 }
297
298#else
299# error "port me"
300#endif
301 return off;
302}
303
304
305/**
306 * Variant of iemNativeEmitLoadGprImm64 where the caller ensures sufficent
307 * buffer space.
308 *
309 * Max buffer consumption:
310 * - AMD64: 10 instruction bytes.
311 * - ARM64: 4 instruction words (16 bytes).
312 */
313DECL_FORCE_INLINE(uint32_t)
314iemNativeEmitLoadGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint64_t uImm64)
315{
316#ifdef RT_ARCH_AMD64
317 if (uImm64 == 0)
318 {
319 /* xor gpr, gpr */
320 if (iGpr >= 8)
321 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
322 pCodeBuf[off++] = 0x33;
323 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
324 }
325 else if (uImm64 <= UINT32_MAX)
326 {
327 /* mov gpr, imm32 */
328 if (iGpr >= 8)
329 pCodeBuf[off++] = X86_OP_REX_B;
330 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
331 pCodeBuf[off++] = RT_BYTE1(uImm64);
332 pCodeBuf[off++] = RT_BYTE2(uImm64);
333 pCodeBuf[off++] = RT_BYTE3(uImm64);
334 pCodeBuf[off++] = RT_BYTE4(uImm64);
335 }
336 else if (uImm64 == (uint64_t)(int32_t)uImm64)
337 {
338 /* mov gpr, sx(imm32) */
339 if (iGpr < 8)
340 pCodeBuf[off++] = X86_OP_REX_W;
341 else
342 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
343 pCodeBuf[off++] = 0xc7;
344 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGpr & 7);
345 pCodeBuf[off++] = RT_BYTE1(uImm64);
346 pCodeBuf[off++] = RT_BYTE2(uImm64);
347 pCodeBuf[off++] = RT_BYTE3(uImm64);
348 pCodeBuf[off++] = RT_BYTE4(uImm64);
349 }
350 else
351 {
352 /* mov gpr, imm64 */
353 if (iGpr < 8)
354 pCodeBuf[off++] = X86_OP_REX_W;
355 else
356 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
357 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
358 pCodeBuf[off++] = RT_BYTE1(uImm64);
359 pCodeBuf[off++] = RT_BYTE2(uImm64);
360 pCodeBuf[off++] = RT_BYTE3(uImm64);
361 pCodeBuf[off++] = RT_BYTE4(uImm64);
362 pCodeBuf[off++] = RT_BYTE5(uImm64);
363 pCodeBuf[off++] = RT_BYTE6(uImm64);
364 pCodeBuf[off++] = RT_BYTE7(uImm64);
365 pCodeBuf[off++] = RT_BYTE8(uImm64);
366 }
367
368#elif defined(RT_ARCH_ARM64)
369 /*
370 * Quick simplification: Do 32-bit load if top half is zero.
371 */
372 if (uImm64 <= UINT32_MAX)
373 return iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGpr, (uint32_t)uImm64);
374
375 /*
376 * We need to start this sequence with a 'mov grp, imm16, lsl #x' and
377 * supply remaining bits using 'movk grp, imm16, lsl #x'.
378 *
379 * The mov instruction is encoded 0xd2800000 + shift + imm16 + grp,
380 * while the movk is 0xf2800000 + shift + imm16 + grp, meaning the diff
381 * is 0x20000000 (bit 29). So, we keep this bit in a variable and set it
382 * after the first non-zero immediate component so we switch to movk for
383 * the remainder.
384 */
385 unsigned cZeroHalfWords = !( uImm64 & UINT16_MAX)
386 + !((uImm64 >> 16) & UINT16_MAX)
387 + !((uImm64 >> 32) & UINT16_MAX)
388 + !((uImm64 >> 48) & UINT16_MAX);
389 unsigned cFfffHalfWords = cZeroHalfWords >= 2 ? 0 /* skip */
390 : ( (uImm64 & UINT16_MAX) == UINT16_MAX)
391 + (((uImm64 >> 16) & UINT16_MAX) == UINT16_MAX)
392 + (((uImm64 >> 32) & UINT16_MAX) == UINT16_MAX)
393 + (((uImm64 >> 48) & UINT16_MAX) == UINT16_MAX);
394 if (cFfffHalfWords <= cZeroHalfWords)
395 {
396 uint32_t fMovBase = UINT32_C(0xd2800000) | iGpr;
397
398 /* movz gpr, imm16 */
399 uint32_t uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
400 if (uImmPart || cZeroHalfWords == 4)
401 {
402 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
403 fMovBase |= RT_BIT_32(29);
404 }
405 /* mov[z/k] gpr, imm16, lsl #16 */
406 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
407 if (uImmPart)
408 {
409 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
410 fMovBase |= RT_BIT_32(29);
411 }
412 /* mov[z/k] gpr, imm16, lsl #32 */
413 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
414 if (uImmPart)
415 {
416 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
417 fMovBase |= RT_BIT_32(29);
418 }
419 /* mov[z/k] gpr, imm16, lsl #48 */
420 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
421 if (uImmPart)
422 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
423 }
424 else
425 {
426 uint32_t fMovBase = UINT32_C(0x92800000) | iGpr;
427
428 /* find the first half-word that isn't UINT16_MAX. */
429 uint32_t const iHwNotFfff = (uImm64 & UINT16_MAX) != UINT16_MAX ? 0
430 : ((uImm64 >> 16) & UINT16_MAX) != UINT16_MAX ? 1
431 : ((uImm64 >> 32) & UINT16_MAX) != UINT16_MAX ? 2 : 3;
432
433 /* movn gpr, imm16, lsl #iHwNotFfff*16 */
434 uint32_t uImmPart = (uint32_t)(~(uImm64 >> (iHwNotFfff * 16)) & UINT32_C(0xffff)) << 5;
435 pCodeBuf[off++] = fMovBase | (iHwNotFfff << 21) | uImmPart;
436 fMovBase |= RT_BIT_32(30) | RT_BIT_32(29); /* -> movk */
437 /* movk gpr, imm16 */
438 if (iHwNotFfff != 0)
439 {
440 uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
441 if (uImmPart != UINT32_C(0xffff))
442 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
443 }
444 /* movk gpr, imm16, lsl #16 */
445 if (iHwNotFfff != 1)
446 {
447 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
448 if (uImmPart != UINT32_C(0xffff))
449 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
450 }
451 /* movk gpr, imm16, lsl #32 */
452 if (iHwNotFfff != 2)
453 {
454 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
455 if (uImmPart != UINT32_C(0xffff))
456 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
457 }
458 /* movk gpr, imm16, lsl #48 */
459 if (iHwNotFfff != 3)
460 {
461 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
462 if (uImmPart != UINT32_C(0xffff))
463 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
464 }
465 }
466
467#else
468# error "port me"
469#endif
470 return off;
471}
472
473
474/**
475 * Emits loading a constant into a 64-bit GPR
476 */
477DECL_INLINE_THROW(uint32_t)
478iemNativeEmitLoadGprImm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint64_t uImm64)
479{
480#ifdef RT_ARCH_AMD64
481 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 10), off, iGpr, uImm64);
482#elif defined(RT_ARCH_ARM64)
483 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGpr, uImm64);
484#else
485# error "port me"
486#endif
487 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
488 return off;
489}
490
491
492/**
493 * Emits loading a constant into a 32-bit GPR.
494 * @note The top 32 bits will be cleared.
495 */
496DECL_INLINE_THROW(uint32_t)
497iemNativeEmitLoadGprImm32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t uImm32)
498{
499#ifdef RT_ARCH_AMD64
500 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGpr, uImm32);
501#elif defined(RT_ARCH_ARM64)
502 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGpr, uImm32);
503#else
504# error "port me"
505#endif
506 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
507 return off;
508}
509
510
511/**
512 * Emits loading a constant into a 8-bit GPR
513 * @note The AMD64 version does *NOT* clear any bits in the 8..63 range,
514 * only the ARM64 version does that.
515 */
516DECL_INLINE_THROW(uint32_t)
517iemNativeEmitLoadGpr8Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint8_t uImm8)
518{
519#ifdef RT_ARCH_AMD64
520 /* mov gpr, imm8 */
521 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
522 if (iGpr >= 8)
523 pbCodeBuf[off++] = X86_OP_REX_B;
524 else if (iGpr >= 4)
525 pbCodeBuf[off++] = X86_OP_REX;
526 pbCodeBuf[off++] = 0xb0 + (iGpr & 7);
527 pbCodeBuf[off++] = RT_BYTE1(uImm8);
528
529#elif defined(RT_ARCH_ARM64)
530 /* movz gpr, imm16, lsl #0 */
531 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
532 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | (UINT32_C(0) << 21) | ((uint32_t)uImm8 << 5) | iGpr;
533
534#else
535# error "port me"
536#endif
537 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
538 return off;
539}
540
541
542#ifdef RT_ARCH_AMD64
543/**
544 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
545 */
546DECL_FORCE_INLINE(uint32_t)
547iemNativeEmitGprByVCpuDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu)
548{
549 if (offVCpu < 128)
550 {
551 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
552 pbCodeBuf[off++] = (uint8_t)(int8_t)offVCpu;
553 }
554 else
555 {
556 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
557 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offVCpu);
558 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offVCpu);
559 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offVCpu);
560 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offVCpu);
561 }
562 return off;
563}
564
565/**
566 * Special variant of iemNativeEmitGprByVCpuDisp for accessing the VM structure.
567 */
568DECL_FORCE_INLINE(uint32_t)
569iemNativeEmitGprByVCpuSignedDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offVCpu)
570{
571 if (offVCpu < 128 && offVCpu >= -128)
572 {
573 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
574 pbCodeBuf[off++] = (uint8_t)(int8_t)offVCpu;
575 }
576 else
577 {
578 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
579 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offVCpu);
580 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offVCpu);
581 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offVCpu);
582 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offVCpu);
583 }
584 return off;
585}
586
587#elif defined(RT_ARCH_ARM64)
588
589/**
590 * Common bit of iemNativeEmitLoadGprFromVCpuU64Ex and friends.
591 *
592 * @note Loads can use @a iGprReg for large offsets, stores requires a temporary
593 * registers (@a iGprTmp).
594 * @note DON'T try this with prefetch.
595 */
596DECL_FORCE_INLINE_THROW(uint32_t)
597iemNativeEmitGprByVCpuLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu,
598 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
599{
600 /*
601 * There are a couple of ldr variants that takes an immediate offset, so
602 * try use those if we can, otherwise we have to use the temporary register
603 * help with the addressing.
604 */
605 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
606 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
607 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
608 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
609 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
610 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
611 else if (!ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) || iGprTmp != UINT8_MAX)
612 {
613 /* The offset is too large, so we must load it into a register and use
614 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
615 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
616 if (iGprTmp == UINT8_MAX)
617 iGprTmp = iGprReg;
618 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, offVCpu);
619 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, iGprTmp);
620 }
621 else
622# ifdef IEM_WITH_THROW_CATCH
623 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
624# else
625 AssertReleaseFailedStmt(off = UINT32_MAX);
626# endif
627
628 return off;
629}
630
631/**
632 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
633 */
634DECL_FORCE_INLINE_THROW(uint32_t)
635iemNativeEmitGprByVCpuLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
636 uint32_t offVCpu, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
637{
638 /*
639 * There are a couple of ldr variants that takes an immediate offset, so
640 * try use those if we can, otherwise we have to use the temporary register
641 * help with the addressing.
642 */
643 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
644 {
645 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
646 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
647 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
648 }
649 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
650 {
651 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
652 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
653 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
654 }
655 else
656 {
657 /* The offset is too large, so we must load it into a register and use
658 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
659 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
660 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, offVCpu);
661 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
662 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU,
663 IEMNATIVE_REG_FIXED_TMP0);
664 }
665 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
666 return off;
667}
668
669
670/**
671 * Special variant of iemNativeEmitGprByVCpuLdStEx for accessing the VM
672 * structure.
673 *
674 * @note Loads can use @a iGprReg for large offsets, stores requires a temporary
675 * registers (@a iGprTmp).
676 * @note DON'T try this with prefetch.
677 */
678DECL_FORCE_INLINE_THROW(uint32_t)
679iemNativeEmitGprBySignedVCpuLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offVCpu,
680 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
681{
682 Assert((uint32_t)RT_ABS(offVCpu) < RT_BIT_32(28)); /* we should be way out of range for problematic sign extending issues. */
683 Assert(!((uint32_t)RT_ABS(offVCpu) & (cbData - 1)));
684
685 /*
686 * For negative offsets we need to use put the displacement in a register
687 * as the two variants with signed immediates will either post or pre
688 * increment the base address register.
689 */
690 if (!ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) || iGprTmp != UINT8_MAX)
691 {
692 uint8_t const idxIndexReg = !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) ? iGprReg : IEMNATIVE_REG_FIXED_TMP0;
693 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxIndexReg, offVCpu / (int32_t)cbData);
694 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, idxIndexReg,
695 kArmv8A64InstrLdStExtend_Sxtw, cbData > 1 /*fShifted*/);
696 }
697 else
698# ifdef IEM_WITH_THROW_CATCH
699 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
700# else
701 AssertReleaseFailedStmt(off = UINT32_MAX);
702# endif
703
704 return off;
705}
706
707/**
708 * Special variant of iemNativeEmitGprByVCpuLdSt for accessing the VM structure.
709 */
710DECL_FORCE_INLINE_THROW(uint32_t)
711iemNativeEmitGprBySignedVCpuLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
712 int32_t offVCpu, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
713{
714 off = iemNativeEmitGprBySignedVCpuLdStEx(iemNativeInstrBufEnsure(pReNative, off, 2 + 1), off, iGprReg,
715 offVCpu, enmOperation, cbData, IEMNATIVE_REG_FIXED_TMP0);
716 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
717 return off;
718}
719
720#endif /* RT_ARCH_ARM64 */
721
722
723/**
724 * Emits a 64-bit GPR load of a VCpu value.
725 */
726DECL_FORCE_INLINE_THROW(uint32_t)
727iemNativeEmitLoadGprFromVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
728{
729#ifdef RT_ARCH_AMD64
730 /* mov reg64, mem64 */
731 if (iGpr < 8)
732 pCodeBuf[off++] = X86_OP_REX_W;
733 else
734 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
735 pCodeBuf[off++] = 0x8b;
736 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
737
738#elif defined(RT_ARCH_ARM64)
739 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
740
741#else
742# error "port me"
743#endif
744 return off;
745}
746
747
748/**
749 * Emits a 64-bit GPR load of a VCpu value.
750 */
751DECL_INLINE_THROW(uint32_t)
752iemNativeEmitLoadGprFromVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
753{
754#ifdef RT_ARCH_AMD64
755 off = iemNativeEmitLoadGprFromVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
756 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
757
758#elif defined(RT_ARCH_ARM64)
759 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
760
761#else
762# error "port me"
763#endif
764 return off;
765}
766
767/**
768 * Emits a 32-bit GPR load of a VCpu value.
769 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
770 */
771DECL_FORCE_INLINE_THROW(uint32_t)
772iemNativeEmitLoadGprFromVCpuU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
773{
774#ifdef RT_ARCH_AMD64
775 /* mov reg32, mem32 */
776 if (iGpr >= 8)
777 pCodeBuf[off++] = X86_OP_REX_R;
778 pCodeBuf[off++] = 0x8b;
779 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
780
781#elif defined(RT_ARCH_ARM64)
782 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
783
784#else
785# error "port me"
786#endif
787 return off;
788}
789
790
791/**
792 * Emits a 32-bit GPR load of a VCpu value.
793 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
794 */
795DECL_INLINE_THROW(uint32_t)
796iemNativeEmitLoadGprFromVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
797{
798#ifdef RT_ARCH_AMD64
799 off = iemNativeEmitLoadGprFromVCpuU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
800 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
801
802#elif defined(RT_ARCH_ARM64)
803 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
804
805#else
806# error "port me"
807#endif
808 return off;
809}
810
811
812/**
813 * Emits a 16-bit GPR load of a VCpu value.
814 * @note Bits 16 thru 63 in the GPR will be zero after the operation.
815 */
816DECL_FORCE_INLINE_THROW(uint32_t)
817iemNativeEmitLoadGprFromVCpuU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
818{
819#ifdef RT_ARCH_AMD64
820 /* movzx reg32, mem16 */
821 if (iGpr >= 8)
822 pCodeBuf[off++] = X86_OP_REX_R;
823 pCodeBuf[off++] = 0x0f;
824 pCodeBuf[off++] = 0xb7;
825 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
826
827#elif defined(RT_ARCH_ARM64)
828 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t));
829
830#else
831# error "port me"
832#endif
833 return off;
834}
835
836
837/**
838 * Emits a 16-bit GPR load of a VCpu value.
839 * @note Bits 16 thru 63 in the GPR will be zero after the operation.
840 */
841DECL_INLINE_THROW(uint32_t)
842iemNativeEmitLoadGprFromVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
843{
844#ifdef RT_ARCH_AMD64
845 off = iemNativeEmitLoadGprFromVCpuU16Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGpr, offVCpu);
846 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
847
848#elif defined(RT_ARCH_ARM64)
849 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t));
850
851#else
852# error "port me"
853#endif
854 return off;
855}
856
857
858/**
859 * Emits a 8-bit GPR load of a VCpu value.
860 * @note Bits 8 thru 63 in the GPR will be zero after the operation.
861 */
862DECL_INLINE_THROW(uint32_t)
863iemNativeEmitLoadGprFromVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
864{
865#ifdef RT_ARCH_AMD64
866 /* movzx reg32, mem8 */
867 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
868 if (iGpr >= 8)
869 pbCodeBuf[off++] = X86_OP_REX_R;
870 pbCodeBuf[off++] = 0x0f;
871 pbCodeBuf[off++] = 0xb6;
872 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
873 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
874
875#elif defined(RT_ARCH_ARM64)
876 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t));
877
878#else
879# error "port me"
880#endif
881 return off;
882}
883
884
885/**
886 * Emits a store of a GPR value to a 64-bit VCpu field.
887 */
888DECL_FORCE_INLINE_THROW(uint32_t)
889iemNativeEmitStoreGprToVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu,
890 uint8_t iGprTmp = UINT8_MAX)
891{
892#ifdef RT_ARCH_AMD64
893 /* mov mem64, reg64 */
894 if (iGpr < 8)
895 pCodeBuf[off++] = X86_OP_REX_W;
896 else
897 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
898 pCodeBuf[off++] = 0x89;
899 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
900 RT_NOREF(iGprTmp);
901
902#elif defined(RT_ARCH_ARM64)
903 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
904
905#else
906# error "port me"
907#endif
908 return off;
909}
910
911
912/**
913 * Emits a store of a GPR value to a 64-bit VCpu field.
914 */
915DECL_INLINE_THROW(uint32_t)
916iemNativeEmitStoreGprToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
917{
918#ifdef RT_ARCH_AMD64
919 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
920#elif defined(RT_ARCH_ARM64)
921 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGpr, offVCpu,
922 IEMNATIVE_REG_FIXED_TMP0);
923#else
924# error "port me"
925#endif
926 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
927 return off;
928}
929
930
931/**
932 * Emits a store of a GPR value to a 32-bit VCpu field.
933 *
934 * @note Limited range on ARM64.
935 */
936DECL_INLINE_THROW(uint32_t)
937iemNativeEmitStoreGprToVCpuU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu,
938 uint8_t iGprTmp = UINT8_MAX)
939{
940#ifdef RT_ARCH_AMD64
941 /* mov mem32, reg32 */
942 if (iGpr >= 8)
943 pCodeBuf[off++] = X86_OP_REX_R;
944 pCodeBuf[off++] = 0x89;
945 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
946 RT_NOREF(iGprTmp);
947
948#elif defined(RT_ARCH_ARM64)
949 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
950
951#else
952# error "port me"
953#endif
954 return off;
955}
956
957
958/**
959 * Emits a store of a GPR value to a 32-bit VCpu field.
960 */
961DECL_INLINE_THROW(uint32_t)
962iemNativeEmitStoreGprToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
963{
964#ifdef RT_ARCH_AMD64
965 /* mov mem32, reg32 */
966 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
967 if (iGpr >= 8)
968 pbCodeBuf[off++] = X86_OP_REX_R;
969 pbCodeBuf[off++] = 0x89;
970 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
971 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
972
973#elif defined(RT_ARCH_ARM64)
974 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
975
976#else
977# error "port me"
978#endif
979 return off;
980}
981
982
983/**
984 * Emits a store of a GPR value to a 16-bit VCpu field.
985 *
986 * @note Limited range on ARM64.
987 */
988DECL_INLINE_THROW(uint32_t)
989iemNativeEmitStoreGprToVCpuU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu,
990 uint8_t iGprTmp = UINT8_MAX)
991{
992#ifdef RT_ARCH_AMD64
993 /* mov mem16, reg16 */
994 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
995 if (iGpr >= 8)
996 pCodeBuf[off++] = X86_OP_REX_R;
997 pCodeBuf[off++] = 0x89;
998 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
999 RT_NOREF(iGprTmp);
1000
1001#elif defined(RT_ARCH_ARM64)
1002 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
1003
1004#else
1005# error "port me"
1006#endif
1007 return off;
1008}
1009
1010
1011/**
1012 * Emits a store of a GPR value to a 16-bit VCpu field.
1013 */
1014DECL_INLINE_THROW(uint32_t)
1015iemNativeEmitStoreGprToVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
1016{
1017#ifdef RT_ARCH_AMD64
1018 /* mov mem16, reg16 */
1019 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1020 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1021 if (iGpr >= 8)
1022 pbCodeBuf[off++] = X86_OP_REX_R;
1023 pbCodeBuf[off++] = 0x89;
1024 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
1025 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1026
1027#elif defined(RT_ARCH_ARM64)
1028 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t));
1029
1030#else
1031# error "port me"
1032#endif
1033 return off;
1034}
1035
1036
1037/**
1038 * Emits a store of a GPR value to a 8-bit VCpu field.
1039 */
1040DECL_INLINE_THROW(uint32_t)
1041iemNativeEmitStoreGprToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
1042{
1043#ifdef RT_ARCH_AMD64
1044 /* mov mem8, reg8 */
1045 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1046 if (iGpr >= 8)
1047 pbCodeBuf[off++] = X86_OP_REX_R;
1048 pbCodeBuf[off++] = 0x88;
1049 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
1050 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1051
1052#elif defined(RT_ARCH_ARM64)
1053 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
1054
1055#else
1056# error "port me"
1057#endif
1058 return off;
1059}
1060
1061
1062/**
1063 * Emits a store of an immediate value to a 64-bit VCpu field.
1064 *
1065 * @note Will allocate temporary registers on both ARM64 and AMD64.
1066 */
1067DECL_FORCE_INLINE_THROW(uint32_t)
1068iemNativeEmitStoreImmToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uImm, uint32_t offVCpu)
1069{
1070#ifdef RT_ARCH_AMD64
1071 /* mov mem32, imm32 */
1072 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
1073 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxRegImm, offVCpu);
1074 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1075 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
1076
1077#elif defined(RT_ARCH_ARM64)
1078 uint8_t const idxRegImm = uImm == 0 ? ARMV8_A64_REG_XZR : iemNativeRegAllocTmpImm(pReNative, &off, uImm);
1079 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t));
1080 if (idxRegImm != ARMV8_A64_REG_XZR)
1081 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
1082
1083#else
1084# error "port me"
1085#endif
1086 return off;
1087}
1088
1089
1090/**
1091 * Emits a store of an immediate value to a 32-bit VCpu field.
1092 *
1093 * @note ARM64: Will allocate temporary registers.
1094 */
1095DECL_FORCE_INLINE_THROW(uint32_t)
1096iemNativeEmitStoreImmToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uImm, uint32_t offVCpu)
1097{
1098#ifdef RT_ARCH_AMD64
1099 /* mov mem32, imm32 */
1100 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1101 pCodeBuf[off++] = 0xc7;
1102 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1103 pCodeBuf[off++] = RT_BYTE1(uImm);
1104 pCodeBuf[off++] = RT_BYTE2(uImm);
1105 pCodeBuf[off++] = RT_BYTE3(uImm);
1106 pCodeBuf[off++] = RT_BYTE4(uImm);
1107 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1108
1109#elif defined(RT_ARCH_ARM64)
1110 uint8_t const idxRegImm = uImm == 0 ? ARMV8_A64_REG_XZR : iemNativeRegAllocTmpImm(pReNative, &off, uImm);
1111 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
1112 if (idxRegImm != ARMV8_A64_REG_XZR)
1113 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
1114
1115#else
1116# error "port me"
1117#endif
1118 return off;
1119}
1120
1121
1122
1123/**
1124 * Emits a store of an immediate value to a 16-bit VCpu field.
1125 *
1126 * @note ARM64: A idxTmp1 is always required! The idxTmp2 depends on whehter the
1127 * offset can be encoded as an immediate or not. The @a offVCpu immediate
1128 * range is 0..8190 bytes from VMCPU and the same from CPUMCPU.
1129 */
1130DECL_FORCE_INLINE_THROW(uint32_t)
1131iemNativeEmitStoreImmToVCpuU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint32_t offVCpu,
1132 uint8_t idxTmp1 = UINT8_MAX, uint8_t idxTmp2 = UINT8_MAX)
1133{
1134#ifdef RT_ARCH_AMD64
1135 /* mov mem16, imm16 */
1136 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1137 pCodeBuf[off++] = 0xc7;
1138 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1139 pCodeBuf[off++] = RT_BYTE1(uImm);
1140 pCodeBuf[off++] = RT_BYTE2(uImm);
1141 RT_NOREF(idxTmp1, idxTmp2);
1142
1143#elif defined(RT_ARCH_ARM64)
1144 if (idxTmp1 != UINT8_MAX)
1145 {
1146 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp1, uImm);
1147 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, idxTmp1, offVCpu, kArmv8A64InstrLdStType_St_Half,
1148 sizeof(uint16_t), idxTmp2);
1149 }
1150 else
1151# ifdef IEM_WITH_THROW_CATCH
1152 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
1153# else
1154 AssertReleaseFailedStmt(off = UINT32_MAX);
1155# endif
1156
1157#else
1158# error "port me"
1159#endif
1160 return off;
1161}
1162
1163
1164/**
1165 * Emits a store of an immediate value to a 8-bit VCpu field.
1166 */
1167DECL_INLINE_THROW(uint32_t)
1168iemNativeEmitStoreImmToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bImm, uint32_t offVCpu,
1169 uint8_t idxRegTmp = UINT8_MAX)
1170{
1171#ifdef RT_ARCH_AMD64
1172 /* mov mem8, imm8 */
1173 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1174 pbCodeBuf[off++] = 0xc6;
1175 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, offVCpu);
1176 pbCodeBuf[off++] = bImm;
1177 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1178 RT_NOREF(idxRegTmp);
1179
1180#elif defined(RT_ARCH_ARM64)
1181 /* Cannot use IEMNATIVE_REG_FIXED_TMP0 for the immediate as that's used by iemNativeEmitGprByVCpuLdSt. */
1182 if (idxRegTmp != UINT8_MAX)
1183 {
1184 Assert(idxRegTmp != IEMNATIVE_REG_FIXED_TMP0);
1185 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegTmp, bImm);
1186 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegTmp, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
1187 }
1188 else
1189 {
1190 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, bImm);
1191 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
1192 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
1193 }
1194
1195#else
1196# error "port me"
1197#endif
1198 return off;
1199}
1200
1201
1202/**
1203 * Emits a load effective address to a GRP of a VCpu field.
1204 */
1205DECL_INLINE_THROW(uint32_t)
1206iemNativeEmitLeaGprByVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t offVCpu)
1207{
1208#ifdef RT_ARCH_AMD64
1209 /* lea gprdst, [rbx + offDisp] */
1210 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1211 if (iGprDst < 8)
1212 pbCodeBuf[off++] = X86_OP_REX_W;
1213 else
1214 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
1215 pbCodeBuf[off++] = 0x8d;
1216 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGprDst, offVCpu);
1217
1218#elif defined(RT_ARCH_ARM64)
1219 if (offVCpu < (unsigned)_4K)
1220 {
1221 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1222 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu);
1223 }
1224 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)_4K)
1225 {
1226 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1227 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX,
1228 offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx));
1229 }
1230 else if (offVCpu <= 0xffffffU)
1231 {
1232 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1233 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu >> 12,
1234 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
1235 if (offVCpu & 0xfffU)
1236 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, offVCpu & 0xfff);
1237 }
1238 else
1239 {
1240 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
1241 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offVCpu);
1242 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1243 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, iGprDst);
1244 }
1245
1246#else
1247# error "port me"
1248#endif
1249 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1250 return off;
1251}
1252
1253
1254/** This is just as a typesafe alternative to RT_UOFFSETOF. */
1255DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromStamCounterPtr(PVMCPU pVCpu, PSTAMCOUNTER pStamCounter)
1256{
1257 uintptr_t const off = (uintptr_t)pStamCounter - (uintptr_t)pVCpu;
1258 Assert(off < sizeof(VMCPU));
1259 return off;
1260}
1261
1262
1263/** This is just as a typesafe alternative to RT_UOFFSETOF. */
1264DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromU64Ptr(PVMCPU pVCpu, uint64_t *pu64)
1265{
1266 uintptr_t const off = (uintptr_t)pu64 - (uintptr_t)pVCpu;
1267 Assert(off < sizeof(VMCPU));
1268 return off;
1269}
1270
1271
1272/**
1273 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1274 *
1275 * @note The two temp registers are not required for AMD64. ARM64 always
1276 * requires the first, and the 2nd is needed if the offset cannot be
1277 * encoded as an immediate.
1278 */
1279DECL_FORCE_INLINE(uint32_t)
1280iemNativeEmitIncStamCounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1281{
1282#ifdef RT_ARCH_AMD64
1283 /* inc qword [pVCpu + off] */
1284 pCodeBuf[off++] = X86_OP_REX_W;
1285 pCodeBuf[off++] = 0xff;
1286 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1287 RT_NOREF(idxTmp1, idxTmp2);
1288
1289#elif defined(RT_ARCH_ARM64)
1290 /* Determine how we're to access pVCpu first. */
1291 uint32_t const cbData = sizeof(STAMCOUNTER);
1292 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
1293 {
1294 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1295 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1296 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1297 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1298 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1299 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1300 }
1301 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
1302 {
1303 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1304 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1305 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1306 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1307 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1308 }
1309 else
1310 {
1311 /* The offset is too large, so we must load it into a register and use
1312 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
1313 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1314 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1315 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1316 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1317 }
1318
1319#else
1320# error "port me"
1321#endif
1322 return off;
1323}
1324
1325
1326/**
1327 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1328 *
1329 * @note The two temp registers are not required for AMD64. ARM64 always
1330 * requires the first, and the 2nd is needed if the offset cannot be
1331 * encoded as an immediate.
1332 */
1333DECL_FORCE_INLINE(uint32_t)
1334iemNativeEmitIncStamCounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1335{
1336#ifdef RT_ARCH_AMD64
1337 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxTmp1, idxTmp2, offVCpu);
1338#elif defined(RT_ARCH_ARM64)
1339 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1340#else
1341# error "port me"
1342#endif
1343 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1344 return off;
1345}
1346
1347
1348/**
1349 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1350 *
1351 * @note The two temp registers are not required for AMD64. ARM64 always
1352 * requires the first, and the 2nd is needed if the offset cannot be
1353 * encoded as an immediate.
1354 */
1355DECL_FORCE_INLINE(uint32_t)
1356iemNativeEmitIncU32CounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1357{
1358 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1359#ifdef RT_ARCH_AMD64
1360 /* inc dword [pVCpu + offVCpu] */
1361 pCodeBuf[off++] = 0xff;
1362 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1363 RT_NOREF(idxTmp1, idxTmp2);
1364
1365#elif defined(RT_ARCH_ARM64)
1366 /* Determine how we're to access pVCpu first. */
1367 uint32_t const cbData = sizeof(uint32_t);
1368 if (offVCpu < (unsigned)(_4K * cbData))
1369 {
1370 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1371 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmp1,
1372 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1373 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1374 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmp1,
1375 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1376 }
1377 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1378 {
1379 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1380 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1381 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1382 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1383 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1384 }
1385 else
1386 {
1387 /* The offset is too large, so we must load it into a register and use
1388 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1389 of the instruction if that'll reduce the constant to 16-bits. */
1390 if (offVCpu / cbData < (unsigned)UINT16_MAX)
1391 {
1392 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp2, offVCpu / cbData);
1393 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1394 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1395 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1396 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1397 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1398 }
1399 else
1400 {
1401 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1402 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1403 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1404 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1405 }
1406 }
1407
1408#else
1409# error "port me"
1410#endif
1411 return off;
1412}
1413
1414
1415/**
1416 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1417 *
1418 * @note The two temp registers are not required for AMD64. ARM64 always
1419 * requires the first, and the 2nd is needed if the offset cannot be
1420 * encoded as an immediate.
1421 */
1422DECL_FORCE_INLINE(uint32_t)
1423iemNativeEmitIncU32CounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1424{
1425#ifdef RT_ARCH_AMD64
1426 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, idxTmp1, idxTmp2, offVCpu);
1427#elif defined(RT_ARCH_ARM64)
1428 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1429#else
1430# error "port me"
1431#endif
1432 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1433 return off;
1434}
1435
1436
1437/**
1438 * Emits code for OR'ing a bitmask into a 32-bit VMCPU member.
1439 *
1440 * @note May allocate temporary registers (not AMD64).
1441 */
1442DECL_FORCE_INLINE(uint32_t)
1443iemNativeEmitOrImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1444{
1445 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1446#ifdef RT_ARCH_AMD64
1447 /* or dword [pVCpu + offVCpu], imm8/32 */
1448 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1449 if (fMask < 0x80)
1450 {
1451 pCodeBuf[off++] = 0x83;
1452 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1453 pCodeBuf[off++] = (uint8_t)fMask;
1454 }
1455 else
1456 {
1457 pCodeBuf[off++] = 0x81;
1458 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1459 pCodeBuf[off++] = RT_BYTE1(fMask);
1460 pCodeBuf[off++] = RT_BYTE2(fMask);
1461 pCodeBuf[off++] = RT_BYTE3(fMask);
1462 pCodeBuf[off++] = RT_BYTE4(fMask);
1463 }
1464
1465#elif defined(RT_ARCH_ARM64)
1466 /* If the constant is unwieldy we'll need a register to hold it as well. */
1467 uint32_t uImmSizeLen, uImmRotate;
1468 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1469 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1470
1471 /* We need a temp register for holding the member value we're modifying. */
1472 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1473
1474 /* Determine how we're to access pVCpu first. */
1475 uint32_t const cbData = sizeof(uint32_t);
1476 if (offVCpu < (unsigned)(_4K * cbData))
1477 {
1478 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1479 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1480 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1481 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1482 if (idxTmpMask == UINT8_MAX)
1483 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1484 else
1485 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1486 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1487 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1488 }
1489 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1490 {
1491 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1492 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1493 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1494 if (idxTmpMask == UINT8_MAX)
1495 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1496 else
1497 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1498 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1499 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1500 }
1501 else
1502 {
1503 /* The offset is too large, so we must load it into a register and use
1504 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1505 of the instruction if that'll reduce the constant to 16-bits. */
1506 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1507 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1508 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1509 if (fShifted)
1510 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1511 else
1512 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1513
1514 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1515 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1516
1517 if (idxTmpMask == UINT8_MAX)
1518 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1519 else
1520 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1521
1522 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1523 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1524 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1525 }
1526 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1527 if (idxTmpMask != UINT8_MAX)
1528 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1529
1530#else
1531# error "port me"
1532#endif
1533 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1534 return off;
1535}
1536
1537
1538/**
1539 * Emits code for AND'ing a bitmask into a 32-bit VMCPU member.
1540 *
1541 * @note May allocate temporary registers (not AMD64).
1542 */
1543DECL_FORCE_INLINE(uint32_t)
1544iemNativeEmitAndImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1545{
1546 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1547#ifdef RT_ARCH_AMD64
1548 /* and dword [pVCpu + offVCpu], imm8/32 */
1549 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1550 if (fMask < 0x80)
1551 {
1552 pCodeBuf[off++] = 0x83;
1553 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1554 pCodeBuf[off++] = (uint8_t)fMask;
1555 }
1556 else
1557 {
1558 pCodeBuf[off++] = 0x81;
1559 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1560 pCodeBuf[off++] = RT_BYTE1(fMask);
1561 pCodeBuf[off++] = RT_BYTE2(fMask);
1562 pCodeBuf[off++] = RT_BYTE3(fMask);
1563 pCodeBuf[off++] = RT_BYTE4(fMask);
1564 }
1565
1566#elif defined(RT_ARCH_ARM64)
1567 /* If the constant is unwieldy we'll need a register to hold it as well. */
1568 uint32_t uImmSizeLen, uImmRotate;
1569 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1570 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1571
1572 /* We need a temp register for holding the member value we're modifying. */
1573 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1574
1575 /* Determine how we're to access pVCpu first. */
1576 uint32_t const cbData = sizeof(uint32_t);
1577 if (offVCpu < (unsigned)(_4K * cbData))
1578 {
1579 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1580 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1581 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1582 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1583 if (idxTmpMask == UINT8_MAX)
1584 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1585 else
1586 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1587 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1588 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1589 }
1590 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1591 {
1592 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1593 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1594 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1595 if (idxTmpMask == UINT8_MAX)
1596 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1597 else
1598 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1599 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1600 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1601 }
1602 else
1603 {
1604 /* The offset is too large, so we must load it into a register and use
1605 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1606 of the instruction if that'll reduce the constant to 16-bits. */
1607 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1608 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1609 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1610 if (fShifted)
1611 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1612 else
1613 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1614
1615 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1616 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1617
1618 if (idxTmpMask == UINT8_MAX)
1619 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1620 else
1621 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1622
1623 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1624 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1625 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1626 }
1627 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1628 if (idxTmpMask != UINT8_MAX)
1629 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1630
1631#else
1632# error "port me"
1633#endif
1634 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1635 return off;
1636}
1637
1638
1639/**
1640 * Emits a gprdst = gprsrc load.
1641 */
1642DECL_FORCE_INLINE(uint32_t)
1643iemNativeEmitLoadGprFromGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1644{
1645#ifdef RT_ARCH_AMD64
1646 /* mov gprdst, gprsrc */
1647 if ((iGprDst | iGprSrc) >= 8)
1648 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W | X86_OP_REX_B
1649 : iGprSrc >= 8 ? X86_OP_REX_W | X86_OP_REX_R | X86_OP_REX_B
1650 : X86_OP_REX_W | X86_OP_REX_R;
1651 else
1652 pCodeBuf[off++] = X86_OP_REX_W;
1653 pCodeBuf[off++] = 0x8b;
1654 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1655
1656#elif defined(RT_ARCH_ARM64)
1657 /* mov dst, src; alias for: orr dst, xzr, src */
1658 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_XZR, iGprSrc);
1659
1660#else
1661# error "port me"
1662#endif
1663 return off;
1664}
1665
1666
1667/**
1668 * Emits a gprdst = gprsrc load.
1669 */
1670DECL_INLINE_THROW(uint32_t)
1671iemNativeEmitLoadGprFromGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1672{
1673#ifdef RT_ARCH_AMD64
1674 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1675#elif defined(RT_ARCH_ARM64)
1676 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1677#else
1678# error "port me"
1679#endif
1680 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1681 return off;
1682}
1683
1684
1685/**
1686 * Emits a gprdst = gprsrc[31:0] load.
1687 * @note Bits 63 thru 32 are cleared.
1688 */
1689DECL_FORCE_INLINE(uint32_t)
1690iemNativeEmitLoadGprFromGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1691{
1692#ifdef RT_ARCH_AMD64
1693 /* mov gprdst, gprsrc */
1694 if ((iGprDst | iGprSrc) >= 8)
1695 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1696 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1697 : X86_OP_REX_R;
1698 pCodeBuf[off++] = 0x8b;
1699 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1700
1701#elif defined(RT_ARCH_ARM64)
1702 /* mov dst32, src32; alias for: orr dst32, wzr, src32 */
1703 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_WZR, iGprSrc, false /*f64bit*/);
1704
1705#else
1706# error "port me"
1707#endif
1708 return off;
1709}
1710
1711
1712/**
1713 * Emits a gprdst = gprsrc[31:0] load.
1714 * @note Bits 63 thru 32 are cleared.
1715 */
1716DECL_INLINE_THROW(uint32_t)
1717iemNativeEmitLoadGprFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1718{
1719#ifdef RT_ARCH_AMD64
1720 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1721#elif defined(RT_ARCH_ARM64)
1722 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1723#else
1724# error "port me"
1725#endif
1726 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1727 return off;
1728}
1729
1730
1731/**
1732 * Emits a gprdst = gprsrc[15:0] load.
1733 * @note Bits 63 thru 15 are cleared.
1734 */
1735DECL_INLINE_THROW(uint32_t)
1736iemNativeEmitLoadGprFromGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1737{
1738#ifdef RT_ARCH_AMD64
1739 /* movzx Gv,Ew */
1740 if ((iGprDst | iGprSrc) >= 8)
1741 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1742 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1743 : X86_OP_REX_R;
1744 pCodeBuf[off++] = 0x0f;
1745 pCodeBuf[off++] = 0xb7;
1746 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1747
1748#elif defined(RT_ARCH_ARM64)
1749 /* and gprdst, gprsrc, #0xffff */
1750# if 1
1751 Assert(Armv8A64ConvertImmRImmS2Mask32(0x0f, 0) == UINT16_MAX);
1752 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x0f, 0, false /*f64Bit*/);
1753# else
1754 Assert(Armv8A64ConvertImmRImmS2Mask64(0x4f, 0) == UINT16_MAX);
1755 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x4f, 0);
1756# endif
1757
1758#else
1759# error "port me"
1760#endif
1761 return off;
1762}
1763
1764
1765/**
1766 * Emits a gprdst = gprsrc[15:0] load.
1767 * @note Bits 63 thru 15 are cleared.
1768 */
1769DECL_INLINE_THROW(uint32_t)
1770iemNativeEmitLoadGprFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1771{
1772#ifdef RT_ARCH_AMD64
1773 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1774#elif defined(RT_ARCH_ARM64)
1775 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1776#else
1777# error "port me"
1778#endif
1779 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1780 return off;
1781}
1782
1783
1784/**
1785 * Emits a gprdst = gprsrc[7:0] load.
1786 * @note Bits 63 thru 8 are cleared.
1787 */
1788DECL_FORCE_INLINE(uint32_t)
1789iemNativeEmitLoadGprFromGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1790{
1791#ifdef RT_ARCH_AMD64
1792 /* movzx Gv,Eb */
1793 if (iGprDst >= 8 || iGprSrc >= 8)
1794 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1795 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1796 : X86_OP_REX_R;
1797 else if (iGprSrc >= 4)
1798 pCodeBuf[off++] = X86_OP_REX;
1799 pCodeBuf[off++] = 0x0f;
1800 pCodeBuf[off++] = 0xb6;
1801 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1802
1803#elif defined(RT_ARCH_ARM64)
1804 /* and gprdst, gprsrc, #0xff */
1805 Assert(Armv8A64ConvertImmRImmS2Mask32(0x07, 0) == UINT8_MAX);
1806 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x07, 0, false /*f64Bit*/);
1807
1808#else
1809# error "port me"
1810#endif
1811 return off;
1812}
1813
1814
1815/**
1816 * Emits a gprdst = gprsrc[7:0] load.
1817 * @note Bits 63 thru 8 are cleared.
1818 */
1819DECL_INLINE_THROW(uint32_t)
1820iemNativeEmitLoadGprFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1821{
1822#ifdef RT_ARCH_AMD64
1823 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1824#elif defined(RT_ARCH_ARM64)
1825 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1826#else
1827# error "port me"
1828#endif
1829 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1830 return off;
1831}
1832
1833
1834/**
1835 * Emits a gprdst = gprsrc[15:8] load (ah, ch, dh, bh).
1836 * @note Bits 63 thru 8 are cleared.
1837 */
1838DECL_INLINE_THROW(uint32_t)
1839iemNativeEmitLoadGprFromGpr8Hi(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1840{
1841#ifdef RT_ARCH_AMD64
1842 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1843
1844 /* movzx Gv,Ew */
1845 if ((iGprDst | iGprSrc) >= 8)
1846 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1847 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1848 : X86_OP_REX_R;
1849 pbCodeBuf[off++] = 0x0f;
1850 pbCodeBuf[off++] = 0xb7;
1851 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1852
1853 /* shr Ev,8 */
1854 if (iGprDst >= 8)
1855 pbCodeBuf[off++] = X86_OP_REX_B;
1856 pbCodeBuf[off++] = 0xc1;
1857 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
1858 pbCodeBuf[off++] = 8;
1859
1860#elif defined(RT_ARCH_ARM64)
1861 /* ubfx gprdst, gprsrc, #8, #8 - gprdst = gprsrc[15:8] */
1862 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1863 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(iGprDst, iGprSrc, 8, 8, false /*f64Bit*/);
1864
1865#else
1866# error "port me"
1867#endif
1868 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1869 return off;
1870}
1871
1872
1873/**
1874 * Sign-extends 32-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1875 */
1876DECL_INLINE_THROW(uint32_t)
1877iemNativeEmitLoadGprSignExtendedFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1878{
1879#ifdef RT_ARCH_AMD64
1880 /* movsxd r64, r/m32 */
1881 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1882 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1883 pbCodeBuf[off++] = 0x63;
1884 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1885
1886#elif defined(RT_ARCH_ARM64)
1887 /* sxtw dst, src */
1888 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1889 pu32CodeBuf[off++] = Armv8A64MkInstrSxtw(iGprDst, iGprSrc);
1890
1891#else
1892# error "port me"
1893#endif
1894 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1895 return off;
1896}
1897
1898
1899/**
1900 * Sign-extends 16-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1901 */
1902DECL_INLINE_THROW(uint32_t)
1903iemNativeEmitLoadGprSignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1904{
1905#ifdef RT_ARCH_AMD64
1906 /* movsx r64, r/m16 */
1907 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1908 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1909 pbCodeBuf[off++] = 0x0f;
1910 pbCodeBuf[off++] = 0xbf;
1911 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1912
1913#elif defined(RT_ARCH_ARM64)
1914 /* sxth dst, src */
1915 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1916 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc);
1917
1918#else
1919# error "port me"
1920#endif
1921 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1922 return off;
1923}
1924
1925
1926/**
1927 * Sign-extends 16-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1928 */
1929DECL_INLINE_THROW(uint32_t)
1930iemNativeEmitLoadGpr32SignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1931{
1932#ifdef RT_ARCH_AMD64
1933 /* movsx r64, r/m16 */
1934 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1935 if (iGprDst >= 8 || iGprSrc >= 8)
1936 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1937 pbCodeBuf[off++] = 0x0f;
1938 pbCodeBuf[off++] = 0xbf;
1939 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1940
1941#elif defined(RT_ARCH_ARM64)
1942 /* sxth dst32, src */
1943 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1944 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc, false /*f64Bit*/);
1945
1946#else
1947# error "port me"
1948#endif
1949 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1950 return off;
1951}
1952
1953
1954/**
1955 * Sign-extends 8-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1956 */
1957DECL_INLINE_THROW(uint32_t)
1958iemNativeEmitLoadGprSignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1959{
1960#ifdef RT_ARCH_AMD64
1961 /* movsx r64, r/m8 */
1962 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1963 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1964 pbCodeBuf[off++] = 0x0f;
1965 pbCodeBuf[off++] = 0xbe;
1966 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1967
1968#elif defined(RT_ARCH_ARM64)
1969 /* sxtb dst, src */
1970 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1971 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc);
1972
1973#else
1974# error "port me"
1975#endif
1976 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1977 return off;
1978}
1979
1980
1981/**
1982 * Sign-extends 8-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1983 * @note Bits 63 thru 32 are cleared.
1984 */
1985DECL_INLINE_THROW(uint32_t)
1986iemNativeEmitLoadGpr32SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1987{
1988#ifdef RT_ARCH_AMD64
1989 /* movsx r32, r/m8 */
1990 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1991 if (iGprDst >= 8 || iGprSrc >= 8)
1992 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1993 else if (iGprSrc >= 4)
1994 pbCodeBuf[off++] = X86_OP_REX;
1995 pbCodeBuf[off++] = 0x0f;
1996 pbCodeBuf[off++] = 0xbe;
1997 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1998
1999#elif defined(RT_ARCH_ARM64)
2000 /* sxtb dst32, src32 */
2001 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2002 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
2003
2004#else
2005# error "port me"
2006#endif
2007 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2008 return off;
2009}
2010
2011
2012/**
2013 * Sign-extends 8-bit value in @a iGprSrc into a 16-bit value in @a iGprDst.
2014 * @note Bits 63 thru 16 are cleared.
2015 */
2016DECL_INLINE_THROW(uint32_t)
2017iemNativeEmitLoadGpr16SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
2018{
2019#ifdef RT_ARCH_AMD64
2020 /* movsx r16, r/m8 */
2021 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
2022 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
2023 if (iGprDst >= 8 || iGprSrc >= 8)
2024 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
2025 else if (iGprSrc >= 4)
2026 pbCodeBuf[off++] = X86_OP_REX;
2027 pbCodeBuf[off++] = 0x0f;
2028 pbCodeBuf[off++] = 0xbe;
2029 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
2030
2031 /* movzx r32, r/m16 */
2032 if (iGprDst >= 8)
2033 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
2034 pbCodeBuf[off++] = 0x0f;
2035 pbCodeBuf[off++] = 0xb7;
2036 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
2037
2038#elif defined(RT_ARCH_ARM64)
2039 /* sxtb dst32, src32; and dst32, dst32, #0xffff */
2040 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2041 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
2042 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2043 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
2044
2045#else
2046# error "port me"
2047#endif
2048 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2049 return off;
2050}
2051
2052
2053/**
2054 * Emits a gprdst = gprsrc + addend load.
2055 * @note The addend is 32-bit for AMD64 and 64-bit for ARM64.
2056 */
2057#ifdef RT_ARCH_AMD64
2058DECL_INLINE_THROW(uint32_t)
2059iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2060 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
2061{
2062 Assert(iAddend != 0);
2063
2064 /* lea gprdst, [gprsrc + iAddend] */
2065 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2066 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
2067 pbCodeBuf[off++] = 0x8d;
2068 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
2069 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2070 return off;
2071}
2072
2073#elif defined(RT_ARCH_ARM64)
2074DECL_INLINE_THROW(uint32_t)
2075iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2076 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
2077{
2078 if ((uint32_t)iAddend < 4096)
2079 {
2080 /* add dst, src, uimm12 */
2081 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2082 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend);
2083 }
2084 else if ((uint32_t)-iAddend < 4096)
2085 {
2086 /* sub dst, src, uimm12 */
2087 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2088 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend);
2089 }
2090 else
2091 {
2092 Assert(iGprSrc != iGprDst);
2093 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, iAddend);
2094 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2095 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst);
2096 }
2097 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2098 return off;
2099}
2100#else
2101# error "port me"
2102#endif
2103
2104/**
2105 * Emits a gprdst = gprsrc + addend load, accepting iAddend == 0.
2106 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
2107 */
2108#ifdef RT_ARCH_AMD64
2109DECL_INLINE_THROW(uint32_t)
2110iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2111 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
2112#else
2113DECL_INLINE_THROW(uint32_t)
2114iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2115 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
2116#endif
2117{
2118 if (iAddend != 0)
2119 return iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
2120 return iemNativeEmitLoadGprFromGpr(pReNative, off, iGprDst, iGprSrc);
2121}
2122
2123
2124/**
2125 * Emits a gprdst = gprsrc32 + addend load.
2126 * @note Bits 63 thru 32 are cleared.
2127 */
2128DECL_INLINE_THROW(uint32_t)
2129iemNativeEmitLoadGprFromGpr32WithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2130 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
2131{
2132 Assert(iAddend != 0);
2133
2134#ifdef RT_ARCH_AMD64
2135 /* a32 o32 lea gprdst, [gprsrc + iAddend] */
2136 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
2137 pbCodeBuf[off++] = X86_OP_PRF_SIZE_ADDR;
2138 if ((iGprDst | iGprSrc) >= 8)
2139 pbCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
2140 pbCodeBuf[off++] = 0x8d;
2141 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
2142
2143#elif defined(RT_ARCH_ARM64)
2144 if ((uint32_t)iAddend < 4096)
2145 {
2146 /* add dst, src, uimm12 */
2147 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2148 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend, false /*f64Bit*/);
2149 }
2150 else if ((uint32_t)-iAddend < 4096)
2151 {
2152 /* sub dst, src, uimm12 */
2153 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2154 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend, false /*f64Bit*/);
2155 }
2156 else
2157 {
2158 Assert(iGprSrc != iGprDst);
2159 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, (int64_t)iAddend);
2160 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2161 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst, false /*f64Bit*/);
2162 }
2163
2164#else
2165# error "port me"
2166#endif
2167 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2168 return off;
2169}
2170
2171
2172/**
2173 * Emits a gprdst = gprsrc32 + addend load, accepting iAddend == 0.
2174 */
2175DECL_INLINE_THROW(uint32_t)
2176iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2177 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
2178{
2179 if (iAddend != 0)
2180 return iemNativeEmitLoadGprFromGpr32WithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
2181 return iemNativeEmitLoadGprFromGpr32(pReNative, off, iGprDst, iGprSrc);
2182}
2183
2184
2185/**
2186 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
2187 * destination.
2188 */
2189DECL_FORCE_INLINE(uint32_t)
2190iemNativeEmitGprMergeInGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
2191{
2192#ifdef RT_ARCH_AMD64
2193 /* mov reg16, r/m16 */
2194 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
2195 if (idxDst >= 8 || idxSrc >= 8)
2196 pCodeBuf[off++] = (idxDst < 8 ? 0 : X86_OP_REX_R) | (idxSrc < 8 ? 0 : X86_OP_REX_B);
2197 pCodeBuf[off++] = 0x8b;
2198 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxDst & 7, idxSrc & 7);
2199
2200#elif defined(RT_ARCH_ARM64)
2201 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxSrc to idxDst bits 15:0. */
2202 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxDst, idxSrc, 0, 16);
2203
2204#else
2205# error "Port me!"
2206#endif
2207 return off;
2208}
2209
2210
2211/**
2212 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
2213 * destination.
2214 */
2215DECL_INLINE_THROW(uint32_t)
2216iemNativeEmitGprMergeInGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
2217{
2218#ifdef RT_ARCH_AMD64
2219 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, idxDst, idxSrc);
2220#elif defined(RT_ARCH_ARM64)
2221 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, idxDst, idxSrc);
2222#else
2223# error "Port me!"
2224#endif
2225 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2226 return off;
2227}
2228
2229
2230#ifdef RT_ARCH_AMD64
2231/**
2232 * Common bit of iemNativeEmitLoadGprByBp and friends.
2233 */
2234DECL_FORCE_INLINE(uint32_t) iemNativeEmitGprByBpDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offDisp,
2235 PIEMRECOMPILERSTATE pReNativeAssert)
2236{
2237 if (offDisp < 128 && offDisp >= -128)
2238 {
2239 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, X86_GREG_xBP);
2240 pbCodeBuf[off++] = (uint8_t)(int8_t)offDisp;
2241 }
2242 else
2243 {
2244 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, X86_GREG_xBP);
2245 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2246 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2247 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2248 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2249 }
2250 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNativeAssert, off); RT_NOREF(pReNativeAssert);
2251 return off;
2252}
2253#elif defined(RT_ARCH_ARM64)
2254/**
2255 * Common bit of iemNativeEmitLoadGprByBp and friends.
2256 */
2257DECL_FORCE_INLINE_THROW(uint32_t)
2258iemNativeEmitGprByBpLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2259 int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2260{
2261 if ((uint32_t)offDisp < 4096U * cbData && !((uint32_t)offDisp & (cbData - 1)))
2262 {
2263 /* str w/ unsigned imm12 (scaled) */
2264 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2265 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, ARMV8_A64_REG_BP, (uint32_t)offDisp / cbData);
2266 }
2267 else if (offDisp >= -256 && offDisp <= 256)
2268 {
2269 /* stur w/ signed imm9 (unscaled) */
2270 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2271 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(enmOperation, iGprReg, ARMV8_A64_REG_BP, offDisp);
2272 }
2273 else
2274 {
2275 /* Use temporary indexing register. */
2276 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2277 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2278 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, ARMV8_A64_REG_BP,
2279 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2280 }
2281 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2282 return off;
2283}
2284#endif
2285
2286
2287/**
2288 * Emits a 64-bit GRP load instruction with an BP relative source address.
2289 */
2290DECL_INLINE_THROW(uint32_t)
2291iemNativeEmitLoadGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2292{
2293#ifdef RT_ARCH_AMD64
2294 /* mov gprdst, qword [rbp + offDisp] */
2295 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2296 if (iGprDst < 8)
2297 pbCodeBuf[off++] = X86_OP_REX_W;
2298 else
2299 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2300 pbCodeBuf[off++] = 0x8b;
2301 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2302
2303#elif defined(RT_ARCH_ARM64)
2304 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2305
2306#else
2307# error "port me"
2308#endif
2309}
2310
2311
2312/**
2313 * Emits a 32-bit GRP load instruction with an BP relative source address.
2314 * @note Bits 63 thru 32 of the GPR will be cleared.
2315 */
2316DECL_INLINE_THROW(uint32_t)
2317iemNativeEmitLoadGprByBpU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2318{
2319#ifdef RT_ARCH_AMD64
2320 /* mov gprdst, dword [rbp + offDisp] */
2321 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2322 if (iGprDst >= 8)
2323 pbCodeBuf[off++] = X86_OP_REX_R;
2324 pbCodeBuf[off++] = 0x8b;
2325 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2326
2327#elif defined(RT_ARCH_ARM64)
2328 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2329
2330#else
2331# error "port me"
2332#endif
2333}
2334
2335
2336/**
2337 * Emits a 16-bit GRP load instruction with an BP relative source address.
2338 * @note Bits 63 thru 16 of the GPR will be cleared.
2339 */
2340DECL_INLINE_THROW(uint32_t)
2341iemNativeEmitLoadGprByBpU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2342{
2343#ifdef RT_ARCH_AMD64
2344 /* movzx gprdst, word [rbp + offDisp] */
2345 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2346 if (iGprDst >= 8)
2347 pbCodeBuf[off++] = X86_OP_REX_R;
2348 pbCodeBuf[off++] = 0x0f;
2349 pbCodeBuf[off++] = 0xb7;
2350 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2351
2352#elif defined(RT_ARCH_ARM64)
2353 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint32_t));
2354
2355#else
2356# error "port me"
2357#endif
2358}
2359
2360
2361/**
2362 * Emits a 8-bit GRP load instruction with an BP relative source address.
2363 * @note Bits 63 thru 8 of the GPR will be cleared.
2364 */
2365DECL_INLINE_THROW(uint32_t)
2366iemNativeEmitLoadGprByBpU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2367{
2368#ifdef RT_ARCH_AMD64
2369 /* movzx gprdst, byte [rbp + offDisp] */
2370 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2371 if (iGprDst >= 8)
2372 pbCodeBuf[off++] = X86_OP_REX_R;
2373 pbCodeBuf[off++] = 0x0f;
2374 pbCodeBuf[off++] = 0xb6;
2375 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2376
2377#elif defined(RT_ARCH_ARM64)
2378 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint32_t));
2379
2380#else
2381# error "port me"
2382#endif
2383}
2384
2385
2386/**
2387 * Emits a 128-bit vector register load instruction with an BP relative source address.
2388 */
2389DECL_FORCE_INLINE_THROW(uint32_t)
2390iemNativeEmitLoadVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2391{
2392#ifdef RT_ARCH_AMD64
2393 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
2394
2395 /* movdqu reg128, mem128 */
2396 pbCodeBuf[off++] = 0xf3;
2397 if (iVecRegDst >= 8)
2398 pbCodeBuf[off++] = X86_OP_REX_R;
2399 pbCodeBuf[off++] = 0x0f;
2400 pbCodeBuf[off++] = 0x6f;
2401 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2402#elif defined(RT_ARCH_ARM64)
2403 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2404#else
2405# error "port me"
2406#endif
2407}
2408
2409
2410/**
2411 * Emits a 256-bit vector register load instruction with an BP relative source address.
2412 */
2413DECL_FORCE_INLINE_THROW(uint32_t)
2414iemNativeEmitLoadVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2415{
2416#ifdef RT_ARCH_AMD64
2417 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2418
2419 /* vmovdqu reg256, mem256 */
2420 pbCodeBuf[off++] = X86_OP_VEX2;
2421 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegDst >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2422 pbCodeBuf[off++] = 0x6f;
2423 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2424#elif defined(RT_ARCH_ARM64)
2425 /* ASSUMES two consecutive vector registers for the 256-bit value. */
2426 Assert(!(iVecRegDst & 0x1));
2427 off = iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2428 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst + 1, offDisp + sizeof(RTUINT128U), kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2429#else
2430# error "port me"
2431#endif
2432}
2433
2434
2435/**
2436 * Emits a load effective address to a GRP with an BP relative source address.
2437 */
2438DECL_INLINE_THROW(uint32_t)
2439iemNativeEmitLeaGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2440{
2441#ifdef RT_ARCH_AMD64
2442 /* lea gprdst, [rbp + offDisp] */
2443 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2444 if (iGprDst < 8)
2445 pbCodeBuf[off++] = X86_OP_REX_W;
2446 else
2447 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2448 pbCodeBuf[off++] = 0x8d;
2449 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2450
2451#elif defined(RT_ARCH_ARM64)
2452 bool const fSub = offDisp < 0;
2453 uint32_t const offAbsDisp = (uint32_t)RT_ABS(offDisp);
2454 if (offAbsDisp <= 0xffffffU)
2455 {
2456 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2457 if (offAbsDisp <= 0xfffU)
2458 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, ARMV8_A64_REG_BP, offAbsDisp);
2459 else
2460 {
2461 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, ARMV8_A64_REG_BP, offAbsDisp >> 12,
2462 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
2463 if (offAbsDisp & 0xfffU)
2464 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, offAbsDisp & 0xfff);
2465 }
2466 }
2467 else
2468 {
2469 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
2470 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offAbsDisp);
2471 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2472 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(fSub, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2473 }
2474
2475#else
2476# error "port me"
2477#endif
2478
2479 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2480 return off;
2481}
2482
2483
2484/**
2485 * Emits a 64-bit GPR store with an BP relative destination address.
2486 *
2487 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2488 */
2489DECL_INLINE_THROW(uint32_t)
2490iemNativeEmitStoreGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iGprSrc)
2491{
2492#ifdef RT_ARCH_AMD64
2493 /* mov qword [rbp + offDisp], gprdst */
2494 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2495 if (iGprSrc < 8)
2496 pbCodeBuf[off++] = X86_OP_REX_W;
2497 else
2498 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2499 pbCodeBuf[off++] = 0x89;
2500 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprSrc, offDisp, pReNative);
2501
2502#elif defined(RT_ARCH_ARM64)
2503 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2504 {
2505 /* str w/ unsigned imm12 (scaled) */
2506 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2507 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc,
2508 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2509 }
2510 else if (offDisp >= -256 && offDisp <= 256)
2511 {
2512 /* stur w/ signed imm9 (unscaled) */
2513 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2514 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP, offDisp);
2515 }
2516 else if ((uint32_t)-offDisp < (unsigned)_4K)
2517 {
2518 /* Use temporary indexing register w/ sub uimm12. */
2519 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2520 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2521 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2522 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2523 }
2524 else
2525 {
2526 /* Use temporary indexing register. */
2527 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2528 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2529 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP,
2530 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2531 }
2532 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2533 return off;
2534
2535#else
2536# error "Port me!"
2537#endif
2538}
2539
2540
2541/**
2542 * Emits a 64-bit immediate store with an BP relative destination address.
2543 *
2544 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2545 */
2546DECL_INLINE_THROW(uint32_t)
2547iemNativeEmitStoreImm64ByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint64_t uImm64)
2548{
2549#ifdef RT_ARCH_AMD64
2550 if ((int64_t)uImm64 == (int32_t)uImm64)
2551 {
2552 /* mov qword [rbp + offDisp], imm32 - sign extended */
2553 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 11);
2554 pbCodeBuf[off++] = X86_OP_REX_W;
2555 pbCodeBuf[off++] = 0xc7;
2556 if (offDisp < 128 && offDisp >= -128)
2557 {
2558 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 0, X86_GREG_xBP);
2559 pbCodeBuf[off++] = (uint8_t)offDisp;
2560 }
2561 else
2562 {
2563 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 0, X86_GREG_xBP);
2564 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2565 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2566 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2567 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2568 }
2569 pbCodeBuf[off++] = RT_BYTE1(uImm64);
2570 pbCodeBuf[off++] = RT_BYTE2(uImm64);
2571 pbCodeBuf[off++] = RT_BYTE3(uImm64);
2572 pbCodeBuf[off++] = RT_BYTE4(uImm64);
2573 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2574 return off;
2575 }
2576#endif
2577
2578 /* Load tmp0, imm64; Store tmp to bp+disp. */
2579 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uImm64);
2580 return iemNativeEmitStoreGprByBp(pReNative, off, offDisp, IEMNATIVE_REG_FIXED_TMP0);
2581}
2582
2583
2584/**
2585 * Emits a 128-bit vector register store with an BP relative destination address.
2586 *
2587 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2588 */
2589DECL_INLINE_THROW(uint32_t)
2590iemNativeEmitStoreVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2591{
2592#ifdef RT_ARCH_AMD64
2593 /* movdqu [rbp + offDisp], vecsrc */
2594 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2595 pbCodeBuf[off++] = 0xf3;
2596 if (iVecRegSrc >= 8)
2597 pbCodeBuf[off++] = X86_OP_REX_R;
2598 pbCodeBuf[off++] = 0x0f;
2599 pbCodeBuf[off++] = 0x7f;
2600 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2601
2602#elif defined(RT_ARCH_ARM64)
2603 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2604 {
2605 /* str w/ unsigned imm12 (scaled) */
2606 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2607 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc,
2608 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2609 }
2610 else if (offDisp >= -256 && offDisp <= 256)
2611 {
2612 /* stur w/ signed imm9 (unscaled) */
2613 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2614 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP, offDisp);
2615 }
2616 else if ((uint32_t)-offDisp < (unsigned)_4K)
2617 {
2618 /* Use temporary indexing register w/ sub uimm12. */
2619 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2620 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2621 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2622 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2623 }
2624 else
2625 {
2626 /* Use temporary indexing register. */
2627 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2628 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2629 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP,
2630 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2631 }
2632 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2633 return off;
2634
2635#else
2636# error "Port me!"
2637#endif
2638}
2639
2640
2641/**
2642 * Emits a 256-bit vector register store with an BP relative destination address.
2643 *
2644 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2645 */
2646DECL_INLINE_THROW(uint32_t)
2647iemNativeEmitStoreVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2648{
2649#ifdef RT_ARCH_AMD64
2650 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2651
2652 /* vmovdqu mem256, reg256 */
2653 pbCodeBuf[off++] = X86_OP_VEX2;
2654 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegSrc >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2655 pbCodeBuf[off++] = 0x7f;
2656 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2657#elif defined(RT_ARCH_ARM64)
2658 Assert(!(iVecRegSrc & 0x1));
2659 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp, iVecRegSrc);
2660 return iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp + sizeof(RTUINT128U), iVecRegSrc + 1);
2661#else
2662# error "Port me!"
2663#endif
2664}
2665
2666#if defined(RT_ARCH_ARM64)
2667
2668/**
2669 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2670 *
2671 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2672 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2673 * caller does not heed this.
2674 *
2675 * @note DON'T try this with prefetch.
2676 */
2677DECL_FORCE_INLINE_THROW(uint32_t)
2678iemNativeEmitGprByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp,
2679 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2680{
2681 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2682 {
2683 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2684 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2685 }
2686 else if ( ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2687 && iGprReg != iGprBase)
2688 || iGprTmp != UINT8_MAX)
2689 {
2690 /* The offset is too large, so we must load it into a register and use
2691 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2692 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2693 if (iGprTmp == UINT8_MAX)
2694 iGprTmp = iGprReg;
2695 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2696 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, iGprTmp);
2697 }
2698 else
2699# ifdef IEM_WITH_THROW_CATCH
2700 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2701# else
2702 AssertReleaseFailedStmt(off = UINT32_MAX);
2703# endif
2704 return off;
2705}
2706
2707/**
2708 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2709 */
2710DECL_FORCE_INLINE_THROW(uint32_t)
2711iemNativeEmitGprByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2712 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2713{
2714 /*
2715 * There are a couple of ldr variants that takes an immediate offset, so
2716 * try use those if we can, otherwise we have to use the temporary register
2717 * help with the addressing.
2718 */
2719 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2720 {
2721 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2722 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2723 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2724 }
2725 else
2726 {
2727 /* The offset is too large, so we must load it into a register and use
2728 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2729 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2730 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2731
2732 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2733 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, idxTmpReg);
2734
2735 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2736 }
2737 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2738 return off;
2739}
2740
2741/**
2742 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
2743 *
2744 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2745 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2746 * caller does not heed this.
2747 *
2748 * @note DON'T try this with prefetch.
2749 */
2750DECL_FORCE_INLINE_THROW(uint32_t)
2751iemNativeEmitVecRegByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iGprBase, int32_t offDisp,
2752 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2753{
2754 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2755 {
2756 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2757 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
2758 }
2759 else if ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2760 || iGprTmp != UINT8_MAX)
2761 {
2762 /* The offset is too large, so we must load it into a register and use
2763 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2764 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2765 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2766 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, iGprTmp);
2767 }
2768 else
2769# ifdef IEM_WITH_THROW_CATCH
2770 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2771# else
2772 AssertReleaseFailedStmt(off = UINT32_MAX);
2773# endif
2774 return off;
2775}
2776
2777
2778/**
2779 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
2780 */
2781DECL_FORCE_INLINE_THROW(uint32_t)
2782iemNativeEmitVecRegByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg,
2783 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2784{
2785 /*
2786 * There are a couple of ldr variants that takes an immediate offset, so
2787 * try use those if we can, otherwise we have to use the temporary register
2788 * help with the addressing.
2789 */
2790 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2791 {
2792 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2793 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2794 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
2795 }
2796 else
2797 {
2798 /* The offset is too large, so we must load it into a register and use
2799 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2800 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2801 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2802
2803 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2804 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, idxTmpReg);
2805
2806 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2807 }
2808 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2809 return off;
2810}
2811#endif /* RT_ARCH_ARM64 */
2812
2813/**
2814 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2815 *
2816 * @note ARM64: Misaligned @a offDisp values and values not in the
2817 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2818 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2819 * does not heed this.
2820 */
2821DECL_FORCE_INLINE_THROW(uint32_t)
2822iemNativeEmitLoadGprByGprU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2823 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2824{
2825#ifdef RT_ARCH_AMD64
2826 /* mov reg64, mem64 */
2827 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2828 pCodeBuf[off++] = 0x8b;
2829 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2830 RT_NOREF(iGprTmp);
2831
2832#elif defined(RT_ARCH_ARM64)
2833 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2834 kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t), iGprTmp);
2835
2836#else
2837# error "port me"
2838#endif
2839 return off;
2840}
2841
2842
2843/**
2844 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2845 */
2846DECL_INLINE_THROW(uint32_t)
2847iemNativeEmitLoadGprByGprU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2848{
2849#ifdef RT_ARCH_AMD64
2850 off = iemNativeEmitLoadGprByGprU64Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2851 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2852
2853#elif defined(RT_ARCH_ARM64)
2854 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2855
2856#else
2857# error "port me"
2858#endif
2859 return off;
2860}
2861
2862
2863/**
2864 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2865 *
2866 * @note ARM64: Misaligned @a offDisp values and values not in the
2867 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2868 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2869 * caller does not heed this.
2870 *
2871 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2872 */
2873DECL_FORCE_INLINE_THROW(uint32_t)
2874iemNativeEmitLoadGprByGprU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2875 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2876{
2877#ifdef RT_ARCH_AMD64
2878 /* mov reg32, mem32 */
2879 if (iGprDst >= 8 || iGprBase >= 8)
2880 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2881 pCodeBuf[off++] = 0x8b;
2882 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2883 RT_NOREF(iGprTmp);
2884
2885#elif defined(RT_ARCH_ARM64)
2886 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2887 kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t), iGprTmp);
2888
2889#else
2890# error "port me"
2891#endif
2892 return off;
2893}
2894
2895
2896/**
2897 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2898 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2899 */
2900DECL_INLINE_THROW(uint32_t)
2901iemNativeEmitLoadGprByGprU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2902{
2903#ifdef RT_ARCH_AMD64
2904 off = iemNativeEmitLoadGprByGprU32Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2905 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2906
2907#elif defined(RT_ARCH_ARM64)
2908 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2909
2910#else
2911# error "port me"
2912#endif
2913 return off;
2914}
2915
2916
2917/**
2918 * Emits a 32-bit GPR load via a GPR base address with a displacement,
2919 * sign-extending the value to 64 bits.
2920 *
2921 * @note ARM64: Misaligned @a offDisp values and values not in the
2922 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2923 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2924 * caller does not heed this.
2925 */
2926DECL_FORCE_INLINE_THROW(uint32_t)
2927iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2928 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2929{
2930#ifdef RT_ARCH_AMD64
2931 /* movsxd reg64, mem32 */
2932 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2933 pCodeBuf[off++] = 0x63;
2934 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2935 RT_NOREF(iGprTmp);
2936
2937#elif defined(RT_ARCH_ARM64)
2938 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2939 kArmv8A64InstrLdStType_Ld_SignWord64, sizeof(uint32_t), iGprTmp);
2940
2941#else
2942# error "port me"
2943#endif
2944 return off;
2945}
2946
2947
2948/**
2949 * Emits a 16-bit GPR load via a GPR base address with a displacement.
2950 *
2951 * @note ARM64: Misaligned @a offDisp values and values not in the
2952 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2953 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2954 * caller does not heed this.
2955 *
2956 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2957 */
2958DECL_FORCE_INLINE_THROW(uint32_t)
2959iemNativeEmitLoadGprByGprU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2960 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2961{
2962#ifdef RT_ARCH_AMD64
2963 /* movzx reg32, mem16 */
2964 if (iGprDst >= 8 || iGprBase >= 8)
2965 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2966 pCodeBuf[off++] = 0x0f;
2967 pCodeBuf[off++] = 0xb7;
2968 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2969 RT_NOREF(iGprTmp);
2970
2971#elif defined(RT_ARCH_ARM64)
2972 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2973 kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t), iGprTmp);
2974
2975#else
2976# error "port me"
2977#endif
2978 return off;
2979}
2980
2981
2982/**
2983 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2984 * sign-extending the value to 64 bits.
2985 *
2986 * @note ARM64: Misaligned @a offDisp values and values not in the
2987 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2988 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2989 * caller does not heed this.
2990 */
2991DECL_FORCE_INLINE_THROW(uint32_t)
2992iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2993 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2994{
2995#ifdef RT_ARCH_AMD64
2996 /* movsx reg64, mem16 */
2997 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2998 pCodeBuf[off++] = 0x0f;
2999 pCodeBuf[off++] = 0xbf;
3000 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
3001 RT_NOREF(iGprTmp);
3002
3003#elif defined(RT_ARCH_ARM64)
3004 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
3005 kArmv8A64InstrLdStType_Ld_SignHalf64, sizeof(uint16_t), iGprTmp);
3006
3007#else
3008# error "port me"
3009#endif
3010 return off;
3011}
3012
3013
3014/**
3015 * Emits a 16-bit GPR load via a GPR base address with a displacement,
3016 * sign-extending the value to 32 bits.
3017 *
3018 * @note ARM64: Misaligned @a offDisp values and values not in the
3019 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
3020 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
3021 * caller does not heed this.
3022 *
3023 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
3024 */
3025DECL_FORCE_INLINE_THROW(uint32_t)
3026iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
3027 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3028{
3029#ifdef RT_ARCH_AMD64
3030 /* movsx reg32, mem16 */
3031 if (iGprDst >= 8 || iGprBase >= 8)
3032 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3033 pCodeBuf[off++] = 0x0f;
3034 pCodeBuf[off++] = 0xbf;
3035 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
3036 RT_NOREF(iGprTmp);
3037
3038#elif defined(RT_ARCH_ARM64)
3039 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
3040 kArmv8A64InstrLdStType_Ld_SignHalf32, sizeof(uint16_t), iGprTmp);
3041
3042#else
3043# error "port me"
3044#endif
3045 return off;
3046}
3047
3048
3049/**
3050 * Emits a 8-bit GPR load via a GPR base address with a displacement.
3051 *
3052 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3053 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3054 * same. Will assert / throw if caller does not heed this.
3055 *
3056 * @note Bits 63 thru 8 in @a iGprDst will be cleared.
3057 */
3058DECL_FORCE_INLINE_THROW(uint32_t)
3059iemNativeEmitLoadGprByGprU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
3060 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3061{
3062#ifdef RT_ARCH_AMD64
3063 /* movzx reg32, mem8 */
3064 if (iGprDst >= 8 || iGprBase >= 8)
3065 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3066 pCodeBuf[off++] = 0x0f;
3067 pCodeBuf[off++] = 0xb6;
3068 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
3069 RT_NOREF(iGprTmp);
3070
3071#elif defined(RT_ARCH_ARM64)
3072 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
3073 kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t), iGprTmp);
3074
3075#else
3076# error "port me"
3077#endif
3078 return off;
3079}
3080
3081
3082/**
3083 * Emits a 8-bit GPR load via a GPR base address with a displacement,
3084 * sign-extending the value to 64 bits.
3085 *
3086 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3087 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3088 * same. Will assert / throw if caller does not heed this.
3089 */
3090DECL_FORCE_INLINE_THROW(uint32_t)
3091iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
3092 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3093{
3094#ifdef RT_ARCH_AMD64
3095 /* movsx reg64, mem8 */
3096 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3097 pCodeBuf[off++] = 0x0f;
3098 pCodeBuf[off++] = 0xbe;
3099 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
3100 RT_NOREF(iGprTmp);
3101
3102#elif defined(RT_ARCH_ARM64)
3103 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
3104 kArmv8A64InstrLdStType_Ld_SignByte64, sizeof(uint8_t), iGprTmp);
3105
3106#else
3107# error "port me"
3108#endif
3109 return off;
3110}
3111
3112
3113/**
3114 * Emits a 8-bit GPR load via a GPR base address with a displacement,
3115 * sign-extending the value to 32 bits.
3116 *
3117 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3118 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3119 * same. Will assert / throw if caller does not heed this.
3120 *
3121 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
3122 */
3123DECL_FORCE_INLINE_THROW(uint32_t)
3124iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
3125 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3126{
3127#ifdef RT_ARCH_AMD64
3128 /* movsx reg32, mem8 */
3129 if (iGprDst >= 8 || iGprBase >= 8)
3130 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3131 pCodeBuf[off++] = 0x0f;
3132 pCodeBuf[off++] = 0xbe;
3133 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
3134 RT_NOREF(iGprTmp);
3135
3136#elif defined(RT_ARCH_ARM64)
3137 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
3138 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
3139
3140#else
3141# error "port me"
3142#endif
3143 return off;
3144}
3145
3146
3147/**
3148 * Emits a 8-bit GPR load via a GPR base address with a displacement,
3149 * sign-extending the value to 16 bits.
3150 *
3151 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3152 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3153 * same. Will assert / throw if caller does not heed this.
3154 *
3155 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
3156 */
3157DECL_FORCE_INLINE_THROW(uint32_t)
3158iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
3159 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3160{
3161#ifdef RT_ARCH_AMD64
3162 /* movsx reg32, mem8 */
3163 if (iGprDst >= 8 || iGprBase >= 8)
3164 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3165 pCodeBuf[off++] = 0x0f;
3166 pCodeBuf[off++] = 0xbe;
3167 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
3168# if 1 /** @todo use 'movzx reg32, reg16' instead of 'and reg32, 0ffffh' ? */
3169 /* and reg32, 0xffffh */
3170 if (iGprDst >= 8)
3171 pCodeBuf[off++] = X86_OP_REX_B;
3172 pCodeBuf[off++] = 0x81;
3173 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
3174 pCodeBuf[off++] = 0xff;
3175 pCodeBuf[off++] = 0xff;
3176 pCodeBuf[off++] = 0;
3177 pCodeBuf[off++] = 0;
3178# else
3179 /* movzx reg32, reg16 */
3180 if (iGprDst >= 8)
3181 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
3182 pCodeBuf[off++] = 0x0f;
3183 pCodeBuf[off++] = 0xb7;
3184 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
3185# endif
3186 RT_NOREF(iGprTmp);
3187
3188#elif defined(RT_ARCH_ARM64)
3189 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
3190 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
3191 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
3192 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*64Bit*/);
3193
3194#else
3195# error "port me"
3196#endif
3197 return off;
3198}
3199
3200
3201/**
3202 * Emits a 128-bit vector register load via a GPR base address with a displacement.
3203 *
3204 * @note ARM64: Misaligned @a offDisp values and values not in the
3205 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3206 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3207 * does not heed this.
3208 */
3209DECL_FORCE_INLINE_THROW(uint32_t)
3210iemNativeEmitLoadVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3211 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3212{
3213#ifdef RT_ARCH_AMD64
3214 /* movdqu reg128, mem128 */
3215 pCodeBuf[off++] = 0xf3;
3216 if (iVecRegDst >= 8 || iGprBase >= 8)
3217 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3218 pCodeBuf[off++] = 0x0f;
3219 pCodeBuf[off++] = 0x6f;
3220 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3221 RT_NOREF(iGprTmp);
3222
3223#elif defined(RT_ARCH_ARM64)
3224 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3225 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3226
3227#else
3228# error "port me"
3229#endif
3230 return off;
3231}
3232
3233
3234/**
3235 * Emits a 128-bit GPR load via a GPR base address with a displacement.
3236 */
3237DECL_INLINE_THROW(uint32_t)
3238iemNativeEmitLoadVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3239{
3240#ifdef RT_ARCH_AMD64
3241 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3242 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3243
3244#elif defined(RT_ARCH_ARM64)
3245 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3246
3247#else
3248# error "port me"
3249#endif
3250 return off;
3251}
3252
3253
3254/**
3255 * Emits a 256-bit vector register load via a GPR base address with a displacement.
3256 *
3257 * @note ARM64: Misaligned @a offDisp values and values not in the
3258 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3259 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3260 * does not heed this.
3261 */
3262DECL_FORCE_INLINE_THROW(uint32_t)
3263iemNativeEmitLoadVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3264 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3265{
3266#ifdef RT_ARCH_AMD64
3267 /* vmovdqu reg256, mem256 */
3268 pCodeBuf[off++] = X86_OP_VEX3;
3269 pCodeBuf[off++] = (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
3270 | X86_OP_VEX3_BYTE1_X
3271 | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
3272 | UINT8_C(0x01);
3273 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
3274 pCodeBuf[off++] = 0x6f;
3275 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3276 RT_NOREF(iGprTmp);
3277
3278#elif defined(RT_ARCH_ARM64)
3279 Assert(!(iVecRegDst & 0x1));
3280 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3281 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3282 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3283 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3284#else
3285# error "port me"
3286#endif
3287 return off;
3288}
3289
3290
3291/**
3292 * Emits a 256-bit GPR load via a GPR base address with a displacement.
3293 */
3294DECL_INLINE_THROW(uint32_t)
3295iemNativeEmitLoadVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3296{
3297#ifdef RT_ARCH_AMD64
3298 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3299 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3300
3301#elif defined(RT_ARCH_ARM64)
3302 Assert(!(iVecRegDst & 0x1));
3303 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
3304 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3305 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3306 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3307
3308#else
3309# error "port me"
3310#endif
3311 return off;
3312}
3313
3314
3315/**
3316 * Emits a 64-bit GPR store via a GPR base address with a displacement.
3317 *
3318 * @note ARM64: Misaligned @a offDisp values and values not in the
3319 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3320 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3321 * does not heed this.
3322 */
3323DECL_FORCE_INLINE_THROW(uint32_t)
3324iemNativeEmitStoreGpr64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3325 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3326{
3327#ifdef RT_ARCH_AMD64
3328 /* mov mem64, reg64 */
3329 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3330 pCodeBuf[off++] = 0x89;
3331 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3332 RT_NOREF(iGprTmp);
3333
3334#elif defined(RT_ARCH_ARM64)
3335 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3336 kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
3337
3338#else
3339# error "port me"
3340#endif
3341 return off;
3342}
3343
3344
3345/**
3346 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3347 *
3348 * @note ARM64: Misaligned @a offDisp values and values not in the
3349 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3350 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3351 * does not heed this.
3352 */
3353DECL_FORCE_INLINE_THROW(uint32_t)
3354iemNativeEmitStoreGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3355 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3356{
3357#ifdef RT_ARCH_AMD64
3358 /* mov mem32, reg32 */
3359 if (iGprSrc >= 8 || iGprBase >= 8)
3360 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3361 pCodeBuf[off++] = 0x89;
3362 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3363 RT_NOREF(iGprTmp);
3364
3365#elif defined(RT_ARCH_ARM64)
3366 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3367 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3368
3369#else
3370# error "port me"
3371#endif
3372 return off;
3373}
3374
3375
3376/**
3377 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3378 *
3379 * @note ARM64: Misaligned @a offDisp values and values not in the
3380 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3381 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3382 * does not heed this.
3383 */
3384DECL_FORCE_INLINE_THROW(uint32_t)
3385iemNativeEmitStoreGpr16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3386 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3387{
3388#ifdef RT_ARCH_AMD64
3389 /* mov mem16, reg16 */
3390 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3391 if (iGprSrc >= 8 || iGprBase >= 8)
3392 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3393 pCodeBuf[off++] = 0x89;
3394 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3395 RT_NOREF(iGprTmp);
3396
3397#elif defined(RT_ARCH_ARM64)
3398 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3399 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3400
3401#else
3402# error "port me"
3403#endif
3404 return off;
3405}
3406
3407
3408/**
3409 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3410 *
3411 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3412 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3413 * same. Will assert / throw if caller does not heed this.
3414 */
3415DECL_FORCE_INLINE_THROW(uint32_t)
3416iemNativeEmitStoreGpr8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3417 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3418{
3419#ifdef RT_ARCH_AMD64
3420 /* mov mem8, reg8 */
3421 if (iGprSrc >= 8 || iGprBase >= 8)
3422 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3423 else if (iGprSrc >= 4)
3424 pCodeBuf[off++] = X86_OP_REX;
3425 pCodeBuf[off++] = 0x88;
3426 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3427 RT_NOREF(iGprTmp);
3428
3429#elif defined(RT_ARCH_ARM64)
3430 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3431 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3432
3433#else
3434# error "port me"
3435#endif
3436 return off;
3437}
3438
3439
3440/**
3441 * Emits a 64-bit immediate store via a GPR base address with a displacement.
3442 *
3443 * @note This will always require @a iGprTmpImm on ARM (except for uImm=0), on
3444 * AMD64 it depends on the immediate value.
3445 *
3446 * @note ARM64: Misaligned @a offDisp values and values not in the
3447 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3448 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3449 * does not heed this.
3450 */
3451DECL_FORCE_INLINE_THROW(uint32_t)
3452iemNativeEmitStoreImm64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint64_t uImm, uint8_t iGprBase,
3453 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3454{
3455#ifdef RT_ARCH_AMD64
3456 if ((int32_t)uImm == (int64_t)uImm)
3457 {
3458 /* mov mem64, imm32 (sign-extended) */
3459 pCodeBuf[off++] = X86_OP_REX_W | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3460 pCodeBuf[off++] = 0xc7;
3461 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3462 pCodeBuf[off++] = RT_BYTE1(uImm);
3463 pCodeBuf[off++] = RT_BYTE2(uImm);
3464 pCodeBuf[off++] = RT_BYTE3(uImm);
3465 pCodeBuf[off++] = RT_BYTE4(uImm);
3466 }
3467 else if (iGprImmTmp != UINT8_MAX || iGprTmp != UINT8_MAX)
3468 {
3469 /* require temporary register. */
3470 if (iGprImmTmp == UINT8_MAX)
3471 iGprImmTmp = iGprTmp;
3472 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3473 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp);
3474 }
3475 else
3476# ifdef IEM_WITH_THROW_CATCH
3477 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3478# else
3479 AssertReleaseFailedStmt(off = UINT32_MAX);
3480# endif
3481
3482#elif defined(RT_ARCH_ARM64)
3483 if (uImm == 0)
3484 iGprImmTmp = ARMV8_A64_REG_XZR;
3485 else
3486 {
3487 Assert(iGprImmTmp < 31);
3488 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3489 }
3490 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp, iGprTmp);
3491
3492#else
3493# error "port me"
3494#endif
3495 return off;
3496}
3497
3498
3499/**
3500 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3501 *
3502 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3503 *
3504 * @note ARM64: Misaligned @a offDisp values and values not in the
3505 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3506 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3507 * does not heed this.
3508 */
3509DECL_FORCE_INLINE_THROW(uint32_t)
3510iemNativeEmitStoreImm32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uImm, uint8_t iGprBase,
3511 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3512{
3513#ifdef RT_ARCH_AMD64
3514 /* mov mem32, imm32 */
3515 if (iGprBase >= 8)
3516 pCodeBuf[off++] = X86_OP_REX_B;
3517 pCodeBuf[off++] = 0xc7;
3518 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3519 pCodeBuf[off++] = RT_BYTE1(uImm);
3520 pCodeBuf[off++] = RT_BYTE2(uImm);
3521 pCodeBuf[off++] = RT_BYTE3(uImm);
3522 pCodeBuf[off++] = RT_BYTE4(uImm);
3523 RT_NOREF(iGprImmTmp, iGprTmp);
3524
3525#elif defined(RT_ARCH_ARM64)
3526 Assert(iGprImmTmp < 31);
3527 if (uImm == 0)
3528 iGprImmTmp = ARMV8_A64_REG_XZR;
3529 else
3530 {
3531 Assert(iGprImmTmp < 31);
3532 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3533 }
3534 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3535 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3536
3537#else
3538# error "port me"
3539#endif
3540 return off;
3541}
3542
3543
3544/**
3545 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3546 *
3547 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3548 *
3549 * @note ARM64: Misaligned @a offDisp values and values not in the
3550 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3551 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3552 * does not heed this.
3553 */
3554DECL_FORCE_INLINE_THROW(uint32_t)
3555iemNativeEmitStoreImm16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint8_t iGprBase,
3556 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3557{
3558#ifdef RT_ARCH_AMD64
3559 /* mov mem16, imm16 */
3560 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3561 if (iGprBase >= 8)
3562 pCodeBuf[off++] = X86_OP_REX_B;
3563 pCodeBuf[off++] = 0xc7;
3564 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3565 pCodeBuf[off++] = RT_BYTE1(uImm);
3566 pCodeBuf[off++] = RT_BYTE2(uImm);
3567 RT_NOREF(iGprImmTmp, iGprTmp);
3568
3569#elif defined(RT_ARCH_ARM64)
3570 if (uImm == 0)
3571 iGprImmTmp = ARMV8_A64_REG_XZR;
3572 else
3573 {
3574 Assert(iGprImmTmp < 31);
3575 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3576 }
3577 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3578 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3579
3580#else
3581# error "port me"
3582#endif
3583 return off;
3584}
3585
3586
3587/**
3588 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3589 *
3590 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3591 *
3592 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3593 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3594 * same. Will assert / throw if caller does not heed this.
3595 */
3596DECL_FORCE_INLINE_THROW(uint32_t)
3597iemNativeEmitStoreImm8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t uImm, uint8_t iGprBase,
3598 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3599{
3600#ifdef RT_ARCH_AMD64
3601 /* mov mem8, imm8 */
3602 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3603 if (iGprBase >= 8)
3604 pCodeBuf[off++] = X86_OP_REX_B;
3605 pCodeBuf[off++] = 0xc6;
3606 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3607 pCodeBuf[off++] = uImm;
3608 RT_NOREF(iGprImmTmp, iGprTmp);
3609
3610#elif defined(RT_ARCH_ARM64)
3611 if (uImm == 0)
3612 iGprImmTmp = ARMV8_A64_REG_XZR;
3613 else
3614 {
3615 Assert(iGprImmTmp < 31);
3616 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3617 }
3618 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3619 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3620
3621#else
3622# error "port me"
3623#endif
3624 return off;
3625}
3626
3627
3628/**
3629 * Emits a 128-bit vector register store via a GPR base address with a displacement.
3630 *
3631 * @note ARM64: Misaligned @a offDisp values and values not in the
3632 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3633 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3634 * does not heed this.
3635 */
3636DECL_FORCE_INLINE_THROW(uint32_t)
3637iemNativeEmitStoreVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3638 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3639{
3640#ifdef RT_ARCH_AMD64
3641 /* movdqu mem128, reg128 */
3642 pCodeBuf[off++] = 0xf3;
3643 if (iVecRegDst >= 8 || iGprBase >= 8)
3644 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3645 pCodeBuf[off++] = 0x0f;
3646 pCodeBuf[off++] = 0x7f;
3647 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3648 RT_NOREF(iGprTmp);
3649
3650#elif defined(RT_ARCH_ARM64)
3651 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3652 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3653
3654#else
3655# error "port me"
3656#endif
3657 return off;
3658}
3659
3660
3661/**
3662 * Emits a 128-bit vector register store via a GPR base address with a displacement.
3663 */
3664DECL_INLINE_THROW(uint32_t)
3665iemNativeEmitStoreVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3666{
3667#ifdef RT_ARCH_AMD64
3668 off = iemNativeEmitStoreVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3669 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3670
3671#elif defined(RT_ARCH_ARM64)
3672 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3673
3674#else
3675# error "port me"
3676#endif
3677 return off;
3678}
3679
3680
3681/**
3682 * Emits a 256-bit vector register store via a GPR base address with a displacement.
3683 *
3684 * @note ARM64: Misaligned @a offDisp values and values not in the
3685 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3686 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3687 * does not heed this.
3688 */
3689DECL_FORCE_INLINE_THROW(uint32_t)
3690iemNativeEmitStoreVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3691 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3692{
3693#ifdef RT_ARCH_AMD64
3694 /* vmovdqu mem256, reg256 */
3695 pCodeBuf[off++] = X86_OP_VEX3;
3696 pCodeBuf[off++] = (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
3697 | X86_OP_VEX3_BYTE1_X
3698 | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
3699 | UINT8_C(0x01);
3700 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
3701 pCodeBuf[off++] = 0x7f;
3702 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3703 RT_NOREF(iGprTmp);
3704
3705#elif defined(RT_ARCH_ARM64)
3706 Assert(!(iVecRegDst & 0x1));
3707 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3708 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3709 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3710 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3711#else
3712# error "port me"
3713#endif
3714 return off;
3715}
3716
3717
3718/**
3719 * Emits a 256-bit GPR load via a GPR base address with a displacement.
3720 */
3721DECL_INLINE_THROW(uint32_t)
3722iemNativeEmitStoreVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3723{
3724#ifdef RT_ARCH_AMD64
3725 off = iemNativeEmitStoreVecRegByGprU256Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3726 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3727
3728#elif defined(RT_ARCH_ARM64)
3729 Assert(!(iVecRegDst & 0x1));
3730 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
3731 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3732 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3733 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3734
3735#else
3736# error "port me"
3737#endif
3738 return off;
3739}
3740
3741
3742
3743/*********************************************************************************************************************************
3744* Subtraction and Additions *
3745*********************************************************************************************************************************/
3746
3747/**
3748 * Emits subtracting a 64-bit GPR from another, storing the result in the first.
3749 * @note The AMD64 version sets flags.
3750 */
3751DECL_INLINE_THROW(uint32_t)
3752iemNativeEmitSubTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3753{
3754#if defined(RT_ARCH_AMD64)
3755 /* sub Gv,Ev */
3756 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3757 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3758 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3759 pbCodeBuf[off++] = 0x2b;
3760 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3761
3762#elif defined(RT_ARCH_ARM64)
3763 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3764 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend);
3765
3766#else
3767# error "Port me"
3768#endif
3769 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3770 return off;
3771}
3772
3773
3774/**
3775 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3776 * @note The AMD64 version sets flags.
3777 */
3778DECL_FORCE_INLINE(uint32_t)
3779iemNativeEmitSubTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3780{
3781#if defined(RT_ARCH_AMD64)
3782 /* sub Gv,Ev */
3783 if (iGprDst >= 8 || iGprSubtrahend >= 8)
3784 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
3785 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3786 pCodeBuf[off++] = 0x2b;
3787 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3788
3789#elif defined(RT_ARCH_ARM64)
3790 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend, false /*f64Bit*/);
3791
3792#else
3793# error "Port me"
3794#endif
3795 return off;
3796}
3797
3798
3799/**
3800 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3801 * @note The AMD64 version sets flags.
3802 */
3803DECL_INLINE_THROW(uint32_t)
3804iemNativeEmitSubTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3805{
3806#if defined(RT_ARCH_AMD64)
3807 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSubtrahend);
3808#elif defined(RT_ARCH_ARM64)
3809 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSubtrahend);
3810#else
3811# error "Port me"
3812#endif
3813 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3814 return off;
3815}
3816
3817
3818/**
3819 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3820 *
3821 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3822 *
3823 * @note Larger constants will require a temporary register. Failing to specify
3824 * one when needed will trigger fatal assertion / throw.
3825 */
3826DECL_FORCE_INLINE_THROW(uint32_t)
3827iemNativeEmitSubGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3828 uint8_t iGprTmp = UINT8_MAX)
3829{
3830#ifdef RT_ARCH_AMD64
3831 pCodeBuf[off++] = iGprDst >= 8 ? X86_OP_REX_W | X86_OP_REX_B : X86_OP_REX_W;
3832 if (iSubtrahend == 1)
3833 {
3834 /* dec r/m64 */
3835 pCodeBuf[off++] = 0xff;
3836 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3837 }
3838 else if (iSubtrahend == -1)
3839 {
3840 /* inc r/m64 */
3841 pCodeBuf[off++] = 0xff;
3842 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3843 }
3844 else if ((int8_t)iSubtrahend == iSubtrahend)
3845 {
3846 /* sub r/m64, imm8 */
3847 pCodeBuf[off++] = 0x83;
3848 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3849 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3850 }
3851 else if ((int32_t)iSubtrahend == iSubtrahend)
3852 {
3853 /* sub r/m64, imm32 */
3854 pCodeBuf[off++] = 0x81;
3855 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3856 pCodeBuf[off++] = RT_BYTE1((uint64_t)iSubtrahend);
3857 pCodeBuf[off++] = RT_BYTE2((uint64_t)iSubtrahend);
3858 pCodeBuf[off++] = RT_BYTE3((uint64_t)iSubtrahend);
3859 pCodeBuf[off++] = RT_BYTE4((uint64_t)iSubtrahend);
3860 }
3861 else if (iGprTmp != UINT8_MAX)
3862 {
3863 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off - 1, iGprTmp, (uint64_t)iSubtrahend);
3864 /* sub r/m64, r64 */
3865 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B) | (iGprTmp < 8 ? 0 : X86_OP_REX_R);
3866 pCodeBuf[off++] = 0x29;
3867 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprTmp & 7, iGprDst & 7);
3868 }
3869 else
3870# ifdef IEM_WITH_THROW_CATCH
3871 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3872# else
3873 AssertReleaseFailedStmt(off = UINT32_MAX);
3874# endif
3875
3876#elif defined(RT_ARCH_ARM64)
3877 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3878 if (uAbsSubtrahend < 4096)
3879 {
3880 if (iSubtrahend >= 0)
3881 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3882 else
3883 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3884 }
3885 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3886 {
3887 if (iSubtrahend >= 0)
3888 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3889 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3890 else
3891 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3892 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3893 }
3894 else if (iGprTmp != UINT8_MAX)
3895 {
3896 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (uint64_t)iSubtrahend);
3897 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp);
3898 }
3899 else
3900# ifdef IEM_WITH_THROW_CATCH
3901 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3902# else
3903 AssertReleaseFailedStmt(off = UINT32_MAX);
3904# endif
3905
3906#else
3907# error "Port me"
3908#endif
3909 return off;
3910}
3911
3912
3913/**
3914 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3915 *
3916 * @note Larger constants will require a temporary register. Failing to specify
3917 * one when needed will trigger fatal assertion / throw.
3918 */
3919DECL_INLINE_THROW(uint32_t)
3920iemNativeEmitSubGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3921 uint8_t iGprTmp = UINT8_MAX)
3922
3923{
3924#ifdef RT_ARCH_AMD64
3925 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iSubtrahend, iGprTmp);
3926#elif defined(RT_ARCH_ARM64)
3927 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGprDst, iSubtrahend, iGprTmp);
3928#else
3929# error "Port me"
3930#endif
3931 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3932 return off;
3933}
3934
3935
3936/**
3937 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3938 *
3939 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3940 *
3941 * @note ARM64: Larger constants will require a temporary register. Failing to
3942 * specify one when needed will trigger fatal assertion / throw.
3943 */
3944DECL_FORCE_INLINE_THROW(uint32_t)
3945iemNativeEmitSubGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3946 uint8_t iGprTmp = UINT8_MAX)
3947{
3948#ifdef RT_ARCH_AMD64
3949 if (iGprDst >= 8)
3950 pCodeBuf[off++] = X86_OP_REX_B;
3951 if (iSubtrahend == 1)
3952 {
3953 /* dec r/m32 */
3954 pCodeBuf[off++] = 0xff;
3955 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3956 }
3957 else if (iSubtrahend == -1)
3958 {
3959 /* inc r/m32 */
3960 pCodeBuf[off++] = 0xff;
3961 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3962 }
3963 else if (iSubtrahend < 128 && iSubtrahend >= -128)
3964 {
3965 /* sub r/m32, imm8 */
3966 pCodeBuf[off++] = 0x83;
3967 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3968 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3969 }
3970 else
3971 {
3972 /* sub r/m32, imm32 */
3973 pCodeBuf[off++] = 0x81;
3974 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3975 pCodeBuf[off++] = RT_BYTE1(iSubtrahend);
3976 pCodeBuf[off++] = RT_BYTE2(iSubtrahend);
3977 pCodeBuf[off++] = RT_BYTE3(iSubtrahend);
3978 pCodeBuf[off++] = RT_BYTE4(iSubtrahend);
3979 }
3980 RT_NOREF(iGprTmp);
3981
3982#elif defined(RT_ARCH_ARM64)
3983 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3984 if (uAbsSubtrahend < 4096)
3985 {
3986 if (iSubtrahend >= 0)
3987 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3988 else
3989 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3990 }
3991 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3992 {
3993 if (iSubtrahend >= 0)
3994 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3995 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3996 else
3997 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3998 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3999 }
4000 else if (iGprTmp != UINT8_MAX)
4001 {
4002 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
4003 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
4004 }
4005 else
4006# ifdef IEM_WITH_THROW_CATCH
4007 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4008# else
4009 AssertReleaseFailedStmt(off = UINT32_MAX);
4010# endif
4011
4012#else
4013# error "Port me"
4014#endif
4015 return off;
4016}
4017
4018
4019/**
4020 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
4021 *
4022 * @note ARM64: Larger constants will require a temporary register. Failing to
4023 * specify one when needed will trigger fatal assertion / throw.
4024 */
4025DECL_INLINE_THROW(uint32_t)
4026iemNativeEmitSubGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
4027 uint8_t iGprTmp = UINT8_MAX)
4028
4029{
4030#ifdef RT_ARCH_AMD64
4031 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iSubtrahend, iGprTmp);
4032#elif defined(RT_ARCH_ARM64)
4033 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iSubtrahend, iGprTmp);
4034#else
4035# error "Port me"
4036#endif
4037 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4038 return off;
4039}
4040
4041
4042/**
4043 * Emits a 16-bit GPR subtract with a signed immediate subtrahend.
4044 *
4045 * This will optimize using DEC/INC/whatever and ARM64 will not set flags,
4046 * so not suitable as a base for conditional jumps.
4047 *
4048 * @note AMD64: Will only update the lower 16 bits of the register.
4049 * @note ARM64: Will update the entire register.
4050 * @note ARM64: Larger constants will require a temporary register. Failing to
4051 * specify one when needed will trigger fatal assertion / throw.
4052 */
4053DECL_FORCE_INLINE_THROW(uint32_t)
4054iemNativeEmitSubGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iSubtrahend,
4055 uint8_t iGprTmp = UINT8_MAX)
4056{
4057#ifdef RT_ARCH_AMD64
4058 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4059 if (iGprDst >= 8)
4060 pCodeBuf[off++] = X86_OP_REX_B;
4061 if (iSubtrahend == 1)
4062 {
4063 /* dec r/m16 */
4064 pCodeBuf[off++] = 0xff;
4065 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4066 }
4067 else if (iSubtrahend == -1)
4068 {
4069 /* inc r/m16 */
4070 pCodeBuf[off++] = 0xff;
4071 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4072 }
4073 else if ((int8_t)iSubtrahend == iSubtrahend)
4074 {
4075 /* sub r/m16, imm8 */
4076 pCodeBuf[off++] = 0x83;
4077 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
4078 pCodeBuf[off++] = (uint8_t)iSubtrahend;
4079 }
4080 else
4081 {
4082 /* sub r/m16, imm16 */
4083 pCodeBuf[off++] = 0x81;
4084 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
4085 pCodeBuf[off++] = RT_BYTE1((uint16_t)iSubtrahend);
4086 pCodeBuf[off++] = RT_BYTE2((uint16_t)iSubtrahend);
4087 }
4088 RT_NOREF(iGprTmp);
4089
4090#elif defined(RT_ARCH_ARM64)
4091 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
4092 if (uAbsSubtrahend < 4096)
4093 {
4094 if (iSubtrahend >= 0)
4095 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
4096 else
4097 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
4098 }
4099 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
4100 {
4101 if (iSubtrahend >= 0)
4102 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
4103 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
4104 else
4105 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
4106 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
4107 }
4108 else if (iGprTmp != UINT8_MAX)
4109 {
4110 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
4111 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
4112 }
4113 else
4114# ifdef IEM_WITH_THROW_CATCH
4115 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4116# else
4117 AssertReleaseFailedStmt(off = UINT32_MAX);
4118# endif
4119 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
4120
4121#else
4122# error "Port me"
4123#endif
4124 return off;
4125}
4126
4127
4128/**
4129 * Emits adding a 64-bit GPR to another, storing the result in the first.
4130 * @note The AMD64 version sets flags.
4131 */
4132DECL_FORCE_INLINE(uint32_t)
4133iemNativeEmitAddTwoGprsEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
4134{
4135#if defined(RT_ARCH_AMD64)
4136 /* add Gv,Ev */
4137 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4138 | (iGprAddend < 8 ? 0 : X86_OP_REX_B);
4139 pCodeBuf[off++] = 0x03;
4140 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
4141
4142#elif defined(RT_ARCH_ARM64)
4143 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend);
4144
4145#else
4146# error "Port me"
4147#endif
4148 return off;
4149}
4150
4151
4152/**
4153 * Emits adding a 64-bit GPR to another, storing the result in the first.
4154 * @note The AMD64 version sets flags.
4155 */
4156DECL_INLINE_THROW(uint32_t)
4157iemNativeEmitAddTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
4158{
4159#if defined(RT_ARCH_AMD64)
4160 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
4161#elif defined(RT_ARCH_ARM64)
4162 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
4163#else
4164# error "Port me"
4165#endif
4166 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4167 return off;
4168}
4169
4170
4171/**
4172 * Emits adding a 64-bit GPR to another, storing the result in the first.
4173 * @note The AMD64 version sets flags.
4174 */
4175DECL_FORCE_INLINE(uint32_t)
4176iemNativeEmitAddTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
4177{
4178#if defined(RT_ARCH_AMD64)
4179 /* add Gv,Ev */
4180 if (iGprDst >= 8 || iGprAddend >= 8)
4181 pCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0)
4182 | (iGprAddend >= 8 ? X86_OP_REX_B : 0);
4183 pCodeBuf[off++] = 0x03;
4184 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
4185
4186#elif defined(RT_ARCH_ARM64)
4187 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend, false /*f64Bit*/);
4188
4189#else
4190# error "Port me"
4191#endif
4192 return off;
4193}
4194
4195
4196/**
4197 * Emits adding a 64-bit GPR to another, storing the result in the first.
4198 * @note The AMD64 version sets flags.
4199 */
4200DECL_INLINE_THROW(uint32_t)
4201iemNativeEmitAddTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
4202{
4203#if defined(RT_ARCH_AMD64)
4204 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
4205#elif defined(RT_ARCH_ARM64)
4206 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
4207#else
4208# error "Port me"
4209#endif
4210 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4211 return off;
4212}
4213
4214
4215/**
4216 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
4217 */
4218DECL_INLINE_THROW(uint32_t)
4219iemNativeEmitAddGprImm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4220{
4221#if defined(RT_ARCH_AMD64)
4222 /* add or inc */
4223 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4224 if (iImm8 != 1)
4225 {
4226 pCodeBuf[off++] = 0x83;
4227 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4228 pCodeBuf[off++] = (uint8_t)iImm8;
4229 }
4230 else
4231 {
4232 pCodeBuf[off++] = 0xff;
4233 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4234 }
4235
4236#elif defined(RT_ARCH_ARM64)
4237 if (iImm8 >= 0)
4238 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint8_t)iImm8);
4239 else
4240 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint8_t)-iImm8);
4241
4242#else
4243# error "Port me"
4244#endif
4245 return off;
4246}
4247
4248
4249/**
4250 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
4251 */
4252DECL_INLINE_THROW(uint32_t)
4253iemNativeEmitAddGprImm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4254{
4255#if defined(RT_ARCH_AMD64)
4256 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
4257#elif defined(RT_ARCH_ARM64)
4258 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
4259#else
4260# error "Port me"
4261#endif
4262 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4263 return off;
4264}
4265
4266
4267/**
4268 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
4269 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4270 */
4271DECL_FORCE_INLINE(uint32_t)
4272iemNativeEmitAddGpr32Imm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4273{
4274#if defined(RT_ARCH_AMD64)
4275 /* add or inc */
4276 if (iGprDst >= 8)
4277 pCodeBuf[off++] = X86_OP_REX_B;
4278 if (iImm8 != 1)
4279 {
4280 pCodeBuf[off++] = 0x83;
4281 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4282 pCodeBuf[off++] = (uint8_t)iImm8;
4283 }
4284 else
4285 {
4286 pCodeBuf[off++] = 0xff;
4287 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4288 }
4289
4290#elif defined(RT_ARCH_ARM64)
4291 if (iImm8 >= 0)
4292 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint8_t)iImm8, false /*f64Bit*/);
4293 else
4294 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint8_t)-iImm8, false /*f64Bit*/);
4295
4296#else
4297# error "Port me"
4298#endif
4299 return off;
4300}
4301
4302
4303/**
4304 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
4305 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4306 */
4307DECL_INLINE_THROW(uint32_t)
4308iemNativeEmitAddGpr32Imm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4309{
4310#if defined(RT_ARCH_AMD64)
4311 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
4312#elif defined(RT_ARCH_ARM64)
4313 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
4314#else
4315# error "Port me"
4316#endif
4317 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4318 return off;
4319}
4320
4321
4322/**
4323 * Emits a 64-bit GPR additions with a 64-bit signed addend.
4324 *
4325 * @note Will assert / throw if @a iGprTmp is not specified when needed.
4326 */
4327DECL_FORCE_INLINE_THROW(uint32_t)
4328iemNativeEmitAddGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iAddend, uint8_t iGprTmp = UINT8_MAX)
4329{
4330#if defined(RT_ARCH_AMD64)
4331 if ((int8_t)iAddend == iAddend)
4332 return iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
4333
4334 if ((int32_t)iAddend == iAddend)
4335 {
4336 /* add grp, imm32 */
4337 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4338 pCodeBuf[off++] = 0x81;
4339 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4340 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4341 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4342 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4343 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4344 }
4345 else if (iGprTmp != UINT8_MAX)
4346 {
4347 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
4348
4349 /* add dst, tmpreg */
4350 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4351 | (iGprTmp < 8 ? 0 : X86_OP_REX_B);
4352 pCodeBuf[off++] = 0x03;
4353 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprTmp & 7);
4354 }
4355 else
4356# ifdef IEM_WITH_THROW_CATCH
4357 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4358# else
4359 AssertReleaseFailedStmt(off = UINT32_MAX);
4360# endif
4361
4362#elif defined(RT_ARCH_ARM64)
4363 uint64_t const uAbsAddend = (uint64_t)RT_ABS(iAddend);
4364 if (uAbsAddend <= 0xffffffU)
4365 {
4366 bool const fSub = iAddend < 0;
4367 if (uAbsAddend > 0xfffU)
4368 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, true /*f64Bit*/,
4369 false /*fSetFlags*/, true /*fShift12*/);
4370 if (uAbsAddend & 0xfffU)
4371 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & UINT32_C(0xfff));
4372 }
4373 else if (iGprTmp != UINT8_MAX)
4374 {
4375 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
4376 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp);
4377 }
4378 else
4379# ifdef IEM_WITH_THROW_CATCH
4380 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4381# else
4382 AssertReleaseFailedStmt(off = UINT32_MAX);
4383# endif
4384
4385#else
4386# error "Port me"
4387#endif
4388 return off;
4389}
4390
4391
4392/**
4393 * Emits a 64-bit GPR additions with a 64-bit signed addend.
4394 */
4395DECL_INLINE_THROW(uint32_t)
4396iemNativeEmitAddGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iAddend)
4397{
4398#if defined(RT_ARCH_AMD64)
4399 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
4400 return iemNativeEmitAddGprImm8(pReNative, off, iGprDst, (int8_t)iAddend);
4401
4402 if (iAddend <= INT32_MAX && iAddend >= INT32_MIN)
4403 {
4404 /* add grp, imm32 */
4405 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4406 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4407 pbCodeBuf[off++] = 0x81;
4408 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4409 pbCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4410 pbCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4411 pbCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4412 pbCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4413 }
4414 else
4415 {
4416 /* Best to use a temporary register to deal with this in the simplest way: */
4417 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
4418
4419 /* add dst, tmpreg */
4420 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4421 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4422 | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
4423 pbCodeBuf[off++] = 0x03;
4424 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iTmpReg & 7);
4425
4426 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4427 }
4428
4429#elif defined(RT_ARCH_ARM64)
4430 bool const fSub = iAddend < 0;
4431 uint64_t const uAbsAddend = (uint64_t)RT_ABS(iAddend);
4432 if (uAbsAddend <= 0xffffffU)
4433 {
4434 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4435 if (uAbsAddend > 0xfffU)
4436 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, true /*f64Bit*/,
4437 false /*fSetFlags*/, true /*fShift12*/);
4438 if (uAbsAddend & 0xfffU)
4439 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & UINT32_C(0xfff));
4440 }
4441 else
4442 {
4443 /* Use temporary register for the immediate. */
4444 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uAbsAddend);
4445
4446 /* add gprdst, gprdst, tmpreg */
4447 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4448 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(fSub, iGprDst, iGprDst, iTmpReg);
4449
4450 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4451 }
4452
4453#else
4454# error "Port me"
4455#endif
4456 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4457 return off;
4458}
4459
4460
4461/**
4462 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4463 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4464 * @note For ARM64 the iAddend value must be in the range 0x000000..0xffffff.
4465 * The negative ranges are also allowed, making it behave like a
4466 * subtraction. If the constant does not conform, bad stuff will happen.
4467 */
4468DECL_FORCE_INLINE_THROW(uint32_t)
4469iemNativeEmitAddGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iAddend, uint8_t iGprTmp = UINT8_MAX)
4470{
4471#if defined(RT_ARCH_AMD64)
4472 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
4473 return iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
4474
4475 /* add grp, imm32 */
4476 if (iGprDst >= 8)
4477 pCodeBuf[off++] = X86_OP_REX_B;
4478 pCodeBuf[off++] = 0x81;
4479 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4480 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4481 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4482 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4483 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4484 RT_NOREF(iGprTmp);
4485
4486#elif defined(RT_ARCH_ARM64)
4487 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4488 if (uAbsAddend <= 0xffffffU)
4489 {
4490 bool const fSub = iAddend < 0;
4491 if (uAbsAddend > 0xfffU)
4492 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, false /*f64Bit*/,
4493 false /*fSetFlags*/, true /*fShift12*/);
4494 if (uAbsAddend & 0xfffU)
4495 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & 0xfff, false /*f64Bit*/);
4496 }
4497 else if (iGprTmp != UINT8_MAX)
4498 {
4499 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, iAddend);
4500 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
4501 }
4502 else
4503# ifdef IEM_WITH_THROW_CATCH
4504 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4505# else
4506 AssertReleaseFailedStmt(off = UINT32_MAX);
4507# endif
4508
4509#else
4510# error "Port me"
4511#endif
4512 return off;
4513}
4514
4515
4516/**
4517 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4518 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4519 */
4520DECL_INLINE_THROW(uint32_t)
4521iemNativeEmitAddGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iAddend)
4522{
4523#if defined(RT_ARCH_AMD64)
4524 off = iemNativeEmitAddGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iAddend);
4525
4526#elif defined(RT_ARCH_ARM64)
4527 bool const fSub = iAddend < 0;
4528 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4529 if (uAbsAddend <= 0xffffffU)
4530 {
4531 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4532 if (uAbsAddend > 0xfffU)
4533 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, false /*f64Bit*/,
4534 false /*fSetFlags*/, true /*fShift12*/);
4535 if (uAbsAddend & 0xfffU)
4536 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & 0xfff, false /*f64Bit*/);
4537 }
4538 else
4539 {
4540 /* Use temporary register for the immediate. */
4541 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uAbsAddend);
4542
4543 /* add gprdst, gprdst, tmpreg */
4544 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4545 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(fSub, iGprDst, iGprDst, iTmpReg, false /*f64Bit*/);
4546
4547 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4548 }
4549
4550#else
4551# error "Port me"
4552#endif
4553 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4554 return off;
4555}
4556
4557
4558/**
4559 * Emits a 16-bit GPR add with a signed immediate addend.
4560 *
4561 * This will optimize using INC/DEC/whatever and ARM64 will not set flags,
4562 * so not suitable as a base for conditional jumps.
4563 *
4564 * @note AMD64: Will only update the lower 16 bits of the register.
4565 * @note ARM64: Will update the entire register.
4566 * @sa iemNativeEmitSubGpr16ImmEx
4567 */
4568DECL_FORCE_INLINE(uint32_t)
4569iemNativeEmitAddGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iAddend)
4570{
4571#ifdef RT_ARCH_AMD64
4572 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4573 if (iGprDst >= 8)
4574 pCodeBuf[off++] = X86_OP_REX_B;
4575 if (iAddend == 1)
4576 {
4577 /* inc r/m16 */
4578 pCodeBuf[off++] = 0xff;
4579 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4580 }
4581 else if (iAddend == -1)
4582 {
4583 /* dec r/m16 */
4584 pCodeBuf[off++] = 0xff;
4585 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4586 }
4587 else if ((int8_t)iAddend == iAddend)
4588 {
4589 /* add r/m16, imm8 */
4590 pCodeBuf[off++] = 0x83;
4591 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4592 pCodeBuf[off++] = (uint8_t)iAddend;
4593 }
4594 else
4595 {
4596 /* add r/m16, imm16 */
4597 pCodeBuf[off++] = 0x81;
4598 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4599 pCodeBuf[off++] = RT_BYTE1((uint16_t)iAddend);
4600 pCodeBuf[off++] = RT_BYTE2((uint16_t)iAddend);
4601 }
4602
4603#elif defined(RT_ARCH_ARM64)
4604 bool const fSub = iAddend < 0;
4605 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4606 if (uAbsAddend > 0xfffU)
4607 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, false /*f64Bit*/,
4608 false /*fSetFlags*/, true /*fShift12*/);
4609 if (uAbsAddend & 0xfffU)
4610 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & 0xfff, false /*f64Bit*/);
4611 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
4612
4613#else
4614# error "Port me"
4615#endif
4616 return off;
4617}
4618
4619
4620
4621/**
4622 * Adds two 64-bit GPRs together, storing the result in a third register.
4623 */
4624DECL_FORCE_INLINE(uint32_t)
4625iemNativeEmitGprEqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4626{
4627#ifdef RT_ARCH_AMD64
4628 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4629 {
4630 /** @todo consider LEA */
4631 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend1);
4632 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend2);
4633 }
4634 else
4635 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4636
4637#elif defined(RT_ARCH_ARM64)
4638 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2);
4639
4640#else
4641# error "Port me!"
4642#endif
4643 return off;
4644}
4645
4646
4647
4648/**
4649 * Adds two 32-bit GPRs together, storing the result in a third register.
4650 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4651 */
4652DECL_FORCE_INLINE(uint32_t)
4653iemNativeEmitGpr32EqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4654{
4655#ifdef RT_ARCH_AMD64
4656 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4657 {
4658 /** @todo consider LEA */
4659 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend1);
4660 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend2);
4661 }
4662 else
4663 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4664
4665#elif defined(RT_ARCH_ARM64)
4666 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2, false /*f64Bit*/);
4667
4668#else
4669# error "Port me!"
4670#endif
4671 return off;
4672}
4673
4674
4675/**
4676 * Adds a 64-bit GPR and a 64-bit unsigned constant, storing the result in a
4677 * third register.
4678 *
4679 * @note The ARM64 version does not work for non-trivial constants if the
4680 * two registers are the same. Will assert / throw exception.
4681 */
4682DECL_FORCE_INLINE_THROW(uint32_t)
4683iemNativeEmitGprEqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int64_t iImmAddend)
4684{
4685#ifdef RT_ARCH_AMD64
4686 /** @todo consider LEA */
4687 if ((int8_t)iImmAddend == iImmAddend)
4688 {
4689 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend);
4690 off = iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4691 }
4692 else
4693 {
4694 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4695 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4696 }
4697
4698#elif defined(RT_ARCH_ARM64)
4699 bool const fSub = iImmAddend < 0;
4700 uint64_t const uAbsImmAddend = RT_ABS(iImmAddend);
4701 if (uAbsImmAddend <= 0xfffU)
4702 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend);
4703 else if (uAbsImmAddend <= 0xffffffU)
4704 {
4705 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend >> 12,
4706 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
4707 if (uAbsImmAddend & 0xfffU)
4708 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsImmAddend & UINT32_C(0xfff));
4709 }
4710 else if (iGprDst != iGprAddend)
4711 {
4712 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, (uint64_t)iImmAddend);
4713 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4714 }
4715 else
4716# ifdef IEM_WITH_THROW_CATCH
4717 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4718# else
4719 AssertReleaseFailedStmt(off = UINT32_MAX);
4720# endif
4721
4722#else
4723# error "Port me!"
4724#endif
4725 return off;
4726}
4727
4728
4729/**
4730 * Adds a 32-bit GPR and a 32-bit unsigned constant, storing the result in a
4731 * third register.
4732 *
4733 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4734 *
4735 * @note The ARM64 version does not work for non-trivial constants if the
4736 * two registers are the same. Will assert / throw exception.
4737 */
4738DECL_FORCE_INLINE_THROW(uint32_t)
4739iemNativeEmitGpr32EqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int32_t iImmAddend)
4740{
4741#ifdef RT_ARCH_AMD64
4742 /** @todo consider LEA */
4743 if ((int8_t)iImmAddend == iImmAddend)
4744 {
4745 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4746 off = iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4747 }
4748 else
4749 {
4750 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4751 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4752 }
4753
4754#elif defined(RT_ARCH_ARM64)
4755 bool const fSub = iImmAddend < 0;
4756 uint32_t const uAbsImmAddend = RT_ABS(iImmAddend);
4757 if (uAbsImmAddend <= 0xfffU)
4758 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4759 else if (uAbsImmAddend <= 0xffffffU)
4760 {
4761 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend >> 12,
4762 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
4763 if (uAbsImmAddend & 0xfffU)
4764 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsImmAddend & 0xfff, false /*f64Bit*/);
4765 }
4766 else if (iGprDst != iGprAddend)
4767 {
4768 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, (uint32_t)iImmAddend);
4769 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4770 }
4771 else
4772# ifdef IEM_WITH_THROW_CATCH
4773 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4774# else
4775 AssertReleaseFailedStmt(off = UINT32_MAX);
4776# endif
4777
4778#else
4779# error "Port me!"
4780#endif
4781 return off;
4782}
4783
4784
4785/*********************************************************************************************************************************
4786* Unary Operations *
4787*********************************************************************************************************************************/
4788
4789/**
4790 * Emits code for two complement negation of a 64-bit GPR.
4791 */
4792DECL_FORCE_INLINE_THROW(uint32_t)
4793iemNativeEmitNegGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4794{
4795#if defined(RT_ARCH_AMD64)
4796 /* neg Ev */
4797 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4798 pCodeBuf[off++] = 0xf7;
4799 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4800
4801#elif defined(RT_ARCH_ARM64)
4802 /* sub dst, xzr, dst */
4803 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst);
4804
4805#else
4806# error "Port me"
4807#endif
4808 return off;
4809}
4810
4811
4812/**
4813 * Emits code for two complement negation of a 64-bit GPR.
4814 */
4815DECL_INLINE_THROW(uint32_t)
4816iemNativeEmitNegGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4817{
4818#if defined(RT_ARCH_AMD64)
4819 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4820#elif defined(RT_ARCH_ARM64)
4821 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4822#else
4823# error "Port me"
4824#endif
4825 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4826 return off;
4827}
4828
4829
4830/**
4831 * Emits code for two complement negation of a 32-bit GPR.
4832 * @note bit 32 thru 63 are set to zero.
4833 */
4834DECL_FORCE_INLINE_THROW(uint32_t)
4835iemNativeEmitNegGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4836{
4837#if defined(RT_ARCH_AMD64)
4838 /* neg Ev */
4839 if (iGprDst >= 8)
4840 pCodeBuf[off++] = X86_OP_REX_B;
4841 pCodeBuf[off++] = 0xf7;
4842 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4843
4844#elif defined(RT_ARCH_ARM64)
4845 /* sub dst, xzr, dst */
4846 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst, false /*f64Bit*/);
4847
4848#else
4849# error "Port me"
4850#endif
4851 return off;
4852}
4853
4854
4855/**
4856 * Emits code for two complement negation of a 32-bit GPR.
4857 * @note bit 32 thru 63 are set to zero.
4858 */
4859DECL_INLINE_THROW(uint32_t)
4860iemNativeEmitNegGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4861{
4862#if defined(RT_ARCH_AMD64)
4863 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4864#elif defined(RT_ARCH_ARM64)
4865 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4866#else
4867# error "Port me"
4868#endif
4869 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4870 return off;
4871}
4872
4873
4874
4875/*********************************************************************************************************************************
4876* Bit Operations *
4877*********************************************************************************************************************************/
4878
4879/**
4880 * Emits code for clearing bits 16 thru 63 in the GPR.
4881 */
4882DECL_INLINE_THROW(uint32_t)
4883iemNativeEmitClear16UpGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4884{
4885#if defined(RT_ARCH_AMD64)
4886 /* movzx Gv,Ew */
4887 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4888 if (iGprDst >= 8)
4889 pbCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
4890 pbCodeBuf[off++] = 0x0f;
4891 pbCodeBuf[off++] = 0xb7;
4892 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
4893
4894#elif defined(RT_ARCH_ARM64)
4895 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4896# if 1
4897 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(iGprDst, iGprDst);
4898# else
4899 ///* This produces 0xffff; 0x4f: N=1 imms=001111 (immr=0) => size=64 length=15 */
4900 //pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 0x4f);
4901# endif
4902#else
4903# error "Port me"
4904#endif
4905 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4906 return off;
4907}
4908
4909
4910/**
4911 * Emits code for AND'ing two 64-bit GPRs.
4912 *
4913 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4914 * and ARM64 hosts.
4915 */
4916DECL_FORCE_INLINE(uint32_t)
4917iemNativeEmitAndGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4918{
4919#if defined(RT_ARCH_AMD64)
4920 /* and Gv, Ev */
4921 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4922 pCodeBuf[off++] = 0x23;
4923 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4924 RT_NOREF(fSetFlags);
4925
4926#elif defined(RT_ARCH_ARM64)
4927 if (!fSetFlags)
4928 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc);
4929 else
4930 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc);
4931
4932#else
4933# error "Port me"
4934#endif
4935 return off;
4936}
4937
4938
4939/**
4940 * Emits code for AND'ing two 64-bit GPRs.
4941 *
4942 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4943 * and ARM64 hosts.
4944 */
4945DECL_INLINE_THROW(uint32_t)
4946iemNativeEmitAndGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4947{
4948#if defined(RT_ARCH_AMD64)
4949 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4950#elif defined(RT_ARCH_ARM64)
4951 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4952#else
4953# error "Port me"
4954#endif
4955 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4956 return off;
4957}
4958
4959
4960/**
4961 * Emits code for AND'ing two 32-bit GPRs.
4962 */
4963DECL_FORCE_INLINE(uint32_t)
4964iemNativeEmitAndGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4965{
4966#if defined(RT_ARCH_AMD64)
4967 /* and Gv, Ev */
4968 if (iGprDst >= 8 || iGprSrc >= 8)
4969 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4970 pCodeBuf[off++] = 0x23;
4971 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4972 RT_NOREF(fSetFlags);
4973
4974#elif defined(RT_ARCH_ARM64)
4975 if (!fSetFlags)
4976 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4977 else
4978 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4979
4980#else
4981# error "Port me"
4982#endif
4983 return off;
4984}
4985
4986
4987/**
4988 * Emits code for AND'ing two 32-bit GPRs.
4989 */
4990DECL_INLINE_THROW(uint32_t)
4991iemNativeEmitAndGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4992{
4993#if defined(RT_ARCH_AMD64)
4994 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4995#elif defined(RT_ARCH_ARM64)
4996 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4997#else
4998# error "Port me"
4999#endif
5000 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5001 return off;
5002}
5003
5004
5005/**
5006 * Emits code for AND'ing a 64-bit GPRs with a constant.
5007 *
5008 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
5009 * and ARM64 hosts.
5010 */
5011DECL_INLINE_THROW(uint32_t)
5012iemNativeEmitAndGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm, bool fSetFlags = false)
5013{
5014#if defined(RT_ARCH_AMD64)
5015 if ((int64_t)uImm == (int8_t)uImm)
5016 {
5017 /* and Ev, imm8 */
5018 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5019 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5020 pbCodeBuf[off++] = 0x83;
5021 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5022 pbCodeBuf[off++] = (uint8_t)uImm;
5023 }
5024 else if ((int64_t)uImm == (int32_t)uImm)
5025 {
5026 /* and Ev, imm32 */
5027 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5028 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5029 pbCodeBuf[off++] = 0x81;
5030 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5031 pbCodeBuf[off++] = RT_BYTE1(uImm);
5032 pbCodeBuf[off++] = RT_BYTE2(uImm);
5033 pbCodeBuf[off++] = RT_BYTE3(uImm);
5034 pbCodeBuf[off++] = RT_BYTE4(uImm);
5035 }
5036 else
5037 {
5038 /* Use temporary register for the 64-bit immediate. */
5039 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5040 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg);
5041 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5042 }
5043 RT_NOREF(fSetFlags);
5044
5045#elif defined(RT_ARCH_ARM64)
5046 uint32_t uImmR = 0;
5047 uint32_t uImmNandS = 0;
5048 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
5049 {
5050 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5051 if (!fSetFlags)
5052 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR);
5053 else
5054 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR);
5055 }
5056 else
5057 {
5058 /* Use temporary register for the 64-bit immediate. */
5059 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5060 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg, fSetFlags);
5061 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5062 }
5063
5064#else
5065# error "Port me"
5066#endif
5067 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5068 return off;
5069}
5070
5071
5072/**
5073 * Emits code for AND'ing an 32-bit GPRs with a constant.
5074 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5075 * @note For ARM64 this only supports @a uImm values that can be expressed using
5076 * the two 6-bit immediates of the AND/ANDS instructions. The caller must
5077 * make sure this is possible!
5078 */
5079DECL_FORCE_INLINE_THROW(uint32_t)
5080iemNativeEmitAndGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
5081{
5082#if defined(RT_ARCH_AMD64)
5083 /* and Ev, imm */
5084 if (iGprDst >= 8)
5085 pCodeBuf[off++] = X86_OP_REX_B;
5086 if ((int32_t)uImm == (int8_t)uImm)
5087 {
5088 pCodeBuf[off++] = 0x83;
5089 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5090 pCodeBuf[off++] = (uint8_t)uImm;
5091 }
5092 else
5093 {
5094 pCodeBuf[off++] = 0x81;
5095 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5096 pCodeBuf[off++] = RT_BYTE1(uImm);
5097 pCodeBuf[off++] = RT_BYTE2(uImm);
5098 pCodeBuf[off++] = RT_BYTE3(uImm);
5099 pCodeBuf[off++] = RT_BYTE4(uImm);
5100 }
5101 RT_NOREF(fSetFlags);
5102
5103#elif defined(RT_ARCH_ARM64)
5104 uint32_t uImmR = 0;
5105 uint32_t uImmNandS = 0;
5106 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5107 {
5108 if (!fSetFlags)
5109 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5110 else
5111 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5112 }
5113 else
5114# ifdef IEM_WITH_THROW_CATCH
5115 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5116# else
5117 AssertReleaseFailedStmt(off = UINT32_MAX);
5118# endif
5119
5120#else
5121# error "Port me"
5122#endif
5123 return off;
5124}
5125
5126
5127/**
5128 * Emits code for AND'ing an 32-bit GPRs with a constant.
5129 *
5130 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5131 */
5132DECL_INLINE_THROW(uint32_t)
5133iemNativeEmitAndGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
5134{
5135#if defined(RT_ARCH_AMD64)
5136 off = iemNativeEmitAndGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm, fSetFlags);
5137
5138#elif defined(RT_ARCH_ARM64)
5139 uint32_t uImmR = 0;
5140 uint32_t uImmNandS = 0;
5141 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5142 {
5143 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5144 if (!fSetFlags)
5145 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5146 else
5147 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5148 }
5149 else
5150 {
5151 /* Use temporary register for the 64-bit immediate. */
5152 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5153 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, iGprDst, iTmpReg, fSetFlags);
5154 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5155 }
5156
5157#else
5158# error "Port me"
5159#endif
5160 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5161 return off;
5162}
5163
5164
5165/**
5166 * Emits code for AND'ing an 64-bit GPRs with a constant.
5167 *
5168 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
5169 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
5170 * the same.
5171 */
5172DECL_FORCE_INLINE_THROW(uint32_t)
5173iemNativeEmitGprEqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint64_t uImm,
5174 bool fSetFlags = false)
5175{
5176#if defined(RT_ARCH_AMD64)
5177 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
5178 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc);
5179 RT_NOREF(fSetFlags);
5180
5181#elif defined(RT_ARCH_ARM64)
5182 uint32_t uImmR = 0;
5183 uint32_t uImmNandS = 0;
5184 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
5185 {
5186 if (!fSetFlags)
5187 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR);
5188 else
5189 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR);
5190 }
5191 else if (iGprDst != iGprSrc)
5192 {
5193 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
5194 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
5195 }
5196 else
5197# ifdef IEM_WITH_THROW_CATCH
5198 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5199# else
5200 AssertReleaseFailedStmt(off = UINT32_MAX);
5201# endif
5202
5203#else
5204# error "Port me"
5205#endif
5206 return off;
5207}
5208
5209/**
5210 * Emits code for AND'ing an 32-bit GPRs with a constant.
5211 *
5212 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
5213 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
5214 * the same.
5215 *
5216 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5217 */
5218DECL_FORCE_INLINE_THROW(uint32_t)
5219iemNativeEmitGpr32EqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint32_t uImm,
5220 bool fSetFlags = false)
5221{
5222#if defined(RT_ARCH_AMD64)
5223 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
5224 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
5225 RT_NOREF(fSetFlags);
5226
5227#elif defined(RT_ARCH_ARM64)
5228 uint32_t uImmR = 0;
5229 uint32_t uImmNandS = 0;
5230 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5231 {
5232 if (!fSetFlags)
5233 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
5234 else
5235 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
5236 }
5237 else if (iGprDst != iGprSrc)
5238 {
5239 /* If a value greater or equal than 64K isn't more than 16 bits wide,
5240 we can use shifting to save an instruction. We prefer the builtin ctz
5241 here to our own, since the compiler can process uImm at compile time
5242 if it is a constant value (which is often the case). This is useful
5243 for the TLB looup code. */
5244 if (uImm > 0xffffU)
5245 {
5246# if defined(__GNUC__)
5247 unsigned cTrailingZeros = __builtin_ctz(uImm);
5248# else
5249 unsigned cTrailingZeros = ASMBitFirstSetU32(uImm) - 1;
5250# endif
5251 if ((uImm >> cTrailingZeros) <= 0xffffU)
5252 {
5253 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprDst, uImm >> cTrailingZeros);
5254 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprSrc,
5255 iGprDst, true /*f64Bit*/, cTrailingZeros, kArmv8A64InstrShift_Lsl);
5256 return off;
5257 }
5258 }
5259 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
5260 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
5261 }
5262 else
5263# ifdef IEM_WITH_THROW_CATCH
5264 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5265# else
5266 AssertReleaseFailedStmt(off = UINT32_MAX);
5267# endif
5268
5269#else
5270# error "Port me"
5271#endif
5272 return off;
5273}
5274
5275
5276/**
5277 * Emits code for OR'ing two 64-bit GPRs.
5278 */
5279DECL_FORCE_INLINE(uint32_t)
5280iemNativeEmitOrGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5281{
5282#if defined(RT_ARCH_AMD64)
5283 /* or Gv, Ev */
5284 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5285 pCodeBuf[off++] = 0x0b;
5286 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5287
5288#elif defined(RT_ARCH_ARM64)
5289 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc);
5290
5291#else
5292# error "Port me"
5293#endif
5294 return off;
5295}
5296
5297
5298/**
5299 * Emits code for OR'ing two 64-bit GPRs.
5300 */
5301DECL_INLINE_THROW(uint32_t)
5302iemNativeEmitOrGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5303{
5304#if defined(RT_ARCH_AMD64)
5305 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5306#elif defined(RT_ARCH_ARM64)
5307 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5308#else
5309# error "Port me"
5310#endif
5311 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5312 return off;
5313}
5314
5315
5316/**
5317 * Emits code for OR'ing two 32-bit GPRs.
5318 * @note Bits 63:32 of the destination GPR will be cleared.
5319 */
5320DECL_FORCE_INLINE(uint32_t)
5321iemNativeEmitOrGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5322{
5323#if defined(RT_ARCH_AMD64)
5324 /* or Gv, Ev */
5325 if (iGprDst >= 8 || iGprSrc >= 8)
5326 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5327 pCodeBuf[off++] = 0x0b;
5328 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5329
5330#elif defined(RT_ARCH_ARM64)
5331 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
5332
5333#else
5334# error "Port me"
5335#endif
5336 return off;
5337}
5338
5339
5340/**
5341 * Emits code for OR'ing two 32-bit GPRs.
5342 * @note Bits 63:32 of the destination GPR will be cleared.
5343 */
5344DECL_INLINE_THROW(uint32_t)
5345iemNativeEmitOrGpr32ByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5346{
5347#if defined(RT_ARCH_AMD64)
5348 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5349#elif defined(RT_ARCH_ARM64)
5350 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5351#else
5352# error "Port me"
5353#endif
5354 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5355 return off;
5356}
5357
5358
5359/**
5360 * Emits code for OR'ing a 64-bit GPRs with a constant.
5361 */
5362DECL_INLINE_THROW(uint32_t)
5363iemNativeEmitOrGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm)
5364{
5365#if defined(RT_ARCH_AMD64)
5366 if ((int64_t)uImm == (int8_t)uImm)
5367 {
5368 /* or Ev, imm8 */
5369 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5370 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5371 pbCodeBuf[off++] = 0x83;
5372 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5373 pbCodeBuf[off++] = (uint8_t)uImm;
5374 }
5375 else if ((int64_t)uImm == (int32_t)uImm)
5376 {
5377 /* or Ev, imm32 */
5378 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5379 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5380 pbCodeBuf[off++] = 0x81;
5381 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5382 pbCodeBuf[off++] = RT_BYTE1(uImm);
5383 pbCodeBuf[off++] = RT_BYTE2(uImm);
5384 pbCodeBuf[off++] = RT_BYTE3(uImm);
5385 pbCodeBuf[off++] = RT_BYTE4(uImm);
5386 }
5387 else
5388 {
5389 /* Use temporary register for the 64-bit immediate. */
5390 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5391 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iTmpReg);
5392 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5393 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5394 }
5395
5396#elif defined(RT_ARCH_ARM64)
5397 uint32_t uImmR = 0;
5398 uint32_t uImmNandS = 0;
5399 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
5400 {
5401 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5402 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR);
5403 }
5404 else
5405 {
5406 /* Use temporary register for the 64-bit immediate. */
5407 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5408 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iTmpReg);
5409 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5410 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5411 }
5412
5413#else
5414# error "Port me"
5415#endif
5416 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5417 return off;
5418}
5419
5420
5421/**
5422 * Emits code for OR'ing an 32-bit GPRs with a constant.
5423 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5424 * @note For ARM64 this only supports @a uImm values that can be expressed using
5425 * the two 6-bit immediates of the OR instructions. The caller must make
5426 * sure this is possible!
5427 */
5428DECL_FORCE_INLINE_THROW(uint32_t)
5429iemNativeEmitOrGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5430{
5431#if defined(RT_ARCH_AMD64)
5432 /* or Ev, imm */
5433 if (iGprDst >= 8)
5434 pCodeBuf[off++] = X86_OP_REX_B;
5435 if ((int32_t)uImm == (int8_t)uImm)
5436 {
5437 pCodeBuf[off++] = 0x83;
5438 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5439 pCodeBuf[off++] = (uint8_t)uImm;
5440 }
5441 else
5442 {
5443 pCodeBuf[off++] = 0x81;
5444 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5445 pCodeBuf[off++] = RT_BYTE1(uImm);
5446 pCodeBuf[off++] = RT_BYTE2(uImm);
5447 pCodeBuf[off++] = RT_BYTE3(uImm);
5448 pCodeBuf[off++] = RT_BYTE4(uImm);
5449 }
5450
5451#elif defined(RT_ARCH_ARM64)
5452 uint32_t uImmR = 0;
5453 uint32_t uImmNandS = 0;
5454 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5455 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5456 else
5457# ifdef IEM_WITH_THROW_CATCH
5458 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5459# else
5460 AssertReleaseFailedStmt(off = UINT32_MAX);
5461# endif
5462
5463#else
5464# error "Port me"
5465#endif
5466 return off;
5467}
5468
5469
5470/**
5471 * Emits code for OR'ing an 32-bit GPRs with a constant.
5472 *
5473 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5474 */
5475DECL_INLINE_THROW(uint32_t)
5476iemNativeEmitOrGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5477{
5478#if defined(RT_ARCH_AMD64)
5479 off = iemNativeEmitOrGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm);
5480
5481#elif defined(RT_ARCH_ARM64)
5482 uint32_t uImmR = 0;
5483 uint32_t uImmNandS = 0;
5484 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5485 {
5486 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5487 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5488 }
5489 else
5490 {
5491 /* Use temporary register for the 64-bit immediate. */
5492 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5493 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, iGprDst, iTmpReg);
5494 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5495 }
5496
5497#else
5498# error "Port me"
5499#endif
5500 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5501 return off;
5502}
5503
5504
5505
5506/**
5507 * ORs two 64-bit GPRs together, storing the result in a third register.
5508 */
5509DECL_FORCE_INLINE(uint32_t)
5510iemNativeEmitGprEqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5511{
5512#ifdef RT_ARCH_AMD64
5513 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5514 {
5515 /** @todo consider LEA */
5516 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc1);
5517 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5518 }
5519 else
5520 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5521
5522#elif defined(RT_ARCH_ARM64)
5523 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2);
5524
5525#else
5526# error "Port me!"
5527#endif
5528 return off;
5529}
5530
5531
5532
5533/**
5534 * Ors two 32-bit GPRs together, storing the result in a third register.
5535 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
5536 */
5537DECL_FORCE_INLINE(uint32_t)
5538iemNativeEmitGpr32EqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5539{
5540#ifdef RT_ARCH_AMD64
5541 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5542 {
5543 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc1);
5544 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5545 }
5546 else
5547 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5548
5549#elif defined(RT_ARCH_ARM64)
5550 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2, false /*f64Bit*/);
5551
5552#else
5553# error "Port me!"
5554#endif
5555 return off;
5556}
5557
5558
5559/**
5560 * Emits code for XOR'ing two 64-bit GPRs.
5561 */
5562DECL_INLINE_THROW(uint32_t)
5563iemNativeEmitXorGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5564{
5565#if defined(RT_ARCH_AMD64)
5566 /* and Gv, Ev */
5567 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5568 pCodeBuf[off++] = 0x33;
5569 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5570
5571#elif defined(RT_ARCH_ARM64)
5572 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc);
5573
5574#else
5575# error "Port me"
5576#endif
5577 return off;
5578}
5579
5580
5581/**
5582 * Emits code for XOR'ing two 64-bit GPRs.
5583 */
5584DECL_INLINE_THROW(uint32_t)
5585iemNativeEmitXorGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5586{
5587#if defined(RT_ARCH_AMD64)
5588 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5589#elif defined(RT_ARCH_ARM64)
5590 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5591#else
5592# error "Port me"
5593#endif
5594 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5595 return off;
5596}
5597
5598
5599/**
5600 * Emits code for XOR'ing two 32-bit GPRs.
5601 */
5602DECL_INLINE_THROW(uint32_t)
5603iemNativeEmitXorGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5604{
5605#if defined(RT_ARCH_AMD64)
5606 /* and Gv, Ev */
5607 if (iGprDst >= 8 || iGprSrc >= 8)
5608 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5609 pCodeBuf[off++] = 0x33;
5610 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5611
5612#elif defined(RT_ARCH_ARM64)
5613 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
5614
5615#else
5616# error "Port me"
5617#endif
5618 return off;
5619}
5620
5621
5622/**
5623 * Emits code for XOR'ing two 32-bit GPRs.
5624 */
5625DECL_INLINE_THROW(uint32_t)
5626iemNativeEmitXorGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5627{
5628#if defined(RT_ARCH_AMD64)
5629 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5630#elif defined(RT_ARCH_ARM64)
5631 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5632#else
5633# error "Port me"
5634#endif
5635 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5636 return off;
5637}
5638
5639
5640/**
5641 * Emits code for XOR'ing an 32-bit GPRs with a constant.
5642 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5643 * @note For ARM64 this only supports @a uImm values that can be expressed using
5644 * the two 6-bit immediates of the EOR instructions. The caller must make
5645 * sure this is possible!
5646 */
5647DECL_FORCE_INLINE_THROW(uint32_t)
5648iemNativeEmitXorGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5649{
5650#if defined(RT_ARCH_AMD64)
5651 /* xor Ev, imm */
5652 if (iGprDst >= 8)
5653 pCodeBuf[off++] = X86_OP_REX_B;
5654 if ((int32_t)uImm == (int8_t)uImm)
5655 {
5656 pCodeBuf[off++] = 0x83;
5657 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5658 pCodeBuf[off++] = (uint8_t)uImm;
5659 }
5660 else
5661 {
5662 pCodeBuf[off++] = 0x81;
5663 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5664 pCodeBuf[off++] = RT_BYTE1(uImm);
5665 pCodeBuf[off++] = RT_BYTE2(uImm);
5666 pCodeBuf[off++] = RT_BYTE3(uImm);
5667 pCodeBuf[off++] = RT_BYTE4(uImm);
5668 }
5669
5670#elif defined(RT_ARCH_ARM64)
5671 uint32_t uImmR = 0;
5672 uint32_t uImmNandS = 0;
5673 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5674 pCodeBuf[off++] = Armv8A64MkInstrEorImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5675 else
5676# ifdef IEM_WITH_THROW_CATCH
5677 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5678# else
5679 AssertReleaseFailedStmt(off = UINT32_MAX);
5680# endif
5681
5682#else
5683# error "Port me"
5684#endif
5685 return off;
5686}
5687
5688
5689/**
5690 * Emits code for XOR'ing two 32-bit GPRs.
5691 */
5692DECL_INLINE_THROW(uint32_t)
5693iemNativeEmitXorGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5694{
5695#if defined(RT_ARCH_AMD64)
5696 off = iemNativeEmitXorGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm);
5697#elif defined(RT_ARCH_ARM64)
5698 off = iemNativeEmitXorGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, uImm);
5699#else
5700# error "Port me"
5701#endif
5702 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5703 return off;
5704}
5705
5706
5707/*********************************************************************************************************************************
5708* Shifting *
5709*********************************************************************************************************************************/
5710
5711/**
5712 * Emits code for shifting a GPR a fixed number of bits to the left.
5713 */
5714DECL_FORCE_INLINE(uint32_t)
5715iemNativeEmitShiftGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5716{
5717 Assert(cShift > 0 && cShift < 64);
5718
5719#if defined(RT_ARCH_AMD64)
5720 /* shl dst, cShift */
5721 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5722 if (cShift != 1)
5723 {
5724 pCodeBuf[off++] = 0xc1;
5725 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5726 pCodeBuf[off++] = cShift;
5727 }
5728 else
5729 {
5730 pCodeBuf[off++] = 0xd1;
5731 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5732 }
5733
5734#elif defined(RT_ARCH_ARM64)
5735 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift);
5736
5737#else
5738# error "Port me"
5739#endif
5740 return off;
5741}
5742
5743
5744/**
5745 * Emits code for shifting a GPR a fixed number of bits to the left.
5746 */
5747DECL_INLINE_THROW(uint32_t)
5748iemNativeEmitShiftGprLeft(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5749{
5750#if defined(RT_ARCH_AMD64)
5751 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5752#elif defined(RT_ARCH_ARM64)
5753 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5754#else
5755# error "Port me"
5756#endif
5757 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5758 return off;
5759}
5760
5761
5762/**
5763 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5764 */
5765DECL_FORCE_INLINE(uint32_t)
5766iemNativeEmitShiftGpr32LeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5767{
5768 Assert(cShift > 0 && cShift < 32);
5769
5770#if defined(RT_ARCH_AMD64)
5771 /* shl dst, cShift */
5772 if (iGprDst >= 8)
5773 pCodeBuf[off++] = X86_OP_REX_B;
5774 if (cShift != 1)
5775 {
5776 pCodeBuf[off++] = 0xc1;
5777 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5778 pCodeBuf[off++] = cShift;
5779 }
5780 else
5781 {
5782 pCodeBuf[off++] = 0xd1;
5783 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5784 }
5785
5786#elif defined(RT_ARCH_ARM64)
5787 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5788
5789#else
5790# error "Port me"
5791#endif
5792 return off;
5793}
5794
5795
5796/**
5797 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5798 */
5799DECL_INLINE_THROW(uint32_t)
5800iemNativeEmitShiftGpr32Left(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5801{
5802#if defined(RT_ARCH_AMD64)
5803 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5804#elif defined(RT_ARCH_ARM64)
5805 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5806#else
5807# error "Port me"
5808#endif
5809 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5810 return off;
5811}
5812
5813
5814/**
5815 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5816 */
5817DECL_FORCE_INLINE(uint32_t)
5818iemNativeEmitShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5819{
5820 Assert(cShift > 0 && cShift < 64);
5821
5822#if defined(RT_ARCH_AMD64)
5823 /* shr dst, cShift */
5824 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5825 if (cShift != 1)
5826 {
5827 pCodeBuf[off++] = 0xc1;
5828 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5829 pCodeBuf[off++] = cShift;
5830 }
5831 else
5832 {
5833 pCodeBuf[off++] = 0xd1;
5834 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5835 }
5836
5837#elif defined(RT_ARCH_ARM64)
5838 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift);
5839
5840#else
5841# error "Port me"
5842#endif
5843 return off;
5844}
5845
5846
5847/**
5848 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5849 */
5850DECL_INLINE_THROW(uint32_t)
5851iemNativeEmitShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5852{
5853#if defined(RT_ARCH_AMD64)
5854 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5855#elif defined(RT_ARCH_ARM64)
5856 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5857#else
5858# error "Port me"
5859#endif
5860 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5861 return off;
5862}
5863
5864
5865/**
5866 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5867 * right.
5868 */
5869DECL_FORCE_INLINE(uint32_t)
5870iemNativeEmitShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5871{
5872 Assert(cShift > 0 && cShift < 32);
5873
5874#if defined(RT_ARCH_AMD64)
5875 /* shr dst, cShift */
5876 if (iGprDst >= 8)
5877 pCodeBuf[off++] = X86_OP_REX_B;
5878 if (cShift != 1)
5879 {
5880 pCodeBuf[off++] = 0xc1;
5881 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5882 pCodeBuf[off++] = cShift;
5883 }
5884 else
5885 {
5886 pCodeBuf[off++] = 0xd1;
5887 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5888 }
5889
5890#elif defined(RT_ARCH_ARM64)
5891 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5892
5893#else
5894# error "Port me"
5895#endif
5896 return off;
5897}
5898
5899
5900/**
5901 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5902 * right.
5903 */
5904DECL_INLINE_THROW(uint32_t)
5905iemNativeEmitShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5906{
5907#if defined(RT_ARCH_AMD64)
5908 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5909#elif defined(RT_ARCH_ARM64)
5910 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5911#else
5912# error "Port me"
5913#endif
5914 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5915 return off;
5916}
5917
5918
5919/**
5920 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5921 * right and assigning it to a different GPR.
5922 */
5923DECL_INLINE_THROW(uint32_t)
5924iemNativeEmitGpr32EqGprShiftRightImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint8_t cShift)
5925{
5926 Assert(cShift > 0); Assert(cShift < 32);
5927#if defined(RT_ARCH_AMD64)
5928 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
5929 off = iemNativeEmitShiftGpr32RightEx(pCodeBuf, off, iGprDst, cShift);
5930
5931#elif defined(RT_ARCH_ARM64)
5932 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprSrc, cShift, false /*64Bit*/);
5933
5934#else
5935# error "Port me"
5936#endif
5937 return off;
5938}
5939
5940
5941/**
5942 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
5943 */
5944DECL_FORCE_INLINE(uint32_t)
5945iemNativeEmitArithShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5946{
5947 Assert(cShift > 0 && cShift < 64);
5948
5949#if defined(RT_ARCH_AMD64)
5950 /* sar dst, cShift */
5951 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5952 if (cShift != 1)
5953 {
5954 pCodeBuf[off++] = 0xc1;
5955 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5956 pCodeBuf[off++] = cShift;
5957 }
5958 else
5959 {
5960 pCodeBuf[off++] = 0xd1;
5961 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5962 }
5963
5964#elif defined(RT_ARCH_ARM64)
5965 pCodeBuf[off++] = Armv8A64MkInstrAsrImm(iGprDst, iGprDst, cShift);
5966
5967#else
5968# error "Port me"
5969#endif
5970 return off;
5971}
5972
5973
5974/**
5975 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
5976 */
5977DECL_INLINE_THROW(uint32_t)
5978iemNativeEmitArithShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5979{
5980#if defined(RT_ARCH_AMD64)
5981 off = iemNativeEmitArithShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5982#elif defined(RT_ARCH_ARM64)
5983 off = iemNativeEmitArithShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5984#else
5985# error "Port me"
5986#endif
5987 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5988 return off;
5989}
5990
5991
5992/**
5993 * Emits code for (signed) shifting a 32-bit GPR a fixed number of bits to the right.
5994 */
5995DECL_FORCE_INLINE(uint32_t)
5996iemNativeEmitArithShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5997{
5998 Assert(cShift > 0 && cShift < 64);
5999
6000#if defined(RT_ARCH_AMD64)
6001 /* sar dst, cShift */
6002 if (iGprDst >= 8)
6003 pCodeBuf[off++] = X86_OP_REX_B;
6004 if (cShift != 1)
6005 {
6006 pCodeBuf[off++] = 0xc1;
6007 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
6008 pCodeBuf[off++] = cShift;
6009 }
6010 else
6011 {
6012 pCodeBuf[off++] = 0xd1;
6013 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
6014 }
6015
6016#elif defined(RT_ARCH_ARM64)
6017 pCodeBuf[off++] = Armv8A64MkInstrAsrImm(iGprDst, iGprDst, cShift, false /*f64Bit*/);
6018
6019#else
6020# error "Port me"
6021#endif
6022 return off;
6023}
6024
6025
6026/**
6027 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
6028 */
6029DECL_INLINE_THROW(uint32_t)
6030iemNativeEmitArithShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
6031{
6032#if defined(RT_ARCH_AMD64)
6033 off = iemNativeEmitArithShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
6034#elif defined(RT_ARCH_ARM64)
6035 off = iemNativeEmitArithShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
6036#else
6037# error "Port me"
6038#endif
6039 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6040 return off;
6041}
6042
6043
6044/**
6045 * Emits code for rotating a GPR a fixed number of bits to the left.
6046 */
6047DECL_FORCE_INLINE(uint32_t)
6048iemNativeEmitRotateGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
6049{
6050 Assert(cShift > 0 && cShift < 64);
6051
6052#if defined(RT_ARCH_AMD64)
6053 /* rol dst, cShift */
6054 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
6055 if (cShift != 1)
6056 {
6057 pCodeBuf[off++] = 0xc1;
6058 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
6059 pCodeBuf[off++] = cShift;
6060 }
6061 else
6062 {
6063 pCodeBuf[off++] = 0xd1;
6064 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
6065 }
6066
6067#elif defined(RT_ARCH_ARM64)
6068 pCodeBuf[off++] = Armv8A64MkInstrRorImm(iGprDst, iGprDst, cShift);
6069
6070#else
6071# error "Port me"
6072#endif
6073 return off;
6074}
6075
6076
6077#if defined(RT_ARCH_AMD64)
6078/**
6079 * Emits code for rotating a 32-bit GPR a fixed number of bits to the left via carry.
6080 */
6081DECL_FORCE_INLINE(uint32_t)
6082iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
6083{
6084 Assert(cShift > 0 && cShift < 32);
6085
6086 /* rcl dst, cShift */
6087 if (iGprDst >= 8)
6088 pCodeBuf[off++] = X86_OP_REX_B;
6089 if (cShift != 1)
6090 {
6091 pCodeBuf[off++] = 0xc1;
6092 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
6093 pCodeBuf[off++] = cShift;
6094 }
6095 else
6096 {
6097 pCodeBuf[off++] = 0xd1;
6098 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
6099 }
6100
6101 return off;
6102}
6103#endif /* RT_ARCH_AMD64 */
6104
6105
6106
6107/**
6108 * Emits code for reversing the byte order for a 16-bit value in a 32-bit GPR.
6109 * @note Bits 63:32 of the destination GPR will be cleared.
6110 */
6111DECL_FORCE_INLINE(uint32_t)
6112iemNativeEmitBswapGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
6113{
6114#if defined(RT_ARCH_AMD64)
6115 /*
6116 * There is no bswap r16 on x86 (the encoding exists but does not work).
6117 * So just use a rol (gcc -O2 is doing that).
6118 *
6119 * rol r16, 0x8
6120 */
6121 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6122 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6123 if (iGpr >= 8)
6124 pbCodeBuf[off++] = X86_OP_REX_B;
6125 pbCodeBuf[off++] = 0xc1;
6126 pbCodeBuf[off++] = 0xc0 | (iGpr & 7);
6127 pbCodeBuf[off++] = 0x08;
6128#elif defined(RT_ARCH_ARM64)
6129 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6130
6131 pu32CodeBuf[off++] = Armv8A64MkInstrRev16(iGpr, iGpr, false /*f64Bit*/);
6132#else
6133# error "Port me"
6134#endif
6135
6136 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6137 return off;
6138}
6139
6140
6141/**
6142 * Emits code for reversing the byte order in a 32-bit GPR.
6143 * @note Bits 63:32 of the destination GPR will be cleared.
6144 */
6145DECL_FORCE_INLINE(uint32_t)
6146iemNativeEmitBswapGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
6147{
6148#if defined(RT_ARCH_AMD64)
6149 /* bswap r32 */
6150 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6151
6152 if (iGpr >= 8)
6153 pbCodeBuf[off++] = X86_OP_REX_B;
6154 pbCodeBuf[off++] = 0x0f;
6155 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
6156#elif defined(RT_ARCH_ARM64)
6157 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6158
6159 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, false /*f64Bit*/);
6160#else
6161# error "Port me"
6162#endif
6163
6164 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6165 return off;
6166}
6167
6168
6169/**
6170 * Emits code for reversing the byte order in a 64-bit GPR.
6171 */
6172DECL_FORCE_INLINE(uint32_t)
6173iemNativeEmitBswapGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
6174{
6175#if defined(RT_ARCH_AMD64)
6176 /* bswap r64 */
6177 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6178
6179 if (iGpr >= 8)
6180 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
6181 else
6182 pbCodeBuf[off++] = X86_OP_REX_W;
6183 pbCodeBuf[off++] = 0x0f;
6184 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
6185#elif defined(RT_ARCH_ARM64)
6186 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6187
6188 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, true /*f64Bit*/);
6189#else
6190# error "Port me"
6191#endif
6192
6193 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6194 return off;
6195}
6196
6197
6198/*********************************************************************************************************************************
6199* Bitfield manipulation *
6200*********************************************************************************************************************************/
6201
6202/**
6203 * Emits code for clearing.
6204 */
6205DECL_FORCE_INLINE(uint32_t)
6206iemNativeEmitBitClearInGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const iGpr, uint8_t iBit)
6207{
6208 Assert(iBit < 32);
6209
6210#if defined(RT_ARCH_AMD64)
6211 /* btr r32, imm8 */
6212 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6213
6214 if (iGpr >= 8)
6215 pbCodeBuf[off++] = X86_OP_REX_B;
6216 pbCodeBuf[off++] = 0x0f;
6217 pbCodeBuf[off++] = 0xba;
6218 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGpr & 7);
6219 pbCodeBuf[off++] = iBit;
6220#elif defined(RT_ARCH_ARM64)
6221 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6222
6223 pu32CodeBuf[off++] = Armv8A64MkInstrBfc(iGpr, iBit /*offFirstBit*/, 1 /*cBits*/, true /*f64Bit*/);
6224#else
6225# error "Port me"
6226#endif
6227
6228 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6229 return off;
6230}
6231
6232
6233/*********************************************************************************************************************************
6234* Compare and Testing *
6235*********************************************************************************************************************************/
6236
6237
6238#ifdef RT_ARCH_ARM64
6239/**
6240 * Emits an ARM64 compare instruction.
6241 */
6242DECL_INLINE_THROW(uint32_t)
6243iemNativeEmitCmpArm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight,
6244 bool f64Bit = true, uint32_t cShift = 0, ARMV8A64INSTRSHIFT enmShift = kArmv8A64InstrShift_Lsr)
6245{
6246 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6247 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR /*iRegResult*/, iGprLeft, iGprRight,
6248 f64Bit, true /*fSetFlags*/, cShift, enmShift);
6249 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6250 return off;
6251}
6252#endif
6253
6254
6255/**
6256 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
6257 * with conditional instruction.
6258 */
6259DECL_FORCE_INLINE(uint32_t)
6260iemNativeEmitCmpGprWithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6261{
6262#ifdef RT_ARCH_AMD64
6263 /* cmp Gv, Ev */
6264 pCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
6265 pCodeBuf[off++] = 0x3b;
6266 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
6267
6268#elif defined(RT_ARCH_ARM64)
6269 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight);
6270
6271#else
6272# error "Port me!"
6273#endif
6274 return off;
6275}
6276
6277
6278/**
6279 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
6280 * with conditional instruction.
6281 */
6282DECL_INLINE_THROW(uint32_t)
6283iemNativeEmitCmpGprWithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6284{
6285#ifdef RT_ARCH_AMD64
6286 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
6287#elif defined(RT_ARCH_ARM64)
6288 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
6289#else
6290# error "Port me!"
6291#endif
6292 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6293 return off;
6294}
6295
6296
6297/**
6298 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
6299 * with conditional instruction.
6300 */
6301DECL_FORCE_INLINE(uint32_t)
6302iemNativeEmitCmpGpr32WithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6303{
6304#ifdef RT_ARCH_AMD64
6305 /* cmp Gv, Ev */
6306 if (iGprLeft >= 8 || iGprRight >= 8)
6307 pCodeBuf[off++] = (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
6308 pCodeBuf[off++] = 0x3b;
6309 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
6310
6311#elif defined(RT_ARCH_ARM64)
6312 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight, false /*f64Bit*/);
6313
6314#else
6315# error "Port me!"
6316#endif
6317 return off;
6318}
6319
6320
6321/**
6322 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
6323 * with conditional instruction.
6324 */
6325DECL_INLINE_THROW(uint32_t)
6326iemNativeEmitCmpGpr32WithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6327{
6328#ifdef RT_ARCH_AMD64
6329 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
6330#elif defined(RT_ARCH_ARM64)
6331 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
6332#else
6333# error "Port me!"
6334#endif
6335 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6336 return off;
6337}
6338
6339
6340/**
6341 * Emits a compare of a 64-bit GPR with a constant value, settings status
6342 * flags/whatever for use with conditional instruction.
6343 */
6344DECL_INLINE_THROW(uint32_t)
6345iemNativeEmitCmpGprWithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft,
6346 uint64_t uImm, uint8_t idxTmpReg = UINT8_MAX)
6347{
6348#ifdef RT_ARCH_AMD64
6349 if ((int8_t)uImm == (int64_t)uImm)
6350 {
6351 /* cmp Ev, Ib */
6352 pCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6353 pCodeBuf[off++] = 0x83;
6354 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6355 pCodeBuf[off++] = (uint8_t)uImm;
6356 return off;
6357 }
6358 if ((int32_t)uImm == (int64_t)uImm)
6359 {
6360 /* cmp Ev, imm */
6361 pCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6362 pCodeBuf[off++] = 0x81;
6363 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6364 pCodeBuf[off++] = RT_BYTE1(uImm);
6365 pCodeBuf[off++] = RT_BYTE2(uImm);
6366 pCodeBuf[off++] = RT_BYTE3(uImm);
6367 pCodeBuf[off++] = RT_BYTE4(uImm);
6368 return off;
6369 }
6370
6371#elif defined(RT_ARCH_ARM64)
6372 if (uImm < _4K)
6373 {
6374 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6375 true /*64Bit*/, true /*fSetFlags*/);
6376 return off;
6377 }
6378 if ((uImm & ~(uint64_t)0xfff000) == 0)
6379 {
6380 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm >> 12,
6381 true /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6382 return off;
6383 }
6384
6385#else
6386# error "Port me!"
6387#endif
6388
6389 if (idxTmpReg != UINT8_MAX)
6390 {
6391 /* Use temporary register for the immediate. */
6392 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpReg, uImm);
6393 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, iGprLeft, idxTmpReg);
6394 }
6395 else
6396# ifdef IEM_WITH_THROW_CATCH
6397 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6398# else
6399 AssertReleaseFailedStmt(off = UINT32_MAX);
6400# endif
6401
6402 return off;
6403}
6404
6405
6406/**
6407 * Emits a compare of a 64-bit GPR with a constant value, settings status
6408 * flags/whatever for use with conditional instruction.
6409 */
6410DECL_INLINE_THROW(uint32_t)
6411iemNativeEmitCmpGprWithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint64_t uImm)
6412{
6413#ifdef RT_ARCH_AMD64
6414 if ((int8_t)uImm == (int64_t)uImm)
6415 {
6416 /* cmp Ev, Ib */
6417 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
6418 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6419 pbCodeBuf[off++] = 0x83;
6420 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6421 pbCodeBuf[off++] = (uint8_t)uImm;
6422 }
6423 else if ((int32_t)uImm == (int64_t)uImm)
6424 {
6425 /* cmp Ev, imm */
6426 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
6427 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6428 pbCodeBuf[off++] = 0x81;
6429 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6430 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6431 pbCodeBuf[off++] = RT_BYTE1(uImm);
6432 pbCodeBuf[off++] = RT_BYTE2(uImm);
6433 pbCodeBuf[off++] = RT_BYTE3(uImm);
6434 pbCodeBuf[off++] = RT_BYTE4(uImm);
6435 }
6436 else
6437 {
6438 /* Use temporary register for the immediate. */
6439 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6440 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
6441 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6442 }
6443
6444#elif defined(RT_ARCH_ARM64)
6445 /** @todo guess there are clevere things we can do here... */
6446 if (uImm < _4K)
6447 {
6448 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6449 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6450 true /*64Bit*/, true /*fSetFlags*/);
6451 }
6452 else if ((uImm & ~(uint64_t)0xfff000) == 0)
6453 {
6454 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6455 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm >> 12,
6456 true /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6457 }
6458 else
6459 {
6460 /* Use temporary register for the immediate. */
6461 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6462 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
6463 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6464 }
6465
6466#else
6467# error "Port me!"
6468#endif
6469
6470 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6471 return off;
6472}
6473
6474
6475/**
6476 * Emits a compare of a 32-bit GPR with a constant value, settings status
6477 * flags/whatever for use with conditional instruction.
6478 *
6479 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
6480 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
6481 * bits all zero). Will release assert or throw exception if the caller
6482 * violates this restriction.
6483 */
6484DECL_FORCE_INLINE_THROW(uint32_t)
6485iemNativeEmitCmpGpr32WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
6486{
6487#ifdef RT_ARCH_AMD64
6488 if (iGprLeft >= 8)
6489 pCodeBuf[off++] = X86_OP_REX_B;
6490 if (uImm <= UINT32_C(0x7f))
6491 {
6492 /* cmp Ev, Ib */
6493 pCodeBuf[off++] = 0x83;
6494 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6495 pCodeBuf[off++] = (uint8_t)uImm;
6496 }
6497 else
6498 {
6499 /* cmp Ev, imm */
6500 pCodeBuf[off++] = 0x81;
6501 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6502 pCodeBuf[off++] = RT_BYTE1(uImm);
6503 pCodeBuf[off++] = RT_BYTE2(uImm);
6504 pCodeBuf[off++] = RT_BYTE3(uImm);
6505 pCodeBuf[off++] = RT_BYTE4(uImm);
6506 }
6507
6508#elif defined(RT_ARCH_ARM64)
6509 /** @todo guess there are clevere things we can do here... */
6510 if (uImm < _4K)
6511 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6512 false /*64Bit*/, true /*fSetFlags*/);
6513 else if ((uImm & ~(uint32_t)0xfff000) == 0)
6514 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6515 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6516 else
6517# ifdef IEM_WITH_THROW_CATCH
6518 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6519# else
6520 AssertReleaseFailedStmt(off = UINT32_MAX);
6521# endif
6522
6523#else
6524# error "Port me!"
6525#endif
6526 return off;
6527}
6528
6529
6530/**
6531 * Emits a compare of a 32-bit GPR with a constant value, settings status
6532 * flags/whatever for use with conditional instruction.
6533 */
6534DECL_INLINE_THROW(uint32_t)
6535iemNativeEmitCmpGpr32WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
6536{
6537#ifdef RT_ARCH_AMD64
6538 off = iemNativeEmitCmpGpr32WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm);
6539
6540#elif defined(RT_ARCH_ARM64)
6541 /** @todo guess there are clevere things we can do here... */
6542 if (uImm < _4K)
6543 {
6544 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6545 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6546 false /*64Bit*/, true /*fSetFlags*/);
6547 }
6548 else if ((uImm & ~(uint32_t)0xfff000) == 0)
6549 {
6550 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6551 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6552 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6553 }
6554 else
6555 {
6556 /* Use temporary register for the immediate. */
6557 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6558 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, iGprLeft, iTmpReg);
6559 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6560 }
6561
6562#else
6563# error "Port me!"
6564#endif
6565
6566 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6567 return off;
6568}
6569
6570
6571/**
6572 * Emits a compare of a 32-bit GPR with a constant value, settings status
6573 * flags/whatever for use with conditional instruction.
6574 *
6575 * @note ARM64: Helper register is required (@a idxTmpReg) for isolating the
6576 * 16-bit value from @a iGrpLeft.
6577 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
6578 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
6579 * bits all zero). Will release assert or throw exception if the caller
6580 * violates this restriction.
6581 */
6582DECL_FORCE_INLINE_THROW(uint32_t)
6583iemNativeEmitCmpGpr16WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
6584 uint8_t idxTmpReg = UINT8_MAX)
6585{
6586#ifdef RT_ARCH_AMD64
6587 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6588 if (iGprLeft >= 8)
6589 pCodeBuf[off++] = X86_OP_REX_B;
6590 if (uImm <= UINT32_C(0x7f))
6591 {
6592 /* cmp Ev, Ib */
6593 pCodeBuf[off++] = 0x83;
6594 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6595 pCodeBuf[off++] = (uint8_t)uImm;
6596 }
6597 else
6598 {
6599 /* cmp Ev, imm */
6600 pCodeBuf[off++] = 0x81;
6601 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6602 pCodeBuf[off++] = RT_BYTE1(uImm);
6603 pCodeBuf[off++] = RT_BYTE2(uImm);
6604 }
6605 RT_NOREF(idxTmpReg);
6606
6607#elif defined(RT_ARCH_ARM64)
6608# ifdef IEM_WITH_THROW_CATCH
6609 AssertStmt(idxTmpReg < 32, IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6610# else
6611 AssertReleaseStmt(idxTmpReg < 32, off = UINT32_MAX);
6612# endif
6613 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
6614 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, iGprLeft, 15, 0, false /*f64Bit*/);
6615 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, idxTmpReg, uImm);
6616
6617#else
6618# error "Port me!"
6619#endif
6620 return off;
6621}
6622
6623
6624/**
6625 * Emits a compare of a 16-bit GPR with a constant value, settings status
6626 * flags/whatever for use with conditional instruction.
6627 *
6628 * @note ARM64: Helper register is required (idxTmpReg).
6629 */
6630DECL_INLINE_THROW(uint32_t)
6631iemNativeEmitCmpGpr16WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
6632 uint8_t idxTmpReg = UINT8_MAX)
6633{
6634#ifdef RT_ARCH_AMD64
6635 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm, idxTmpReg);
6636#elif defined(RT_ARCH_ARM64)
6637 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGprLeft, uImm, idxTmpReg);
6638#else
6639# error "Port me!"
6640#endif
6641 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6642 return off;
6643}
6644
6645
6646
6647/*********************************************************************************************************************************
6648* Branching *
6649*********************************************************************************************************************************/
6650
6651/**
6652 * Emits a JMP rel32 / B imm19 to the given label.
6653 */
6654DECL_FORCE_INLINE_THROW(uint32_t)
6655iemNativeEmitJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t idxLabel)
6656{
6657 Assert(idxLabel < pReNative->cLabels);
6658
6659#ifdef RT_ARCH_AMD64
6660 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6661 {
6662 uint32_t offRel = pReNative->paLabels[idxLabel].off - (off + 2);
6663 if ((int32_t)offRel < 128 && (int32_t)offRel >= -128)
6664 {
6665 pCodeBuf[off++] = 0xeb; /* jmp rel8 */
6666 pCodeBuf[off++] = (uint8_t)offRel;
6667 }
6668 else
6669 {
6670 offRel -= 3;
6671 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
6672 pCodeBuf[off++] = RT_BYTE1(offRel);
6673 pCodeBuf[off++] = RT_BYTE2(offRel);
6674 pCodeBuf[off++] = RT_BYTE3(offRel);
6675 pCodeBuf[off++] = RT_BYTE4(offRel);
6676 }
6677 }
6678 else
6679 {
6680 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
6681 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6682 pCodeBuf[off++] = 0xfe;
6683 pCodeBuf[off++] = 0xff;
6684 pCodeBuf[off++] = 0xff;
6685 pCodeBuf[off++] = 0xff;
6686 }
6687 pCodeBuf[off++] = 0xcc; /* int3 poison */
6688
6689#elif defined(RT_ARCH_ARM64)
6690 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6691 {
6692 pCodeBuf[off] = Armv8A64MkInstrB(pReNative->paLabels[idxLabel].off - off);
6693 off++;
6694 }
6695 else
6696 {
6697 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm26At0);
6698 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
6699 }
6700
6701#else
6702# error "Port me!"
6703#endif
6704 return off;
6705}
6706
6707
6708/**
6709 * Emits a JMP rel32 / B imm19 to the given label.
6710 */
6711DECL_INLINE_THROW(uint32_t)
6712iemNativeEmitJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6713{
6714#ifdef RT_ARCH_AMD64
6715 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel);
6716#elif defined(RT_ARCH_ARM64)
6717 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel);
6718#else
6719# error "Port me!"
6720#endif
6721 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6722 return off;
6723}
6724
6725
6726/**
6727 * Emits a JMP rel32 / B imm19 to a new undefined label.
6728 */
6729DECL_INLINE_THROW(uint32_t)
6730iemNativeEmitJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6731{
6732 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6733 return iemNativeEmitJmpToLabel(pReNative, off, idxLabel);
6734}
6735
6736/** Condition type. */
6737#ifdef RT_ARCH_AMD64
6738typedef enum IEMNATIVEINSTRCOND : uint8_t
6739{
6740 kIemNativeInstrCond_o = 0,
6741 kIemNativeInstrCond_no,
6742 kIemNativeInstrCond_c,
6743 kIemNativeInstrCond_nc,
6744 kIemNativeInstrCond_e,
6745 kIemNativeInstrCond_z = kIemNativeInstrCond_e,
6746 kIemNativeInstrCond_ne,
6747 kIemNativeInstrCond_nz = kIemNativeInstrCond_ne,
6748 kIemNativeInstrCond_be,
6749 kIemNativeInstrCond_nbe,
6750 kIemNativeInstrCond_s,
6751 kIemNativeInstrCond_ns,
6752 kIemNativeInstrCond_p,
6753 kIemNativeInstrCond_np,
6754 kIemNativeInstrCond_l,
6755 kIemNativeInstrCond_nl,
6756 kIemNativeInstrCond_le,
6757 kIemNativeInstrCond_nle
6758} IEMNATIVEINSTRCOND;
6759#elif defined(RT_ARCH_ARM64)
6760typedef ARMV8INSTRCOND IEMNATIVEINSTRCOND;
6761# define kIemNativeInstrCond_o todo_conditional_codes
6762# define kIemNativeInstrCond_no todo_conditional_codes
6763# define kIemNativeInstrCond_c todo_conditional_codes
6764# define kIemNativeInstrCond_nc todo_conditional_codes
6765# define kIemNativeInstrCond_e kArmv8InstrCond_Eq
6766# define kIemNativeInstrCond_ne kArmv8InstrCond_Ne
6767# define kIemNativeInstrCond_be kArmv8InstrCond_Ls
6768# define kIemNativeInstrCond_nbe kArmv8InstrCond_Hi
6769# define kIemNativeInstrCond_s todo_conditional_codes
6770# define kIemNativeInstrCond_ns todo_conditional_codes
6771# define kIemNativeInstrCond_p todo_conditional_codes
6772# define kIemNativeInstrCond_np todo_conditional_codes
6773# define kIemNativeInstrCond_l kArmv8InstrCond_Lt
6774# define kIemNativeInstrCond_nl kArmv8InstrCond_Ge
6775# define kIemNativeInstrCond_le kArmv8InstrCond_Le
6776# define kIemNativeInstrCond_nle kArmv8InstrCond_Gt
6777#else
6778# error "Port me!"
6779#endif
6780
6781
6782/**
6783 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6784 */
6785DECL_FORCE_INLINE_THROW(uint32_t)
6786iemNativeEmitJccToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6787 uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6788{
6789 Assert(idxLabel < pReNative->cLabels);
6790
6791 uint32_t const offLabel = pReNative->paLabels[idxLabel].off;
6792#ifdef RT_ARCH_AMD64
6793 if (offLabel >= off)
6794 {
6795 /* jcc rel32 */
6796 pCodeBuf[off++] = 0x0f;
6797 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6798 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6799 pCodeBuf[off++] = 0x00;
6800 pCodeBuf[off++] = 0x00;
6801 pCodeBuf[off++] = 0x00;
6802 pCodeBuf[off++] = 0x00;
6803 }
6804 else
6805 {
6806 int32_t offDisp = offLabel - (off + 2);
6807 if ((int8_t)offDisp == offDisp)
6808 {
6809 /* jcc rel8 */
6810 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6811 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6812 }
6813 else
6814 {
6815 /* jcc rel32 */
6816 offDisp -= 4;
6817 pCodeBuf[off++] = 0x0f;
6818 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6819 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6820 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6821 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6822 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6823 }
6824 }
6825
6826#elif defined(RT_ARCH_ARM64)
6827 if (offLabel >= off)
6828 {
6829 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6830 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, -1);
6831 }
6832 else
6833 {
6834 Assert(off - offLabel <= 0x3ffffU);
6835 pCodeBuf[off] = Armv8A64MkInstrBCond(enmCond, offLabel - off);
6836 off++;
6837 }
6838
6839#else
6840# error "Port me!"
6841#endif
6842 return off;
6843}
6844
6845
6846/**
6847 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6848 */
6849DECL_INLINE_THROW(uint32_t)
6850iemNativeEmitJccToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6851{
6852#ifdef RT_ARCH_AMD64
6853 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel, enmCond);
6854#elif defined(RT_ARCH_ARM64)
6855 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel, enmCond);
6856#else
6857# error "Port me!"
6858#endif
6859 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6860 return off;
6861}
6862
6863
6864/**
6865 * Emits a Jcc rel32 / B.cc imm19 to a new label.
6866 */
6867DECL_INLINE_THROW(uint32_t)
6868iemNativeEmitJccToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6869 IEMNATIVELABELTYPE enmLabelType, uint16_t uData, IEMNATIVEINSTRCOND enmCond)
6870{
6871 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6872 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, enmCond);
6873}
6874
6875
6876/**
6877 * Emits a JZ/JE rel32 / B.EQ imm19 to the given label.
6878 */
6879DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6880{
6881#ifdef RT_ARCH_AMD64
6882 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_e);
6883#elif defined(RT_ARCH_ARM64)
6884 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Eq);
6885#else
6886# error "Port me!"
6887#endif
6888}
6889
6890/**
6891 * Emits a JZ/JE rel32 / B.EQ imm19 to a new label.
6892 */
6893DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6894 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6895{
6896#ifdef RT_ARCH_AMD64
6897 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_e);
6898#elif defined(RT_ARCH_ARM64)
6899 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Eq);
6900#else
6901# error "Port me!"
6902#endif
6903}
6904
6905
6906/**
6907 * Emits a JNZ/JNE rel32 / B.NE imm19 to the given label.
6908 */
6909DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6910{
6911#ifdef RT_ARCH_AMD64
6912 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_ne);
6913#elif defined(RT_ARCH_ARM64)
6914 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ne);
6915#else
6916# error "Port me!"
6917#endif
6918}
6919
6920/**
6921 * Emits a JNZ/JNE rel32 / B.NE imm19 to a new label.
6922 */
6923DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6924 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6925{
6926#ifdef RT_ARCH_AMD64
6927 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_ne);
6928#elif defined(RT_ARCH_ARM64)
6929 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ne);
6930#else
6931# error "Port me!"
6932#endif
6933}
6934
6935
6936/**
6937 * Emits a JBE/JNA rel32 / B.LS imm19 to the given label.
6938 */
6939DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6940{
6941#ifdef RT_ARCH_AMD64
6942 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_be);
6943#elif defined(RT_ARCH_ARM64)
6944 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ls);
6945#else
6946# error "Port me!"
6947#endif
6948}
6949
6950/**
6951 * Emits a JBE/JNA rel32 / B.LS imm19 to a new label.
6952 */
6953DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6954 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6955{
6956#ifdef RT_ARCH_AMD64
6957 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_be);
6958#elif defined(RT_ARCH_ARM64)
6959 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ls);
6960#else
6961# error "Port me!"
6962#endif
6963}
6964
6965
6966/**
6967 * Emits a JA/JNBE rel32 / B.HI imm19 to the given label.
6968 */
6969DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6970{
6971#ifdef RT_ARCH_AMD64
6972 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_nbe);
6973#elif defined(RT_ARCH_ARM64)
6974 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Hi);
6975#else
6976# error "Port me!"
6977#endif
6978}
6979
6980/**
6981 * Emits a JA/JNBE rel32 / B.HI imm19 to a new label.
6982 */
6983DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6984 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6985{
6986#ifdef RT_ARCH_AMD64
6987 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_nbe);
6988#elif defined(RT_ARCH_ARM64)
6989 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Hi);
6990#else
6991# error "Port me!"
6992#endif
6993}
6994
6995
6996/**
6997 * Emits a JL/JNGE rel32 / B.LT imm19 to the given label.
6998 */
6999DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
7000{
7001#ifdef RT_ARCH_AMD64
7002 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_l);
7003#elif defined(RT_ARCH_ARM64)
7004 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Lt);
7005#else
7006# error "Port me!"
7007#endif
7008}
7009
7010/**
7011 * Emits a JA/JNGE rel32 / B.HI imm19 to a new label.
7012 */
7013DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7014 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7015{
7016#ifdef RT_ARCH_AMD64
7017 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_l);
7018#elif defined(RT_ARCH_ARM64)
7019 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Lt);
7020#else
7021# error "Port me!"
7022#endif
7023}
7024
7025
7026/**
7027 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
7028 *
7029 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
7030 *
7031 * Only use hardcoded jumps forward when emitting for exactly one
7032 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
7033 * the right target address on all platforms!
7034 *
7035 * Please also note that on x86 it is necessary pass off + 256 or higher
7036 * for @a offTarget one believe the intervening code is more than 127
7037 * bytes long.
7038 */
7039DECL_FORCE_INLINE(uint32_t)
7040iemNativeEmitJccToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
7041{
7042#ifdef RT_ARCH_AMD64
7043 /* jcc rel8 / rel32 */
7044 int32_t offDisp = (int32_t)(offTarget - (off + 2));
7045 if (offDisp < 128 && offDisp >= -128)
7046 {
7047 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
7048 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
7049 }
7050 else
7051 {
7052 offDisp -= 4;
7053 pCodeBuf[off++] = 0x0f;
7054 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
7055 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
7056 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
7057 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
7058 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
7059 }
7060
7061#elif defined(RT_ARCH_ARM64)
7062 pCodeBuf[off] = Armv8A64MkInstrBCond(enmCond, (int32_t)(offTarget - off));
7063 off++;
7064#else
7065# error "Port me!"
7066#endif
7067 return off;
7068}
7069
7070
7071/**
7072 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
7073 *
7074 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
7075 *
7076 * Only use hardcoded jumps forward when emitting for exactly one
7077 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
7078 * the right target address on all platforms!
7079 *
7080 * Please also note that on x86 it is necessary pass off + 256 or higher
7081 * for @a offTarget if one believe the intervening code is more than 127
7082 * bytes long.
7083 */
7084DECL_INLINE_THROW(uint32_t)
7085iemNativeEmitJccToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
7086{
7087#ifdef RT_ARCH_AMD64
7088 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, offTarget, enmCond);
7089#elif defined(RT_ARCH_ARM64)
7090 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget, enmCond);
7091#else
7092# error "Port me!"
7093#endif
7094 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7095 return off;
7096}
7097
7098
7099/**
7100 * Emits a JZ/JE rel32 / B.EQ imm19 with a fixed displacement.
7101 *
7102 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
7103 */
7104DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
7105{
7106#ifdef RT_ARCH_AMD64
7107 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_e);
7108#elif defined(RT_ARCH_ARM64)
7109 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Eq);
7110#else
7111# error "Port me!"
7112#endif
7113}
7114
7115
7116/**
7117 * Emits a JNZ/JNE rel32 / B.NE imm19 with a fixed displacement.
7118 *
7119 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
7120 */
7121DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
7122{
7123#ifdef RT_ARCH_AMD64
7124 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_ne);
7125#elif defined(RT_ARCH_ARM64)
7126 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ne);
7127#else
7128# error "Port me!"
7129#endif
7130}
7131
7132
7133/**
7134 * Emits a JBE/JNA rel32 / B.LS imm19 with a fixed displacement.
7135 *
7136 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
7137 */
7138DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
7139{
7140#ifdef RT_ARCH_AMD64
7141 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_be);
7142#elif defined(RT_ARCH_ARM64)
7143 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ls);
7144#else
7145# error "Port me!"
7146#endif
7147}
7148
7149
7150/**
7151 * Emits a JA/JNBE rel32 / B.HI imm19 with a fixed displacement.
7152 *
7153 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
7154 */
7155DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
7156{
7157#ifdef RT_ARCH_AMD64
7158 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_nbe);
7159#elif defined(RT_ARCH_ARM64)
7160 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Hi);
7161#else
7162# error "Port me!"
7163#endif
7164}
7165
7166
7167/**
7168 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
7169 *
7170 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
7171 */
7172DECL_FORCE_INLINE(uint32_t) iemNativeEmitJmpToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget)
7173{
7174#ifdef RT_ARCH_AMD64
7175 /* jmp rel8 or rel32 */
7176 int32_t offDisp = offTarget - (off + 2);
7177 if (offDisp < 128 && offDisp >= -128)
7178 {
7179 pCodeBuf[off++] = 0xeb;
7180 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
7181 }
7182 else
7183 {
7184 offDisp -= 3;
7185 pCodeBuf[off++] = 0xe9;
7186 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
7187 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
7188 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
7189 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
7190 }
7191
7192#elif defined(RT_ARCH_ARM64)
7193 pCodeBuf[off] = Armv8A64MkInstrB((int32_t)(offTarget - off));
7194 off++;
7195
7196#else
7197# error "Port me!"
7198#endif
7199 return off;
7200}
7201
7202
7203/**
7204 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
7205 *
7206 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
7207 */
7208DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
7209{
7210#ifdef RT_ARCH_AMD64
7211 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, offTarget);
7212#elif defined(RT_ARCH_ARM64)
7213 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget);
7214#else
7215# error "Port me!"
7216#endif
7217 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7218 return off;
7219}
7220
7221
7222/**
7223 * Fixes up a conditional jump to a fixed label.
7224 * @see iemNativeEmitJmpToFixed, iemNativeEmitJnzToFixed,
7225 * iemNativeEmitJzToFixed, ...
7226 */
7227DECL_INLINE_THROW(void) iemNativeFixupFixedJump(PIEMRECOMPILERSTATE pReNative, uint32_t offFixup, uint32_t offTarget)
7228{
7229#ifdef RT_ARCH_AMD64
7230 uint8_t * const pbCodeBuf = pReNative->pInstrBuf;
7231 uint8_t const bOpcode = pbCodeBuf[offFixup];
7232 if ((uint8_t)(bOpcode - 0x70) < (uint8_t)0x10 || bOpcode == 0xeb)
7233 {
7234 pbCodeBuf[offFixup + 1] = (uint8_t)(offTarget - (offFixup + 2));
7235 AssertStmt((int8_t)pbCodeBuf[offFixup + 1] == (int32_t)(offTarget - (offFixup + 2)),
7236 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_FIXED_JUMP_OUT_OF_RANGE));
7237 }
7238 else
7239 {
7240 if (bOpcode != 0x0f)
7241 Assert(bOpcode == 0xe9);
7242 else
7243 {
7244 offFixup += 1;
7245 Assert((uint8_t)(pbCodeBuf[offFixup] - 0x80) <= 0x10);
7246 }
7247 uint32_t const offRel32 = offTarget - (offFixup + 5);
7248 pbCodeBuf[offFixup + 1] = RT_BYTE1(offRel32);
7249 pbCodeBuf[offFixup + 2] = RT_BYTE2(offRel32);
7250 pbCodeBuf[offFixup + 3] = RT_BYTE3(offRel32);
7251 pbCodeBuf[offFixup + 4] = RT_BYTE4(offRel32);
7252 }
7253
7254#elif defined(RT_ARCH_ARM64)
7255 int32_t const offDisp = offTarget - offFixup;
7256 uint32_t * const pu32CodeBuf = pReNative->pInstrBuf;
7257 if ((pu32CodeBuf[offFixup] & UINT32_C(0xff000000)) == UINT32_C(0x54000000))
7258 {
7259 /* B.COND + BC.COND */
7260 Assert(offDisp >= -262144 && offDisp < 262144);
7261 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xff00001f))
7262 | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
7263 }
7264 else if ((pu32CodeBuf[offFixup] & UINT32_C(0xfc000000)) == UINT32_C(0x14000000))
7265 {
7266 /* B imm26 */
7267 Assert(offDisp >= -33554432 && offDisp < 33554432);
7268 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xfc000000))
7269 | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
7270 }
7271 else if ((pu32CodeBuf[offFixup] & UINT32_C(0x7e000000)) == UINT32_C(0x34000000))
7272 {
7273 /* CBZ / CBNZ reg, imm19 */
7274 Assert((pu32CodeBuf[offFixup] & UINT32_C(0x7e000000)) == UINT32_C(0x34000000));
7275 Assert(offDisp >= -1048576 && offDisp < 1048576);
7276 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xff00001f))
7277 | (((uint32_t)offDisp << 5) & UINT32_C(0x00ffffe0));
7278 }
7279 else
7280 {
7281 /* TBZ / TBNZ reg, bit5, imm14 */
7282 Assert((pu32CodeBuf[offFixup] & UINT32_C(0x7e000000)) == UINT32_C(0x36000000));
7283 Assert(offDisp >= -8192 && offDisp < 8192);
7284 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xfff8001f))
7285 | (((uint32_t)offDisp << 5) & UINT32_C(0x0007ffe0));
7286 }
7287
7288#else
7289# error "Port me!"
7290#endif
7291}
7292
7293
7294#ifdef RT_ARCH_AMD64
7295/**
7296 * For doing bt on a register.
7297 */
7298DECL_INLINE_THROW(uint32_t)
7299iemNativeEmitAmd64TestBitInGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo)
7300{
7301 Assert(iBitNo < 64);
7302 /* bt Ev, imm8 */
7303 if (iBitNo >= 32)
7304 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
7305 else if (iGprSrc >= 8)
7306 pCodeBuf[off++] = X86_OP_REX_B;
7307 pCodeBuf[off++] = 0x0f;
7308 pCodeBuf[off++] = 0xba;
7309 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
7310 pCodeBuf[off++] = iBitNo;
7311 return off;
7312}
7313#endif /* RT_ARCH_AMD64 */
7314
7315
7316/**
7317 * Internal helper, don't call directly.
7318 */
7319DECL_INLINE_THROW(uint32_t)
7320iemNativeEmitTestBitInGprAndJmpToFixedIfCcEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo,
7321 uint32_t offTarget, uint32_t *poffFixup, bool fJmpIfSet)
7322{
7323 Assert(iBitNo < 64);
7324#ifdef RT_ARCH_AMD64
7325 if (iBitNo < 8)
7326 {
7327 /* test Eb, imm8 */
7328 if (iGprSrc >= 4)
7329 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7330 pCodeBuf[off++] = 0xf6;
7331 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7332 pCodeBuf[off++] = (uint8_t)1 << iBitNo;
7333 if (poffFixup)
7334 *poffFixup = off;
7335 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offTarget, fJmpIfSet ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7336 }
7337 else
7338 {
7339 /* bt Ev, imm8 */
7340 if (iBitNo >= 32)
7341 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
7342 else if (iGprSrc >= 8)
7343 pCodeBuf[off++] = X86_OP_REX_B;
7344 pCodeBuf[off++] = 0x0f;
7345 pCodeBuf[off++] = 0xba;
7346 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
7347 pCodeBuf[off++] = iBitNo;
7348 if (poffFixup)
7349 *poffFixup = off;
7350 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offTarget, fJmpIfSet ? kIemNativeInstrCond_c : kIemNativeInstrCond_nc);
7351 }
7352
7353#elif defined(RT_ARCH_ARM64)
7354 /* Just use the TBNZ instruction here. */
7355 if (poffFixup)
7356 *poffFixup = off;
7357 pCodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, off - offTarget, iGprSrc, iBitNo);
7358
7359#else
7360# error "Port me!"
7361#endif
7362 return off;
7363}
7364
7365
7366/**
7367 * Emits a jump to @a idxTarget on the condition that bit @a iBitNo _is_ _set_
7368 * in @a iGprSrc.
7369 */
7370DECL_INLINE_THROW(uint32_t)
7371iemNativeEmitTestBitInGprAndJmpToFixedIfSetEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo,
7372 uint32_t offTarget, uint32_t *poffFixup)
7373{
7374 return iemNativeEmitTestBitInGprAndJmpToFixedIfCcEx(pCodeBuf, off, iGprSrc, iBitNo, offTarget, poffFixup, true /*fJmpIfSet*/);
7375}
7376
7377
7378/**
7379 * Emits a jump to @a idxTarget on the condition that bit @a iBitNo _is_ _not_
7380 * _set_ in @a iGprSrc.
7381 */
7382DECL_INLINE_THROW(uint32_t)
7383iemNativeEmitTestBitInGprAndJmpToLabelIfNotSetEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo,
7384 uint32_t offTarget, uint32_t *poffFixup)
7385{
7386 return iemNativeEmitTestBitInGprAndJmpToFixedIfCcEx(pCodeBuf, off, iGprSrc, iBitNo, offTarget, poffFixup, false /*fJmpIfSet*/);
7387}
7388
7389
7390
7391/**
7392 * Internal helper, don't call directly.
7393 */
7394DECL_INLINE_THROW(uint32_t)
7395iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7396 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel, bool fJmpIfSet)
7397{
7398 Assert(iBitNo < 64);
7399#ifdef RT_ARCH_AMD64
7400 if (iBitNo < 8)
7401 {
7402 /* test Eb, imm8 */
7403 if (iGprSrc >= 4)
7404 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7405 pCodeBuf[off++] = 0xf6;
7406 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7407 pCodeBuf[off++] = (uint8_t)1 << iBitNo;
7408 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
7409 fJmpIfSet ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7410 }
7411 else
7412 {
7413 /* bt Ev, imm8 */
7414 if (iBitNo >= 32)
7415 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
7416 else if (iGprSrc >= 8)
7417 pCodeBuf[off++] = X86_OP_REX_B;
7418 pCodeBuf[off++] = 0x0f;
7419 pCodeBuf[off++] = 0xba;
7420 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
7421 pCodeBuf[off++] = iBitNo;
7422 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
7423 fJmpIfSet ? kIemNativeInstrCond_c : kIemNativeInstrCond_nc);
7424 }
7425
7426#elif defined(RT_ARCH_ARM64)
7427 /* Use the TBNZ instruction here. */
7428 if (pReNative->paLabels[idxLabel].enmType > kIemNativeLabelType_LastWholeTbBranch)
7429 {
7430 AssertMsg(pReNative->paLabels[idxLabel].off == UINT32_MAX,
7431 ("TODO: Please enable & test commented out code for jumping back to a predefined label.\n"));
7432 //uint32_t offLabel = pReNative->paLabels[idxLabel].off;
7433 //if (offLabel == UINT32_MAX)
7434 {
7435 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm14At5);
7436 pCodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, 0, iGprSrc, iBitNo);
7437 }
7438 //else
7439 //{
7440 // RT_BREAKPOINT();
7441 // Assert(off - offLabel <= 0x1fffU);
7442 // pCodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, offLabel - off, iGprSrc, iBitNo);
7443 //
7444 //}
7445 }
7446 else
7447 {
7448 Assert(Armv8A64ConvertImmRImmS2Mask64(0x40, (64U - iBitNo) & 63U) == RT_BIT_64(iBitNo));
7449 pCodeBuf[off++] = Armv8A64MkInstrTstImm(iGprSrc, 0x40, (64U - iBitNo) & 63U);
7450 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
7451 pCodeBuf[off++] = Armv8A64MkInstrBCond(fJmpIfSet ? kArmv8InstrCond_Ne : kArmv8InstrCond_Eq, 0);
7452 }
7453
7454#else
7455# error "Port me!"
7456#endif
7457 return off;
7458}
7459
7460
7461/**
7462 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _set_ in
7463 * @a iGprSrc.
7464 */
7465DECL_INLINE_THROW(uint32_t)
7466iemNativeEmitTestBitInGprAndJmpToLabelIfSetEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7467 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7468{
7469 return iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(pReNative, pCodeBuf, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
7470}
7471
7472
7473/**
7474 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _not_
7475 * _set_ in @a iGprSrc.
7476 */
7477DECL_INLINE_THROW(uint32_t)
7478iemNativeEmitTestBitInGprAndJmpToLabelIfNotSetEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7479 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7480{
7481 return iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(pReNative, pCodeBuf, off, iGprSrc, iBitNo, idxLabel, false /*fJmpIfSet*/);
7482}
7483
7484
7485/**
7486 * Internal helper, don't call directly.
7487 */
7488DECL_INLINE_THROW(uint32_t)
7489iemNativeEmitTestBitInGprAndJmpToLabelIfCc(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
7490 uint8_t iBitNo, uint32_t idxLabel, bool fJmpIfSet)
7491{
7492#ifdef RT_ARCH_AMD64
7493 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 5+6), off,
7494 iGprSrc, iBitNo, idxLabel, fJmpIfSet);
7495#elif defined(RT_ARCH_ARM64)
7496 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCcEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 2), off,
7497 iGprSrc, iBitNo, idxLabel, fJmpIfSet);
7498#else
7499# error "Port me!"
7500#endif
7501 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7502 return off;
7503}
7504
7505
7506/**
7507 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _set_ in
7508 * @a iGprSrc.
7509 */
7510DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7511 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7512{
7513 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
7514}
7515
7516
7517/**
7518 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _not_
7519 * _set_ in @a iGprSrc.
7520 */
7521DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7522 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7523{
7524 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, false /*fJmpIfSet*/);
7525}
7526
7527
7528/**
7529 * Emits a test for any of the bits from @a fBits in @a iGprSrc, setting CPU
7530 * flags accordingly.
7531 */
7532DECL_INLINE_THROW(uint32_t)
7533iemNativeEmitTestAnyBitsInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t fBits)
7534{
7535 Assert(fBits != 0);
7536#ifdef RT_ARCH_AMD64
7537
7538 if (fBits >= UINT32_MAX)
7539 {
7540 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7541
7542 /* test Ev,Gv */
7543 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
7544 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
7545 pbCodeBuf[off++] = 0x85;
7546 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 8, iTmpReg & 7);
7547
7548 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7549 }
7550 else if (fBits <= UINT32_MAX)
7551 {
7552 /* test Eb, imm8 or test Ev, imm32 */
7553 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
7554 if (fBits <= UINT8_MAX)
7555 {
7556 if (iGprSrc >= 4)
7557 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7558 pbCodeBuf[off++] = 0xf6;
7559 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7560 pbCodeBuf[off++] = (uint8_t)fBits;
7561 }
7562 else
7563 {
7564 if (iGprSrc >= 8)
7565 pbCodeBuf[off++] = X86_OP_REX_B;
7566 pbCodeBuf[off++] = 0xf7;
7567 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7568 pbCodeBuf[off++] = RT_BYTE1(fBits);
7569 pbCodeBuf[off++] = RT_BYTE2(fBits);
7570 pbCodeBuf[off++] = RT_BYTE3(fBits);
7571 pbCodeBuf[off++] = RT_BYTE4(fBits);
7572 }
7573 }
7574 /** @todo implement me. */
7575 else
7576 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
7577
7578#elif defined(RT_ARCH_ARM64)
7579 uint32_t uImmR = 0;
7580 uint32_t uImmNandS = 0;
7581 if (Armv8A64ConvertMask64ToImmRImmS(fBits, &uImmNandS, &uImmR))
7582 {
7583 /* ands xzr, iGprSrc, #fBits */
7584 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7585 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR);
7586 }
7587 else
7588 {
7589 /* ands xzr, iGprSrc, iTmpReg */
7590 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7591 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7592 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg);
7593 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7594 }
7595
7596#else
7597# error "Port me!"
7598#endif
7599 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7600 return off;
7601}
7602
7603
7604/**
7605 * Emits a test for any of the bits from @a fBits in the lower 32 bits of
7606 * @a iGprSrc, setting CPU flags accordingly.
7607 *
7608 * @note For ARM64 this only supports @a fBits values that can be expressed
7609 * using the two 6-bit immediates of the ANDS instruction. The caller
7610 * must make sure this is possible!
7611 */
7612DECL_FORCE_INLINE_THROW(uint32_t)
7613iemNativeEmitTestAnyBitsInGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint32_t fBits,
7614 uint8_t iTmpReg = UINT8_MAX)
7615{
7616 Assert(fBits != 0);
7617
7618#ifdef RT_ARCH_AMD64
7619 if (fBits <= UINT8_MAX)
7620 {
7621 /* test Eb, imm8 */
7622 if (iGprSrc >= 4)
7623 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7624 pCodeBuf[off++] = 0xf6;
7625 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7626 pCodeBuf[off++] = (uint8_t)fBits;
7627 }
7628 else
7629 {
7630 /* test Ev, imm32 */
7631 if (iGprSrc >= 8)
7632 pCodeBuf[off++] = X86_OP_REX_B;
7633 pCodeBuf[off++] = 0xf7;
7634 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7635 pCodeBuf[off++] = RT_BYTE1(fBits);
7636 pCodeBuf[off++] = RT_BYTE2(fBits);
7637 pCodeBuf[off++] = RT_BYTE3(fBits);
7638 pCodeBuf[off++] = RT_BYTE4(fBits);
7639 }
7640 RT_NOREF(iTmpReg);
7641
7642#elif defined(RT_ARCH_ARM64)
7643 /* ands xzr, src, #fBits */
7644 uint32_t uImmR = 0;
7645 uint32_t uImmNandS = 0;
7646 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7647 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7648 else if (iTmpReg != UINT8_MAX)
7649 {
7650 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iTmpReg, fBits);
7651 pCodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg, false /*f64Bit*/);
7652 }
7653 else
7654# ifdef IEM_WITH_THROW_CATCH
7655 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
7656# else
7657 AssertReleaseFailedStmt(off = UINT32_MAX);
7658# endif
7659
7660#else
7661# error "Port me!"
7662#endif
7663 return off;
7664}
7665
7666
7667
7668/**
7669 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
7670 * @a iGprSrc, setting CPU flags accordingly.
7671 *
7672 * @note For ARM64 this only supports @a fBits values that can be expressed
7673 * using the two 6-bit immediates of the ANDS instruction. The caller
7674 * must make sure this is possible!
7675 */
7676DECL_FORCE_INLINE_THROW(uint32_t)
7677iemNativeEmitTestAnyBitsInGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
7678{
7679 Assert(fBits != 0);
7680
7681#ifdef RT_ARCH_AMD64
7682 /* test Eb, imm8 */
7683 if (iGprSrc >= 4)
7684 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7685 pCodeBuf[off++] = 0xf6;
7686 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7687 pCodeBuf[off++] = fBits;
7688
7689#elif defined(RT_ARCH_ARM64)
7690 /* ands xzr, src, #fBits */
7691 uint32_t uImmR = 0;
7692 uint32_t uImmNandS = 0;
7693 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7694 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7695 else
7696# ifdef IEM_WITH_THROW_CATCH
7697 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
7698# else
7699 AssertReleaseFailedStmt(off = UINT32_MAX);
7700# endif
7701
7702#else
7703# error "Port me!"
7704#endif
7705 return off;
7706}
7707
7708
7709/**
7710 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
7711 * @a iGprSrc, setting CPU flags accordingly.
7712 */
7713DECL_INLINE_THROW(uint32_t)
7714iemNativeEmitTestAnyBitsInGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
7715{
7716 Assert(fBits != 0);
7717
7718#ifdef RT_ARCH_AMD64
7719 off = iemNativeEmitTestAnyBitsInGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprSrc, fBits);
7720
7721#elif defined(RT_ARCH_ARM64)
7722 /* ands xzr, src, [tmp|#imm] */
7723 uint32_t uImmR = 0;
7724 uint32_t uImmNandS = 0;
7725 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7726 {
7727 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7728 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7729 }
7730 else
7731 {
7732 /* Use temporary register for the 64-bit immediate. */
7733 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7734 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7735 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg, false /*f64Bit*/);
7736 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7737 }
7738
7739#else
7740# error "Port me!"
7741#endif
7742 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7743 return off;
7744}
7745
7746
7747/**
7748 * Emits a jump to @a idxLabel on the condition _any_ of the bits in @a fBits
7749 * are set in @a iGprSrc.
7750 */
7751DECL_INLINE_THROW(uint32_t)
7752iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7753 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
7754{
7755 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
7756
7757 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
7758 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7759
7760 return off;
7761}
7762
7763
7764/**
7765 * Emits a jump to @a idxLabel on the condition _none_ of the bits in @a fBits
7766 * are set in @a iGprSrc.
7767 */
7768DECL_INLINE_THROW(uint32_t)
7769iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7770 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
7771{
7772 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
7773
7774 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
7775 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7776
7777 return off;
7778}
7779
7780
7781/**
7782 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7783 *
7784 * The operand size is given by @a f64Bit.
7785 */
7786DECL_FORCE_INLINE_THROW(uint32_t)
7787iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7788 uint8_t iGprSrc, bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
7789{
7790 Assert(idxLabel < pReNative->cLabels);
7791
7792#ifdef RT_ARCH_AMD64
7793 /* test reg32,reg32 / test reg64,reg64 */
7794 if (f64Bit)
7795 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
7796 else if (iGprSrc >= 8)
7797 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
7798 pCodeBuf[off++] = 0x85;
7799 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
7800
7801 /* jnz idxLabel */
7802 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
7803 fJmpIfNotZero ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7804
7805#elif defined(RT_ARCH_ARM64)
7806 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
7807 {
7808 pCodeBuf[off] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, (int32_t)(pReNative->paLabels[idxLabel].off - off),
7809 iGprSrc, f64Bit);
7810 off++;
7811 }
7812 else
7813 {
7814 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
7815 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, 0, iGprSrc, f64Bit);
7816 }
7817
7818#else
7819# error "Port me!"
7820#endif
7821 return off;
7822}
7823
7824
7825/**
7826 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7827 *
7828 * The operand size is given by @a f64Bit.
7829 */
7830DECL_FORCE_INLINE_THROW(uint32_t)
7831iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
7832 bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
7833{
7834#ifdef RT_ARCH_AMD64
7835 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 3 + 6),
7836 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
7837#elif defined(RT_ARCH_ARM64)
7838 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1),
7839 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
7840#else
7841# error "Port me!"
7842#endif
7843 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7844 return off;
7845}
7846
7847
7848/**
7849 * Emits code that jumps to @a offTarget if @a iGprSrc is not zero.
7850 *
7851 * The operand size is given by @a f64Bit.
7852 */
7853DECL_FORCE_INLINE_THROW(uint32_t)
7854iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7855 uint8_t iGprSrc, bool f64Bit, bool fJmpIfNotZero, uint32_t offTarget)
7856{
7857#ifdef RT_ARCH_AMD64
7858 /* test reg32,reg32 / test reg64,reg64 */
7859 if (f64Bit)
7860 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
7861 else if (iGprSrc >= 8)
7862 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
7863 pCodeBuf[off++] = 0x85;
7864 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
7865
7866 /* jnz idxLabel */
7867 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offTarget,
7868 fJmpIfNotZero ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7869
7870#elif defined(RT_ARCH_ARM64)
7871 pCodeBuf[off] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, (int32_t)(offTarget - off), iGprSrc, f64Bit);
7872 off++;
7873
7874#else
7875# error "Port me!"
7876#endif
7877 return off;
7878}
7879
7880
7881/**
7882 * Emits code that jumps to @a offTarget if @a iGprSrc is not zero.
7883 *
7884 * The operand size is given by @a f64Bit.
7885 */
7886DECL_FORCE_INLINE_THROW(uint32_t)
7887iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
7888 bool f64Bit, bool fJmpIfNotZero, uint32_t offTarget)
7889{
7890#ifdef RT_ARCH_AMD64
7891 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 3 + 6),
7892 off, iGprSrc, f64Bit, fJmpIfNotZero, offTarget);
7893#elif defined(RT_ARCH_ARM64)
7894 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1),
7895 off, iGprSrc, f64Bit, fJmpIfNotZero, offTarget);
7896#else
7897# error "Port me!"
7898#endif
7899 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7900 return off;
7901}
7902
7903
7904/* if (Grp1 == 0) Jmp idxLabel; */
7905
7906/**
7907 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
7908 *
7909 * The operand size is given by @a f64Bit.
7910 */
7911DECL_FORCE_INLINE_THROW(uint32_t)
7912iemNativeEmitTestIfGprIsZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7913 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7914{
7915 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
7916 f64Bit, false /*fJmpIfNotZero*/, idxLabel);
7917}
7918
7919
7920/**
7921 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
7922 *
7923 * The operand size is given by @a f64Bit.
7924 */
7925DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7926 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7927{
7928 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, false /*fJmpIfNotZero*/, idxLabel);
7929}
7930
7931
7932/**
7933 * Emits code that jumps to a new label if @a iGprSrc is zero.
7934 *
7935 * The operand size is given by @a f64Bit.
7936 */
7937DECL_INLINE_THROW(uint32_t)
7938iemNativeEmitTestIfGprIsZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7939 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7940{
7941 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7942 return iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7943}
7944
7945
7946/**
7947 * Emits code that jumps to @a offTarget if @a iGprSrc is zero.
7948 *
7949 * The operand size is given by @a f64Bit.
7950 */
7951DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsZeroAndJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7952 uint8_t iGprSrc, bool f64Bit, uint32_t offTarget)
7953{
7954 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixed(pReNative, off, iGprSrc, f64Bit, false /*fJmpIfNotZero*/, offTarget);
7955}
7956
7957
7958/* if (Grp1 != 0) Jmp idxLabel; */
7959
7960/**
7961 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7962 *
7963 * The operand size is given by @a f64Bit.
7964 */
7965DECL_FORCE_INLINE_THROW(uint32_t)
7966iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7967 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7968{
7969 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
7970 f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7971}
7972
7973
7974/**
7975 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7976 *
7977 * The operand size is given by @a f64Bit.
7978 */
7979DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7980 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7981{
7982 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7983}
7984
7985
7986/**
7987 * Emits code that jumps to a new label if @a iGprSrc is not zero.
7988 *
7989 * The operand size is given by @a f64Bit.
7990 */
7991DECL_INLINE_THROW(uint32_t)
7992iemNativeEmitTestIfGprIsNotZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7993 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7994{
7995 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7996 return iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7997}
7998
7999
8000/* if (Grp1 != Gpr2) Jmp idxLabel; */
8001
8002/**
8003 * Emits code that jumps to the given label if @a iGprLeft and @a iGprRight
8004 * differs.
8005 */
8006DECL_INLINE_THROW(uint32_t)
8007iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8008 uint8_t iGprLeft, uint8_t iGprRight, uint32_t idxLabel)
8009{
8010 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iGprRight);
8011 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
8012 return off;
8013}
8014
8015
8016/**
8017 * Emits code that jumps to a new label if @a iGprLeft and @a iGprRight differs.
8018 */
8019DECL_INLINE_THROW(uint32_t)
8020iemNativeEmitTestIfGprNotEqualGprAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8021 uint8_t iGprLeft, uint8_t iGprRight,
8022 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
8023{
8024 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
8025 return iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, iGprLeft, iGprRight, idxLabel);
8026}
8027
8028
8029/* if (Grp != Imm) Jmp idxLabel; */
8030
8031/**
8032 * Emits code that jumps to the given label if @a iGprSrc differs from @a uImm.
8033 */
8034DECL_INLINE_THROW(uint32_t)
8035iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8036 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
8037{
8038 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
8039 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
8040 return off;
8041}
8042
8043
8044/**
8045 * Emits code that jumps to a new label if @a iGprSrc differs from @a uImm.
8046 */
8047DECL_INLINE_THROW(uint32_t)
8048iemNativeEmitTestIfGprNotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8049 uint8_t iGprSrc, uint64_t uImm,
8050 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
8051{
8052 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
8053 return iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
8054}
8055
8056
8057/**
8058 * Emits code that jumps to the given label if 32-bit @a iGprSrc differs from
8059 * @a uImm.
8060 */
8061DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8062 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
8063{
8064 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
8065 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
8066 return off;
8067}
8068
8069
8070/**
8071 * Emits code that jumps to a new label if 32-bit @a iGprSrc differs from
8072 * @a uImm.
8073 */
8074DECL_INLINE_THROW(uint32_t)
8075iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8076 uint8_t iGprSrc, uint32_t uImm,
8077 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
8078{
8079 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
8080 return iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
8081}
8082
8083
8084/**
8085 * Emits code that jumps to the given label if 16-bit @a iGprSrc differs from
8086 * @a uImm.
8087 */
8088DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8089 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel)
8090{
8091 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm);
8092 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
8093 return off;
8094}
8095
8096
8097/**
8098 * Emits code that jumps to a new label if 16-bit @a iGprSrc differs from
8099 * @a uImm.
8100 */
8101DECL_INLINE_THROW(uint32_t)
8102iemNativeEmitTestIfGpr16NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8103 uint8_t iGprSrc, uint16_t uImm,
8104 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
8105{
8106 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
8107 return iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
8108}
8109
8110
8111/* if (Grp == Imm) Jmp idxLabel; */
8112
8113/**
8114 * Emits code that jumps to the given label if @a iGprSrc equals @a uImm.
8115 */
8116DECL_INLINE_THROW(uint32_t)
8117iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8118 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
8119{
8120 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
8121 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
8122 return off;
8123}
8124
8125
8126/**
8127 * Emits code that jumps to a new label if @a iGprSrc equals from @a uImm.
8128 */
8129DECL_INLINE_THROW(uint32_t)
8130iemNativeEmitTestIfGprEqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t uImm,
8131 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
8132{
8133 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
8134 return iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
8135}
8136
8137
8138/**
8139 * Emits code that jumps to the given label if 32-bit @a iGprSrc equals @a uImm.
8140 */
8141DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8142 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
8143{
8144 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
8145 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
8146 return off;
8147}
8148
8149
8150/**
8151 * Emits code that jumps to a new label if 32-bit @a iGprSrc equals @a uImm.
8152 */
8153DECL_INLINE_THROW(uint32_t)
8154iemNativeEmitTestIfGpr32EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint32_t uImm,
8155 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
8156{
8157 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
8158 return iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
8159}
8160
8161
8162/**
8163 * Emits code that jumps to the given label if 16-bit @a iGprSrc equals @a uImm.
8164 *
8165 * @note ARM64: Helper register is required (idxTmpReg).
8166 */
8167DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8168 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel,
8169 uint8_t idxTmpReg = UINT8_MAX)
8170{
8171 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm, idxTmpReg);
8172 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
8173 return off;
8174}
8175
8176
8177/**
8178 * Emits code that jumps to a new label if 16-bit @a iGprSrc equals @a uImm.
8179 *
8180 * @note ARM64: Helper register is required (idxTmpReg).
8181 */
8182DECL_INLINE_THROW(uint32_t)
8183iemNativeEmitTestIfGpr16EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint16_t uImm,
8184 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0,
8185 uint8_t idxTmpReg = UINT8_MAX)
8186{
8187 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
8188 return iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel, idxTmpReg);
8189}
8190
8191
8192
8193/*********************************************************************************************************************************
8194* Indirect Jumps. *
8195*********************************************************************************************************************************/
8196
8197/**
8198 * Emits an indirect jump a 64-bit address in a GPR.
8199 */
8200DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpViaGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc)
8201{
8202#ifdef RT_ARCH_AMD64
8203 uint8_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
8204 if (iGprSrc >= 8)
8205 pCodeBuf[off++] = X86_OP_REX_B;
8206 pCodeBuf[off++] = 0xff;
8207 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
8208
8209#elif defined(RT_ARCH_ARM64)
8210 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8211 pCodeBuf[off++] = Armv8A64MkInstrBr(iGprSrc);
8212
8213#else
8214# error "port me"
8215#endif
8216 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8217 return off;
8218}
8219
8220
8221/**
8222 * Emits an indirect jump to an immediate 64-bit address (uses the temporary GPR).
8223 */
8224DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t uPfn)
8225{
8226 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uPfn);
8227 return iemNativeEmitJmpViaGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP0);
8228}
8229
8230
8231/*********************************************************************************************************************************
8232* Calls. *
8233*********************************************************************************************************************************/
8234
8235/**
8236 * Emits a call to a 64-bit address.
8237 */
8238DECL_FORCE_INLINE(uint32_t) iemNativeEmitCallImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uintptr_t uPfn,
8239#ifdef RT_ARCH_AMD64
8240 uint8_t idxRegTmp = X86_GREG_xAX
8241#elif defined(RT_ARCH_ARM64)
8242 uint8_t idxRegTmp = IEMNATIVE_REG_FIXED_TMP0
8243#else
8244# error "Port me"
8245#endif
8246 )
8247{
8248 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxRegTmp, uPfn);
8249
8250#ifdef RT_ARCH_AMD64
8251 /* call idxRegTmp */
8252 if (idxRegTmp >= 8)
8253 pCodeBuf[off++] = X86_OP_REX_B;
8254 pCodeBuf[off++] = 0xff;
8255 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, idxRegTmp & 7);
8256
8257#elif defined(RT_ARCH_ARM64)
8258 pCodeBuf[off++] = Armv8A64MkInstrBlr(idxRegTmp);
8259
8260#else
8261# error "port me"
8262#endif
8263 return off;
8264}
8265
8266
8267/**
8268 * Emits a call to a 64-bit address.
8269 */
8270template<bool const a_fSkipEflChecks = false>
8271DECL_INLINE_THROW(uint32_t) iemNativeEmitCallImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t uPfn)
8272{
8273 if RT_CONSTEXPR_IF(!a_fSkipEflChecks)
8274 {
8275 IEMNATIVE_ASSERT_EFLAGS_POSTPONING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8276 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY( pReNative, X86_EFL_STATUS_BITS);
8277 }
8278
8279#ifdef RT_ARCH_AMD64
8280 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xAX, uPfn);
8281
8282 /* call rax */
8283 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8284 pbCodeBuf[off++] = 0xff;
8285 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
8286
8287#elif defined(RT_ARCH_ARM64)
8288 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uPfn);
8289
8290 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8291 pu32CodeBuf[off++] = Armv8A64MkInstrBlr(IEMNATIVE_REG_FIXED_TMP0);
8292
8293#else
8294# error "port me"
8295#endif
8296 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8297 return off;
8298}
8299
8300
8301/**
8302 * Emits code to load a stack variable into an argument GPR.
8303 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
8304 */
8305DECL_FORCE_INLINE_THROW(uint32_t)
8306iemNativeEmitLoadArgGregFromStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
8307 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = UINT32_MAX,
8308 bool fSpilledVarsInVolatileRegs = false)
8309{
8310 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8311 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8312 AssertStmt(pVar->enmKind == kIemNativeVarKind_Stack, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8313
8314 uint8_t const idxRegVar = pVar->idxReg;
8315 if ( idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs)
8316 && ( (RT_BIT_32(idxRegVar) & (~IEMNATIVE_CALL_VOLATILE_GREG_MASK | fHstVolatileRegsAllowed))
8317 || !fSpilledVarsInVolatileRegs ))
8318 {
8319 AssertStmt( !(RT_BIT_32(idxRegVar) & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
8320 || (RT_BIT_32(idxRegVar) & fHstVolatileRegsAllowed),
8321 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_13));
8322 if (!offAddend)
8323 {
8324 if (idxRegArg != idxRegVar)
8325 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegArg, idxRegVar);
8326 }
8327 else
8328 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegArg, idxRegVar, offAddend);
8329 }
8330 else
8331 {
8332 uint8_t const idxStackSlot = pVar->idxStackSlot;
8333 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8334 off = iemNativeEmitLoadGprByBp(pReNative, off, idxRegArg, iemNativeStackCalcBpDisp(idxStackSlot));
8335 if (offAddend)
8336 off = iemNativeEmitAddGprImm(pReNative, off, idxRegArg, offAddend);
8337 }
8338 return off;
8339}
8340
8341
8342/**
8343 * Emits code to load a stack or immediate variable value into an argument GPR,
8344 * optional with a addend.
8345 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
8346 */
8347DECL_FORCE_INLINE_THROW(uint32_t)
8348iemNativeEmitLoadArgGregFromImmOrStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
8349 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = 0,
8350 bool fSpilledVarsInVolatileRegs = false)
8351{
8352 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8353 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8354 if (pVar->enmKind == kIemNativeVarKind_Immediate)
8355 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegArg, pVar->u.uValue + offAddend);
8356 else
8357 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, idxRegArg, idxVar, offAddend,
8358 fHstVolatileRegsAllowed, fSpilledVarsInVolatileRegs);
8359 return off;
8360}
8361
8362
8363/**
8364 * Emits code to load the variable address into an argument GPR.
8365 *
8366 * This only works for uninitialized and stack variables.
8367 */
8368DECL_FORCE_INLINE_THROW(uint32_t)
8369iemNativeEmitLoadArgGregWithVarAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
8370 bool fFlushShadows)
8371{
8372 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8373 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8374 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
8375 || pVar->enmKind == kIemNativeVarKind_Stack,
8376 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8377 AssertStmt(!pVar->fSimdReg,
8378 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8379
8380 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8381 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
8382
8383 uint8_t const idxRegVar = pVar->idxReg;
8384 if (idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs))
8385 {
8386 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, idxRegVar);
8387 iemNativeRegFreeVar(pReNative, idxRegVar, fFlushShadows);
8388 Assert(pVar->idxReg == UINT8_MAX);
8389 }
8390 Assert( pVar->idxStackSlot != UINT8_MAX
8391 && pVar->idxReg == UINT8_MAX);
8392
8393 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
8394}
8395
8396
8397
8398/*********************************************************************************************************************************
8399* TB exiting helpers. *
8400*********************************************************************************************************************************/
8401
8402#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8403/* IEMAllN8veEmit-x86.h: */
8404template<uint32_t const a_bmInputRegs>
8405DECL_FORCE_INLINE_THROW(uint32_t)
8406iemNativeDoPostponedEFlagsAtTbExitEx(PIEMRECOMPILERSTATE pReNative, uint32_t off, PIEMNATIVEINSTR pCodeBuf);
8407
8408template<uint32_t const a_bmInputRegs>
8409DECL_FORCE_INLINE_THROW(uint32_t)
8410iemNativeDoPostponedEFlagsAtTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off);
8411#endif
8412
8413
8414/**
8415 * Helper for marking the current conditional branch as exiting the TB.
8416 *
8417 * This simplifies the state consolidation later when we reach the IEM_MC_ENDIF.
8418 */
8419DECL_FORCE_INLINE(void) iemNativeMarkCurCondBranchAsExiting(PIEMRECOMPILERSTATE pReNative)
8420{
8421 uint8_t idxCondDepth = pReNative->cCondDepth;
8422 if (idxCondDepth)
8423 {
8424 idxCondDepth--;
8425 pReNative->aCondStack[idxCondDepth].afExitTb[pReNative->aCondStack[idxCondDepth].fInElse] = true;
8426 }
8427}
8428
8429
8430/**
8431 * Unconditionally exits the translation block via a branch instructions.
8432 *
8433 * @note In case a delayed EFLAGS calculation is pending, this may emit an
8434 * additional IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS instructions.
8435 */
8436template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fActuallyExitingTb = true, bool const a_fPostponedEfl = true>
8437DECL_INLINE_THROW(uint32_t) iemNativeEmitTbExitEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off)
8438{
8439 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8440 AssertCompile(IEMNATIVELABELTYPE_IS_EXIT_REASON(a_enmExitReason));
8441
8442 if RT_CONSTEXPR_IF(a_fActuallyExitingTb)
8443 iemNativeMarkCurCondBranchAsExiting(pReNative);
8444
8445#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8446 if RT_CONSTEXPR_IF(a_fPostponedEfl)
8447 off = iemNativeDoPostponedEFlagsAtTbExitEx<IEMNATIVELABELTYPE_GET_INPUT_REG_MASK(a_enmExitReason)>(pReNative, off,
8448 pCodeBuf);
8449#endif
8450
8451#ifdef RT_ARCH_AMD64
8452 /* jmp rel32 */
8453 pCodeBuf[off++] = 0xe9;
8454 iemNativeAddTbExitFixup(pReNative, off, a_enmExitReason);
8455 pCodeBuf[off++] = 0xfe;
8456 pCodeBuf[off++] = 0xff;
8457 pCodeBuf[off++] = 0xff;
8458 pCodeBuf[off++] = 0xff;
8459
8460#elif defined(RT_ARCH_ARM64)
8461 iemNativeAddTbExitFixup(pReNative, off, a_enmExitReason);
8462 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
8463
8464#else
8465# error "Port me!"
8466#endif
8467 return off;
8468}
8469
8470
8471/**
8472 * Unconditionally exits the translation block via a branch instructions.
8473 *
8474 * @note In case a delayed EFLAGS calculation is pending, this may emit an
8475 * additional IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS instructions.
8476 */
8477template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fActuallyExitingTb = true, bool const a_fPostponedEfl = true>
8478DECL_INLINE_THROW(uint32_t) iemNativeEmitTbExit(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8479{
8480 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8481 AssertCompile(IEMNATIVELABELTYPE_IS_EXIT_REASON(a_enmExitReason));
8482
8483 if RT_CONSTEXPR_IF(a_fActuallyExitingTb)
8484 iemNativeMarkCurCondBranchAsExiting(pReNative);
8485
8486#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8487 if RT_CONSTEXPR_IF(a_fPostponedEfl)
8488 off = iemNativeDoPostponedEFlagsAtTbExit<IEMNATIVELABELTYPE_GET_INPUT_REG_MASK(a_enmExitReason)>(pReNative, off);
8489#endif
8490
8491#ifdef RT_ARCH_AMD64
8492 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
8493
8494 /* jmp rel32 */
8495 pCodeBuf[off++] = 0xe9;
8496 iemNativeAddTbExitFixup(pReNative, off, a_enmExitReason);
8497 pCodeBuf[off++] = 0xfe;
8498 pCodeBuf[off++] = 0xff;
8499 pCodeBuf[off++] = 0xff;
8500 pCodeBuf[off++] = 0xff;
8501
8502#elif defined(RT_ARCH_ARM64)
8503 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
8504 iemNativeAddTbExitFixup(pReNative, off, a_enmExitReason);
8505 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
8506
8507#else
8508# error "Port me!"
8509#endif
8510 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8511 return off;
8512}
8513
8514
8515/**
8516 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
8517 *
8518 * @note In case a delayed EFLAGS calculation is pending, this may emit an
8519 * additional IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS instructions.
8520 */
8521template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8522DECL_FORCE_INLINE_THROW(uint32_t)
8523iemNativeEmitTbExitJccEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off, IEMNATIVEINSTRCOND enmCond)
8524{
8525 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8526 AssertCompile(IEMNATIVELABELTYPE_IS_EXIT_REASON(a_enmExitReason));
8527
8528#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8529 if RT_CONSTEXPR_IF(a_fPostponedEfl)
8530 if (pReNative->PostponedEfl.fEFlags)
8531 {
8532 /* Jcc l_NonPrimaryCodeStreamTarget */
8533 uint32_t const offFixup1 = off;
8534 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 1, enmCond);
8535
8536 /* JMP l_PrimaryCodeStreamResume */
8537 uint32_t const offFixup2 = off;
8538 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, off + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
8539
8540 /* l_NonPrimaryCodeStreamTarget: */
8541 iemNativeFixupFixedJump(pReNative, offFixup1, off);
8542 off = iemNativeEmitTbExitEx<a_enmExitReason, false /*a_fActuallyExitingTb*/, true>(pReNative, pCodeBuf, off);
8543
8544 /* l_PrimaryCodeStreamResume: */
8545 iemNativeFixupFixedJump(pReNative, offFixup2, off);
8546 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8547 return off;
8548 }
8549#endif
8550
8551#if defined(RT_ARCH_AMD64)
8552 /* jcc rel32 */
8553 pCodeBuf[off++] = 0x0f;
8554 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
8555 iemNativeAddTbExitFixup(pReNative, off, a_enmExitReason);
8556 pCodeBuf[off++] = 0x00;
8557 pCodeBuf[off++] = 0x00;
8558 pCodeBuf[off++] = 0x00;
8559 pCodeBuf[off++] = 0x00;
8560
8561#else
8562 /* ARM64 doesn't have the necessary jump range, so we jump via local label
8563 just like when we keep everything local. */
8564 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, a_enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8565 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel, enmCond);
8566#endif
8567 return off;
8568}
8569
8570
8571/**
8572 * Emits a Jcc rel32 / B.cc imm19 to the epilog.
8573 */
8574template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8575DECL_INLINE_THROW(uint32_t) iemNativeEmitTbExitJcc(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEINSTRCOND enmCond)
8576{
8577 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8578 AssertCompile(IEMNATIVELABELTYPE_IS_EXIT_REASON(a_enmExitReason));
8579
8580#ifdef RT_ARCH_AMD64
8581 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS + 5);
8582#elif defined(RT_ARCH_ARM64)
8583 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS + 1);
8584#else
8585# error "Port me!"
8586#endif
8587 off = iemNativeEmitTbExitJccEx<a_enmExitReason, a_fPostponedEfl>(pReNative, pCodeBuf, off, enmCond);
8588 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8589 return off;
8590}
8591
8592
8593/**
8594 * Emits a JNZ/JNE rel32 / B.NE imm19 to the TB exit routine with the given reason.
8595 */
8596template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8597DECL_INLINE_THROW(uint32_t) iemNativeEmitTbExitJnz(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8598{
8599#ifdef RT_ARCH_AMD64
8600 return iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kIemNativeInstrCond_ne);
8601#elif defined(RT_ARCH_ARM64)
8602 return iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kArmv8InstrCond_Ne);
8603#else
8604# error "Port me!"
8605#endif
8606}
8607
8608
8609/**
8610 * Emits a JZ/JE rel32 / B.EQ imm19 to the TB exit routine with the given reason.
8611 */
8612template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8613DECL_INLINE_THROW(uint32_t) iemNativeEmitTbExitJz(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8614{
8615#ifdef RT_ARCH_AMD64
8616 return iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kIemNativeInstrCond_e);
8617#elif defined(RT_ARCH_ARM64)
8618 return iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kArmv8InstrCond_Eq);
8619#else
8620# error "Port me!"
8621#endif
8622}
8623
8624
8625/**
8626 * Emits a JA/JNBE rel32 / B.HI imm19 to the TB exit.
8627 */
8628template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8629DECL_INLINE_THROW(uint32_t) iemNativeEmitTbExitJa(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8630{
8631#ifdef RT_ARCH_AMD64
8632 return iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kIemNativeInstrCond_nbe);
8633#elif defined(RT_ARCH_ARM64)
8634 return iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kArmv8InstrCond_Hi);
8635#else
8636# error "Port me!"
8637#endif
8638}
8639
8640
8641/**
8642 * Emits a JL/JNGE rel32 / B.LT imm19 to the TB exit with the given reason.
8643 */
8644template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8645DECL_INLINE_THROW(uint32_t) iemNativeEmitTbExitJl(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8646{
8647#ifdef RT_ARCH_AMD64
8648 return iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kIemNativeInstrCond_l);
8649#elif defined(RT_ARCH_ARM64)
8650 return iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kArmv8InstrCond_Lt);
8651#else
8652# error "Port me!"
8653#endif
8654}
8655
8656
8657/**
8658 * Emits a jump to the TB exit with @a a_enmExitReason on the condition _any_ of
8659 * the bits in @a fBits are set in @a iGprSrc.
8660 */
8661template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8662DECL_INLINE_THROW(uint32_t)
8663iemNativeEmitTbExitIfAnyBitsSetInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t fBits)
8664{
8665 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
8666
8667 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
8668 return iemNativeEmitTbExitJnz<a_enmExitReason, a_fPostponedEfl>(pReNative, off);
8669}
8670
8671
8672#if 0 /* unused */
8673/**
8674 * Emits a jump to @a idxLabel on the condition _none_ of the bits in @a fBits
8675 * are set in @a iGprSrc.
8676 */
8677template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8678DECL_INLINE_THROW(uint32_t)
8679iemNativeEmitTbExitIfNoBitsSetInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t fBits)
8680{
8681 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
8682
8683 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
8684 return iemNativeEmitJzTbExit<a_enmExitReason, a_fPostponedEfl>(pReNative, off);
8685}
8686#endif
8687
8688
8689#if 0 /* unused */
8690/**
8691 * Emits code that exits the TB with the given reason if @a iGprLeft and @a iGprRight
8692 * differs.
8693 */
8694template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8695DECL_INLINE_THROW(uint32_t)
8696iemNativeEmitTbExitIfGprNotEqualGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
8697{
8698 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iGprRight);
8699 off = iemNativeEmitJnzTbExit<a_enmExitReason, a_fPostponedEfl>(pReNative, off);
8700 return off;
8701}
8702#endif
8703
8704
8705/**
8706 * Emits code that jumps to the given label if 32-bit @a iGprSrc differs from
8707 * @a uImm.
8708 */
8709template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8710DECL_INLINE_THROW(uint32_t)
8711iemNativeEmitTbExitIfGpr32NotEqualImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint32_t uImm)
8712{
8713 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
8714 off = iemNativeEmitTbExitJnz<a_enmExitReason, a_fPostponedEfl>(pReNative, off);
8715 return off;
8716}
8717
8718
8719/**
8720 * Emits code that exits the current TB if @a iGprSrc differs from @a uImm.
8721 */
8722template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8723DECL_INLINE_THROW(uint32_t)
8724iemNativeEmitTbExitIfGprNotEqualImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t uImm)
8725{
8726 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
8727 off = iemNativeEmitTbExitJnz<a_enmExitReason, a_fPostponedEfl>(pReNative, off);
8728 return off;
8729}
8730
8731
8732/**
8733 * Emits code that exits the current TB with the given reason if 32-bit @a iGprSrc equals @a uImm.
8734 */
8735template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8736DECL_INLINE_THROW(uint32_t)
8737iemNativeEmitTbExitIfGpr32EqualsImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint32_t uImm)
8738{
8739 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
8740 off = iemNativeEmitTbExitJz<a_enmExitReason, a_fPostponedEfl>(pReNative, off);
8741 return off;
8742}
8743
8744
8745/**
8746 * Emits code to exit the current TB with the reason @a a_enmExitReason on the
8747 * condition that bit @a iBitNo _is_ _set_ in @a iGprSrc.
8748 *
8749 * @note On ARM64 the range is only +/-8191 instructions.
8750 */
8751template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8752DECL_INLINE_THROW(uint32_t)
8753iemNativeEmitTbExitIfBitSetInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo)
8754{
8755 AssertCompile(IEMNATIVELABELTYPE_IS_EXIT_REASON(a_enmExitReason));
8756
8757#if defined(RT_ARCH_AMD64)
8758 Assert(iBitNo < 64);
8759 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
8760 if (iBitNo < 8)
8761 {
8762 /* test Eb, imm8 */
8763 if (iGprSrc >= 4)
8764 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
8765 pbCodeBuf[off++] = 0xf6;
8766 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
8767 pbCodeBuf[off++] = (uint8_t)1 << iBitNo;
8768 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8769 off = iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kIemNativeInstrCond_ne);
8770 }
8771 else
8772 {
8773 /* bt Ev, imm8 */
8774 if (iBitNo >= 32)
8775 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8776 else if (iGprSrc >= 8)
8777 pbCodeBuf[off++] = X86_OP_REX_B;
8778 pbCodeBuf[off++] = 0x0f;
8779 pbCodeBuf[off++] = 0xba;
8780 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
8781 pbCodeBuf[off++] = iBitNo;
8782 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8783 off = iemNativeEmitTbExitJcc<a_enmExitReason, a_fPostponedEfl>(pReNative, off, kIemNativeInstrCond_c);
8784 }
8785 return off;
8786
8787#elif defined(RT_ARCH_ARM64)
8788 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8789 /** @todo Perhaps we should always apply the PostponedEfl code pattern here,
8790 * it's the same number of instructions as the TST + B.CC stuff? */
8791# ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8792 if RT_CONSTEXPR_IF(a_fPostponedEfl)
8793 if (pReNative->PostponedEfl.fEFlags)
8794 {
8795 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
8796 3 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
8797 pCodeBuf[off++] = Armv8A64MkInstrTbnz(1 /*l_NonPrimaryCodeStreamTarget*/, iGprSrc, iBitNo);
8798 uint32_t const offFixup = off;
8799 pCodeBuf[off++] = Armv8A64MkInstrB(0 /*l_PrimaryCodeStreamResume*/);
8800 /* l_NonPrimaryCodeStreamTarget: */
8801 off = iemNativeEmitTbExitEx<a_enmExitReason, false /*a_fActuallyExitingTb*/, true>(pReNative, pCodeBuf, off);
8802 /* l_PrimaryCodeStreamResume: */
8803 iemNativeFixupFixedJump(pReNative, offFixup, off);
8804 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8805 return off;
8806 }
8807# endif
8808 /* ARM64 doesn't have the necessary range to reach the per-chunk code, so
8809 we go via a local trampoline. */
8810 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, a_enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8811 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
8812#else
8813# error "port me"
8814#endif
8815}
8816
8817
8818/**
8819 * Emits code that exits the current TB with @a a_enmExitReason if @a iGprSrc is
8820 * not zero.
8821 *
8822 * The operand size is given by @a f64Bit.
8823 */
8824template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8825DECL_FORCE_INLINE_THROW(uint32_t)
8826iemNativeEmitTbExitIfGprIsNotZeroEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
8827 uint8_t iGprSrc, bool f64Bit)
8828{
8829 AssertCompile(IEMNATIVELABELTYPE_IS_EXIT_REASON(a_enmExitReason));
8830
8831#if defined(RT_ARCH_AMD64)
8832 /* test reg32,reg32 / test reg64,reg64 */
8833 if (f64Bit)
8834 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
8835 else if (iGprSrc >= 8)
8836 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
8837 pCodeBuf[off++] = 0x85;
8838 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
8839
8840 /* jnz idxLabel */
8841 return iemNativeEmitTbExitJccEx<a_enmExitReason, a_fPostponedEfl>(pReNative, pCodeBuf, off, kIemNativeInstrCond_ne);
8842
8843#elif defined(RT_ARCH_ARM64)
8844 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8845# ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8846 if RT_CONSTEXPR_IF(a_fPostponedEfl)
8847 if (pReNative->PostponedEfl.fEFlags)
8848 {
8849 pCodeBuf[off++] = Armv8A64MkInstrCbnz(1 /*l_NonPrimaryCodeStreamTarget*/, iGprSrc, f64Bit);
8850 uint32_t const offFixup = off;
8851 pCodeBuf[off++] = Armv8A64MkInstrB(0 /*l_PrimaryCodeStreamResume*/);
8852 /* l_NonPrimaryCodeStreamTarget: */
8853 off = iemNativeEmitTbExitEx<a_enmExitReason, false /*a_fActuallyExitingTb*/, true>(pReNative, pCodeBuf, off);
8854 /* l_PrimaryCodeStreamResume: */
8855 iemNativeFixupFixedJump(pReNative, offFixup, off);
8856 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8857 return off;
8858 }
8859# endif
8860 /* ARM64 doesn't have the necessary range to reach the per-chunk code, so
8861 we go via a local trampoline. */
8862 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, a_enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8863 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
8864 f64Bit, true /*fJmpIfNotZero*/, idxLabel);
8865#else
8866# error "port me"
8867#endif
8868}
8869
8870
8871/**
8872 * Emits code to exit the current TB with the given reason @a a_enmExitReason if
8873 * @a iGprSrc is not zero.
8874 *
8875 * The operand size is given by @a f64Bit.
8876 */
8877template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8878DECL_INLINE_THROW(uint32_t)
8879iemNativeEmitTbExitIfGprIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit)
8880{
8881#if defined(RT_ARCH_AMD64)
8882 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3 + 6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
8883
8884#else
8885 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
8886#endif
8887 off = iemNativeEmitTbExitIfGprIsNotZeroEx<a_enmExitReason, a_fPostponedEfl>(pReNative, pCodeBuf, off, iGprSrc, f64Bit);
8888 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8889 return off;
8890}
8891
8892
8893/**
8894 * Emits code that exits the current TB with @a a_enmExitReason if @a iGprSrc is
8895 * zero.
8896 *
8897 * The operand size is given by @a f64Bit.
8898 */
8899template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8900DECL_FORCE_INLINE_THROW(uint32_t)
8901iemNativeEmitTbExitIfGprIsZeroEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
8902 uint8_t iGprSrc, bool f64Bit)
8903{
8904 AssertCompile(IEMNATIVELABELTYPE_IS_EXIT_REASON(a_enmExitReason));
8905
8906#if defined(RT_ARCH_AMD64)
8907 /* test reg32,reg32 / test reg64,reg64 */
8908 if (f64Bit)
8909 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
8910 else if (iGprSrc >= 8)
8911 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
8912 pCodeBuf[off++] = 0x85;
8913 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
8914
8915 /* jnz idxLabel */
8916 return iemNativeEmitTbExitJccEx<a_enmExitReason, a_fPostponedEfl>(pReNative, pCodeBuf, off, kIemNativeInstrCond_e);
8917
8918#elif defined(RT_ARCH_ARM64)
8919 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_ONLY(pReNative, X86_EFL_STATUS_BITS);
8920# ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8921 if RT_CONSTEXPR_IF(a_fPostponedEfl)
8922 if (pReNative->PostponedEfl.fEFlags)
8923 {
8924 pCodeBuf[off++] = Armv8A64MkInstrCbz(1 /*l_NonPrimaryCodeStreamTarget*/, iGprSrc, f64Bit);
8925 uint32_t const offFixup = off;
8926 pCodeBuf[off++] = Armv8A64MkInstrB(0 /*l_PrimaryCodeStreamResume*/);
8927 /* l_NonPrimaryCodeStreamTarget: */
8928 off = iemNativeEmitTbExitEx<a_enmExitReason, false /*a_fActuallyExitingTb*/, true>(pReNative, pCodeBuf, off);
8929 /* l_PrimaryCodeStreamResume: */
8930 iemNativeFixupFixedJump(pReNative, offFixup, off);
8931 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8932 return off;
8933 }
8934# endif
8935 /* ARM64 doesn't have the necessary range to reach the per-chunk code, so
8936 we go via a local trampoline. */
8937 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, a_enmExitReason, UINT32_MAX /*offWhere*/, 0 /*uData*/);
8938 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
8939 f64Bit, false /*fJmpIfNotZero*/, idxLabel);
8940#else
8941# error "port me"
8942#endif
8943}
8944
8945
8946/**
8947 * Emits code to exit the current TB with the given reason @a a_enmExitReason if @a iGprSrc is zero.
8948 *
8949 * The operand size is given by @a f64Bit.
8950 */
8951template<IEMNATIVELABELTYPE const a_enmExitReason, bool const a_fPostponedEfl = true>
8952DECL_INLINE_THROW(uint32_t)
8953iemNativeEmitTbExitIfGprIsZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit)
8954{
8955#if defined(RT_ARCH_AMD64)
8956 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3 + 6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
8957
8958#else
8959 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
8960#endif
8961 off = iemNativeEmitTbExitIfGprIsZeroEx<a_enmExitReason, a_fPostponedEfl>(pReNative, pCodeBuf, off, iGprSrc, f64Bit);
8962 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8963 return off;
8964}
8965
8966
8967
8968/*********************************************************************************************************************************
8969* SIMD helpers. *
8970*********************************************************************************************************************************/
8971
8972/**
8973 * Emits code to load the variable address into an argument GPR.
8974 *
8975 * This is a special variant intended for SIMD variables only and only called
8976 * by the TLB miss path in the memory fetch/store code because there we pass
8977 * the value by reference and need both the register and stack depending on which
8978 * path is taken (TLB hit vs. miss).
8979 */
8980DECL_FORCE_INLINE_THROW(uint32_t)
8981iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
8982 bool fSyncRegWithStack = true)
8983{
8984 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8985 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8986 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
8987 || pVar->enmKind == kIemNativeVarKind_Stack,
8988 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8989 AssertStmt(pVar->fSimdReg,
8990 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8991 Assert( pVar->idxStackSlot != UINT8_MAX
8992 && pVar->idxReg != UINT8_MAX);
8993
8994 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8995 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
8996
8997 uint8_t const idxRegVar = pVar->idxReg;
8998 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8999 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
9000
9001 if (fSyncRegWithStack)
9002 {
9003 if (pVar->cbVar == sizeof(RTUINT128U))
9004 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDisp, idxRegVar);
9005 else
9006 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDisp, idxRegVar);
9007 }
9008
9009 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
9010}
9011
9012
9013/**
9014 * Emits code to sync the host SIMD register assigned to the given SIMD variable.
9015 *
9016 * This is a special helper and only called
9017 * by the TLB miss path in the memory fetch/store code because there we pass
9018 * the value by reference and need to sync the value on the stack with the assigned host register
9019 * after a TLB miss where the value ends up on the stack.
9020 */
9021DECL_FORCE_INLINE_THROW(uint32_t)
9022iemNativeEmitSimdVarSyncStackToRegister(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar)
9023{
9024 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9025 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9026 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
9027 || pVar->enmKind == kIemNativeVarKind_Stack,
9028 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9029 AssertStmt(pVar->fSimdReg,
9030 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9031 Assert( pVar->idxStackSlot != UINT8_MAX
9032 && pVar->idxReg != UINT8_MAX);
9033
9034 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
9035 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
9036
9037 uint8_t const idxRegVar = pVar->idxReg;
9038 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
9039 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
9040
9041 if (pVar->cbVar == sizeof(RTUINT128U))
9042 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxRegVar, offBpDisp);
9043 else
9044 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxRegVar, offBpDisp);
9045
9046 return off;
9047}
9048
9049
9050/**
9051 * Emits a gprdst = ~gprsrc store.
9052 */
9053DECL_FORCE_INLINE_THROW(uint32_t)
9054iemNativeEmitInvBitsGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
9055{
9056#ifdef RT_ARCH_AMD64
9057 if (iGprDst != iGprSrc)
9058 {
9059 /* mov gprdst, gprsrc. */
9060 if (f64Bit)
9061 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc);
9062 else
9063 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc); /* Bits 32:63 are cleared. */
9064 }
9065
9066 /* not gprdst */
9067 if (f64Bit || iGprDst >= 8)
9068 pCodeBuf[off++] = (f64Bit ? X86_OP_REX_W : 0)
9069 | (iGprDst >= 8 ? X86_OP_REX_B : 0);
9070 pCodeBuf[off++] = 0xf7;
9071 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
9072#elif defined(RT_ARCH_ARM64)
9073 pCodeBuf[off++] = Armv8A64MkInstrOrn(iGprDst, ARMV8_A64_REG_XZR, iGprSrc, f64Bit);
9074#else
9075# error "port me"
9076#endif
9077 return off;
9078}
9079
9080
9081/**
9082 * Emits a gprdst = ~gprsrc store.
9083 */
9084DECL_INLINE_THROW(uint32_t)
9085iemNativeEmitInvBitsGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
9086{
9087#ifdef RT_ARCH_AMD64
9088 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 9), off, iGprDst, iGprSrc, f64Bit);
9089#elif defined(RT_ARCH_ARM64)
9090 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, f64Bit);
9091#else
9092# error "port me"
9093#endif
9094 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9095 return off;
9096}
9097
9098
9099/**
9100 * Emits a 128-bit vector register store to a VCpu value.
9101 */
9102DECL_FORCE_INLINE_THROW(uint32_t)
9103iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9104{
9105#ifdef RT_ARCH_AMD64
9106 /* movdqa mem128, reg128 */ /* ASSUMING an aligned location here. */
9107 pCodeBuf[off++] = 0x66;
9108 if (iVecReg >= 8)
9109 pCodeBuf[off++] = X86_OP_REX_R;
9110 pCodeBuf[off++] = 0x0f;
9111 pCodeBuf[off++] = 0x7f;
9112 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
9113#elif defined(RT_ARCH_ARM64)
9114 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
9115
9116#else
9117# error "port me"
9118#endif
9119 return off;
9120}
9121
9122
9123/**
9124 * Emits a 128-bit vector register load of a VCpu value.
9125 */
9126DECL_INLINE_THROW(uint32_t)
9127iemNativeEmitSimdStoreVecRegToVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9128{
9129#ifdef RT_ARCH_AMD64
9130 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
9131#elif defined(RT_ARCH_ARM64)
9132 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
9133#else
9134# error "port me"
9135#endif
9136 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9137 return off;
9138}
9139
9140
9141/**
9142 * Emits a high 128-bit vector register store to a VCpu value.
9143 */
9144DECL_FORCE_INLINE_THROW(uint32_t)
9145iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9146{
9147#ifdef RT_ARCH_AMD64
9148 /* vextracti128 mem128, reg128, 1 */ /* ASSUMES AVX2 support. */
9149 pCodeBuf[off++] = X86_OP_VEX3;
9150 if (iVecReg >= 8)
9151 pCodeBuf[off++] = 0x63;
9152 else
9153 pCodeBuf[off++] = 0xe3;
9154 pCodeBuf[off++] = 0x7d;
9155 pCodeBuf[off++] = 0x39;
9156 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
9157 pCodeBuf[off++] = 0x01; /* Immediate */
9158#elif defined(RT_ARCH_ARM64)
9159 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
9160#else
9161# error "port me"
9162#endif
9163 return off;
9164}
9165
9166
9167/**
9168 * Emits a high 128-bit vector register load of a VCpu value.
9169 */
9170DECL_INLINE_THROW(uint32_t)
9171iemNativeEmitSimdStoreVecRegToVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9172{
9173#ifdef RT_ARCH_AMD64
9174 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
9175#elif defined(RT_ARCH_ARM64)
9176 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9177 Assert(!(iVecReg & 0x1));
9178 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
9179#else
9180# error "port me"
9181#endif
9182 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9183 return off;
9184}
9185
9186
9187/**
9188 * Emits a 128-bit vector register load of a VCpu value.
9189 */
9190DECL_FORCE_INLINE_THROW(uint32_t)
9191iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9192{
9193#ifdef RT_ARCH_AMD64
9194 /* movdqa reg128, mem128 */ /* ASSUMING an aligned location here. */
9195 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9196 if (iVecReg >= 8)
9197 pCodeBuf[off++] = X86_OP_REX_R;
9198 pCodeBuf[off++] = 0x0f;
9199 pCodeBuf[off++] = 0x6f;
9200 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
9201#elif defined(RT_ARCH_ARM64)
9202 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
9203
9204#else
9205# error "port me"
9206#endif
9207 return off;
9208}
9209
9210
9211/**
9212 * Emits a 128-bit vector register load of a VCpu value.
9213 */
9214DECL_INLINE_THROW(uint32_t)
9215iemNativeEmitSimdLoadVecRegFromVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9216{
9217#ifdef RT_ARCH_AMD64
9218 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
9219#elif defined(RT_ARCH_ARM64)
9220 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
9221#else
9222# error "port me"
9223#endif
9224 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9225 return off;
9226}
9227
9228
9229/**
9230 * Emits a 128-bit vector register load of a VCpu value.
9231 */
9232DECL_FORCE_INLINE_THROW(uint32_t)
9233iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9234{
9235#ifdef RT_ARCH_AMD64
9236 /* vinserti128 ymm, ymm, mem128, 1. */ /* ASSUMES AVX2 support */
9237 pCodeBuf[off++] = X86_OP_VEX3;
9238 if (iVecReg >= 8)
9239 pCodeBuf[off++] = 0x63;
9240 else
9241 pCodeBuf[off++] = 0xe3;
9242 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
9243 pCodeBuf[off++] = 0x38;
9244 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
9245 pCodeBuf[off++] = 0x01; /* Immediate */
9246#elif defined(RT_ARCH_ARM64)
9247 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
9248#else
9249# error "port me"
9250#endif
9251 return off;
9252}
9253
9254
9255/**
9256 * Emits a 128-bit vector register load of a VCpu value.
9257 */
9258DECL_INLINE_THROW(uint32_t)
9259iemNativeEmitSimdLoadVecRegFromVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
9260{
9261#ifdef RT_ARCH_AMD64
9262 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
9263#elif defined(RT_ARCH_ARM64)
9264 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9265 Assert(!(iVecReg & 0x1));
9266 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
9267#else
9268# error "port me"
9269#endif
9270 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9271 return off;
9272}
9273
9274
9275/**
9276 * Emits a vecdst = vecsrc load.
9277 */
9278DECL_FORCE_INLINE(uint32_t)
9279iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9280{
9281#ifdef RT_ARCH_AMD64
9282 /* movdqu vecdst, vecsrc */
9283 pCodeBuf[off++] = 0xf3;
9284
9285 if ((iVecRegDst | iVecRegSrc) >= 8)
9286 pCodeBuf[off++] = iVecRegDst < 8 ? X86_OP_REX_B
9287 : iVecRegSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
9288 : X86_OP_REX_R;
9289 pCodeBuf[off++] = 0x0f;
9290 pCodeBuf[off++] = 0x6f;
9291 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
9292
9293#elif defined(RT_ARCH_ARM64)
9294 /* mov dst, src; alias for: orr dst, src, src */
9295 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
9296
9297#else
9298# error "port me"
9299#endif
9300 return off;
9301}
9302
9303
9304/**
9305 * Emits a vecdst = vecsrc load, 128-bit.
9306 */
9307DECL_INLINE_THROW(uint32_t)
9308iemNativeEmitSimdLoadVecRegFromVecRegU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9309{
9310#ifdef RT_ARCH_AMD64
9311 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
9312#elif defined(RT_ARCH_ARM64)
9313 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
9314#else
9315# error "port me"
9316#endif
9317 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9318 return off;
9319}
9320
9321
9322/**
9323 * Emits a vecdst[128:255] = vecsrc[128:255] load.
9324 */
9325DECL_FORCE_INLINE_THROW(uint32_t)
9326iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9327{
9328#ifdef RT_ARCH_AMD64
9329 /* vperm2i128 dst, dst, src, 0x30. */ /* ASSUMES AVX2 support */
9330 pCodeBuf[off++] = X86_OP_VEX3;
9331 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
9332 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
9333 pCodeBuf[off++] = 0x46;
9334 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
9335 pCodeBuf[off++] = 0x30; /* Immediate, this will leave the low 128 bits of dst untouched and move the high 128 bits from src to dst. */
9336
9337#elif defined(RT_ARCH_ARM64)
9338 RT_NOREF(pCodeBuf, iVecRegDst, iVecRegSrc);
9339
9340 /* Should never be called because we can just use iemNativeEmitSimdLoadVecRegFromVecRegU128(). */
9341# ifdef IEM_WITH_THROW_CATCH
9342 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
9343# else
9344 AssertReleaseFailedStmt(off = UINT32_MAX);
9345# endif
9346#else
9347# error "port me"
9348#endif
9349 return off;
9350}
9351
9352
9353/**
9354 * Emits a vecdst[128:255] = vecsrc[128:255] load, high 128-bit.
9355 */
9356DECL_INLINE_THROW(uint32_t)
9357iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9358{
9359#ifdef RT_ARCH_AMD64
9360 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
9361#elif defined(RT_ARCH_ARM64)
9362 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
9363 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iVecRegSrc + 1);
9364#else
9365# error "port me"
9366#endif
9367 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9368 return off;
9369}
9370
9371
9372/**
9373 * Emits a vecdst[0:127] = vecsrc[128:255] load.
9374 */
9375DECL_FORCE_INLINE_THROW(uint32_t)
9376iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9377{
9378#ifdef RT_ARCH_AMD64
9379 /* vextracti128 dst, src, 1. */ /* ASSUMES AVX2 support */
9380 pCodeBuf[off++] = X86_OP_VEX3;
9381 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegDst >= 8, false, iVecRegSrc >= 8);
9382 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
9383 pCodeBuf[off++] = 0x39;
9384 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7);
9385 pCodeBuf[off++] = 0x1;
9386
9387#elif defined(RT_ARCH_ARM64)
9388 RT_NOREF(pCodeBuf, iVecRegDst, iVecRegSrc);
9389
9390 /* Should never be called because we can just use iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(). */
9391# ifdef IEM_WITH_THROW_CATCH
9392 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
9393# else
9394 AssertReleaseFailedStmt(off = UINT32_MAX);
9395# endif
9396#else
9397# error "port me"
9398#endif
9399 return off;
9400}
9401
9402
9403/**
9404 * Emits a vecdst[0:127] = vecsrc[128:255] load, high 128-bit.
9405 */
9406DECL_INLINE_THROW(uint32_t)
9407iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9408{
9409#ifdef RT_ARCH_AMD64
9410 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iVecRegSrc);
9411#elif defined(RT_ARCH_ARM64)
9412 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
9413 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc + 1);
9414#else
9415# error "port me"
9416#endif
9417 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9418 return off;
9419}
9420
9421
9422/**
9423 * Emits a vecdst = vecsrc load, 256-bit.
9424 */
9425DECL_INLINE_THROW(uint32_t)
9426iemNativeEmitSimdLoadVecRegFromVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9427{
9428#ifdef RT_ARCH_AMD64
9429 /* vmovdqa ymm, ymm */
9430 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
9431 if (iVecRegDst >= 8 && iVecRegSrc >= 8)
9432 {
9433 pbCodeBuf[off++] = X86_OP_VEX3;
9434 pbCodeBuf[off++] = 0x41;
9435 pbCodeBuf[off++] = 0x7d;
9436 pbCodeBuf[off++] = 0x6f;
9437 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
9438 }
9439 else
9440 {
9441 pbCodeBuf[off++] = X86_OP_VEX2;
9442 pbCodeBuf[off++] = (iVecRegSrc >= 8 || iVecRegDst >= 8) ? 0x7d : 0xfd;
9443 pbCodeBuf[off++] = iVecRegSrc >= 8 ? 0x7f : 0x6f;
9444 pbCodeBuf[off++] = iVecRegSrc >= 8
9445 ? X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7)
9446 : X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
9447 }
9448#elif defined(RT_ARCH_ARM64)
9449 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9450 Assert(!(iVecRegDst & 0x1)); Assert(!(iVecRegSrc & 0x1));
9451 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst, iVecRegSrc );
9452 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst + 1, iVecRegSrc + 1);
9453#else
9454# error "port me"
9455#endif
9456 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9457 return off;
9458}
9459
9460
9461/**
9462 * Emits a vecdst = vecsrc load.
9463 */
9464DECL_FORCE_INLINE(uint32_t)
9465iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9466{
9467#ifdef RT_ARCH_AMD64
9468 /* vinserti128 dst, dst, src, 1. */ /* ASSUMES AVX2 support */
9469 pCodeBuf[off++] = X86_OP_VEX3;
9470 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
9471 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
9472 pCodeBuf[off++] = 0x38;
9473 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
9474 pCodeBuf[off++] = 0x01; /* Immediate */
9475
9476#elif defined(RT_ARCH_ARM64)
9477 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
9478 /* mov dst, src; alias for: orr dst, src, src */
9479 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst + 1, iVecRegSrc, iVecRegSrc);
9480
9481#else
9482# error "port me"
9483#endif
9484 return off;
9485}
9486
9487
9488/**
9489 * Emits a vecdst[128:255] = vecsrc[0:127] load, 128-bit.
9490 */
9491DECL_INLINE_THROW(uint32_t)
9492iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9493{
9494#ifdef RT_ARCH_AMD64
9495 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iVecRegSrc);
9496#elif defined(RT_ARCH_ARM64)
9497 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
9498#else
9499# error "port me"
9500#endif
9501 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9502 return off;
9503}
9504
9505
9506/**
9507 * Emits a gprdst = vecsrc[x] load, 64-bit.
9508 */
9509DECL_FORCE_INLINE(uint32_t)
9510iemNativeEmitSimdLoadGprFromVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
9511{
9512#ifdef RT_ARCH_AMD64
9513 if (iQWord >= 2)
9514 {
9515 /*
9516 * vpextrq doesn't work on the upper 128-bits.
9517 * So we use the following sequence:
9518 * vextracti128 vectmp0, vecsrc, 1
9519 * pextrq gpr, vectmp0, #(iQWord - 2)
9520 */
9521 /* vextracti128 */
9522 pCodeBuf[off++] = X86_OP_VEX3;
9523 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegSrc >= 8);
9524 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
9525 pCodeBuf[off++] = 0x39;
9526 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9527 pCodeBuf[off++] = 0x1;
9528
9529 /* pextrq */
9530 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9531 pCodeBuf[off++] = X86_OP_REX_W
9532 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
9533 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
9534 pCodeBuf[off++] = 0x0f;
9535 pCodeBuf[off++] = 0x3a;
9536 pCodeBuf[off++] = 0x16;
9537 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprDst & 7);
9538 pCodeBuf[off++] = iQWord - 2;
9539 }
9540 else
9541 {
9542 /* pextrq gpr, vecsrc, #iQWord (ASSUMES SSE4.1). */
9543 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9544 pCodeBuf[off++] = X86_OP_REX_W
9545 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
9546 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
9547 pCodeBuf[off++] = 0x0f;
9548 pCodeBuf[off++] = 0x3a;
9549 pCodeBuf[off++] = 0x16;
9550 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
9551 pCodeBuf[off++] = iQWord;
9552 }
9553#elif defined(RT_ARCH_ARM64)
9554 /* umov gprdst, vecsrc[iQWord] */
9555 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iQWord, kArmv8InstrUmovInsSz_U64);
9556#else
9557# error "port me"
9558#endif
9559 return off;
9560}
9561
9562
9563/**
9564 * Emits a gprdst = vecsrc[x] load, 64-bit.
9565 */
9566DECL_INLINE_THROW(uint32_t)
9567iemNativeEmitSimdLoadGprFromVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
9568{
9569 Assert(iQWord <= 3);
9570
9571#ifdef RT_ARCH_AMD64
9572 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iVecRegSrc, iQWord);
9573#elif defined(RT_ARCH_ARM64)
9574 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9575 Assert(!(iVecRegSrc & 0x1));
9576 /* Need to access the "high" 128-bit vector register. */
9577 if (iQWord >= 2)
9578 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iQWord - 2);
9579 else
9580 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iQWord);
9581#else
9582# error "port me"
9583#endif
9584 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9585 return off;
9586}
9587
9588
9589/**
9590 * Emits a gprdst = vecsrc[x] load, 32-bit.
9591 */
9592DECL_FORCE_INLINE(uint32_t)
9593iemNativeEmitSimdLoadGprFromVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
9594{
9595#ifdef RT_ARCH_AMD64
9596 if (iDWord >= 4)
9597 {
9598 /*
9599 * vpextrd doesn't work on the upper 128-bits.
9600 * So we use the following sequence:
9601 * vextracti128 vectmp0, vecsrc, 1
9602 * pextrd gpr, vectmp0, #(iDWord - 4)
9603 */
9604 /* vextracti128 */
9605 pCodeBuf[off++] = X86_OP_VEX3;
9606 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegSrc >= 8);
9607 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
9608 pCodeBuf[off++] = 0x39;
9609 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9610 pCodeBuf[off++] = 0x1;
9611
9612 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
9613 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9614 if (iGprDst >= 8 || IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8)
9615 pCodeBuf[off++] = (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
9616 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
9617 pCodeBuf[off++] = 0x0f;
9618 pCodeBuf[off++] = 0x3a;
9619 pCodeBuf[off++] = 0x16;
9620 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprDst & 7);
9621 pCodeBuf[off++] = iDWord - 4;
9622 }
9623 else
9624 {
9625 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
9626 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9627 if (iGprDst >= 8 || iVecRegSrc >= 8)
9628 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
9629 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
9630 pCodeBuf[off++] = 0x0f;
9631 pCodeBuf[off++] = 0x3a;
9632 pCodeBuf[off++] = 0x16;
9633 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
9634 pCodeBuf[off++] = iDWord;
9635 }
9636#elif defined(RT_ARCH_ARM64)
9637 Assert(iDWord < 4);
9638
9639 /* umov gprdst, vecsrc[iDWord] */
9640 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iDWord, kArmv8InstrUmovInsSz_U32, false /*fDst64Bit*/);
9641#else
9642# error "port me"
9643#endif
9644 return off;
9645}
9646
9647
9648/**
9649 * Emits a gprdst = vecsrc[x] load, 32-bit.
9650 */
9651DECL_INLINE_THROW(uint32_t)
9652iemNativeEmitSimdLoadGprFromVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
9653{
9654 Assert(iDWord <= 7);
9655
9656#ifdef RT_ARCH_AMD64
9657 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 15), off, iGprDst, iVecRegSrc, iDWord);
9658#elif defined(RT_ARCH_ARM64)
9659 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9660 Assert(!(iVecRegSrc & 0x1));
9661 /* Need to access the "high" 128-bit vector register. */
9662 if (iDWord >= 4)
9663 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iDWord - 4);
9664 else
9665 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iDWord);
9666#else
9667# error "port me"
9668#endif
9669 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9670 return off;
9671}
9672
9673
9674/**
9675 * Emits a gprdst = vecsrc[x] load, 16-bit.
9676 */
9677DECL_FORCE_INLINE(uint32_t)
9678iemNativeEmitSimdLoadGprFromVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
9679{
9680#ifdef RT_ARCH_AMD64
9681 if (iWord >= 8)
9682 {
9683 /** @todo Currently not used. */
9684 AssertReleaseFailed();
9685 }
9686 else
9687 {
9688 /* pextrw gpr, vecsrc, #iWord */
9689 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9690 if (iGprDst >= 8 || iVecRegSrc >= 8)
9691 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
9692 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_B);
9693 pCodeBuf[off++] = 0x0f;
9694 pCodeBuf[off++] = 0xc5;
9695 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iVecRegSrc & 7);
9696 pCodeBuf[off++] = iWord;
9697 }
9698#elif defined(RT_ARCH_ARM64)
9699 /* umov gprdst, vecsrc[iWord] */
9700 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iWord, kArmv8InstrUmovInsSz_U16, false /*fDst64Bit*/);
9701#else
9702# error "port me"
9703#endif
9704 return off;
9705}
9706
9707
9708/**
9709 * Emits a gprdst = vecsrc[x] load, 16-bit.
9710 */
9711DECL_INLINE_THROW(uint32_t)
9712iemNativeEmitSimdLoadGprFromVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
9713{
9714 Assert(iWord <= 16);
9715
9716#ifdef RT_ARCH_AMD64
9717 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGprDst, iVecRegSrc, iWord);
9718#elif defined(RT_ARCH_ARM64)
9719 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9720 Assert(!(iVecRegSrc & 0x1));
9721 /* Need to access the "high" 128-bit vector register. */
9722 if (iWord >= 8)
9723 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iWord - 8);
9724 else
9725 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iWord);
9726#else
9727# error "port me"
9728#endif
9729 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9730 return off;
9731}
9732
9733
9734/**
9735 * Emits a gprdst = vecsrc[x] load, 8-bit.
9736 */
9737DECL_FORCE_INLINE(uint32_t)
9738iemNativeEmitSimdLoadGprFromVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
9739{
9740#ifdef RT_ARCH_AMD64
9741 if (iByte >= 16)
9742 {
9743 /** @todo Currently not used. */
9744 AssertReleaseFailed();
9745 }
9746 else
9747 {
9748 /* pextrb gpr, vecsrc, #iByte */
9749 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9750 if (iGprDst >= 8 || iVecRegSrc >= 8)
9751 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
9752 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
9753 pCodeBuf[off++] = 0x0f;
9754 pCodeBuf[off++] = 0x3a;
9755 pCodeBuf[off++] = 0x14;
9756 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
9757 pCodeBuf[off++] = iByte;
9758 }
9759#elif defined(RT_ARCH_ARM64)
9760 /* umov gprdst, vecsrc[iByte] */
9761 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iByte, kArmv8InstrUmovInsSz_U8, false /*fDst64Bit*/);
9762#else
9763# error "port me"
9764#endif
9765 return off;
9766}
9767
9768
9769/**
9770 * Emits a gprdst = vecsrc[x] load, 8-bit.
9771 */
9772DECL_INLINE_THROW(uint32_t)
9773iemNativeEmitSimdLoadGprFromVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
9774{
9775 Assert(iByte <= 32);
9776
9777#ifdef RT_ARCH_AMD64
9778 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iByte);
9779#elif defined(RT_ARCH_ARM64)
9780 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9781 Assert(!(iVecRegSrc & 0x1));
9782 /* Need to access the "high" 128-bit vector register. */
9783 if (iByte >= 16)
9784 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iByte - 16);
9785 else
9786 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iByte);
9787#else
9788# error "port me"
9789#endif
9790 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9791 return off;
9792}
9793
9794
9795/**
9796 * Emits a vecdst[x] = gprsrc store, 64-bit.
9797 */
9798DECL_FORCE_INLINE(uint32_t)
9799iemNativeEmitSimdStoreGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
9800{
9801#ifdef RT_ARCH_AMD64
9802 if (iQWord >= 2)
9803 {
9804 /*
9805 * vpinsrq doesn't work on the upper 128-bits.
9806 * So we use the following sequence:
9807 * vextracti128 vectmp0, vecdst, 1
9808 * pinsrq vectmp0, gpr, #(iQWord - 2)
9809 * vinserti128 vecdst, vectmp0, 1
9810 */
9811 /* vextracti128 */
9812 pCodeBuf[off++] = X86_OP_VEX3;
9813 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
9814 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
9815 pCodeBuf[off++] = 0x39;
9816 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9817 pCodeBuf[off++] = 0x1;
9818
9819 /* pinsrq */
9820 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9821 pCodeBuf[off++] = X86_OP_REX_W
9822 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
9823 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9824 pCodeBuf[off++] = 0x0f;
9825 pCodeBuf[off++] = 0x3a;
9826 pCodeBuf[off++] = 0x22;
9827 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprSrc & 7);
9828 pCodeBuf[off++] = iQWord - 2;
9829
9830 /* vinserti128 */
9831 pCodeBuf[off++] = X86_OP_VEX3;
9832 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
9833 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
9834 pCodeBuf[off++] = 0x38;
9835 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9836 pCodeBuf[off++] = 0x01; /* Immediate */
9837 }
9838 else
9839 {
9840 /* pinsrq vecsrc, gpr, #iQWord (ASSUMES SSE4.1). */
9841 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9842 pCodeBuf[off++] = X86_OP_REX_W
9843 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9844 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9845 pCodeBuf[off++] = 0x0f;
9846 pCodeBuf[off++] = 0x3a;
9847 pCodeBuf[off++] = 0x22;
9848 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9849 pCodeBuf[off++] = iQWord;
9850 }
9851#elif defined(RT_ARCH_ARM64)
9852 /* ins vecsrc[iQWord], gpr */
9853 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iQWord, kArmv8InstrUmovInsSz_U64);
9854#else
9855# error "port me"
9856#endif
9857 return off;
9858}
9859
9860
9861/**
9862 * Emits a vecdst[x] = gprsrc store, 64-bit.
9863 */
9864DECL_INLINE_THROW(uint32_t)
9865iemNativeEmitSimdStoreGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
9866{
9867 Assert(iQWord <= 3);
9868
9869#ifdef RT_ARCH_AMD64
9870 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 19), off, iVecRegDst, iGprSrc, iQWord);
9871#elif defined(RT_ARCH_ARM64)
9872 Assert(!(iVecRegDst & 0x1));
9873 if (iQWord >= 2)
9874 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iGprSrc, iQWord - 2);
9875 else
9876 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iQWord);
9877#else
9878# error "port me"
9879#endif
9880 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9881 return off;
9882}
9883
9884
9885/**
9886 * Emits a vecdst[x] = gprsrc store, 32-bit.
9887 */
9888DECL_FORCE_INLINE(uint32_t)
9889iemNativeEmitSimdStoreGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
9890{
9891#ifdef RT_ARCH_AMD64
9892 if (iDWord >= 4)
9893 {
9894 /*
9895 * vpinsrq doesn't work on the upper 128-bits.
9896 * So we use the following sequence:
9897 * vextracti128 vectmp0, vecdst, 1
9898 * pinsrd vectmp0, gpr, #(iDword - 4)
9899 * vinserti128 vecdst, vectmp0, 1
9900 */
9901 /* vextracti128 */
9902 pCodeBuf[off++] = X86_OP_VEX3;
9903 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
9904 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
9905 pCodeBuf[off++] = 0x39;
9906 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9907 pCodeBuf[off++] = 0x1;
9908
9909 /* pinsrd */
9910 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9911 if (IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8 || iGprSrc >= 8)
9912 pCodeBuf[off++] = (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
9913 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9914 pCodeBuf[off++] = 0x0f;
9915 pCodeBuf[off++] = 0x3a;
9916 pCodeBuf[off++] = 0x22;
9917 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprSrc & 7);
9918 pCodeBuf[off++] = iDWord - 4;
9919
9920 /* vinserti128 */
9921 pCodeBuf[off++] = X86_OP_VEX3;
9922 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
9923 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
9924 pCodeBuf[off++] = 0x38;
9925 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
9926 pCodeBuf[off++] = 0x01; /* Immediate */
9927 }
9928 else
9929 {
9930 /* pinsrd vecsrc, gpr, #iDWord (ASSUMES SSE4.1). */
9931 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9932 if (iVecRegDst >= 8 || iGprSrc >= 8)
9933 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9934 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9935 pCodeBuf[off++] = 0x0f;
9936 pCodeBuf[off++] = 0x3a;
9937 pCodeBuf[off++] = 0x22;
9938 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9939 pCodeBuf[off++] = iDWord;
9940 }
9941#elif defined(RT_ARCH_ARM64)
9942 /* ins vecsrc[iDWord], gpr */
9943 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iDWord, kArmv8InstrUmovInsSz_U32);
9944#else
9945# error "port me"
9946#endif
9947 return off;
9948}
9949
9950
9951/**
9952 * Emits a vecdst[x] = gprsrc store, 64-bit.
9953 */
9954DECL_INLINE_THROW(uint32_t)
9955iemNativeEmitSimdStoreGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
9956{
9957 Assert(iDWord <= 7);
9958
9959#ifdef RT_ARCH_AMD64
9960 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 19), off, iVecRegDst, iGprSrc, iDWord);
9961#elif defined(RT_ARCH_ARM64)
9962 Assert(!(iVecRegDst & 0x1));
9963 if (iDWord >= 4)
9964 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iGprSrc, iDWord - 4);
9965 else
9966 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iDWord);
9967#else
9968# error "port me"
9969#endif
9970 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9971 return off;
9972}
9973
9974
9975/**
9976 * Emits a vecdst[x] = gprsrc store, 16-bit.
9977 */
9978DECL_FORCE_INLINE(uint32_t)
9979iemNativeEmitSimdStoreGprToVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iWord)
9980{
9981#ifdef RT_ARCH_AMD64
9982 /* pinsrw vecsrc, gpr, #iWord. */
9983 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9984 if (iVecRegDst >= 8 || iGprSrc >= 8)
9985 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9986 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9987 pCodeBuf[off++] = 0x0f;
9988 pCodeBuf[off++] = 0xc4;
9989 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9990 pCodeBuf[off++] = iWord;
9991#elif defined(RT_ARCH_ARM64)
9992 /* ins vecsrc[iWord], gpr */
9993 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iWord, kArmv8InstrUmovInsSz_U16);
9994#else
9995# error "port me"
9996#endif
9997 return off;
9998}
9999
10000
10001/**
10002 * Emits a vecdst[x] = gprsrc store, 16-bit.
10003 */
10004DECL_INLINE_THROW(uint32_t)
10005iemNativeEmitSimdStoreGprToVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iWord)
10006{
10007 Assert(iWord <= 15);
10008
10009#ifdef RT_ARCH_AMD64
10010 off = iemNativeEmitSimdStoreGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iGprSrc, iWord);
10011#elif defined(RT_ARCH_ARM64)
10012 off = iemNativeEmitSimdStoreGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iWord);
10013#else
10014# error "port me"
10015#endif
10016 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10017 return off;
10018}
10019
10020
10021/**
10022 * Emits a vecdst[x] = gprsrc store, 8-bit.
10023 */
10024DECL_FORCE_INLINE(uint32_t)
10025iemNativeEmitSimdStoreGprToVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iByte)
10026{
10027#ifdef RT_ARCH_AMD64
10028 /* pinsrb vecsrc, gpr, #iByte (ASSUMES SSE4.1). */
10029 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10030 if (iVecRegDst >= 8 || iGprSrc >= 8)
10031 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
10032 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
10033 pCodeBuf[off++] = 0x0f;
10034 pCodeBuf[off++] = 0x3a;
10035 pCodeBuf[off++] = 0x20;
10036 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
10037 pCodeBuf[off++] = iByte;
10038#elif defined(RT_ARCH_ARM64)
10039 /* ins vecsrc[iByte], gpr */
10040 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iByte, kArmv8InstrUmovInsSz_U8);
10041#else
10042# error "port me"
10043#endif
10044 return off;
10045}
10046
10047
10048/**
10049 * Emits a vecdst[x] = gprsrc store, 8-bit.
10050 */
10051DECL_INLINE_THROW(uint32_t)
10052iemNativeEmitSimdStoreGprToVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iByte)
10053{
10054 Assert(iByte <= 15);
10055
10056#ifdef RT_ARCH_AMD64
10057 off = iemNativeEmitSimdStoreGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iByte);
10058#elif defined(RT_ARCH_ARM64)
10059 off = iemNativeEmitSimdStoreGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iByte);
10060#else
10061# error "port me"
10062#endif
10063 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10064 return off;
10065}
10066
10067
10068/**
10069 * Emits a vecdst.au32[iDWord] = 0 store.
10070 */
10071DECL_FORCE_INLINE(uint32_t)
10072iemNativeEmitSimdZeroVecRegElemU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
10073{
10074 Assert(iDWord <= 7);
10075
10076#ifdef RT_ARCH_AMD64
10077 /*
10078 * xor tmp0, tmp0
10079 * pinsrd xmm, tmp0, iDword
10080 */
10081 if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
10082 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
10083 pCodeBuf[off++] = 0x33;
10084 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_REG_FIXED_TMP0 & 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
10085 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(pCodeBuf, off, iVecReg, IEMNATIVE_REG_FIXED_TMP0, iDWord);
10086#elif defined(RT_ARCH_ARM64)
10087 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10088 Assert(!(iVecReg & 0x1));
10089 /* ins vecsrc[iDWord], wzr */
10090 if (iDWord >= 4)
10091 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg + 1, ARMV8_A64_REG_WZR, iDWord - 4, kArmv8InstrUmovInsSz_U32);
10092 else
10093 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg, ARMV8_A64_REG_WZR, iDWord, kArmv8InstrUmovInsSz_U32);
10094#else
10095# error "port me"
10096#endif
10097 return off;
10098}
10099
10100
10101/**
10102 * Emits a vecdst.au32[iDWord] = 0 store.
10103 */
10104DECL_INLINE_THROW(uint32_t)
10105iemNativeEmitSimdZeroVecRegElemU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
10106{
10107
10108#ifdef RT_ARCH_AMD64
10109 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, iDWord);
10110#elif defined(RT_ARCH_ARM64)
10111 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, iDWord);
10112#else
10113# error "port me"
10114#endif
10115 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10116 return off;
10117}
10118
10119
10120/**
10121 * Emits a vecdst[0:127] = 0 store.
10122 */
10123DECL_FORCE_INLINE(uint32_t)
10124iemNativeEmitSimdZeroVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
10125{
10126#ifdef RT_ARCH_AMD64
10127 /* pxor xmm, xmm */
10128 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10129 if (iVecReg >= 8)
10130 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
10131 pCodeBuf[off++] = 0x0f;
10132 pCodeBuf[off++] = 0xef;
10133 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
10134#elif defined(RT_ARCH_ARM64)
10135 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10136 Assert(!(iVecReg & 0x1));
10137 /* eor vecreg, vecreg, vecreg */
10138 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
10139#else
10140# error "port me"
10141#endif
10142 return off;
10143}
10144
10145
10146/**
10147 * Emits a vecdst[0:127] = 0 store.
10148 */
10149DECL_INLINE_THROW(uint32_t)
10150iemNativeEmitSimdZeroVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
10151{
10152#ifdef RT_ARCH_AMD64
10153 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
10154#elif defined(RT_ARCH_ARM64)
10155 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
10156#else
10157# error "port me"
10158#endif
10159 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10160 return off;
10161}
10162
10163
10164/**
10165 * Emits a vecdst[128:255] = 0 store.
10166 */
10167DECL_FORCE_INLINE(uint32_t)
10168iemNativeEmitSimdZeroVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
10169{
10170#ifdef RT_ARCH_AMD64
10171 /* vmovdqa xmm, xmm. This will clear the upper half of ymm */
10172 if (iVecReg < 8)
10173 {
10174 pCodeBuf[off++] = X86_OP_VEX2;
10175 pCodeBuf[off++] = 0xf9;
10176 }
10177 else
10178 {
10179 pCodeBuf[off++] = X86_OP_VEX3;
10180 pCodeBuf[off++] = 0x41;
10181 pCodeBuf[off++] = 0x79;
10182 }
10183 pCodeBuf[off++] = 0x6f;
10184 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
10185#elif defined(RT_ARCH_ARM64)
10186 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10187 Assert(!(iVecReg & 0x1));
10188 /* eor vecreg, vecreg, vecreg */
10189 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
10190#else
10191# error "port me"
10192#endif
10193 return off;
10194}
10195
10196
10197/**
10198 * Emits a vecdst[128:255] = 0 store.
10199 */
10200DECL_INLINE_THROW(uint32_t)
10201iemNativeEmitSimdZeroVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
10202{
10203#ifdef RT_ARCH_AMD64
10204 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecReg);
10205#elif defined(RT_ARCH_ARM64)
10206 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
10207#else
10208# error "port me"
10209#endif
10210 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10211 return off;
10212}
10213
10214
10215/**
10216 * Emits a vecdst[0:255] = 0 store.
10217 */
10218DECL_FORCE_INLINE(uint32_t)
10219iemNativeEmitSimdZeroVecRegU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
10220{
10221#ifdef RT_ARCH_AMD64
10222 /* vpxor ymm, ymm, ymm */
10223 if (iVecReg < 8)
10224 {
10225 pCodeBuf[off++] = X86_OP_VEX2;
10226 pCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
10227 }
10228 else
10229 {
10230 pCodeBuf[off++] = X86_OP_VEX3;
10231 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X | 0x01;
10232 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
10233 }
10234 pCodeBuf[off++] = 0xef;
10235 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
10236#elif defined(RT_ARCH_ARM64)
10237 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10238 Assert(!(iVecReg & 0x1));
10239 /* eor vecreg, vecreg, vecreg */
10240 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
10241 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
10242#else
10243# error "port me"
10244#endif
10245 return off;
10246}
10247
10248
10249/**
10250 * Emits a vecdst[0:255] = 0 store.
10251 */
10252DECL_INLINE_THROW(uint32_t)
10253iemNativeEmitSimdZeroVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
10254{
10255#ifdef RT_ARCH_AMD64
10256 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
10257#elif defined(RT_ARCH_ARM64)
10258 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecReg);
10259#else
10260# error "port me"
10261#endif
10262 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10263 return off;
10264}
10265
10266
10267/**
10268 * Emits a vecdst = gprsrc broadcast, 8-bit.
10269 */
10270DECL_FORCE_INLINE(uint32_t)
10271iemNativeEmitSimdBroadcastGprToVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10272{
10273#ifdef RT_ARCH_AMD64
10274 /* pinsrb vecdst, gpr, #0 (ASSUMES SSE 4.1) */
10275 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10276 if (iVecRegDst >= 8 || iGprSrc >= 8)
10277 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
10278 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
10279 pCodeBuf[off++] = 0x0f;
10280 pCodeBuf[off++] = 0x3a;
10281 pCodeBuf[off++] = 0x20;
10282 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
10283 pCodeBuf[off++] = 0x00;
10284
10285 /* vpbroadcastb {y,x}mm, xmm (ASSUMES AVX2). */
10286 pCodeBuf[off++] = X86_OP_VEX3;
10287 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
10288 | 0x02 /* opcode map. */
10289 | ( iVecRegDst >= 8
10290 ? 0
10291 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
10292 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
10293 pCodeBuf[off++] = 0x78;
10294 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
10295#elif defined(RT_ARCH_ARM64)
10296 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10297 Assert(!(iVecRegDst & 0x1) || !f256Bit);
10298
10299 /* dup vecsrc, gpr */
10300 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U8);
10301 if (f256Bit)
10302 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U8);
10303#else
10304# error "port me"
10305#endif
10306 return off;
10307}
10308
10309
10310/**
10311 * Emits a vecdst[x] = gprsrc broadcast, 8-bit.
10312 */
10313DECL_INLINE_THROW(uint32_t)
10314iemNativeEmitSimdBroadcastGprToVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10315{
10316#ifdef RT_ARCH_AMD64
10317 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
10318#elif defined(RT_ARCH_ARM64)
10319 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
10320#else
10321# error "port me"
10322#endif
10323 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10324 return off;
10325}
10326
10327
10328/**
10329 * Emits a vecdst = gprsrc broadcast, 16-bit.
10330 */
10331DECL_FORCE_INLINE(uint32_t)
10332iemNativeEmitSimdBroadcastGprToVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10333{
10334#ifdef RT_ARCH_AMD64
10335 /* pinsrw vecdst, gpr, #0 */
10336 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10337 if (iVecRegDst >= 8 || iGprSrc >= 8)
10338 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
10339 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
10340 pCodeBuf[off++] = 0x0f;
10341 pCodeBuf[off++] = 0xc4;
10342 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
10343 pCodeBuf[off++] = 0x00;
10344
10345 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
10346 pCodeBuf[off++] = X86_OP_VEX3;
10347 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
10348 | 0x02 /* opcode map. */
10349 | ( iVecRegDst >= 8
10350 ? 0
10351 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
10352 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
10353 pCodeBuf[off++] = 0x79;
10354 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
10355#elif defined(RT_ARCH_ARM64)
10356 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10357 Assert(!(iVecRegDst & 0x1) || !f256Bit);
10358
10359 /* dup vecsrc, gpr */
10360 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U16);
10361 if (f256Bit)
10362 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U16);
10363#else
10364# error "port me"
10365#endif
10366 return off;
10367}
10368
10369
10370/**
10371 * Emits a vecdst[x] = gprsrc broadcast, 16-bit.
10372 */
10373DECL_INLINE_THROW(uint32_t)
10374iemNativeEmitSimdBroadcastGprToVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10375{
10376#ifdef RT_ARCH_AMD64
10377 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
10378#elif defined(RT_ARCH_ARM64)
10379 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
10380#else
10381# error "port me"
10382#endif
10383 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10384 return off;
10385}
10386
10387
10388/**
10389 * Emits a vecdst = gprsrc broadcast, 32-bit.
10390 */
10391DECL_FORCE_INLINE(uint32_t)
10392iemNativeEmitSimdBroadcastGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10393{
10394#ifdef RT_ARCH_AMD64
10395 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
10396 * vbroadcast needs a memory operand or another xmm register to work... */
10397
10398 /* pinsrd vecdst, gpr, #0 (ASSUMES SSE4.1). */
10399 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10400 if (iVecRegDst >= 8 || iGprSrc >= 8)
10401 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
10402 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
10403 pCodeBuf[off++] = 0x0f;
10404 pCodeBuf[off++] = 0x3a;
10405 pCodeBuf[off++] = 0x22;
10406 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
10407 pCodeBuf[off++] = 0x00;
10408
10409 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
10410 pCodeBuf[off++] = X86_OP_VEX3;
10411 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
10412 | 0x02 /* opcode map. */
10413 | ( iVecRegDst >= 8
10414 ? 0
10415 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
10416 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
10417 pCodeBuf[off++] = 0x58;
10418 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
10419#elif defined(RT_ARCH_ARM64)
10420 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10421 Assert(!(iVecRegDst & 0x1) || !f256Bit);
10422
10423 /* dup vecsrc, gpr */
10424 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U32);
10425 if (f256Bit)
10426 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U32);
10427#else
10428# error "port me"
10429#endif
10430 return off;
10431}
10432
10433
10434/**
10435 * Emits a vecdst[x] = gprsrc broadcast, 32-bit.
10436 */
10437DECL_INLINE_THROW(uint32_t)
10438iemNativeEmitSimdBroadcastGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10439{
10440#ifdef RT_ARCH_AMD64
10441 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
10442#elif defined(RT_ARCH_ARM64)
10443 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
10444#else
10445# error "port me"
10446#endif
10447 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10448 return off;
10449}
10450
10451
10452/**
10453 * Emits a vecdst = gprsrc broadcast, 64-bit.
10454 */
10455DECL_FORCE_INLINE(uint32_t)
10456iemNativeEmitSimdBroadcastGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10457{
10458#ifdef RT_ARCH_AMD64
10459 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
10460 * vbroadcast needs a memory operand or another xmm register to work... */
10461
10462 /* pinsrq vecdst, gpr, #0 (ASSUMES SSE4.1). */
10463 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
10464 pCodeBuf[off++] = X86_OP_REX_W
10465 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
10466 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
10467 pCodeBuf[off++] = 0x0f;
10468 pCodeBuf[off++] = 0x3a;
10469 pCodeBuf[off++] = 0x22;
10470 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
10471 pCodeBuf[off++] = 0x00;
10472
10473 /* vpbroadcastq {y,x}mm, xmm (ASSUMES AVX2). */
10474 pCodeBuf[off++] = X86_OP_VEX3;
10475 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
10476 | 0x02 /* opcode map. */
10477 | ( iVecRegDst >= 8
10478 ? 0
10479 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
10480 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
10481 pCodeBuf[off++] = 0x59;
10482 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
10483#elif defined(RT_ARCH_ARM64)
10484 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10485 Assert(!(iVecRegDst & 0x1) || !f256Bit);
10486
10487 /* dup vecsrc, gpr */
10488 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U64);
10489 if (f256Bit)
10490 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U64);
10491#else
10492# error "port me"
10493#endif
10494 return off;
10495}
10496
10497
10498/**
10499 * Emits a vecdst[x] = gprsrc broadcast, 64-bit.
10500 */
10501DECL_INLINE_THROW(uint32_t)
10502iemNativeEmitSimdBroadcastGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
10503{
10504#ifdef RT_ARCH_AMD64
10505 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 14), off, iVecRegDst, iGprSrc, f256Bit);
10506#elif defined(RT_ARCH_ARM64)
10507 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
10508#else
10509# error "port me"
10510#endif
10511 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10512 return off;
10513}
10514
10515
10516/**
10517 * Emits a vecdst[0:127] = vecdst[128:255] = vecsrc[0:127] broadcast, 128-bit.
10518 */
10519DECL_FORCE_INLINE(uint32_t)
10520iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
10521{
10522#ifdef RT_ARCH_AMD64
10523 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(pCodeBuf, off, iVecRegDst, iVecRegSrc);
10524
10525 /* vinserti128 ymm, ymm, xmm, 1. */ /* ASSUMES AVX2 support */
10526 pCodeBuf[off++] = X86_OP_VEX3;
10527 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
10528 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
10529 pCodeBuf[off++] = 0x38;
10530 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
10531 pCodeBuf[off++] = 0x01; /* Immediate */
10532#elif defined(RT_ARCH_ARM64)
10533 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
10534 Assert(!(iVecRegDst & 0x1));
10535
10536 /* mov dst, src; alias for: orr dst, src, src */
10537 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
10538 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst + 1, iVecRegSrc, iVecRegSrc);
10539#else
10540# error "port me"
10541#endif
10542 return off;
10543}
10544
10545
10546/**
10547 * Emits a vecdst[0:127] = vecdst[128:255] = vecsrc[0:127] broadcast, 128-bit.
10548 */
10549DECL_INLINE_THROW(uint32_t)
10550iemNativeEmitSimdBroadcastVecRegU128ToVecReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
10551{
10552#ifdef RT_ARCH_AMD64
10553 off = iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(iemNativeInstrBufEnsure(pReNative, off, 11), off, iVecRegDst, iVecRegSrc);
10554#elif defined(RT_ARCH_ARM64)
10555 off = iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecRegDst, iVecRegSrc);
10556#else
10557# error "port me"
10558#endif
10559 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10560 return off;
10561}
10562
10563
10564/** @} */
10565
10566#endif /* !VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h */
10567
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette