VirtualBox

source: vbox/trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h@ 104626

Last change on this file since 104626 was 104468, checked in by vboxsync, 8 months ago

VMM/IEM: Deal with the simples direct 'linking' of TBs scenario for relative jumps, when staying with the same code page. bugref:10656

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 328.3 KB
Line 
1/* $Id: IEMN8veRecompilerEmit.h 104468 2024-05-01 00:43:28Z vboxsync $ */
2/** @file
3 * IEM - Interpreted Execution Manager - Native Recompiler Inlined Emitters.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28#ifndef VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
29#define VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h
30#ifndef RT_WITHOUT_PRAGMA_ONCE
31# pragma once
32#endif
33
34#include "IEMN8veRecompiler.h"
35
36
37/** @defgroup grp_iem_n8ve_re_inline Native Recompiler Inlined Emitters
38 * @ingroup grp_iem_n8ve_re
39 * @{
40 */
41
42/**
43 * Emit a simple marker instruction to more easily tell where something starts
44 * in the disassembly.
45 */
46DECL_INLINE_THROW(uint32_t)
47iemNativeEmitMarker(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
48{
49#ifdef RT_ARCH_AMD64
50 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
51 if (uInfo == 0)
52 {
53 /* nop */
54 pbCodeBuf[off++] = 0x90;
55 }
56 else
57 {
58 /* nop [disp32] */
59 pbCodeBuf[off++] = 0x0f;
60 pbCodeBuf[off++] = 0x1f;
61 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, 0, 5);
62 pbCodeBuf[off++] = RT_BYTE1(uInfo);
63 pbCodeBuf[off++] = RT_BYTE2(uInfo);
64 pbCodeBuf[off++] = RT_BYTE3(uInfo);
65 pbCodeBuf[off++] = RT_BYTE4(uInfo);
66 }
67#elif defined(RT_ARCH_ARM64)
68 /* nop */
69 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
70 if (uInfo == 0)
71 pu32CodeBuf[off++] = ARMV8_A64_INSTR_NOP;
72 else
73 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(ARMV8_A64_REG_XZR, (uint16_t)uInfo);
74
75 RT_NOREF(uInfo);
76#else
77# error "port me"
78#endif
79 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
80 return off;
81}
82
83
84/**
85 * Emit a breakpoint instruction.
86 */
87DECL_FORCE_INLINE(uint32_t) iemNativeEmitBrkEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uInfo)
88{
89#ifdef RT_ARCH_AMD64
90 pCodeBuf[off++] = 0xcc;
91 RT_NOREF(uInfo); /** @todo use multibyte nop for info? */
92
93#elif defined(RT_ARCH_ARM64)
94 pCodeBuf[off++] = Armv8A64MkInstrBrk(uInfo & UINT32_C(0xffff));
95
96#else
97# error "error"
98#endif
99 return off;
100}
101
102
103/**
104 * Emit a breakpoint instruction.
105 */
106DECL_INLINE_THROW(uint32_t) iemNativeEmitBrk(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uInfo)
107{
108#ifdef RT_ARCH_AMD64
109 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
110#elif defined(RT_ARCH_ARM64)
111 off = iemNativeEmitBrkEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, uInfo);
112#else
113# error "error"
114#endif
115 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
116 return off;
117}
118
119
120/*********************************************************************************************************************************
121* Loads, Stores and Related Stuff. *
122*********************************************************************************************************************************/
123
124#ifdef RT_ARCH_AMD64
125/**
126 * Common bit of iemNativeEmitLoadGprByGpr and friends.
127 */
128DECL_FORCE_INLINE(uint32_t)
129iemNativeEmitGprByGprDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp)
130{
131 if (offDisp == 0 && (iGprBase & 7) != X86_GREG_xBP) /* Can use encoding w/o displacement field. */
132 {
133 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, iGprReg & 7, iGprBase & 7);
134 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
135 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
136 }
137 else if (offDisp == (int8_t)offDisp)
138 {
139 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, iGprBase & 7);
140 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
141 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
142 pbCodeBuf[off++] = (uint8_t)offDisp;
143 }
144 else
145 {
146 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, iGprBase & 7);
147 if ((iGprBase & 7) == X86_GREG_xSP) /* for RSP/R12 relative addressing we have to use a SIB byte. */
148 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_xSP, X86_GREG_xSP, 0); /* -> [RSP/R12] */
149 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
150 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
151 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
152 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
153 }
154 return off;
155}
156#endif /* RT_ARCH_AMD64 */
157
158/**
159 * Emits setting a GPR to zero.
160 */
161DECL_INLINE_THROW(uint32_t)
162iemNativeEmitGprZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
163{
164#ifdef RT_ARCH_AMD64
165 /* xor gpr32, gpr32 */
166 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
167 if (iGpr >= 8)
168 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
169 pbCodeBuf[off++] = 0x33;
170 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
171
172#elif defined(RT_ARCH_ARM64)
173 /* mov gpr, #0x0 */
174 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
175 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | iGpr;
176
177#else
178# error "port me"
179#endif
180 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
181 return off;
182}
183
184
185/**
186 * Variant of iemNativeEmitLoadGpr32Imm where the caller ensures sufficent
187 * buffer space.
188 *
189 * Max buffer consumption:
190 * - AMD64: 6 instruction bytes.
191 * - ARM64: 2 instruction words (8 bytes).
192 *
193 * @note The top 32 bits will be cleared.
194 */
195DECL_FORCE_INLINE(uint32_t)
196iemNativeEmitLoadGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t uImm32)
197{
198#ifdef RT_ARCH_AMD64
199 if (uImm32 == 0)
200 {
201 /* xor gpr, gpr */
202 if (iGpr >= 8)
203 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
204 pCodeBuf[off++] = 0x33;
205 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
206 }
207 else
208 {
209 /* mov gpr, imm32 */
210 if (iGpr >= 8)
211 pCodeBuf[off++] = X86_OP_REX_B;
212 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
213 pCodeBuf[off++] = RT_BYTE1(uImm32);
214 pCodeBuf[off++] = RT_BYTE2(uImm32);
215 pCodeBuf[off++] = RT_BYTE3(uImm32);
216 pCodeBuf[off++] = RT_BYTE4(uImm32);
217 }
218
219#elif defined(RT_ARCH_ARM64)
220 if ((uImm32 >> 16) == 0)
221 /* movz gpr, imm16 */
222 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32, 0, false /*f64Bit*/);
223 else if ((uImm32 & UINT32_C(0xffff)) == 0)
224 /* movz gpr, imm16, lsl #16 */
225 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
226 else if ((uImm32 & UINT32_C(0xffff)) == UINT32_C(0xffff))
227 /* movn gpr, imm16, lsl #16 */
228 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32 >> 16, 1, false /*f64Bit*/);
229 else if ((uImm32 >> 16) == UINT32_C(0xffff))
230 /* movn gpr, imm16 */
231 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32, 0, false /*f64Bit*/);
232 else
233 {
234 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 & UINT32_C(0xffff), 0, false /*f64Bit*/);
235 pCodeBuf[off++] = Armv8A64MkInstrMovK(iGpr, uImm32 >> 16, 1, false /*f64Bit*/);
236 }
237
238#else
239# error "port me"
240#endif
241 return off;
242}
243
244
245/**
246 * Variant of iemNativeEmitLoadGprImm64 where the caller ensures sufficent
247 * buffer space.
248 *
249 * Max buffer consumption:
250 * - AMD64: 10 instruction bytes.
251 * - ARM64: 4 instruction words (16 bytes).
252 */
253DECL_FORCE_INLINE(uint32_t)
254iemNativeEmitLoadGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint64_t uImm64)
255{
256#ifdef RT_ARCH_AMD64
257 if (uImm64 == 0)
258 {
259 /* xor gpr, gpr */
260 if (iGpr >= 8)
261 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
262 pCodeBuf[off++] = 0x33;
263 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7);
264 }
265 else if (uImm64 <= UINT32_MAX)
266 {
267 /* mov gpr, imm32 */
268 if (iGpr >= 8)
269 pCodeBuf[off++] = X86_OP_REX_B;
270 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
271 pCodeBuf[off++] = RT_BYTE1(uImm64);
272 pCodeBuf[off++] = RT_BYTE2(uImm64);
273 pCodeBuf[off++] = RT_BYTE3(uImm64);
274 pCodeBuf[off++] = RT_BYTE4(uImm64);
275 }
276 else if (uImm64 == (uint64_t)(int32_t)uImm64)
277 {
278 /* mov gpr, sx(imm32) */
279 if (iGpr < 8)
280 pCodeBuf[off++] = X86_OP_REX_W;
281 else
282 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
283 pCodeBuf[off++] = 0xc7;
284 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGpr & 7);
285 pCodeBuf[off++] = RT_BYTE1(uImm64);
286 pCodeBuf[off++] = RT_BYTE2(uImm64);
287 pCodeBuf[off++] = RT_BYTE3(uImm64);
288 pCodeBuf[off++] = RT_BYTE4(uImm64);
289 }
290 else
291 {
292 /* mov gpr, imm64 */
293 if (iGpr < 8)
294 pCodeBuf[off++] = X86_OP_REX_W;
295 else
296 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
297 pCodeBuf[off++] = 0xb8 + (iGpr & 7);
298 pCodeBuf[off++] = RT_BYTE1(uImm64);
299 pCodeBuf[off++] = RT_BYTE2(uImm64);
300 pCodeBuf[off++] = RT_BYTE3(uImm64);
301 pCodeBuf[off++] = RT_BYTE4(uImm64);
302 pCodeBuf[off++] = RT_BYTE5(uImm64);
303 pCodeBuf[off++] = RT_BYTE6(uImm64);
304 pCodeBuf[off++] = RT_BYTE7(uImm64);
305 pCodeBuf[off++] = RT_BYTE8(uImm64);
306 }
307
308#elif defined(RT_ARCH_ARM64)
309 /*
310 * Quick simplification: Do 32-bit load if top half is zero.
311 */
312 if (uImm64 <= UINT32_MAX)
313 return iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGpr, (uint32_t)uImm64);
314
315 /*
316 * We need to start this sequence with a 'mov grp, imm16, lsl #x' and
317 * supply remaining bits using 'movk grp, imm16, lsl #x'.
318 *
319 * The mov instruction is encoded 0xd2800000 + shift + imm16 + grp,
320 * while the movk is 0xf2800000 + shift + imm16 + grp, meaning the diff
321 * is 0x20000000 (bit 29). So, we keep this bit in a variable and set it
322 * after the first non-zero immediate component so we switch to movk for
323 * the remainder.
324 */
325 unsigned cZeroHalfWords = !( uImm64 & UINT16_MAX)
326 + !((uImm64 >> 16) & UINT16_MAX)
327 + !((uImm64 >> 32) & UINT16_MAX)
328 + !((uImm64 >> 48) & UINT16_MAX);
329 unsigned cFfffHalfWords = cZeroHalfWords >= 2 ? 0 /* skip */
330 : ( (uImm64 & UINT16_MAX) == UINT16_MAX)
331 + (((uImm64 >> 16) & UINT16_MAX) == UINT16_MAX)
332 + (((uImm64 >> 32) & UINT16_MAX) == UINT16_MAX)
333 + (((uImm64 >> 48) & UINT16_MAX) == UINT16_MAX);
334 if (cFfffHalfWords <= cZeroHalfWords)
335 {
336 uint32_t fMovBase = UINT32_C(0xd2800000) | iGpr;
337
338 /* movz gpr, imm16 */
339 uint32_t uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
340 if (uImmPart || cZeroHalfWords == 4)
341 {
342 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
343 fMovBase |= RT_BIT_32(29);
344 }
345 /* mov[z/k] gpr, imm16, lsl #16 */
346 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
347 if (uImmPart)
348 {
349 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
350 fMovBase |= RT_BIT_32(29);
351 }
352 /* mov[z/k] gpr, imm16, lsl #32 */
353 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
354 if (uImmPart)
355 {
356 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
357 fMovBase |= RT_BIT_32(29);
358 }
359 /* mov[z/k] gpr, imm16, lsl #48 */
360 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
361 if (uImmPart)
362 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
363 }
364 else
365 {
366 uint32_t fMovBase = UINT32_C(0x92800000) | iGpr;
367
368 /* find the first half-word that isn't UINT16_MAX. */
369 uint32_t const iHwNotFfff = (uImm64 & UINT16_MAX) != UINT16_MAX ? 0
370 : ((uImm64 >> 16) & UINT16_MAX) != UINT16_MAX ? 1
371 : ((uImm64 >> 32) & UINT16_MAX) != UINT16_MAX ? 2 : 3;
372
373 /* movn gpr, imm16, lsl #iHwNotFfff*16 */
374 uint32_t uImmPart = (uint32_t)(~(uImm64 >> (iHwNotFfff * 16)) & UINT32_C(0xffff)) << 5;
375 pCodeBuf[off++] = fMovBase | (iHwNotFfff << 21) | uImmPart;
376 fMovBase |= RT_BIT_32(30) | RT_BIT_32(29); /* -> movk */
377 /* movk gpr, imm16 */
378 if (iHwNotFfff != 0)
379 {
380 uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff));
381 if (uImmPart != UINT32_C(0xffff))
382 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5);
383 }
384 /* movk gpr, imm16, lsl #16 */
385 if (iHwNotFfff != 1)
386 {
387 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff));
388 if (uImmPart != UINT32_C(0xffff))
389 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5);
390 }
391 /* movk gpr, imm16, lsl #32 */
392 if (iHwNotFfff != 2)
393 {
394 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff));
395 if (uImmPart != UINT32_C(0xffff))
396 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5);
397 }
398 /* movk gpr, imm16, lsl #48 */
399 if (iHwNotFfff != 3)
400 {
401 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff));
402 if (uImmPart != UINT32_C(0xffff))
403 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5);
404 }
405 }
406
407#else
408# error "port me"
409#endif
410 return off;
411}
412
413
414/**
415 * Emits loading a constant into a 64-bit GPR
416 */
417DECL_INLINE_THROW(uint32_t)
418iemNativeEmitLoadGprImm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint64_t uImm64)
419{
420#ifdef RT_ARCH_AMD64
421 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 10), off, iGpr, uImm64);
422#elif defined(RT_ARCH_ARM64)
423 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGpr, uImm64);
424#else
425# error "port me"
426#endif
427 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
428 return off;
429}
430
431
432/**
433 * Emits loading a constant into a 32-bit GPR.
434 * @note The top 32 bits will be cleared.
435 */
436DECL_INLINE_THROW(uint32_t)
437iemNativeEmitLoadGprImm32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t uImm32)
438{
439#ifdef RT_ARCH_AMD64
440 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGpr, uImm32);
441#elif defined(RT_ARCH_ARM64)
442 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGpr, uImm32);
443#else
444# error "port me"
445#endif
446 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
447 return off;
448}
449
450
451/**
452 * Emits loading a constant into a 8-bit GPR
453 * @note The AMD64 version does *NOT* clear any bits in the 8..63 range,
454 * only the ARM64 version does that.
455 */
456DECL_INLINE_THROW(uint32_t)
457iemNativeEmitLoadGpr8Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint8_t uImm8)
458{
459#ifdef RT_ARCH_AMD64
460 /* mov gpr, imm8 */
461 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
462 if (iGpr >= 8)
463 pbCodeBuf[off++] = X86_OP_REX_B;
464 else if (iGpr >= 4)
465 pbCodeBuf[off++] = X86_OP_REX;
466 pbCodeBuf[off++] = 0xb0 + (iGpr & 7);
467 pbCodeBuf[off++] = RT_BYTE1(uImm8);
468
469#elif defined(RT_ARCH_ARM64)
470 /* movz gpr, imm16, lsl #0 */
471 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
472 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | (UINT32_C(0) << 21) | ((uint32_t)uImm8 << 5) | iGpr;
473
474#else
475# error "port me"
476#endif
477 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
478 return off;
479}
480
481
482#ifdef RT_ARCH_AMD64
483/**
484 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
485 */
486DECL_FORCE_INLINE(uint32_t)
487iemNativeEmitGprByVCpuDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu)
488{
489 if (offVCpu < 128)
490 {
491 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
492 pbCodeBuf[off++] = (uint8_t)(int8_t)offVCpu;
493 }
494 else
495 {
496 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
497 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offVCpu);
498 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offVCpu);
499 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offVCpu);
500 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offVCpu);
501 }
502 return off;
503}
504
505#elif defined(RT_ARCH_ARM64)
506
507/**
508 * Common bit of iemNativeEmitLoadGprFromVCpuU64Ex and friends.
509 *
510 * @note Loads can use @a iGprReg for large offsets, stores requires a temporary
511 * registers (@a iGprTmp).
512 * @note DON'T try this with prefetch.
513 */
514DECL_FORCE_INLINE_THROW(uint32_t)
515iemNativeEmitGprByVCpuLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu,
516 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
517{
518 /*
519 * There are a couple of ldr variants that takes an immediate offset, so
520 * try use those if we can, otherwise we have to use the temporary register
521 * help with the addressing.
522 */
523 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
524 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
525 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
526 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
527 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
528 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
529 else if (!ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation) || iGprTmp != UINT8_MAX)
530 {
531 /* The offset is too large, so we must load it into a register and use
532 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
533 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
534 if (iGprTmp == UINT8_MAX)
535 iGprTmp = iGprReg;
536 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, offVCpu);
537 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, iGprTmp);
538 }
539 else
540# ifdef IEM_WITH_THROW_CATCH
541 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
542# else
543 AssertReleaseFailedStmt(off = UINT32_MAX);
544# endif
545
546 return off;
547}
548
549/**
550 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
551 */
552DECL_FORCE_INLINE_THROW(uint32_t)
553iemNativeEmitGprByVCpuLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
554 uint32_t offVCpu, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
555{
556 /*
557 * There are a couple of ldr variants that takes an immediate offset, so
558 * try use those if we can, otherwise we have to use the temporary register
559 * help with the addressing.
560 */
561 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
562 {
563 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
564 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
565 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
566 }
567 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
568 {
569 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
570 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PCPUMCTX,
571 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
572 }
573 else
574 {
575 /* The offset is too large, so we must load it into a register and use
576 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
577 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
578 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, offVCpu);
579 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
580 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, IEMNATIVE_REG_FIXED_PVMCPU,
581 IEMNATIVE_REG_FIXED_TMP0);
582 }
583 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
584 return off;
585}
586
587#endif /* RT_ARCH_ARM64 */
588
589
590/**
591 * Emits a 64-bit GPR load of a VCpu value.
592 */
593DECL_FORCE_INLINE_THROW(uint32_t)
594iemNativeEmitLoadGprFromVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
595{
596#ifdef RT_ARCH_AMD64
597 /* mov reg64, mem64 */
598 if (iGpr < 8)
599 pCodeBuf[off++] = X86_OP_REX_W;
600 else
601 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
602 pCodeBuf[off++] = 0x8b;
603 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off,iGpr, offVCpu);
604
605#elif defined(RT_ARCH_ARM64)
606 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
607
608#else
609# error "port me"
610#endif
611 return off;
612}
613
614
615/**
616 * Emits a 64-bit GPR load of a VCpu value.
617 */
618DECL_INLINE_THROW(uint32_t)
619iemNativeEmitLoadGprFromVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
620{
621#ifdef RT_ARCH_AMD64
622 off = iemNativeEmitLoadGprFromVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
623 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
624
625#elif defined(RT_ARCH_ARM64)
626 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
627
628#else
629# error "port me"
630#endif
631 return off;
632}
633
634
635/**
636 * Emits a 32-bit GPR load of a VCpu value.
637 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
638 */
639DECL_INLINE_THROW(uint32_t)
640iemNativeEmitLoadGprFromVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
641{
642#ifdef RT_ARCH_AMD64
643 /* mov reg32, mem32 */
644 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
645 if (iGpr >= 8)
646 pbCodeBuf[off++] = X86_OP_REX_R;
647 pbCodeBuf[off++] = 0x8b;
648 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
649 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
650
651#elif defined(RT_ARCH_ARM64)
652 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
653
654#else
655# error "port me"
656#endif
657 return off;
658}
659
660
661/**
662 * Emits a 16-bit GPR load of a VCpu value.
663 * @note Bits 16 thru 63 in the GPR will be zero after the operation.
664 */
665DECL_INLINE_THROW(uint32_t)
666iemNativeEmitLoadGprFromVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
667{
668#ifdef RT_ARCH_AMD64
669 /* movzx reg32, mem16 */
670 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
671 if (iGpr >= 8)
672 pbCodeBuf[off++] = X86_OP_REX_R;
673 pbCodeBuf[off++] = 0x0f;
674 pbCodeBuf[off++] = 0xb7;
675 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
676 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
677
678#elif defined(RT_ARCH_ARM64)
679 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t));
680
681#else
682# error "port me"
683#endif
684 return off;
685}
686
687
688/**
689 * Emits a 8-bit GPR load of a VCpu value.
690 * @note Bits 8 thru 63 in the GPR will be zero after the operation.
691 */
692DECL_INLINE_THROW(uint32_t)
693iemNativeEmitLoadGprFromVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
694{
695#ifdef RT_ARCH_AMD64
696 /* movzx reg32, mem8 */
697 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
698 if (iGpr >= 8)
699 pbCodeBuf[off++] = X86_OP_REX_R;
700 pbCodeBuf[off++] = 0x0f;
701 pbCodeBuf[off++] = 0xb6;
702 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
703 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
704
705#elif defined(RT_ARCH_ARM64)
706 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t));
707
708#else
709# error "port me"
710#endif
711 return off;
712}
713
714
715/**
716 * Emits a store of a GPR value to a 64-bit VCpu field.
717 */
718DECL_FORCE_INLINE_THROW(uint32_t)
719iemNativeEmitStoreGprToVCpuU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t offVCpu,
720 uint8_t iGprTmp = UINT8_MAX)
721{
722#ifdef RT_ARCH_AMD64
723 /* mov mem64, reg64 */
724 if (iGpr < 8)
725 pCodeBuf[off++] = X86_OP_REX_W;
726 else
727 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
728 pCodeBuf[off++] = 0x89;
729 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iGpr, offVCpu);
730 RT_NOREF(iGprTmp);
731
732#elif defined(RT_ARCH_ARM64)
733 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
734
735#else
736# error "port me"
737#endif
738 return off;
739}
740
741
742/**
743 * Emits a store of a GPR value to a 64-bit VCpu field.
744 */
745DECL_INLINE_THROW(uint32_t)
746iemNativeEmitStoreGprToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
747{
748#ifdef RT_ARCH_AMD64
749 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGpr, offVCpu);
750#elif defined(RT_ARCH_ARM64)
751 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGpr, offVCpu,
752 IEMNATIVE_REG_FIXED_TMP0);
753#else
754# error "port me"
755#endif
756 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
757 return off;
758}
759
760
761/**
762 * Emits a store of a GPR value to a 32-bit VCpu field.
763 */
764DECL_INLINE_THROW(uint32_t)
765iemNativeEmitStoreGprToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
766{
767#ifdef RT_ARCH_AMD64
768 /* mov mem32, reg32 */
769 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
770 if (iGpr >= 8)
771 pbCodeBuf[off++] = X86_OP_REX_R;
772 pbCodeBuf[off++] = 0x89;
773 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
774 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
775
776#elif defined(RT_ARCH_ARM64)
777 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
778
779#else
780# error "port me"
781#endif
782 return off;
783}
784
785
786/**
787 * Emits a store of a GPR value to a 16-bit VCpu field.
788 */
789DECL_INLINE_THROW(uint32_t)
790iemNativeEmitStoreGprToVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
791{
792#ifdef RT_ARCH_AMD64
793 /* mov mem16, reg16 */
794 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
795 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
796 if (iGpr >= 8)
797 pbCodeBuf[off++] = X86_OP_REX_R;
798 pbCodeBuf[off++] = 0x89;
799 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
800 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
801
802#elif defined(RT_ARCH_ARM64)
803 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t));
804
805#else
806# error "port me"
807#endif
808 return off;
809}
810
811
812/**
813 * Emits a store of a GPR value to a 8-bit VCpu field.
814 */
815DECL_INLINE_THROW(uint32_t)
816iemNativeEmitStoreGprToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
817{
818#ifdef RT_ARCH_AMD64
819 /* mov mem8, reg8 */
820 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
821 if (iGpr >= 8)
822 pbCodeBuf[off++] = X86_OP_REX_R;
823 pbCodeBuf[off++] = 0x88;
824 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
825 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
826
827#elif defined(RT_ARCH_ARM64)
828 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
829
830#else
831# error "port me"
832#endif
833 return off;
834}
835
836
837/**
838 * Emits a store of an immediate value to a 64-bit VCpu field.
839 *
840 * @note Will allocate temporary registers on both ARM64 and AMD64.
841 */
842DECL_FORCE_INLINE_THROW(uint32_t)
843iemNativeEmitStoreImmToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uImm, uint32_t offVCpu)
844{
845#ifdef RT_ARCH_AMD64
846 /* mov mem32, imm32 */
847 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
848 off = iemNativeEmitStoreGprToVCpuU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxRegImm, offVCpu);
849 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
850 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
851
852#elif defined(RT_ARCH_ARM64)
853 uint8_t const idxRegImm = uImm == 0 ? ARMV8_A64_REG_XZR : iemNativeRegAllocTmpImm(pReNative, &off, uImm);
854 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t));
855 if (idxRegImm != ARMV8_A64_REG_XZR)
856 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
857
858#else
859# error "port me"
860#endif
861 return off;
862}
863
864
865/**
866 * Emits a store of an immediate value to a 32-bit VCpu field.
867 *
868 * @note ARM64: Will allocate temporary registers.
869 */
870DECL_FORCE_INLINE_THROW(uint32_t)
871iemNativeEmitStoreImmToVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t uImm, uint32_t offVCpu)
872{
873#ifdef RT_ARCH_AMD64
874 /* mov mem32, imm32 */
875 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
876 pCodeBuf[off++] = 0xc7;
877 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
878 pCodeBuf[off++] = RT_BYTE1(uImm);
879 pCodeBuf[off++] = RT_BYTE2(uImm);
880 pCodeBuf[off++] = RT_BYTE3(uImm);
881 pCodeBuf[off++] = RT_BYTE4(uImm);
882 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
883
884#elif defined(RT_ARCH_ARM64)
885 uint8_t const idxRegImm = uImm == 0 ? ARMV8_A64_REG_XZR : iemNativeRegAllocTmpImm(pReNative, &off, uImm);
886 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
887 if (idxRegImm != ARMV8_A64_REG_XZR)
888 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
889
890#else
891# error "port me"
892#endif
893 return off;
894}
895
896
897
898/**
899 * Emits a store of an immediate value to a 16-bit VCpu field.
900 *
901 * @note ARM64: A idxTmp1 is always required! The idxTmp2 depends on whehter the
902 * offset can be encoded as an immediate or not. The @a offVCpu immediate
903 * range is 0..8190 bytes from VMCPU and the same from CPUMCPU.
904 */
905DECL_FORCE_INLINE_THROW(uint32_t)
906iemNativeEmitStoreImmToVCpuU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint32_t offVCpu,
907 uint8_t idxTmp1 = UINT8_MAX, uint8_t idxTmp2 = UINT8_MAX)
908{
909#ifdef RT_ARCH_AMD64
910 /* mov mem16, imm16 */
911 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
912 pCodeBuf[off++] = 0xc7;
913 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
914 pCodeBuf[off++] = RT_BYTE1(uImm);
915 pCodeBuf[off++] = RT_BYTE2(uImm);
916 RT_NOREF(idxTmp1, idxTmp2);
917
918#elif defined(RT_ARCH_ARM64)
919 if (idxTmp1 != UINT8_MAX)
920 {
921 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp1, uImm);
922 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, idxTmp1, offVCpu, kArmv8A64InstrLdStType_St_Half,
923 sizeof(uint16_t), idxTmp2);
924 }
925 else
926# ifdef IEM_WITH_THROW_CATCH
927 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
928# else
929 AssertReleaseFailedStmt(off = UINT32_MAX);
930# endif
931
932#else
933# error "port me"
934#endif
935 return off;
936}
937
938
939/**
940 * Emits a store of an immediate value to a 8-bit VCpu field.
941 */
942DECL_INLINE_THROW(uint32_t)
943iemNativeEmitStoreImmToVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bImm, uint32_t offVCpu)
944{
945#ifdef RT_ARCH_AMD64
946 /* mov mem8, imm8 */
947 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
948 pbCodeBuf[off++] = 0xc6;
949 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, offVCpu);
950 pbCodeBuf[off++] = bImm;
951 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
952
953#elif defined(RT_ARCH_ARM64)
954 /* Cannot use IEMNATIVE_REG_FIXED_TMP0 for the immediate as that's used by iemNativeEmitGprByVCpuLdSt. */
955 uint8_t const idxRegImm = iemNativeRegAllocTmpImm(pReNative, &off, bImm);
956 off = iemNativeEmitGprByVCpuLdSt(pReNative, off, idxRegImm, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
957 iemNativeRegFreeTmpImm(pReNative, idxRegImm);
958
959#else
960# error "port me"
961#endif
962 return off;
963}
964
965
966/**
967 * Emits a load effective address to a GRP of a VCpu field.
968 */
969DECL_INLINE_THROW(uint32_t)
970iemNativeEmitLeaGprByVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t offVCpu)
971{
972#ifdef RT_ARCH_AMD64
973 /* lea gprdst, [rbx + offDisp] */
974 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
975 if (iGprDst < 8)
976 pbCodeBuf[off++] = X86_OP_REX_W;
977 else
978 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
979 pbCodeBuf[off++] = 0x8d;
980 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGprDst, offVCpu);
981
982#elif defined(RT_ARCH_ARM64)
983 if (offVCpu < (unsigned)_4K)
984 {
985 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
986 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu);
987 }
988 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)_4K)
989 {
990 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
991 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX,
992 offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx));
993 }
994 else if (offVCpu <= 0xffffffU)
995 {
996 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
997 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu >> 12,
998 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
999 if (offVCpu & 0xfffU)
1000 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, offVCpu & 0xfff);
1001 }
1002 else
1003 {
1004 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
1005 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offVCpu);
1006 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1007 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, iGprDst);
1008 }
1009
1010#else
1011# error "port me"
1012#endif
1013 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1014 return off;
1015}
1016
1017
1018/** This is just as a typesafe alternative to RT_UOFFSETOF. */
1019DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromStamCounterPtr(PVMCPU pVCpu, PSTAMCOUNTER pStamCounter)
1020{
1021 uintptr_t const off = (uintptr_t)pStamCounter - (uintptr_t)pVCpu;
1022 Assert(off < sizeof(VMCPU));
1023 return off;
1024}
1025
1026
1027/** This is just as a typesafe alternative to RT_UOFFSETOF. */
1028DECL_FORCE_INLINE(uint32_t) iemNativeVCpuOffsetFromU64Ptr(PVMCPU pVCpu, uint64_t *pu64)
1029{
1030 uintptr_t const off = (uintptr_t)pu64 - (uintptr_t)pVCpu;
1031 Assert(off < sizeof(VMCPU));
1032 return off;
1033}
1034
1035
1036/**
1037 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1038 *
1039 * @note The two temp registers are not required for AMD64. ARM64 always
1040 * requires the first, and the 2nd is needed if the offset cannot be
1041 * encoded as an immediate.
1042 */
1043DECL_FORCE_INLINE(uint32_t)
1044iemNativeEmitIncStamCounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1045{
1046#ifdef RT_ARCH_AMD64
1047 /* inc qword [pVCpu + off] */
1048 pCodeBuf[off++] = X86_OP_REX_W;
1049 pCodeBuf[off++] = 0xff;
1050 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1051 RT_NOREF(idxTmp1, idxTmp2);
1052
1053#elif defined(RT_ARCH_ARM64)
1054 /* Determine how we're to access pVCpu first. */
1055 uint32_t const cbData = sizeof(STAMCOUNTER);
1056 if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
1057 {
1058 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1059 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1,
1060 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1061 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1062 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1,
1063 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1064 }
1065 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
1066 {
1067 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1068 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1069 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1070 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1071 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1072 }
1073 else
1074 {
1075 /* The offset is too large, so we must load it into a register and use
1076 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
1077 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1078 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1079 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1080 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1081 }
1082
1083#else
1084# error "port me"
1085#endif
1086 return off;
1087}
1088
1089
1090/**
1091 * Emits code for incrementing a statistics counter (STAMCOUNTER/uint64_t) in VMCPU.
1092 *
1093 * @note The two temp registers are not required for AMD64. ARM64 always
1094 * requires the first, and the 2nd is needed if the offset cannot be
1095 * encoded as an immediate.
1096 */
1097DECL_FORCE_INLINE(uint32_t)
1098iemNativeEmitIncStamCounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1099{
1100#ifdef RT_ARCH_AMD64
1101 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, idxTmp1, idxTmp2, offVCpu);
1102#elif defined(RT_ARCH_ARM64)
1103 off = iemNativeEmitIncStamCounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1104#else
1105# error "port me"
1106#endif
1107 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1108 return off;
1109}
1110
1111
1112/**
1113 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1114 *
1115 * @note The two temp registers are not required for AMD64. ARM64 always
1116 * requires the first, and the 2nd is needed if the offset cannot be
1117 * encoded as an immediate.
1118 */
1119DECL_FORCE_INLINE(uint32_t)
1120iemNativeEmitIncU32CounterInVCpuEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1121{
1122 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1123#ifdef RT_ARCH_AMD64
1124 /* inc dword [pVCpu + offVCpu] */
1125 pCodeBuf[off++] = 0xff;
1126 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
1127 RT_NOREF(idxTmp1, idxTmp2);
1128
1129#elif defined(RT_ARCH_ARM64)
1130 /* Determine how we're to access pVCpu first. */
1131 uint32_t const cbData = sizeof(uint32_t);
1132 if (offVCpu < (unsigned)(_4K * cbData))
1133 {
1134 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1135 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmp1,
1136 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1137 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1138 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmp1,
1139 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1140 }
1141 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1142 {
1143 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1144 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1145 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1146 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PCPUMCTX,
1147 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1148 }
1149 else
1150 {
1151 /* The offset is too large, so we must load it into a register and use
1152 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1153 of the instruction if that'll reduce the constant to 16-bits. */
1154 if (offVCpu / cbData < (unsigned)UINT16_MAX)
1155 {
1156 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmp2, offVCpu / cbData);
1157 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1158 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1159 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1160 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU,
1161 idxTmp2, kArmv8A64InstrLdStExtend_Lsl, true /*fShifted(2)*/);
1162 }
1163 else
1164 {
1165 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmp2, offVCpu);
1166 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1167 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxTmp1, idxTmp1, 1);
1168 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmp1, IEMNATIVE_REG_FIXED_PVMCPU, idxTmp2);
1169 }
1170 }
1171
1172#else
1173# error "port me"
1174#endif
1175 return off;
1176}
1177
1178
1179/**
1180 * Emits code for incrementing an unsigned 32-bit statistics counter in VMCPU.
1181 *
1182 * @note The two temp registers are not required for AMD64. ARM64 always
1183 * requires the first, and the 2nd is needed if the offset cannot be
1184 * encoded as an immediate.
1185 */
1186DECL_FORCE_INLINE(uint32_t)
1187iemNativeEmitIncU32CounterInVCpu(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxTmp1, uint8_t idxTmp2, uint32_t offVCpu)
1188{
1189#ifdef RT_ARCH_AMD64
1190 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, idxTmp1, idxTmp2, offVCpu);
1191#elif defined(RT_ARCH_ARM64)
1192 off = iemNativeEmitIncU32CounterInVCpuEx(iemNativeInstrBufEnsure(pReNative, off, 4+3), off, idxTmp1, idxTmp2, offVCpu);
1193#else
1194# error "port me"
1195#endif
1196 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1197 return off;
1198}
1199
1200
1201/**
1202 * Emits code for OR'ing a bitmask into a 32-bit VMCPU member.
1203 *
1204 * @note May allocate temporary registers (not AMD64).
1205 */
1206DECL_FORCE_INLINE(uint32_t)
1207iemNativeEmitOrImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1208{
1209 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1210#ifdef RT_ARCH_AMD64
1211 /* or dword [pVCpu + offVCpu], imm8/32 */
1212 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1213 if (fMask < 0x80)
1214 {
1215 pCodeBuf[off++] = 0x83;
1216 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1217 pCodeBuf[off++] = (uint8_t)fMask;
1218 }
1219 else
1220 {
1221 pCodeBuf[off++] = 0x81;
1222 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, offVCpu);
1223 pCodeBuf[off++] = RT_BYTE1(fMask);
1224 pCodeBuf[off++] = RT_BYTE2(fMask);
1225 pCodeBuf[off++] = RT_BYTE3(fMask);
1226 pCodeBuf[off++] = RT_BYTE4(fMask);
1227 }
1228
1229#elif defined(RT_ARCH_ARM64)
1230 /* If the constant is unwieldy we'll need a register to hold it as well. */
1231 uint32_t uImmSizeLen, uImmRotate;
1232 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1233 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1234
1235 /* We need a temp register for holding the member value we're modifying. */
1236 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1237
1238 /* Determine how we're to access pVCpu first. */
1239 uint32_t const cbData = sizeof(uint32_t);
1240 if (offVCpu < (unsigned)(_4K * cbData))
1241 {
1242 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1243 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1244 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1245 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1246 if (idxTmpMask == UINT8_MAX)
1247 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1248 else
1249 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1250 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1251 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1252 }
1253 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1254 {
1255 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1256 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1257 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1258 if (idxTmpMask == UINT8_MAX)
1259 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1260 else
1261 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1262 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1263 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1264 }
1265 else
1266 {
1267 /* The offset is too large, so we must load it into a register and use
1268 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1269 of the instruction if that'll reduce the constant to 16-bits. */
1270 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1271 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1272 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1273 if (fShifted)
1274 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1275 else
1276 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1277
1278 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1279 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1280
1281 if (idxTmpMask == UINT8_MAX)
1282 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1283 else
1284 pCodeBuf[off++] = Armv8A64MkInstrOrr(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1285
1286 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1287 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1288 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1289 }
1290 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1291 if (idxTmpMask != UINT8_MAX)
1292 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1293
1294#else
1295# error "port me"
1296#endif
1297 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1298 return off;
1299}
1300
1301
1302/**
1303 * Emits code for AND'ing a bitmask into a 32-bit VMCPU member.
1304 *
1305 * @note May allocate temporary registers (not AMD64).
1306 */
1307DECL_FORCE_INLINE(uint32_t)
1308iemNativeEmitAndImmIntoVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fMask, uint32_t offVCpu)
1309{
1310 Assert(!(offVCpu & 3)); /* ASSUME correctly aligned member. */
1311#ifdef RT_ARCH_AMD64
1312 /* and dword [pVCpu + offVCpu], imm8/32 */
1313 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1314 if (fMask < 0x80)
1315 {
1316 pCodeBuf[off++] = 0x83;
1317 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1318 pCodeBuf[off++] = (uint8_t)fMask;
1319 }
1320 else
1321 {
1322 pCodeBuf[off++] = 0x81;
1323 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 4, offVCpu);
1324 pCodeBuf[off++] = RT_BYTE1(fMask);
1325 pCodeBuf[off++] = RT_BYTE2(fMask);
1326 pCodeBuf[off++] = RT_BYTE3(fMask);
1327 pCodeBuf[off++] = RT_BYTE4(fMask);
1328 }
1329
1330#elif defined(RT_ARCH_ARM64)
1331 /* If the constant is unwieldy we'll need a register to hold it as well. */
1332 uint32_t uImmSizeLen, uImmRotate;
1333 uint8_t const idxTmpMask = Armv8A64ConvertMask32ToImmRImmS(fMask, &uImmSizeLen, &uImmRotate) ? UINT8_MAX
1334 : iemNativeRegAllocTmpImm(pReNative, &off, fMask);
1335
1336 /* We need a temp register for holding the member value we're modifying. */
1337 uint8_t const idxTmpValue = iemNativeRegAllocTmp(pReNative, &off);
1338
1339 /* Determine how we're to access pVCpu first. */
1340 uint32_t const cbData = sizeof(uint32_t);
1341 if (offVCpu < (unsigned)(_4K * cbData))
1342 {
1343 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
1344 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1345 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue,
1346 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1347 if (idxTmpMask == UINT8_MAX)
1348 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1349 else
1350 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1351 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue,
1352 IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
1353 }
1354 else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData))
1355 {
1356 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1357 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1358 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1359 if (idxTmpMask == UINT8_MAX)
1360 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1361 else
1362 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1363 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PCPUMCTX,
1364 (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
1365 }
1366 else
1367 {
1368 /* The offset is too large, so we must load it into a register and use
1369 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. We'll try use the 'LSL, #2' feature
1370 of the instruction if that'll reduce the constant to 16-bits. */
1371 uint8_t const idxTmpIndex = iemNativeRegAllocTmp(pReNative, &off);
1372 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
1373 bool const fShifted = offVCpu / cbData < (unsigned)UINT16_MAX;
1374 if (fShifted)
1375 pCodeBuf[off++] = Armv8A64MkInstrMovZ(idxTmpIndex, offVCpu / cbData);
1376 else
1377 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpIndex, offVCpu);
1378
1379 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_Ld_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1380 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1381
1382 if (idxTmpMask == UINT8_MAX)
1383 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpValue, idxTmpValue, uImmSizeLen, uImmRotate, false /*f64Bit*/);
1384 else
1385 pCodeBuf[off++] = Armv8A64MkInstrAnd(idxTmpValue, idxTmpValue, idxTmpMask, false /*f64Bit*/);
1386
1387 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Word, idxTmpValue, IEMNATIVE_REG_FIXED_PVMCPU,
1388 idxTmpIndex, kArmv8A64InstrLdStExtend_Lsl, fShifted /*fShifted(2)*/);
1389 iemNativeRegFreeTmp(pReNative, idxTmpIndex);
1390 }
1391 iemNativeRegFreeTmp(pReNative, idxTmpValue);
1392 if (idxTmpMask != UINT8_MAX)
1393 iemNativeRegFreeTmp(pReNative, idxTmpMask);
1394
1395#else
1396# error "port me"
1397#endif
1398 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1399 return off;
1400}
1401
1402
1403/**
1404 * Emits a gprdst = gprsrc load.
1405 */
1406DECL_FORCE_INLINE(uint32_t)
1407iemNativeEmitLoadGprFromGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1408{
1409#ifdef RT_ARCH_AMD64
1410 /* mov gprdst, gprsrc */
1411 if ((iGprDst | iGprSrc) >= 8)
1412 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W | X86_OP_REX_B
1413 : iGprSrc >= 8 ? X86_OP_REX_W | X86_OP_REX_R | X86_OP_REX_B
1414 : X86_OP_REX_W | X86_OP_REX_R;
1415 else
1416 pCodeBuf[off++] = X86_OP_REX_W;
1417 pCodeBuf[off++] = 0x8b;
1418 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1419
1420#elif defined(RT_ARCH_ARM64)
1421 /* mov dst, src; alias for: orr dst, xzr, src */
1422 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_XZR, iGprSrc);
1423
1424#else
1425# error "port me"
1426#endif
1427 return off;
1428}
1429
1430
1431/**
1432 * Emits a gprdst = gprsrc load.
1433 */
1434DECL_INLINE_THROW(uint32_t)
1435iemNativeEmitLoadGprFromGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1436{
1437#ifdef RT_ARCH_AMD64
1438 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1439#elif defined(RT_ARCH_ARM64)
1440 off = iemNativeEmitLoadGprFromGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1441#else
1442# error "port me"
1443#endif
1444 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1445 return off;
1446}
1447
1448
1449/**
1450 * Emits a gprdst = gprsrc[31:0] load.
1451 * @note Bits 63 thru 32 are cleared.
1452 */
1453DECL_FORCE_INLINE(uint32_t)
1454iemNativeEmitLoadGprFromGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1455{
1456#ifdef RT_ARCH_AMD64
1457 /* mov gprdst, gprsrc */
1458 if ((iGprDst | iGprSrc) >= 8)
1459 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1460 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1461 : X86_OP_REX_R;
1462 pCodeBuf[off++] = 0x8b;
1463 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1464
1465#elif defined(RT_ARCH_ARM64)
1466 /* mov dst32, src32; alias for: orr dst32, wzr, src32 */
1467 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, ARMV8_A64_REG_WZR, iGprSrc, false /*f64bit*/);
1468
1469#else
1470# error "port me"
1471#endif
1472 return off;
1473}
1474
1475
1476/**
1477 * Emits a gprdst = gprsrc[31:0] load.
1478 * @note Bits 63 thru 32 are cleared.
1479 */
1480DECL_INLINE_THROW(uint32_t)
1481iemNativeEmitLoadGprFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1482{
1483#ifdef RT_ARCH_AMD64
1484 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
1485#elif defined(RT_ARCH_ARM64)
1486 off = iemNativeEmitLoadGprFromGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1487#else
1488# error "port me"
1489#endif
1490 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1491 return off;
1492}
1493
1494
1495/**
1496 * Emits a gprdst = gprsrc[15:0] load.
1497 * @note Bits 63 thru 15 are cleared.
1498 */
1499DECL_INLINE_THROW(uint32_t)
1500iemNativeEmitLoadGprFromGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1501{
1502#ifdef RT_ARCH_AMD64
1503 /* movzx Gv,Ew */
1504 if ((iGprDst | iGprSrc) >= 8)
1505 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1506 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1507 : X86_OP_REX_R;
1508 pCodeBuf[off++] = 0x0f;
1509 pCodeBuf[off++] = 0xb7;
1510 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1511
1512#elif defined(RT_ARCH_ARM64)
1513 /* and gprdst, gprsrc, #0xffff */
1514# if 1
1515 Assert(Armv8A64ConvertImmRImmS2Mask32(0x0f, 0) == UINT16_MAX);
1516 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x0f, 0, false /*f64Bit*/);
1517# else
1518 Assert(Armv8A64ConvertImmRImmS2Mask64(0x4f, 0) == UINT16_MAX);
1519 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x4f, 0);
1520# endif
1521
1522#else
1523# error "port me"
1524#endif
1525 return off;
1526}
1527
1528
1529/**
1530 * Emits a gprdst = gprsrc[15:0] load.
1531 * @note Bits 63 thru 15 are cleared.
1532 */
1533DECL_INLINE_THROW(uint32_t)
1534iemNativeEmitLoadGprFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1535{
1536#ifdef RT_ARCH_AMD64
1537 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1538#elif defined(RT_ARCH_ARM64)
1539 off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1540#else
1541# error "port me"
1542#endif
1543 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1544 return off;
1545}
1546
1547
1548/**
1549 * Emits a gprdst = gprsrc[7:0] load.
1550 * @note Bits 63 thru 8 are cleared.
1551 */
1552DECL_FORCE_INLINE(uint32_t)
1553iemNativeEmitLoadGprFromGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1554{
1555#ifdef RT_ARCH_AMD64
1556 /* movzx Gv,Eb */
1557 if (iGprDst >= 8 || iGprSrc >= 8)
1558 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1559 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1560 : X86_OP_REX_R;
1561 else if (iGprSrc >= 4)
1562 pCodeBuf[off++] = X86_OP_REX;
1563 pCodeBuf[off++] = 0x0f;
1564 pCodeBuf[off++] = 0xb6;
1565 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1566
1567#elif defined(RT_ARCH_ARM64)
1568 /* and gprdst, gprsrc, #0xff */
1569 Assert(Armv8A64ConvertImmRImmS2Mask32(0x07, 0) == UINT8_MAX);
1570 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x07, 0, false /*f64Bit*/);
1571
1572#else
1573# error "port me"
1574#endif
1575 return off;
1576}
1577
1578
1579/**
1580 * Emits a gprdst = gprsrc[7:0] load.
1581 * @note Bits 63 thru 8 are cleared.
1582 */
1583DECL_INLINE_THROW(uint32_t)
1584iemNativeEmitLoadGprFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1585{
1586#ifdef RT_ARCH_AMD64
1587 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
1588#elif defined(RT_ARCH_ARM64)
1589 off = iemNativeEmitLoadGprFromGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
1590#else
1591# error "port me"
1592#endif
1593 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1594 return off;
1595}
1596
1597
1598/**
1599 * Emits a gprdst = gprsrc[15:8] load (ah, ch, dh, bh).
1600 * @note Bits 63 thru 8 are cleared.
1601 */
1602DECL_INLINE_THROW(uint32_t)
1603iemNativeEmitLoadGprFromGpr8Hi(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1604{
1605#ifdef RT_ARCH_AMD64
1606 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1607
1608 /* movzx Gv,Ew */
1609 if ((iGprDst | iGprSrc) >= 8)
1610 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_B
1611 : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
1612 : X86_OP_REX_R;
1613 pbCodeBuf[off++] = 0x0f;
1614 pbCodeBuf[off++] = 0xb7;
1615 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1616
1617 /* shr Ev,8 */
1618 if (iGprDst >= 8)
1619 pbCodeBuf[off++] = X86_OP_REX_B;
1620 pbCodeBuf[off++] = 0xc1;
1621 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
1622 pbCodeBuf[off++] = 8;
1623
1624#elif defined(RT_ARCH_ARM64)
1625 /* ubfx gprdst, gprsrc, #8, #8 - gprdst = gprsrc[15:8] */
1626 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1627 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(iGprDst, iGprSrc, 8, 8, false /*f64Bit*/);
1628
1629#else
1630# error "port me"
1631#endif
1632 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1633 return off;
1634}
1635
1636
1637/**
1638 * Sign-extends 32-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1639 */
1640DECL_INLINE_THROW(uint32_t)
1641iemNativeEmitLoadGprSignExtendedFromGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1642{
1643#ifdef RT_ARCH_AMD64
1644 /* movsxd r64, r/m32 */
1645 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
1646 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1647 pbCodeBuf[off++] = 0x63;
1648 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1649
1650#elif defined(RT_ARCH_ARM64)
1651 /* sxtw dst, src */
1652 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1653 pu32CodeBuf[off++] = Armv8A64MkInstrSxtw(iGprDst, iGprSrc);
1654
1655#else
1656# error "port me"
1657#endif
1658 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1659 return off;
1660}
1661
1662
1663/**
1664 * Sign-extends 16-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1665 */
1666DECL_INLINE_THROW(uint32_t)
1667iemNativeEmitLoadGprSignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1668{
1669#ifdef RT_ARCH_AMD64
1670 /* movsx r64, r/m16 */
1671 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1672 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1673 pbCodeBuf[off++] = 0x0f;
1674 pbCodeBuf[off++] = 0xbf;
1675 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1676
1677#elif defined(RT_ARCH_ARM64)
1678 /* sxth dst, src */
1679 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1680 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc);
1681
1682#else
1683# error "port me"
1684#endif
1685 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1686 return off;
1687}
1688
1689
1690/**
1691 * Sign-extends 16-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1692 */
1693DECL_INLINE_THROW(uint32_t)
1694iemNativeEmitLoadGpr32SignExtendedFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1695{
1696#ifdef RT_ARCH_AMD64
1697 /* movsx r64, r/m16 */
1698 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1699 if (iGprDst >= 8 || iGprSrc >= 8)
1700 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1701 pbCodeBuf[off++] = 0x0f;
1702 pbCodeBuf[off++] = 0xbf;
1703 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1704
1705#elif defined(RT_ARCH_ARM64)
1706 /* sxth dst32, src */
1707 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1708 pu32CodeBuf[off++] = Armv8A64MkInstrSxth(iGprDst, iGprSrc, false /*f64Bit*/);
1709
1710#else
1711# error "port me"
1712#endif
1713 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1714 return off;
1715}
1716
1717
1718/**
1719 * Sign-extends 8-bit value in @a iGprSrc into a 64-bit value in @a iGprDst.
1720 */
1721DECL_INLINE_THROW(uint32_t)
1722iemNativeEmitLoadGprSignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1723{
1724#ifdef RT_ARCH_AMD64
1725 /* movsx r64, r/m8 */
1726 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1727 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1728 pbCodeBuf[off++] = 0x0f;
1729 pbCodeBuf[off++] = 0xbe;
1730 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1731
1732#elif defined(RT_ARCH_ARM64)
1733 /* sxtb dst, src */
1734 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1735 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc);
1736
1737#else
1738# error "port me"
1739#endif
1740 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1741 return off;
1742}
1743
1744
1745/**
1746 * Sign-extends 8-bit value in @a iGprSrc into a 32-bit value in @a iGprDst.
1747 * @note Bits 63 thru 32 are cleared.
1748 */
1749DECL_INLINE_THROW(uint32_t)
1750iemNativeEmitLoadGpr32SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1751{
1752#ifdef RT_ARCH_AMD64
1753 /* movsx r32, r/m8 */
1754 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
1755 if (iGprDst >= 8 || iGprSrc >= 8)
1756 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1757 else if (iGprSrc >= 4)
1758 pbCodeBuf[off++] = X86_OP_REX;
1759 pbCodeBuf[off++] = 0x0f;
1760 pbCodeBuf[off++] = 0xbe;
1761 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1762
1763#elif defined(RT_ARCH_ARM64)
1764 /* sxtb dst32, src32 */
1765 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1766 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1767
1768#else
1769# error "port me"
1770#endif
1771 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1772 return off;
1773}
1774
1775
1776/**
1777 * Sign-extends 8-bit value in @a iGprSrc into a 16-bit value in @a iGprDst.
1778 * @note Bits 63 thru 16 are cleared.
1779 */
1780DECL_INLINE_THROW(uint32_t)
1781iemNativeEmitLoadGpr16SignExtendedFromGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
1782{
1783#ifdef RT_ARCH_AMD64
1784 /* movsx r16, r/m8 */
1785 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1786 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1787 if (iGprDst >= 8 || iGprSrc >= 8)
1788 pbCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
1789 else if (iGprSrc >= 4)
1790 pbCodeBuf[off++] = X86_OP_REX;
1791 pbCodeBuf[off++] = 0x0f;
1792 pbCodeBuf[off++] = 0xbe;
1793 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
1794
1795 /* movzx r32, r/m16 */
1796 if (iGprDst >= 8)
1797 pbCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
1798 pbCodeBuf[off++] = 0x0f;
1799 pbCodeBuf[off++] = 0xb7;
1800 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
1801
1802#elif defined(RT_ARCH_ARM64)
1803 /* sxtb dst32, src32; and dst32, dst32, #0xffff */
1804 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1805 pu32CodeBuf[off++] = Armv8A64MkInstrSxtb(iGprDst, iGprSrc, false /*f64Bit*/);
1806 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1807 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
1808
1809#else
1810# error "port me"
1811#endif
1812 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1813 return off;
1814}
1815
1816
1817/**
1818 * Emits a gprdst = gprsrc + addend load.
1819 * @note The addend is 32-bit for AMD64 and 64-bit for ARM64.
1820 */
1821#ifdef RT_ARCH_AMD64
1822DECL_INLINE_THROW(uint32_t)
1823iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1824 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1825{
1826 Assert(iAddend != 0);
1827
1828 /* lea gprdst, [gprsrc + iAddend] */
1829 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
1830 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1831 pbCodeBuf[off++] = 0x8d;
1832 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1833 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1834 return off;
1835}
1836
1837#elif defined(RT_ARCH_ARM64)
1838DECL_INLINE_THROW(uint32_t)
1839iemNativeEmitLoadGprFromGprWithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1840 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1841{
1842 if ((uint32_t)iAddend < 4096)
1843 {
1844 /* add dst, src, uimm12 */
1845 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1846 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend);
1847 }
1848 else if ((uint32_t)-iAddend < 4096)
1849 {
1850 /* sub dst, src, uimm12 */
1851 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1852 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend);
1853 }
1854 else
1855 {
1856 Assert(iGprSrc != iGprDst);
1857 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, iAddend);
1858 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1859 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst);
1860 }
1861 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1862 return off;
1863}
1864#else
1865# error "port me"
1866#endif
1867
1868/**
1869 * Emits a gprdst = gprsrc + addend load, accepting iAddend == 0.
1870 * @note The added is 32-bit for AMD64 and 64-bit for ARM64.
1871 */
1872#ifdef RT_ARCH_AMD64
1873DECL_INLINE_THROW(uint32_t)
1874iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1875 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1876#else
1877DECL_INLINE_THROW(uint32_t)
1878iemNativeEmitLoadGprFromGprWithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1879 uint8_t iGprDst, uint8_t iGprSrc, int64_t iAddend)
1880#endif
1881{
1882 if (iAddend != 0)
1883 return iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1884 return iemNativeEmitLoadGprFromGpr(pReNative, off, iGprDst, iGprSrc);
1885}
1886
1887
1888/**
1889 * Emits a gprdst = gprsrc32 + addend load.
1890 * @note Bits 63 thru 32 are cleared.
1891 */
1892DECL_INLINE_THROW(uint32_t)
1893iemNativeEmitLoadGprFromGpr32WithAddend(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1894 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1895{
1896 Assert(iAddend != 0);
1897
1898#ifdef RT_ARCH_AMD64
1899 /* a32 o32 lea gprdst, [gprsrc + iAddend] */
1900 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
1901 pbCodeBuf[off++] = X86_OP_PRF_SIZE_ADDR;
1902 if ((iGprDst | iGprSrc) >= 8)
1903 pbCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0) | (iGprSrc >= 8 ? X86_OP_REX_B : 0);
1904 pbCodeBuf[off++] = 0x8d;
1905 off = iemNativeEmitGprByGprDisp(pbCodeBuf, off, iGprDst, iGprSrc, iAddend);
1906
1907#elif defined(RT_ARCH_ARM64)
1908 if ((uint32_t)iAddend < 4096)
1909 {
1910 /* add dst, src, uimm12 */
1911 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1912 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprSrc, (uint32_t)iAddend, false /*f64Bit*/);
1913 }
1914 else if ((uint32_t)-iAddend < 4096)
1915 {
1916 /* sub dst, src, uimm12 */
1917 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1918 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprSrc, (uint32_t)-iAddend, false /*f64Bit*/);
1919 }
1920 else
1921 {
1922 Assert(iGprSrc != iGprDst);
1923 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, (int64_t)iAddend);
1924 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1925 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprSrc, iGprDst, false /*f64Bit*/);
1926 }
1927
1928#else
1929# error "port me"
1930#endif
1931 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1932 return off;
1933}
1934
1935
1936/**
1937 * Emits a gprdst = gprsrc32 + addend load, accepting iAddend == 0.
1938 */
1939DECL_INLINE_THROW(uint32_t)
1940iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1941 uint8_t iGprDst, uint8_t iGprSrc, int32_t iAddend)
1942{
1943 if (iAddend != 0)
1944 return iemNativeEmitLoadGprFromGpr32WithAddend(pReNative, off, iGprDst, iGprSrc, iAddend);
1945 return iemNativeEmitLoadGprFromGpr32(pReNative, off, iGprDst, iGprSrc);
1946}
1947
1948
1949/**
1950 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1951 * destination.
1952 */
1953DECL_FORCE_INLINE(uint32_t)
1954iemNativeEmitGprMergeInGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1955{
1956#ifdef RT_ARCH_AMD64
1957 /* mov reg16, r/m16 */
1958 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
1959 if (idxDst >= 8 || idxSrc >= 8)
1960 pCodeBuf[off++] = (idxDst < 8 ? 0 : X86_OP_REX_R) | (idxSrc < 8 ? 0 : X86_OP_REX_B);
1961 pCodeBuf[off++] = 0x8b;
1962 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxDst & 7, idxSrc & 7);
1963
1964#elif defined(RT_ARCH_ARM64)
1965 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxSrc to idxDst bits 15:0. */
1966 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxDst, idxSrc, 0, 16);
1967
1968#else
1969# error "Port me!"
1970#endif
1971 return off;
1972}
1973
1974
1975/**
1976 * Emits a gprdst[15:0] = gprsrc[15:0], preserving all other bits in the
1977 * destination.
1978 */
1979DECL_INLINE_THROW(uint32_t)
1980iemNativeEmitGprMergeInGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDst, uint8_t idxSrc)
1981{
1982#ifdef RT_ARCH_AMD64
1983 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, idxDst, idxSrc);
1984#elif defined(RT_ARCH_ARM64)
1985 off = iemNativeEmitGprMergeInGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, idxDst, idxSrc);
1986#else
1987# error "Port me!"
1988#endif
1989 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1990 return off;
1991}
1992
1993
1994#ifdef RT_ARCH_AMD64
1995/**
1996 * Common bit of iemNativeEmitLoadGprByBp and friends.
1997 */
1998DECL_FORCE_INLINE(uint32_t) iemNativeEmitGprByBpDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, int32_t offDisp,
1999 PIEMRECOMPILERSTATE pReNativeAssert)
2000{
2001 if (offDisp < 128 && offDisp >= -128)
2002 {
2003 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, X86_GREG_xBP);
2004 pbCodeBuf[off++] = (uint8_t)(int8_t)offDisp;
2005 }
2006 else
2007 {
2008 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, X86_GREG_xBP);
2009 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2010 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2011 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2012 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2013 }
2014 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNativeAssert, off); RT_NOREF(pReNativeAssert);
2015 return off;
2016}
2017#elif defined(RT_ARCH_ARM64)
2018/**
2019 * Common bit of iemNativeEmitLoadGprByBp and friends.
2020 */
2021DECL_FORCE_INLINE_THROW(uint32_t)
2022iemNativeEmitGprByBpLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2023 int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2024{
2025 if ((uint32_t)offDisp < 4096U * cbData && !((uint32_t)offDisp & (cbData - 1)))
2026 {
2027 /* str w/ unsigned imm12 (scaled) */
2028 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2029 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, ARMV8_A64_REG_BP, (uint32_t)offDisp / cbData);
2030 }
2031 else if (offDisp >= -256 && offDisp <= 256)
2032 {
2033 /* stur w/ signed imm9 (unscaled) */
2034 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2035 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(enmOperation, iGprReg, ARMV8_A64_REG_BP, offDisp);
2036 }
2037 else
2038 {
2039 /* Use temporary indexing register. */
2040 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2041 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2042 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, ARMV8_A64_REG_BP,
2043 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2044 }
2045 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2046 return off;
2047}
2048#endif
2049
2050
2051/**
2052 * Emits a 64-bit GRP load instruction with an BP relative source address.
2053 */
2054DECL_INLINE_THROW(uint32_t)
2055iemNativeEmitLoadGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2056{
2057#ifdef RT_ARCH_AMD64
2058 /* mov gprdst, qword [rbp + offDisp] */
2059 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2060 if (iGprDst < 8)
2061 pbCodeBuf[off++] = X86_OP_REX_W;
2062 else
2063 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2064 pbCodeBuf[off++] = 0x8b;
2065 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2066
2067#elif defined(RT_ARCH_ARM64)
2068 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2069
2070#else
2071# error "port me"
2072#endif
2073}
2074
2075
2076/**
2077 * Emits a 32-bit GRP load instruction with an BP relative source address.
2078 * @note Bits 63 thru 32 of the GPR will be cleared.
2079 */
2080DECL_INLINE_THROW(uint32_t)
2081iemNativeEmitLoadGprByBpU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2082{
2083#ifdef RT_ARCH_AMD64
2084 /* mov gprdst, dword [rbp + offDisp] */
2085 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2086 if (iGprDst >= 8)
2087 pbCodeBuf[off++] = X86_OP_REX_R;
2088 pbCodeBuf[off++] = 0x8b;
2089 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2090
2091#elif defined(RT_ARCH_ARM64)
2092 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2093
2094#else
2095# error "port me"
2096#endif
2097}
2098
2099
2100/**
2101 * Emits a 16-bit GRP load instruction with an BP relative source address.
2102 * @note Bits 63 thru 16 of the GPR will be cleared.
2103 */
2104DECL_INLINE_THROW(uint32_t)
2105iemNativeEmitLoadGprByBpU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2106{
2107#ifdef RT_ARCH_AMD64
2108 /* movzx gprdst, word [rbp + offDisp] */
2109 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2110 if (iGprDst >= 8)
2111 pbCodeBuf[off++] = X86_OP_REX_R;
2112 pbCodeBuf[off++] = 0x0f;
2113 pbCodeBuf[off++] = 0xb7;
2114 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2115
2116#elif defined(RT_ARCH_ARM64)
2117 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint32_t));
2118
2119#else
2120# error "port me"
2121#endif
2122}
2123
2124
2125/**
2126 * Emits a 8-bit GRP load instruction with an BP relative source address.
2127 * @note Bits 63 thru 8 of the GPR will be cleared.
2128 */
2129DECL_INLINE_THROW(uint32_t)
2130iemNativeEmitLoadGprByBpU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2131{
2132#ifdef RT_ARCH_AMD64
2133 /* movzx gprdst, byte [rbp + offDisp] */
2134 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2135 if (iGprDst >= 8)
2136 pbCodeBuf[off++] = X86_OP_REX_R;
2137 pbCodeBuf[off++] = 0x0f;
2138 pbCodeBuf[off++] = 0xb6;
2139 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2140
2141#elif defined(RT_ARCH_ARM64)
2142 return iemNativeEmitGprByBpLdSt(pReNative, off, iGprDst, offDisp, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint32_t));
2143
2144#else
2145# error "port me"
2146#endif
2147}
2148
2149
2150#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2151/**
2152 * Emits a 128-bit vector register load instruction with an BP relative source address.
2153 */
2154DECL_FORCE_INLINE_THROW(uint32_t)
2155iemNativeEmitLoadVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2156{
2157#ifdef RT_ARCH_AMD64
2158 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9);
2159
2160 /* movdqu reg128, mem128 */
2161 pbCodeBuf[off++] = 0xf3;
2162 if (iVecRegDst >= 8)
2163 pbCodeBuf[off++] = X86_OP_REX_R;
2164 pbCodeBuf[off++] = 0x0f;
2165 pbCodeBuf[off++] = 0x6f;
2166 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2167#elif defined(RT_ARCH_ARM64)
2168 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2169#else
2170# error "port me"
2171#endif
2172}
2173
2174
2175/**
2176 * Emits a 256-bit vector register load instruction with an BP relative source address.
2177 */
2178DECL_FORCE_INLINE_THROW(uint32_t)
2179iemNativeEmitLoadVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp)
2180{
2181#ifdef RT_ARCH_AMD64
2182 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2183
2184 /* vmovdqu reg256, mem256 */
2185 pbCodeBuf[off++] = X86_OP_VEX2;
2186 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegDst >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2187 pbCodeBuf[off++] = 0x6f;
2188 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative);
2189#elif defined(RT_ARCH_ARM64)
2190 /* ASSUMES two consecutive vector registers for the 256-bit value. */
2191 Assert(!(iVecRegDst & 0x1));
2192 off = iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2193 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst + 1, offDisp + sizeof(RTUINT128U), kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
2194#else
2195# error "port me"
2196#endif
2197}
2198
2199#endif
2200
2201
2202/**
2203 * Emits a load effective address to a GRP with an BP relative source address.
2204 */
2205DECL_INLINE_THROW(uint32_t)
2206iemNativeEmitLeaGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t offDisp)
2207{
2208#ifdef RT_ARCH_AMD64
2209 /* lea gprdst, [rbp + offDisp] */
2210 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2211 if (iGprDst < 8)
2212 pbCodeBuf[off++] = X86_OP_REX_W;
2213 else
2214 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2215 pbCodeBuf[off++] = 0x8d;
2216 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprDst, offDisp, pReNative);
2217
2218#elif defined(RT_ARCH_ARM64)
2219 bool const fSub = offDisp < 0;
2220 uint32_t const offAbsDisp = (uint32_t)RT_ABS(offDisp);
2221 if (offAbsDisp <= 0xffffffU)
2222 {
2223 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2224 if (offAbsDisp <= 0xfffU)
2225 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, ARMV8_A64_REG_BP, offAbsDisp);
2226 else
2227 {
2228 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, ARMV8_A64_REG_BP, offAbsDisp >> 12,
2229 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
2230 if (offAbsDisp & 0xfffU)
2231 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, offAbsDisp & 0xfff);
2232 }
2233 }
2234 else
2235 {
2236 Assert(iGprDst != IEMNATIVE_REG_FIXED_PVMCPU);
2237 off = iemNativeEmitLoadGprImm64(pReNative, off, iGprDst, offAbsDisp);
2238 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2239 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(fSub, iGprDst, ARMV8_A64_REG_BP, iGprDst);
2240 }
2241
2242#else
2243# error "port me"
2244#endif
2245
2246 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2247 return off;
2248}
2249
2250
2251/**
2252 * Emits a 64-bit GPR store with an BP relative destination address.
2253 *
2254 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2255 */
2256DECL_INLINE_THROW(uint32_t)
2257iemNativeEmitStoreGprByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iGprSrc)
2258{
2259#ifdef RT_ARCH_AMD64
2260 /* mov qword [rbp + offDisp], gprdst */
2261 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2262 if (iGprSrc < 8)
2263 pbCodeBuf[off++] = X86_OP_REX_W;
2264 else
2265 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
2266 pbCodeBuf[off++] = 0x89;
2267 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iGprSrc, offDisp, pReNative);
2268
2269#elif defined(RT_ARCH_ARM64)
2270 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2271 {
2272 /* str w/ unsigned imm12 (scaled) */
2273 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2274 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc,
2275 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2276 }
2277 else if (offDisp >= -256 && offDisp <= 256)
2278 {
2279 /* stur w/ signed imm9 (unscaled) */
2280 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2281 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP, offDisp);
2282 }
2283 else if ((uint32_t)-offDisp < (unsigned)_4K)
2284 {
2285 /* Use temporary indexing register w/ sub uimm12. */
2286 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2287 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2288 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2289 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Dword, iGprSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2290 }
2291 else
2292 {
2293 /* Use temporary indexing register. */
2294 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2295 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2296 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Dword, iGprSrc, ARMV8_A64_REG_BP,
2297 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2298 }
2299 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2300 return off;
2301
2302#else
2303# error "Port me!"
2304#endif
2305}
2306
2307
2308/**
2309 * Emits a 64-bit immediate store with an BP relative destination address.
2310 *
2311 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2312 */
2313DECL_INLINE_THROW(uint32_t)
2314iemNativeEmitStoreImm64ByBp(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint64_t uImm64)
2315{
2316#ifdef RT_ARCH_AMD64
2317 if ((int64_t)uImm64 == (int32_t)uImm64)
2318 {
2319 /* mov qword [rbp + offDisp], imm32 - sign extended */
2320 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 11);
2321 pbCodeBuf[off++] = X86_OP_REX_W;
2322 pbCodeBuf[off++] = 0xc7;
2323 if (offDisp < 128 && offDisp >= -128)
2324 {
2325 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 0, X86_GREG_xBP);
2326 pbCodeBuf[off++] = (uint8_t)offDisp;
2327 }
2328 else
2329 {
2330 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 0, X86_GREG_xBP);
2331 pbCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
2332 pbCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
2333 pbCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
2334 pbCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
2335 }
2336 pbCodeBuf[off++] = RT_BYTE1(uImm64);
2337 pbCodeBuf[off++] = RT_BYTE2(uImm64);
2338 pbCodeBuf[off++] = RT_BYTE3(uImm64);
2339 pbCodeBuf[off++] = RT_BYTE4(uImm64);
2340 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2341 return off;
2342 }
2343#endif
2344
2345 /* Load tmp0, imm64; Store tmp to bp+disp. */
2346 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uImm64);
2347 return iemNativeEmitStoreGprByBp(pReNative, off, offDisp, IEMNATIVE_REG_FIXED_TMP0);
2348}
2349
2350#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2351
2352/**
2353 * Emits a 128-bit vector register store with an BP relative destination address.
2354 *
2355 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2356 */
2357DECL_INLINE_THROW(uint32_t)
2358iemNativeEmitStoreVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2359{
2360#ifdef RT_ARCH_AMD64
2361 /* movdqu [rbp + offDisp], vecsrc */
2362 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
2363 pbCodeBuf[off++] = 0xf3;
2364 if (iVecRegSrc >= 8)
2365 pbCodeBuf[off++] = X86_OP_REX_R;
2366 pbCodeBuf[off++] = 0x0f;
2367 pbCodeBuf[off++] = 0x7f;
2368 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2369
2370#elif defined(RT_ARCH_ARM64)
2371 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7))
2372 {
2373 /* str w/ unsigned imm12 (scaled) */
2374 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2375 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc,
2376 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8);
2377 }
2378 else if (offDisp >= -256 && offDisp <= 256)
2379 {
2380 /* stur w/ signed imm9 (unscaled) */
2381 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2382 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP, offDisp);
2383 }
2384 else if ((uint32_t)-offDisp < (unsigned)_4K)
2385 {
2386 /* Use temporary indexing register w/ sub uimm12. */
2387 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2388 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0,
2389 ARMV8_A64_REG_BP, (uint32_t)-offDisp);
2390 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, IEMNATIVE_REG_FIXED_TMP0, 0);
2391 }
2392 else
2393 {
2394 /* Use temporary indexing register. */
2395 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp);
2396 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2397 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP,
2398 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw);
2399 }
2400 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2401 return off;
2402
2403#else
2404# error "Port me!"
2405#endif
2406}
2407
2408
2409/**
2410 * Emits a 256-bit vector register store with an BP relative destination address.
2411 *
2412 * @note May trash IEMNATIVE_REG_FIXED_TMP0.
2413 */
2414DECL_INLINE_THROW(uint32_t)
2415iemNativeEmitStoreVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc)
2416{
2417#ifdef RT_ARCH_AMD64
2418 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
2419
2420 /* vmovdqu mem256, reg256 */
2421 pbCodeBuf[off++] = X86_OP_VEX2;
2422 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegSrc >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
2423 pbCodeBuf[off++] = 0x7f;
2424 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative);
2425#elif defined(RT_ARCH_ARM64)
2426 Assert(!(iVecRegSrc & 0x1));
2427 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp, iVecRegSrc);
2428 return iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp + sizeof(RTUINT128U), iVecRegSrc + 1);
2429#else
2430# error "Port me!"
2431#endif
2432}
2433
2434#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
2435#if defined(RT_ARCH_ARM64)
2436
2437/**
2438 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2439 *
2440 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2441 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2442 * caller does not heed this.
2443 *
2444 * @note DON'T try this with prefetch.
2445 */
2446DECL_FORCE_INLINE_THROW(uint32_t)
2447iemNativeEmitGprByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprReg, uint8_t iGprBase, int32_t offDisp,
2448 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2449{
2450 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2451 {
2452 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2453 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2454 }
2455 else if ( ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2456 && iGprReg != iGprBase)
2457 || iGprTmp != UINT8_MAX)
2458 {
2459 /* The offset is too large, so we must load it into a register and use
2460 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2461 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2462 if (iGprTmp == UINT8_MAX)
2463 iGprTmp = iGprReg;
2464 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2465 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, iGprTmp);
2466 }
2467 else
2468# ifdef IEM_WITH_THROW_CATCH
2469 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2470# else
2471 AssertReleaseFailedStmt(off = UINT32_MAX);
2472# endif
2473 return off;
2474}
2475
2476/**
2477 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
2478 */
2479DECL_FORCE_INLINE_THROW(uint32_t)
2480iemNativeEmitGprByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
2481 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2482{
2483 /*
2484 * There are a couple of ldr variants that takes an immediate offset, so
2485 * try use those if we can, otherwise we have to use the temporary register
2486 * help with the addressing.
2487 */
2488 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2489 {
2490 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2491 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2492 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGprReg, iGprBase, (uint32_t)offDisp / cbData);
2493 }
2494 else
2495 {
2496 /* The offset is too large, so we must load it into a register and use
2497 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2498 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2499 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2500
2501 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2502 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGprReg, iGprBase, idxTmpReg);
2503
2504 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2505 }
2506 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2507 return off;
2508}
2509
2510# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2511/**
2512 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
2513 *
2514 * @note Odd and large @a offDisp values requires a temporary, unless it's a
2515 * load and @a iGprReg differs from @a iGprBase. Will assert / throw if
2516 * caller does not heed this.
2517 *
2518 * @note DON'T try this with prefetch.
2519 */
2520DECL_FORCE_INLINE_THROW(uint32_t)
2521iemNativeEmitVecRegByGprLdStEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iGprBase, int32_t offDisp,
2522 ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData, uint8_t iGprTmp = UINT8_MAX)
2523{
2524 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2525 {
2526 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2527 pCodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
2528 }
2529 else if ( !ARMV8A64INSTRLDSTTYPE_IS_STORE(enmOperation)
2530 || iGprTmp != UINT8_MAX)
2531 {
2532 /* The offset is too large, so we must load it into a register and use
2533 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2534 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2535 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (int64_t)offDisp);
2536 pCodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, iGprTmp);
2537 }
2538 else
2539# ifdef IEM_WITH_THROW_CATCH
2540 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
2541# else
2542 AssertReleaseFailedStmt(off = UINT32_MAX);
2543# endif
2544 return off;
2545}
2546# endif
2547
2548
2549/**
2550 * Common bit of iemNativeEmitLoadVecRegByGprU128 and friends.
2551 */
2552DECL_FORCE_INLINE_THROW(uint32_t)
2553iemNativeEmitVecRegByGprLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg,
2554 uint8_t iGprBase, int32_t offDisp, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
2555{
2556 /*
2557 * There are a couple of ldr variants that takes an immediate offset, so
2558 * try use those if we can, otherwise we have to use the temporary register
2559 * help with the addressing.
2560 */
2561 if ((uint32_t)offDisp < _4K * cbData && !((uint32_t)offDisp & (cbData - 1)))
2562 {
2563 /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
2564 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2565 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iVecReg, iGprBase, (uint32_t)offDisp / cbData);
2566 }
2567 else
2568 {
2569 /* The offset is too large, so we must load it into a register and use
2570 ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
2571 /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
2572 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (int64_t)offDisp);
2573
2574 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2575 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iVecReg, iGprBase, idxTmpReg);
2576
2577 iemNativeRegFreeTmpImm(pReNative, idxTmpReg);
2578 }
2579 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2580 return off;
2581}
2582#endif /* RT_ARCH_ARM64 */
2583
2584/**
2585 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2586 *
2587 * @note ARM64: Misaligned @a offDisp values and values not in the
2588 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2589 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2590 * does not heed this.
2591 */
2592DECL_FORCE_INLINE_THROW(uint32_t)
2593iemNativeEmitLoadGprByGprU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2594 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2595{
2596#ifdef RT_ARCH_AMD64
2597 /* mov reg64, mem64 */
2598 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2599 pCodeBuf[off++] = 0x8b;
2600 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2601 RT_NOREF(iGprTmp);
2602
2603#elif defined(RT_ARCH_ARM64)
2604 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2605 kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t), iGprTmp);
2606
2607#else
2608# error "port me"
2609#endif
2610 return off;
2611}
2612
2613
2614/**
2615 * Emits a 64-bit GPR load via a GPR base address with a displacement.
2616 */
2617DECL_INLINE_THROW(uint32_t)
2618iemNativeEmitLoadGprByGprU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2619{
2620#ifdef RT_ARCH_AMD64
2621 off = iemNativeEmitLoadGprByGprU64Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2622 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2623
2624#elif defined(RT_ARCH_ARM64)
2625 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
2626
2627#else
2628# error "port me"
2629#endif
2630 return off;
2631}
2632
2633
2634/**
2635 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2636 *
2637 * @note ARM64: Misaligned @a offDisp values and values not in the
2638 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2639 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2640 * caller does not heed this.
2641 *
2642 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2643 */
2644DECL_FORCE_INLINE_THROW(uint32_t)
2645iemNativeEmitLoadGprByGprU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2646 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2647{
2648#ifdef RT_ARCH_AMD64
2649 /* mov reg32, mem32 */
2650 if (iGprDst >= 8 || iGprBase >= 8)
2651 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2652 pCodeBuf[off++] = 0x8b;
2653 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2654 RT_NOREF(iGprTmp);
2655
2656#elif defined(RT_ARCH_ARM64)
2657 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2658 kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t), iGprTmp);
2659
2660#else
2661# error "port me"
2662#endif
2663 return off;
2664}
2665
2666
2667/**
2668 * Emits a 32-bit GPR load via a GPR base address with a displacement.
2669 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2670 */
2671DECL_INLINE_THROW(uint32_t)
2672iemNativeEmitLoadGprByGprU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprBase, int32_t offDisp)
2673{
2674#ifdef RT_ARCH_AMD64
2675 off = iemNativeEmitLoadGprByGprU32Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iGprDst, iGprBase, offDisp);
2676 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2677
2678#elif defined(RT_ARCH_ARM64)
2679 off = iemNativeEmitGprByGprLdSt(pReNative, off, iGprDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
2680
2681#else
2682# error "port me"
2683#endif
2684 return off;
2685}
2686
2687
2688/**
2689 * Emits a 32-bit GPR load via a GPR base address with a displacement,
2690 * sign-extending the value to 64 bits.
2691 *
2692 * @note ARM64: Misaligned @a offDisp values and values not in the
2693 * -0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp)
2694 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2695 * caller does not heed this.
2696 */
2697DECL_FORCE_INLINE_THROW(uint32_t)
2698iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2699 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2700{
2701#ifdef RT_ARCH_AMD64
2702 /* movsxd reg64, mem32 */
2703 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2704 pCodeBuf[off++] = 0x63;
2705 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2706 RT_NOREF(iGprTmp);
2707
2708#elif defined(RT_ARCH_ARM64)
2709 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2710 kArmv8A64InstrLdStType_Ld_SignWord64, sizeof(uint32_t), iGprTmp);
2711
2712#else
2713# error "port me"
2714#endif
2715 return off;
2716}
2717
2718
2719/**
2720 * Emits a 16-bit GPR load via a GPR base address with a displacement.
2721 *
2722 * @note ARM64: Misaligned @a offDisp values and values not in the
2723 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2724 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2725 * caller does not heed this.
2726 *
2727 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2728 */
2729DECL_FORCE_INLINE_THROW(uint32_t)
2730iemNativeEmitLoadGprByGprU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2731 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2732{
2733#ifdef RT_ARCH_AMD64
2734 /* movzx reg32, mem16 */
2735 if (iGprDst >= 8 || iGprBase >= 8)
2736 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2737 pCodeBuf[off++] = 0x0f;
2738 pCodeBuf[off++] = 0xb7;
2739 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2740 RT_NOREF(iGprTmp);
2741
2742#elif defined(RT_ARCH_ARM64)
2743 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2744 kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t), iGprTmp);
2745
2746#else
2747# error "port me"
2748#endif
2749 return off;
2750}
2751
2752
2753/**
2754 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2755 * sign-extending the value to 64 bits.
2756 *
2757 * @note ARM64: Misaligned @a offDisp values and values not in the
2758 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2759 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2760 * caller does not heed this.
2761 */
2762DECL_FORCE_INLINE_THROW(uint32_t)
2763iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2764 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2765{
2766#ifdef RT_ARCH_AMD64
2767 /* movsx reg64, mem16 */
2768 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2769 pCodeBuf[off++] = 0x0f;
2770 pCodeBuf[off++] = 0xbf;
2771 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2772 RT_NOREF(iGprTmp);
2773
2774#elif defined(RT_ARCH_ARM64)
2775 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2776 kArmv8A64InstrLdStType_Ld_SignHalf64, sizeof(uint16_t), iGprTmp);
2777
2778#else
2779# error "port me"
2780#endif
2781 return off;
2782}
2783
2784
2785/**
2786 * Emits a 16-bit GPR load via a GPR base address with a displacement,
2787 * sign-extending the value to 32 bits.
2788 *
2789 * @note ARM64: Misaligned @a offDisp values and values not in the
2790 * -0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp)
2791 * if @a iGprReg and @a iGprBase are the same. Will assert / throw if
2792 * caller does not heed this.
2793 *
2794 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2795 */
2796DECL_FORCE_INLINE_THROW(uint32_t)
2797iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2798 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2799{
2800#ifdef RT_ARCH_AMD64
2801 /* movsx reg32, mem16 */
2802 if (iGprDst >= 8 || iGprBase >= 8)
2803 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2804 pCodeBuf[off++] = 0x0f;
2805 pCodeBuf[off++] = 0xbf;
2806 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2807 RT_NOREF(iGprTmp);
2808
2809#elif defined(RT_ARCH_ARM64)
2810 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2811 kArmv8A64InstrLdStType_Ld_SignHalf32, sizeof(uint16_t), iGprTmp);
2812
2813#else
2814# error "port me"
2815#endif
2816 return off;
2817}
2818
2819
2820/**
2821 * Emits a 8-bit GPR load via a GPR base address with a displacement.
2822 *
2823 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2824 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2825 * same. Will assert / throw if caller does not heed this.
2826 *
2827 * @note Bits 63 thru 8 in @a iGprDst will be cleared.
2828 */
2829DECL_FORCE_INLINE_THROW(uint32_t)
2830iemNativeEmitLoadGprByGprU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2831 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2832{
2833#ifdef RT_ARCH_AMD64
2834 /* movzx reg32, mem8 */
2835 if (iGprDst >= 8 || iGprBase >= 8)
2836 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2837 pCodeBuf[off++] = 0x0f;
2838 pCodeBuf[off++] = 0xb6;
2839 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2840 RT_NOREF(iGprTmp);
2841
2842#elif defined(RT_ARCH_ARM64)
2843 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2844 kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t), iGprTmp);
2845
2846#else
2847# error "port me"
2848#endif
2849 return off;
2850}
2851
2852
2853/**
2854 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2855 * sign-extending the value to 64 bits.
2856 *
2857 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2858 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2859 * same. Will assert / throw if caller does not heed this.
2860 */
2861DECL_FORCE_INLINE_THROW(uint32_t)
2862iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2863 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2864{
2865#ifdef RT_ARCH_AMD64
2866 /* movsx reg64, mem8 */
2867 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2868 pCodeBuf[off++] = 0x0f;
2869 pCodeBuf[off++] = 0xbe;
2870 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2871 RT_NOREF(iGprTmp);
2872
2873#elif defined(RT_ARCH_ARM64)
2874 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2875 kArmv8A64InstrLdStType_Ld_SignByte64, sizeof(uint8_t), iGprTmp);
2876
2877#else
2878# error "port me"
2879#endif
2880 return off;
2881}
2882
2883
2884/**
2885 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2886 * sign-extending the value to 32 bits.
2887 *
2888 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2889 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2890 * same. Will assert / throw if caller does not heed this.
2891 *
2892 * @note Bits 63 thru 32 in @a iGprDst will be cleared.
2893 */
2894DECL_FORCE_INLINE_THROW(uint32_t)
2895iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2896 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2897{
2898#ifdef RT_ARCH_AMD64
2899 /* movsx reg32, mem8 */
2900 if (iGprDst >= 8 || iGprBase >= 8)
2901 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2902 pCodeBuf[off++] = 0x0f;
2903 pCodeBuf[off++] = 0xbe;
2904 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2905 RT_NOREF(iGprTmp);
2906
2907#elif defined(RT_ARCH_ARM64)
2908 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2909 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2910
2911#else
2912# error "port me"
2913#endif
2914 return off;
2915}
2916
2917
2918/**
2919 * Emits a 8-bit GPR load via a GPR base address with a displacement,
2920 * sign-extending the value to 16 bits.
2921 *
2922 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
2923 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
2924 * same. Will assert / throw if caller does not heed this.
2925 *
2926 * @note Bits 63 thru 16 in @a iGprDst will be cleared.
2927 */
2928DECL_FORCE_INLINE_THROW(uint32_t)
2929iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprBase,
2930 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2931{
2932#ifdef RT_ARCH_AMD64
2933 /* movsx reg32, mem8 */
2934 if (iGprDst >= 8 || iGprBase >= 8)
2935 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2936 pCodeBuf[off++] = 0x0f;
2937 pCodeBuf[off++] = 0xbe;
2938 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprDst, iGprBase, offDisp);
2939# if 1 /** @todo use 'movzx reg32, reg16' instead of 'and reg32, 0ffffh' ? */
2940 /* and reg32, 0xffffh */
2941 if (iGprDst >= 8)
2942 pCodeBuf[off++] = X86_OP_REX_B;
2943 pCodeBuf[off++] = 0x81;
2944 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
2945 pCodeBuf[off++] = 0xff;
2946 pCodeBuf[off++] = 0xff;
2947 pCodeBuf[off++] = 0;
2948 pCodeBuf[off++] = 0;
2949# else
2950 /* movzx reg32, reg16 */
2951 if (iGprDst >= 8)
2952 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
2953 pCodeBuf[off++] = 0x0f;
2954 pCodeBuf[off++] = 0xb7;
2955 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
2956# endif
2957 RT_NOREF(iGprTmp);
2958
2959#elif defined(RT_ARCH_ARM64)
2960 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprDst, iGprBase, offDisp,
2961 kArmv8A64InstrLdStType_Ld_SignByte32, sizeof(uint8_t), iGprTmp);
2962 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2963 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*64Bit*/);
2964
2965#else
2966# error "port me"
2967#endif
2968 return off;
2969}
2970
2971
2972#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2973/**
2974 * Emits a 128-bit vector register load via a GPR base address with a displacement.
2975 *
2976 * @note ARM64: Misaligned @a offDisp values and values not in the
2977 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
2978 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
2979 * does not heed this.
2980 */
2981DECL_FORCE_INLINE_THROW(uint32_t)
2982iemNativeEmitLoadVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
2983 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
2984{
2985#ifdef RT_ARCH_AMD64
2986 /* movdqu reg128, mem128 */
2987 pCodeBuf[off++] = 0xf3;
2988 if (iVecRegDst >= 8 || iGprBase >= 8)
2989 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
2990 pCodeBuf[off++] = 0x0f;
2991 pCodeBuf[off++] = 0x6f;
2992 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
2993 RT_NOREF(iGprTmp);
2994
2995#elif defined(RT_ARCH_ARM64)
2996 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
2997 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
2998
2999#else
3000# error "port me"
3001#endif
3002 return off;
3003}
3004
3005
3006/**
3007 * Emits a 128-bit GPR load via a GPR base address with a displacement.
3008 */
3009DECL_INLINE_THROW(uint32_t)
3010iemNativeEmitLoadVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3011{
3012#ifdef RT_ARCH_AMD64
3013 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3014 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3015
3016#elif defined(RT_ARCH_ARM64)
3017 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3018
3019#else
3020# error "port me"
3021#endif
3022 return off;
3023}
3024
3025
3026/**
3027 * Emits a 256-bit vector register load via a GPR base address with a displacement.
3028 *
3029 * @note ARM64: Misaligned @a offDisp values and values not in the
3030 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3031 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3032 * does not heed this.
3033 */
3034DECL_FORCE_INLINE_THROW(uint32_t)
3035iemNativeEmitLoadVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3036 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3037{
3038#ifdef RT_ARCH_AMD64
3039 /* vmovdqu reg256, mem256 */
3040 pCodeBuf[off++] = X86_OP_VEX3;
3041 pCodeBuf[off++] = (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
3042 | X86_OP_VEX3_BYTE1_X
3043 | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
3044 | UINT8_C(0x01);
3045 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
3046 pCodeBuf[off++] = 0x6f;
3047 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3048 RT_NOREF(iGprTmp);
3049
3050#elif defined(RT_ARCH_ARM64)
3051 Assert(!(iVecRegDst & 0x1));
3052 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3053 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3054 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3055 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U), iGprTmp);
3056#else
3057# error "port me"
3058#endif
3059 return off;
3060}
3061
3062
3063/**
3064 * Emits a 256-bit GPR load via a GPR base address with a displacement.
3065 */
3066DECL_INLINE_THROW(uint32_t)
3067iemNativeEmitLoadVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3068{
3069#ifdef RT_ARCH_AMD64
3070 off = iemNativeEmitLoadVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3071 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3072
3073#elif defined(RT_ARCH_ARM64)
3074 Assert(!(iVecRegDst & 0x1));
3075 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
3076 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3077 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3078 kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
3079
3080#else
3081# error "port me"
3082#endif
3083 return off;
3084}
3085#endif
3086
3087
3088/**
3089 * Emits a 64-bit GPR store via a GPR base address with a displacement.
3090 *
3091 * @note ARM64: Misaligned @a offDisp values and values not in the
3092 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3093 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3094 * does not heed this.
3095 */
3096DECL_FORCE_INLINE_THROW(uint32_t)
3097iemNativeEmitStoreGpr64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3098 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3099{
3100#ifdef RT_ARCH_AMD64
3101 /* mov mem64, reg64 */
3102 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3103 pCodeBuf[off++] = 0x89;
3104 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3105 RT_NOREF(iGprTmp);
3106
3107#elif defined(RT_ARCH_ARM64)
3108 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3109 kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t), iGprTmp);
3110
3111#else
3112# error "port me"
3113#endif
3114 return off;
3115}
3116
3117
3118/**
3119 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3120 *
3121 * @note ARM64: Misaligned @a offDisp values and values not in the
3122 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3123 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3124 * does not heed this.
3125 */
3126DECL_FORCE_INLINE_THROW(uint32_t)
3127iemNativeEmitStoreGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3128 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3129{
3130#ifdef RT_ARCH_AMD64
3131 /* mov mem32, reg32 */
3132 if (iGprSrc >= 8 || iGprBase >= 8)
3133 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3134 pCodeBuf[off++] = 0x89;
3135 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3136 RT_NOREF(iGprTmp);
3137
3138#elif defined(RT_ARCH_ARM64)
3139 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3140 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3141
3142#else
3143# error "port me"
3144#endif
3145 return off;
3146}
3147
3148
3149/**
3150 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3151 *
3152 * @note ARM64: Misaligned @a offDisp values and values not in the
3153 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3154 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3155 * does not heed this.
3156 */
3157DECL_FORCE_INLINE_THROW(uint32_t)
3158iemNativeEmitStoreGpr16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3159 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3160{
3161#ifdef RT_ARCH_AMD64
3162 /* mov mem16, reg16 */
3163 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3164 if (iGprSrc >= 8 || iGprBase >= 8)
3165 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3166 pCodeBuf[off++] = 0x89;
3167 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3168 RT_NOREF(iGprTmp);
3169
3170#elif defined(RT_ARCH_ARM64)
3171 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3172 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3173
3174#else
3175# error "port me"
3176#endif
3177 return off;
3178}
3179
3180
3181/**
3182 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3183 *
3184 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3185 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3186 * same. Will assert / throw if caller does not heed this.
3187 */
3188DECL_FORCE_INLINE_THROW(uint32_t)
3189iemNativeEmitStoreGpr8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iGprBase,
3190 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3191{
3192#ifdef RT_ARCH_AMD64
3193 /* mov mem8, reg8 */
3194 if (iGprSrc >= 8 || iGprBase >= 8)
3195 pCodeBuf[off++] = (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3196 else if (iGprSrc >= 4)
3197 pCodeBuf[off++] = X86_OP_REX;
3198 pCodeBuf[off++] = 0x88;
3199 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iGprSrc, iGprBase, offDisp);
3200 RT_NOREF(iGprTmp);
3201
3202#elif defined(RT_ARCH_ARM64)
3203 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprSrc, iGprBase, offDisp,
3204 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3205
3206#else
3207# error "port me"
3208#endif
3209 return off;
3210}
3211
3212
3213/**
3214 * Emits a 64-bit immediate store via a GPR base address with a displacement.
3215 *
3216 * @note This will always require @a iGprTmpImm on ARM (except for uImm=0), on
3217 * AMD64 it depends on the immediate value.
3218 *
3219 * @note ARM64: Misaligned @a offDisp values and values not in the
3220 * 0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3221 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3222 * does not heed this.
3223 */
3224DECL_FORCE_INLINE_THROW(uint32_t)
3225iemNativeEmitStoreImm64ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint64_t uImm, uint8_t iGprBase,
3226 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3227{
3228#ifdef RT_ARCH_AMD64
3229 if ((int32_t)uImm == (int64_t)uImm)
3230 {
3231 /* mov mem64, imm32 (sign-extended) */
3232 pCodeBuf[off++] = X86_OP_REX_W | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3233 pCodeBuf[off++] = 0xc7;
3234 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3235 pCodeBuf[off++] = RT_BYTE1(uImm);
3236 pCodeBuf[off++] = RT_BYTE2(uImm);
3237 pCodeBuf[off++] = RT_BYTE3(uImm);
3238 pCodeBuf[off++] = RT_BYTE4(uImm);
3239 }
3240 else if (iGprImmTmp != UINT8_MAX || iGprTmp != UINT8_MAX)
3241 {
3242 /* require temporary register. */
3243 if (iGprImmTmp == UINT8_MAX)
3244 iGprImmTmp = iGprTmp;
3245 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3246 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp);
3247 }
3248 else
3249# ifdef IEM_WITH_THROW_CATCH
3250 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3251# else
3252 AssertReleaseFailedStmt(off = UINT32_MAX);
3253# endif
3254
3255#elif defined(RT_ARCH_ARM64)
3256 if (uImm == 0)
3257 iGprImmTmp = ARMV8_A64_REG_XZR;
3258 else
3259 {
3260 Assert(iGprImmTmp < 31);
3261 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3262 }
3263 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp, iGprTmp);
3264
3265#else
3266# error "port me"
3267#endif
3268 return off;
3269}
3270
3271
3272/**
3273 * Emits a 32-bit GPR store via a GPR base address with a displacement.
3274 *
3275 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3276 *
3277 * @note ARM64: Misaligned @a offDisp values and values not in the
3278 * 0x3ffc...0x3ffc range will require a temporary register (@a iGprTmp) if
3279 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3280 * does not heed this.
3281 */
3282DECL_FORCE_INLINE_THROW(uint32_t)
3283iemNativeEmitStoreImm32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t uImm, uint8_t iGprBase,
3284 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3285{
3286#ifdef RT_ARCH_AMD64
3287 /* mov mem32, imm32 */
3288 if (iGprBase >= 8)
3289 pCodeBuf[off++] = X86_OP_REX_B;
3290 pCodeBuf[off++] = 0xc7;
3291 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3292 pCodeBuf[off++] = RT_BYTE1(uImm);
3293 pCodeBuf[off++] = RT_BYTE2(uImm);
3294 pCodeBuf[off++] = RT_BYTE3(uImm);
3295 pCodeBuf[off++] = RT_BYTE4(uImm);
3296 RT_NOREF(iGprImmTmp, iGprTmp);
3297
3298#elif defined(RT_ARCH_ARM64)
3299 Assert(iGprImmTmp < 31);
3300 if (uImm == 0)
3301 iGprImmTmp = ARMV8_A64_REG_XZR;
3302 else
3303 {
3304 Assert(iGprImmTmp < 31);
3305 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprImmTmp, uImm);
3306 }
3307 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3308 kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t), iGprTmp);
3309
3310#else
3311# error "port me"
3312#endif
3313 return off;
3314}
3315
3316
3317/**
3318 * Emits a 16-bit GPR store via a GPR base address with a displacement.
3319 *
3320 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3321 *
3322 * @note ARM64: Misaligned @a offDisp values and values not in the
3323 * 0x1ffe...0x1ffe range will require a temporary register (@a iGprTmp) if
3324 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3325 * does not heed this.
3326 */
3327DECL_FORCE_INLINE_THROW(uint32_t)
3328iemNativeEmitStoreImm16ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint16_t uImm, uint8_t iGprBase,
3329 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3330{
3331#ifdef RT_ARCH_AMD64
3332 /* mov mem16, imm16 */
3333 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3334 if (iGprBase >= 8)
3335 pCodeBuf[off++] = X86_OP_REX_B;
3336 pCodeBuf[off++] = 0xc7;
3337 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3338 pCodeBuf[off++] = RT_BYTE1(uImm);
3339 pCodeBuf[off++] = RT_BYTE2(uImm);
3340 RT_NOREF(iGprImmTmp, iGprTmp);
3341
3342#elif defined(RT_ARCH_ARM64)
3343 if (uImm == 0)
3344 iGprImmTmp = ARMV8_A64_REG_XZR;
3345 else
3346 {
3347 Assert(iGprImmTmp < 31);
3348 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3349 }
3350 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3351 kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t), iGprTmp);
3352
3353#else
3354# error "port me"
3355#endif
3356 return off;
3357}
3358
3359
3360/**
3361 * Emits a 8-bit GPR store via a GPR base address with a displacement.
3362 *
3363 * @note This will always require @a iGprTmpImm on ARM64 (except for uImm=0).
3364 *
3365 * @note ARM64: @a offDisp values not in the 0xfff...0xfff range will require a
3366 * temporary register (@a iGprTmp) if @a iGprReg and @a iGprBase are the
3367 * same. Will assert / throw if caller does not heed this.
3368 */
3369DECL_FORCE_INLINE_THROW(uint32_t)
3370iemNativeEmitStoreImm8ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t uImm, uint8_t iGprBase,
3371 uint8_t iGprImmTmp = UINT8_MAX, int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3372{
3373#ifdef RT_ARCH_AMD64
3374 /* mov mem8, imm8 */
3375 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3376 if (iGprBase >= 8)
3377 pCodeBuf[off++] = X86_OP_REX_B;
3378 pCodeBuf[off++] = 0xc6;
3379 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, 0, iGprBase, offDisp);
3380 pCodeBuf[off++] = uImm;
3381 RT_NOREF(iGprImmTmp, iGprTmp);
3382
3383#elif defined(RT_ARCH_ARM64)
3384 if (uImm == 0)
3385 iGprImmTmp = ARMV8_A64_REG_XZR;
3386 else
3387 {
3388 Assert(iGprImmTmp < 31);
3389 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGprImmTmp, uImm);
3390 }
3391 off = iemNativeEmitGprByGprLdStEx(pCodeBuf, off, iGprImmTmp, iGprBase, offDisp,
3392 kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t), iGprTmp);
3393
3394#else
3395# error "port me"
3396#endif
3397 return off;
3398}
3399
3400
3401#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3402/**
3403 * Emits a 128-bit vector register store via a GPR base address with a displacement.
3404 *
3405 * @note ARM64: Misaligned @a offDisp values and values not in the
3406 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3407 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3408 * does not heed this.
3409 */
3410DECL_FORCE_INLINE_THROW(uint32_t)
3411iemNativeEmitStoreVecRegByGprU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3412 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3413{
3414#ifdef RT_ARCH_AMD64
3415 /* movdqu mem128, reg128 */
3416 pCodeBuf[off++] = 0xf3;
3417 if (iVecRegDst >= 8 || iGprBase >= 8)
3418 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) | (iGprBase < 8 ? 0 : X86_OP_REX_B);
3419 pCodeBuf[off++] = 0x0f;
3420 pCodeBuf[off++] = 0x7f;
3421 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3422 RT_NOREF(iGprTmp);
3423
3424#elif defined(RT_ARCH_ARM64)
3425 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3426 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3427
3428#else
3429# error "port me"
3430#endif
3431 return off;
3432}
3433
3434
3435/**
3436 * Emits a 128-bit vector register store via a GPR base address with a displacement.
3437 */
3438DECL_INLINE_THROW(uint32_t)
3439iemNativeEmitStoreVecRegByGprU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3440{
3441#ifdef RT_ARCH_AMD64
3442 off = iemNativeEmitStoreVecRegByGprU128Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3443 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3444
3445#elif defined(RT_ARCH_ARM64)
3446 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3447
3448#else
3449# error "port me"
3450#endif
3451 return off;
3452}
3453
3454
3455/**
3456 * Emits a 256-bit vector register store via a GPR base address with a displacement.
3457 *
3458 * @note ARM64: Misaligned @a offDisp values and values not in the
3459 * -0x7ff8...0x7ff8 range will require a temporary register (@a iGprTmp) if
3460 * @a iGprReg and @a iGprBase are the same. Will assert / throw if caller
3461 * does not heed this.
3462 */
3463DECL_FORCE_INLINE_THROW(uint32_t)
3464iemNativeEmitStoreVecRegByGprU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase,
3465 int32_t offDisp = 0, uint8_t iGprTmp = UINT8_MAX)
3466{
3467#ifdef RT_ARCH_AMD64
3468 /* vmovdqu mem256, reg256 */
3469 pCodeBuf[off++] = X86_OP_VEX3;
3470 pCodeBuf[off++] = (iVecRegDst < 8 ? X86_OP_VEX3_BYTE1_R : 0)
3471 | X86_OP_VEX3_BYTE1_X
3472 | (iGprBase < 8 ? X86_OP_VEX3_BYTE1_B : 0)
3473 | UINT8_C(0x01);
3474 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H);
3475 pCodeBuf[off++] = 0x7f;
3476 off = iemNativeEmitGprByGprDisp(pCodeBuf, off, iVecRegDst, iGprBase, offDisp);
3477 RT_NOREF(iGprTmp);
3478
3479#elif defined(RT_ARCH_ARM64)
3480 Assert(!(iVecRegDst & 0x1));
3481 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst, iGprBase, offDisp,
3482 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3483 off = iemNativeEmitVecRegByGprLdStEx(pCodeBuf, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3484 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U), iGprTmp);
3485#else
3486# error "port me"
3487#endif
3488 return off;
3489}
3490
3491
3492/**
3493 * Emits a 256-bit GPR load via a GPR base address with a displacement.
3494 */
3495DECL_INLINE_THROW(uint32_t)
3496iemNativeEmitStoreVecRegByGprU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprBase, int32_t offDisp)
3497{
3498#ifdef RT_ARCH_AMD64
3499 off = iemNativeEmitStoreVecRegByGprU256Ex(iemNativeInstrBufEnsure(pReNative, off, 8), off, iVecRegDst, iGprBase, offDisp);
3500 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3501
3502#elif defined(RT_ARCH_ARM64)
3503 Assert(!(iVecRegDst & 0x1));
3504 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst, iGprBase, offDisp,
3505 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3506 off = iemNativeEmitVecRegByGprLdSt(pReNative, off, iVecRegDst + 1, iGprBase, offDisp + sizeof(RTUINT128U),
3507 kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
3508
3509#else
3510# error "port me"
3511#endif
3512 return off;
3513}
3514#endif
3515
3516
3517
3518/*********************************************************************************************************************************
3519* Subtraction and Additions *
3520*********************************************************************************************************************************/
3521
3522/**
3523 * Emits subtracting a 64-bit GPR from another, storing the result in the first.
3524 * @note The AMD64 version sets flags.
3525 */
3526DECL_INLINE_THROW(uint32_t)
3527iemNativeEmitSubTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3528{
3529#if defined(RT_ARCH_AMD64)
3530 /* sub Gv,Ev */
3531 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
3532 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3533 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3534 pbCodeBuf[off++] = 0x2b;
3535 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3536
3537#elif defined(RT_ARCH_ARM64)
3538 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3539 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend);
3540
3541#else
3542# error "Port me"
3543#endif
3544 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3545 return off;
3546}
3547
3548
3549/**
3550 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3551 * @note The AMD64 version sets flags.
3552 */
3553DECL_FORCE_INLINE(uint32_t)
3554iemNativeEmitSubTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3555{
3556#if defined(RT_ARCH_AMD64)
3557 /* sub Gv,Ev */
3558 if (iGprDst >= 8 || iGprSubtrahend >= 8)
3559 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
3560 | (iGprSubtrahend < 8 ? 0 : X86_OP_REX_B);
3561 pCodeBuf[off++] = 0x2b;
3562 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSubtrahend & 7);
3563
3564#elif defined(RT_ARCH_ARM64)
3565 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprSubtrahend, false /*f64Bit*/);
3566
3567#else
3568# error "Port me"
3569#endif
3570 return off;
3571}
3572
3573
3574/**
3575 * Emits subtracting a 32-bit GPR from another, storing the result in the first.
3576 * @note The AMD64 version sets flags.
3577 */
3578DECL_INLINE_THROW(uint32_t)
3579iemNativeEmitSubTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSubtrahend)
3580{
3581#if defined(RT_ARCH_AMD64)
3582 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSubtrahend);
3583#elif defined(RT_ARCH_ARM64)
3584 off = iemNativeEmitSubTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSubtrahend);
3585#else
3586# error "Port me"
3587#endif
3588 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3589 return off;
3590}
3591
3592
3593/**
3594 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3595 *
3596 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3597 *
3598 * @note Larger constants will require a temporary register. Failing to specify
3599 * one when needed will trigger fatal assertion / throw.
3600 */
3601DECL_FORCE_INLINE_THROW(uint32_t)
3602iemNativeEmitSubGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3603 uint8_t iGprTmp = UINT8_MAX)
3604{
3605#ifdef RT_ARCH_AMD64
3606 pCodeBuf[off++] = iGprDst >= 8 ? X86_OP_REX_W | X86_OP_REX_B : X86_OP_REX_W;
3607 if (iSubtrahend == 1)
3608 {
3609 /* dec r/m64 */
3610 pCodeBuf[off++] = 0xff;
3611 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3612 }
3613 else if (iSubtrahend == -1)
3614 {
3615 /* inc r/m64 */
3616 pCodeBuf[off++] = 0xff;
3617 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3618 }
3619 else if ((int8_t)iSubtrahend == iSubtrahend)
3620 {
3621 /* sub r/m64, imm8 */
3622 pCodeBuf[off++] = 0x83;
3623 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3624 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3625 }
3626 else if ((int32_t)iSubtrahend == iSubtrahend)
3627 {
3628 /* sub r/m64, imm32 */
3629 pCodeBuf[off++] = 0x81;
3630 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3631 pCodeBuf[off++] = RT_BYTE1((uint64_t)iSubtrahend);
3632 pCodeBuf[off++] = RT_BYTE2((uint64_t)iSubtrahend);
3633 pCodeBuf[off++] = RT_BYTE3((uint64_t)iSubtrahend);
3634 pCodeBuf[off++] = RT_BYTE4((uint64_t)iSubtrahend);
3635 }
3636 else if (iGprTmp != UINT8_MAX)
3637 {
3638 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off - 1, iGprTmp, (uint64_t)iSubtrahend);
3639 /* sub r/m64, r64 */
3640 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B) | (iGprTmp < 8 ? 0 : X86_OP_REX_R);
3641 pCodeBuf[off++] = 0x29;
3642 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprTmp & 7, iGprDst & 7);
3643 }
3644 else
3645# ifdef IEM_WITH_THROW_CATCH
3646 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3647# else
3648 AssertReleaseFailedStmt(off = UINT32_MAX);
3649# endif
3650
3651#elif defined(RT_ARCH_ARM64)
3652 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3653 if (uAbsSubtrahend < 4096)
3654 {
3655 if (iSubtrahend >= 0)
3656 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3657 else
3658 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend);
3659 }
3660 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3661 {
3662 if (iSubtrahend >= 0)
3663 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3664 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3665 else
3666 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3667 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3668 }
3669 else if (iGprTmp != UINT8_MAX)
3670 {
3671 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (uint64_t)iSubtrahend);
3672 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp);
3673 }
3674 else
3675# ifdef IEM_WITH_THROW_CATCH
3676 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3677# else
3678 AssertReleaseFailedStmt(off = UINT32_MAX);
3679# endif
3680
3681#else
3682# error "Port me"
3683#endif
3684 return off;
3685}
3686
3687
3688/**
3689 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
3690 *
3691 * @note Larger constants will require a temporary register. Failing to specify
3692 * one when needed will trigger fatal assertion / throw.
3693 */
3694DECL_INLINE_THROW(uint32_t)
3695iemNativeEmitSubGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
3696 uint8_t iGprTmp = UINT8_MAX)
3697
3698{
3699#ifdef RT_ARCH_AMD64
3700 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iSubtrahend, iGprTmp);
3701#elif defined(RT_ARCH_ARM64)
3702 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGprDst, iSubtrahend, iGprTmp);
3703#else
3704# error "Port me"
3705#endif
3706 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3707 return off;
3708}
3709
3710
3711/**
3712 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3713 *
3714 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
3715 *
3716 * @note ARM64: Larger constants will require a temporary register. Failing to
3717 * specify one when needed will trigger fatal assertion / throw.
3718 */
3719DECL_FORCE_INLINE_THROW(uint32_t)
3720iemNativeEmitSubGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3721 uint8_t iGprTmp = UINT8_MAX)
3722{
3723#ifdef RT_ARCH_AMD64
3724 if (iGprDst >= 8)
3725 pCodeBuf[off++] = X86_OP_REX_B;
3726 if (iSubtrahend == 1)
3727 {
3728 /* dec r/m32 */
3729 pCodeBuf[off++] = 0xff;
3730 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3731 }
3732 else if (iSubtrahend == -1)
3733 {
3734 /* inc r/m32 */
3735 pCodeBuf[off++] = 0xff;
3736 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3737 }
3738 else if (iSubtrahend < 128 && iSubtrahend >= -128)
3739 {
3740 /* sub r/m32, imm8 */
3741 pCodeBuf[off++] = 0x83;
3742 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3743 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3744 }
3745 else
3746 {
3747 /* sub r/m32, imm32 */
3748 pCodeBuf[off++] = 0x81;
3749 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3750 pCodeBuf[off++] = RT_BYTE1(iSubtrahend);
3751 pCodeBuf[off++] = RT_BYTE2(iSubtrahend);
3752 pCodeBuf[off++] = RT_BYTE3(iSubtrahend);
3753 pCodeBuf[off++] = RT_BYTE4(iSubtrahend);
3754 }
3755 RT_NOREF(iGprTmp);
3756
3757#elif defined(RT_ARCH_ARM64)
3758 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3759 if (uAbsSubtrahend < 4096)
3760 {
3761 if (iSubtrahend >= 0)
3762 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3763 else
3764 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3765 }
3766 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3767 {
3768 if (iSubtrahend >= 0)
3769 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3770 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3771 else
3772 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3773 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3774 }
3775 else if (iGprTmp != UINT8_MAX)
3776 {
3777 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3778 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3779 }
3780 else
3781# ifdef IEM_WITH_THROW_CATCH
3782 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3783# else
3784 AssertReleaseFailedStmt(off = UINT32_MAX);
3785# endif
3786
3787#else
3788# error "Port me"
3789#endif
3790 return off;
3791}
3792
3793
3794/**
3795 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
3796 *
3797 * @note ARM64: Larger constants will require a temporary register. Failing to
3798 * specify one when needed will trigger fatal assertion / throw.
3799 */
3800DECL_INLINE_THROW(uint32_t)
3801iemNativeEmitSubGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
3802 uint8_t iGprTmp = UINT8_MAX)
3803
3804{
3805#ifdef RT_ARCH_AMD64
3806 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iSubtrahend, iGprTmp);
3807#elif defined(RT_ARCH_ARM64)
3808 off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iSubtrahend, iGprTmp);
3809#else
3810# error "Port me"
3811#endif
3812 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3813 return off;
3814}
3815
3816
3817/**
3818 * Emits a 16-bit GPR subtract with a signed immediate subtrahend.
3819 *
3820 * This will optimize using DEC/INC/whatever and ARM64 will not set flags,
3821 * so not suitable as a base for conditional jumps.
3822 *
3823 * @note AMD64: Will only update the lower 16 bits of the register.
3824 * @note ARM64: Will update the entire register.
3825 * @note ARM64: Larger constants will require a temporary register. Failing to
3826 * specify one when needed will trigger fatal assertion / throw.
3827 */
3828DECL_FORCE_INLINE_THROW(uint32_t)
3829iemNativeEmitSubGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iSubtrahend,
3830 uint8_t iGprTmp = UINT8_MAX)
3831{
3832#ifdef RT_ARCH_AMD64
3833 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3834 if (iGprDst >= 8)
3835 pCodeBuf[off++] = X86_OP_REX_B;
3836 if (iSubtrahend == 1)
3837 {
3838 /* dec r/m16 */
3839 pCodeBuf[off++] = 0xff;
3840 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
3841 }
3842 else if (iSubtrahend == -1)
3843 {
3844 /* inc r/m16 */
3845 pCodeBuf[off++] = 0xff;
3846 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
3847 }
3848 else if ((int8_t)iSubtrahend == iSubtrahend)
3849 {
3850 /* sub r/m16, imm8 */
3851 pCodeBuf[off++] = 0x83;
3852 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3853 pCodeBuf[off++] = (uint8_t)iSubtrahend;
3854 }
3855 else
3856 {
3857 /* sub r/m16, imm16 */
3858 pCodeBuf[off++] = 0x81;
3859 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
3860 pCodeBuf[off++] = RT_BYTE1((uint16_t)iSubtrahend);
3861 pCodeBuf[off++] = RT_BYTE2((uint16_t)iSubtrahend);
3862 }
3863 RT_NOREF(iGprTmp);
3864
3865#elif defined(RT_ARCH_ARM64)
3866 uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
3867 if (uAbsSubtrahend < 4096)
3868 {
3869 if (iSubtrahend >= 0)
3870 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3871 else
3872 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
3873 }
3874 else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
3875 {
3876 if (iSubtrahend >= 0)
3877 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3878 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3879 else
3880 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
3881 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
3882 }
3883 else if (iGprTmp != UINT8_MAX)
3884 {
3885 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
3886 pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
3887 }
3888 else
3889# ifdef IEM_WITH_THROW_CATCH
3890 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
3891# else
3892 AssertReleaseFailedStmt(off = UINT32_MAX);
3893# endif
3894 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
3895
3896#else
3897# error "Port me"
3898#endif
3899 return off;
3900}
3901
3902
3903/**
3904 * Emits adding a 64-bit GPR to another, storing the result in the first.
3905 * @note The AMD64 version sets flags.
3906 */
3907DECL_FORCE_INLINE(uint32_t)
3908iemNativeEmitAddTwoGprsEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3909{
3910#if defined(RT_ARCH_AMD64)
3911 /* add Gv,Ev */
3912 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
3913 | (iGprAddend < 8 ? 0 : X86_OP_REX_B);
3914 pCodeBuf[off++] = 0x03;
3915 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3916
3917#elif defined(RT_ARCH_ARM64)
3918 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend);
3919
3920#else
3921# error "Port me"
3922#endif
3923 return off;
3924}
3925
3926
3927/**
3928 * Emits adding a 64-bit GPR to another, storing the result in the first.
3929 * @note The AMD64 version sets flags.
3930 */
3931DECL_INLINE_THROW(uint32_t)
3932iemNativeEmitAddTwoGprs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3933{
3934#if defined(RT_ARCH_AMD64)
3935 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3936#elif defined(RT_ARCH_ARM64)
3937 off = iemNativeEmitAddTwoGprsEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
3938#else
3939# error "Port me"
3940#endif
3941 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3942 return off;
3943}
3944
3945
3946/**
3947 * Emits adding a 64-bit GPR to another, storing the result in the first.
3948 * @note The AMD64 version sets flags.
3949 */
3950DECL_FORCE_INLINE(uint32_t)
3951iemNativeEmitAddTwoGprs32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3952{
3953#if defined(RT_ARCH_AMD64)
3954 /* add Gv,Ev */
3955 if (iGprDst >= 8 || iGprAddend >= 8)
3956 pCodeBuf[off++] = (iGprDst >= 8 ? X86_OP_REX_R : 0)
3957 | (iGprAddend >= 8 ? X86_OP_REX_B : 0);
3958 pCodeBuf[off++] = 0x03;
3959 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprAddend & 7);
3960
3961#elif defined(RT_ARCH_ARM64)
3962 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, iGprDst, iGprDst, iGprAddend, false /*f64Bit*/);
3963
3964#else
3965# error "Port me"
3966#endif
3967 return off;
3968}
3969
3970
3971/**
3972 * Emits adding a 64-bit GPR to another, storing the result in the first.
3973 * @note The AMD64 version sets flags.
3974 */
3975DECL_INLINE_THROW(uint32_t)
3976iemNativeEmitAddTwoGprs32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend)
3977{
3978#if defined(RT_ARCH_AMD64)
3979 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprAddend);
3980#elif defined(RT_ARCH_ARM64)
3981 off = iemNativeEmitAddTwoGprs32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprAddend);
3982#else
3983# error "Port me"
3984#endif
3985 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3986 return off;
3987}
3988
3989
3990/**
3991 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
3992 */
3993DECL_INLINE_THROW(uint32_t)
3994iemNativeEmitAddGprImm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
3995{
3996#if defined(RT_ARCH_AMD64)
3997 /* add or inc */
3998 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
3999 if (iImm8 != 1)
4000 {
4001 pCodeBuf[off++] = 0x83;
4002 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4003 pCodeBuf[off++] = (uint8_t)iImm8;
4004 }
4005 else
4006 {
4007 pCodeBuf[off++] = 0xff;
4008 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4009 }
4010
4011#elif defined(RT_ARCH_ARM64)
4012 if (iImm8 >= 0)
4013 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, (uint8_t)iImm8);
4014 else
4015 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, (uint8_t)-iImm8);
4016
4017#else
4018# error "Port me"
4019#endif
4020 return off;
4021}
4022
4023
4024/**
4025 * Emits a 64-bit GPR additions with a 8-bit signed immediate.
4026 */
4027DECL_INLINE_THROW(uint32_t)
4028iemNativeEmitAddGprImm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4029{
4030#if defined(RT_ARCH_AMD64)
4031 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
4032#elif defined(RT_ARCH_ARM64)
4033 off = iemNativeEmitAddGprImm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
4034#else
4035# error "Port me"
4036#endif
4037 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4038 return off;
4039}
4040
4041
4042/**
4043 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
4044 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4045 */
4046DECL_FORCE_INLINE(uint32_t)
4047iemNativeEmitAddGpr32Imm8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4048{
4049#if defined(RT_ARCH_AMD64)
4050 /* add or inc */
4051 if (iGprDst >= 8)
4052 pCodeBuf[off++] = X86_OP_REX_B;
4053 if (iImm8 != 1)
4054 {
4055 pCodeBuf[off++] = 0x83;
4056 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4057 pCodeBuf[off++] = (uint8_t)iImm8;
4058 }
4059 else
4060 {
4061 pCodeBuf[off++] = 0xff;
4062 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4063 }
4064
4065#elif defined(RT_ARCH_ARM64)
4066 if (iImm8 >= 0)
4067 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint8_t)iImm8, false /*f64Bit*/);
4068 else
4069 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint8_t)-iImm8, false /*f64Bit*/);
4070
4071#else
4072# error "Port me"
4073#endif
4074 return off;
4075}
4076
4077
4078/**
4079 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
4080 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4081 */
4082DECL_INLINE_THROW(uint32_t)
4083iemNativeEmitAddGpr32Imm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
4084{
4085#if defined(RT_ARCH_AMD64)
4086 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iImm8);
4087#elif defined(RT_ARCH_ARM64)
4088 off = iemNativeEmitAddGpr32Imm8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iImm8);
4089#else
4090# error "Port me"
4091#endif
4092 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4093 return off;
4094}
4095
4096
4097/**
4098 * Emits a 64-bit GPR additions with a 64-bit signed addend.
4099 *
4100 * @note Will assert / throw if @a iGprTmp is not specified when needed.
4101 */
4102DECL_FORCE_INLINE_THROW(uint32_t)
4103iemNativeEmitAddGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iAddend, uint8_t iGprTmp = UINT8_MAX)
4104{
4105#if defined(RT_ARCH_AMD64)
4106 if ((int8_t)iAddend == iAddend)
4107 return iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
4108
4109 if ((int32_t)iAddend == iAddend)
4110 {
4111 /* add grp, imm32 */
4112 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4113 pCodeBuf[off++] = 0x81;
4114 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4115 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4116 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4117 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4118 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4119 }
4120 else if (iGprTmp != UINT8_MAX)
4121 {
4122 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
4123
4124 /* add dst, tmpreg */
4125 pCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4126 | (iGprTmp < 8 ? 0 : X86_OP_REX_B);
4127 pCodeBuf[off++] = 0x03;
4128 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprTmp & 7);
4129 }
4130 else
4131# ifdef IEM_WITH_THROW_CATCH
4132 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4133# else
4134 AssertReleaseFailedStmt(off = UINT32_MAX);
4135# endif
4136
4137#elif defined(RT_ARCH_ARM64)
4138 uint64_t const uAbsAddend = (uint64_t)RT_ABS(iAddend);
4139 if (uAbsAddend <= 0xffffffU)
4140 {
4141 bool const fSub = iAddend < 0;
4142 if (uAbsAddend > 0xfffU)
4143 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, true /*f64Bit*/,
4144 false /*fSetFlags*/, true /*fShift12*/);
4145 if (uAbsAddend & 0xfffU)
4146 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & UINT32_C(0xfff));
4147 }
4148 else if (iGprTmp != UINT8_MAX)
4149 {
4150 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, iAddend);
4151 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprDst, iGprTmp);
4152 }
4153 else
4154# ifdef IEM_WITH_THROW_CATCH
4155 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4156# else
4157 AssertReleaseFailedStmt(off = UINT32_MAX);
4158# endif
4159
4160#else
4161# error "Port me"
4162#endif
4163 return off;
4164}
4165
4166
4167/**
4168 * Emits a 64-bit GPR additions with a 64-bit signed addend.
4169 */
4170DECL_INLINE_THROW(uint32_t)
4171iemNativeEmitAddGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iAddend)
4172{
4173#if defined(RT_ARCH_AMD64)
4174 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
4175 return iemNativeEmitAddGprImm8(pReNative, off, iGprDst, (int8_t)iAddend);
4176
4177 if (iAddend <= INT32_MAX && iAddend >= INT32_MIN)
4178 {
4179 /* add grp, imm32 */
4180 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4181 pbCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4182 pbCodeBuf[off++] = 0x81;
4183 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4184 pbCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4185 pbCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4186 pbCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4187 pbCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4188 }
4189 else
4190 {
4191 /* Best to use a temporary register to deal with this in the simplest way: */
4192 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, (uint64_t)iAddend);
4193
4194 /* add dst, tmpreg */
4195 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4196 pbCodeBuf[off++] = (iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R)
4197 | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
4198 pbCodeBuf[off++] = 0x03;
4199 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iTmpReg & 7);
4200
4201 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4202 }
4203
4204#elif defined(RT_ARCH_ARM64)
4205 bool const fSub = iAddend < 0;
4206 uint64_t const uAbsAddend = (uint64_t)RT_ABS(iAddend);
4207 if (uAbsAddend <= 0xffffffU)
4208 {
4209 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4210 if (uAbsAddend > 0xfffU)
4211 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, true /*f64Bit*/,
4212 false /*fSetFlags*/, true /*fShift12*/);
4213 if (uAbsAddend & 0xfffU)
4214 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & UINT32_C(0xfff));
4215 }
4216 else
4217 {
4218 /* Use temporary register for the immediate. */
4219 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uAbsAddend);
4220
4221 /* add gprdst, gprdst, tmpreg */
4222 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4223 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(fSub, iGprDst, iGprDst, iTmpReg);
4224
4225 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4226 }
4227
4228#else
4229# error "Port me"
4230#endif
4231 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4232 return off;
4233}
4234
4235
4236/**
4237 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4238 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4239 * @note For ARM64 the iAddend value must be in the range 0x000000..0xffffff.
4240 * The negative ranges are also allowed, making it behave like a
4241 * subtraction. If the constant does not conform, bad stuff will happen.
4242 */
4243DECL_FORCE_INLINE_THROW(uint32_t)
4244iemNativeEmitAddGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int32_t iAddend)
4245{
4246#if defined(RT_ARCH_AMD64)
4247 if (iAddend <= INT8_MAX && iAddend >= INT8_MIN)
4248 return iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iAddend);
4249
4250 /* add grp, imm32 */
4251 if (iGprDst >= 8)
4252 pCodeBuf[off++] = X86_OP_REX_B;
4253 pCodeBuf[off++] = 0x81;
4254 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4255 pCodeBuf[off++] = RT_BYTE1((uint32_t)iAddend);
4256 pCodeBuf[off++] = RT_BYTE2((uint32_t)iAddend);
4257 pCodeBuf[off++] = RT_BYTE3((uint32_t)iAddend);
4258 pCodeBuf[off++] = RT_BYTE4((uint32_t)iAddend);
4259
4260#elif defined(RT_ARCH_ARM64)
4261 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4262 if (uAbsAddend <= 0xffffffU)
4263 {
4264 bool const fSub = iAddend < 0;
4265 if (uAbsAddend > 0xfffU)
4266 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, false /*f64Bit*/,
4267 false /*fSetFlags*/, true /*fShift12*/);
4268 if (uAbsAddend & 0xfffU)
4269 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & 0xfff, false /*f64Bit*/);
4270 }
4271 else
4272# ifdef IEM_WITH_THROW_CATCH
4273 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4274# else
4275 AssertReleaseFailedStmt(off = UINT32_MAX);
4276# endif
4277
4278#else
4279# error "Port me"
4280#endif
4281 return off;
4282}
4283
4284
4285/**
4286 * Emits a 32-bit GPR additions with a 32-bit signed immediate.
4287 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
4288 */
4289DECL_INLINE_THROW(uint32_t)
4290iemNativeEmitAddGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iAddend)
4291{
4292#if defined(RT_ARCH_AMD64)
4293 off = iemNativeEmitAddGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iAddend);
4294
4295#elif defined(RT_ARCH_ARM64)
4296 bool const fSub = iAddend < 0;
4297 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4298 if (uAbsAddend <= 0xffffffU)
4299 {
4300 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4301 if (uAbsAddend > 0xfffU)
4302 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, false /*f64Bit*/,
4303 false /*fSetFlags*/, true /*fShift12*/);
4304 if (uAbsAddend & 0xfffU)
4305 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & 0xfff, false /*f64Bit*/);
4306 }
4307 else
4308 {
4309 /* Use temporary register for the immediate. */
4310 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uAbsAddend);
4311
4312 /* add gprdst, gprdst, tmpreg */
4313 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4314 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(fSub, iGprDst, iGprDst, iTmpReg, false /*f64Bit*/);
4315
4316 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4317 }
4318
4319#else
4320# error "Port me"
4321#endif
4322 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4323 return off;
4324}
4325
4326
4327/**
4328 * Emits a 16-bit GPR add with a signed immediate addend.
4329 *
4330 * This will optimize using INC/DEC/whatever and ARM64 will not set flags,
4331 * so not suitable as a base for conditional jumps.
4332 *
4333 * @note AMD64: Will only update the lower 16 bits of the register.
4334 * @note ARM64: Will update the entire register.
4335 * @sa iemNativeEmitSubGpr16ImmEx
4336 */
4337DECL_FORCE_INLINE(uint32_t)
4338iemNativeEmitAddGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iAddend)
4339{
4340#ifdef RT_ARCH_AMD64
4341 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4342 if (iGprDst >= 8)
4343 pCodeBuf[off++] = X86_OP_REX_B;
4344 if (iAddend == 1)
4345 {
4346 /* inc r/m16 */
4347 pCodeBuf[off++] = 0xff;
4348 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4349 }
4350 else if (iAddend == -1)
4351 {
4352 /* dec r/m16 */
4353 pCodeBuf[off++] = 0xff;
4354 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
4355 }
4356 else if ((int8_t)iAddend == iAddend)
4357 {
4358 /* add r/m16, imm8 */
4359 pCodeBuf[off++] = 0x83;
4360 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4361 pCodeBuf[off++] = (uint8_t)iAddend;
4362 }
4363 else
4364 {
4365 /* add r/m16, imm16 */
4366 pCodeBuf[off++] = 0x81;
4367 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
4368 pCodeBuf[off++] = RT_BYTE1((uint16_t)iAddend);
4369 pCodeBuf[off++] = RT_BYTE2((uint16_t)iAddend);
4370 }
4371
4372#elif defined(RT_ARCH_ARM64)
4373 bool const fSub = iAddend < 0;
4374 uint32_t const uAbsAddend = (uint32_t)RT_ABS(iAddend);
4375 if (uAbsAddend > 0xfffU)
4376 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend >> 12, false /*f64Bit*/,
4377 false /*fSetFlags*/, true /*fShift12*/);
4378 if (uAbsAddend & 0xfffU)
4379 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsAddend & 0xfff, false /*f64Bit*/);
4380 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
4381
4382#else
4383# error "Port me"
4384#endif
4385 return off;
4386}
4387
4388
4389
4390/**
4391 * Adds two 64-bit GPRs together, storing the result in a third register.
4392 */
4393DECL_FORCE_INLINE(uint32_t)
4394iemNativeEmitGprEqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4395{
4396#ifdef RT_ARCH_AMD64
4397 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4398 {
4399 /** @todo consider LEA */
4400 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend1);
4401 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend2);
4402 }
4403 else
4404 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4405
4406#elif defined(RT_ARCH_ARM64)
4407 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2);
4408
4409#else
4410# error "Port me!"
4411#endif
4412 return off;
4413}
4414
4415
4416
4417/**
4418 * Adds two 32-bit GPRs together, storing the result in a third register.
4419 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4420 */
4421DECL_FORCE_INLINE(uint32_t)
4422iemNativeEmitGpr32EqGprPlusGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend1, uint8_t iGprAddend2)
4423{
4424#ifdef RT_ARCH_AMD64
4425 if (iGprDst != iGprAddend1 && iGprDst != iGprAddend2)
4426 {
4427 /** @todo consider LEA */
4428 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend1);
4429 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend2);
4430 }
4431 else
4432 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprDst != iGprAddend1 ? iGprAddend1 : iGprAddend2);
4433
4434#elif defined(RT_ARCH_ARM64)
4435 pCodeBuf[off++] = Armv8A64MkInstrAddReg(iGprDst, iGprAddend1, iGprAddend2, false /*f64Bit*/);
4436
4437#else
4438# error "Port me!"
4439#endif
4440 return off;
4441}
4442
4443
4444/**
4445 * Adds a 64-bit GPR and a 64-bit unsigned constant, storing the result in a
4446 * third register.
4447 *
4448 * @note The ARM64 version does not work for non-trivial constants if the
4449 * two registers are the same. Will assert / throw exception.
4450 */
4451DECL_FORCE_INLINE_THROW(uint32_t)
4452iemNativeEmitGprEqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int64_t iImmAddend)
4453{
4454#ifdef RT_ARCH_AMD64
4455 /** @todo consider LEA */
4456 if ((int8_t)iImmAddend == iImmAddend)
4457 {
4458 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprAddend);
4459 off = iemNativeEmitAddGprImm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4460 }
4461 else
4462 {
4463 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4464 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4465 }
4466
4467#elif defined(RT_ARCH_ARM64)
4468 bool const fSub = iImmAddend < 0;
4469 uint64_t const uAbsImmAddend = RT_ABS(iImmAddend);
4470 if (uAbsImmAddend <= 0xfffU)
4471 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend);
4472 else if (uAbsImmAddend <= 0xffffffU)
4473 {
4474 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend >> 12,
4475 true /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
4476 if (uAbsImmAddend & 0xfffU)
4477 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsImmAddend & UINT32_C(0xfff));
4478 }
4479 else if (iGprDst != iGprAddend)
4480 {
4481 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, (uint64_t)iImmAddend);
4482 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4483 }
4484 else
4485# ifdef IEM_WITH_THROW_CATCH
4486 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4487# else
4488 AssertReleaseFailedStmt(off = UINT32_MAX);
4489# endif
4490
4491#else
4492# error "Port me!"
4493#endif
4494 return off;
4495}
4496
4497
4498/**
4499 * Adds a 32-bit GPR and a 32-bit unsigned constant, storing the result in a
4500 * third register.
4501 *
4502 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
4503 *
4504 * @note The ARM64 version does not work for non-trivial constants if the
4505 * two registers are the same. Will assert / throw exception.
4506 */
4507DECL_FORCE_INLINE_THROW(uint32_t)
4508iemNativeEmitGpr32EqGprPlusImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprAddend, int32_t iImmAddend)
4509{
4510#ifdef RT_ARCH_AMD64
4511 /** @todo consider LEA */
4512 if ((int8_t)iImmAddend == iImmAddend)
4513 {
4514 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4515 off = iemNativeEmitAddGpr32Imm8Ex(pCodeBuf, off, iGprDst, (int8_t)iImmAddend);
4516 }
4517 else
4518 {
4519 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, iImmAddend);
4520 off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, iGprDst, iGprAddend);
4521 }
4522
4523#elif defined(RT_ARCH_ARM64)
4524 bool const fSub = iImmAddend < 0;
4525 uint32_t const uAbsImmAddend = RT_ABS(iImmAddend);
4526 if (uAbsImmAddend <= 0xfffU)
4527 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend, false /*f64Bit*/);
4528 else if (uAbsImmAddend <= 0xffffffU)
4529 {
4530 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprAddend, uAbsImmAddend >> 12,
4531 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
4532 if (uAbsImmAddend & 0xfffU)
4533 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(fSub, iGprDst, iGprDst, uAbsImmAddend & 0xfff, false /*f64Bit*/);
4534 }
4535 else if (iGprDst != iGprAddend)
4536 {
4537 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, (uint32_t)iImmAddend);
4538 off = iemNativeEmitAddTwoGprs32Ex(pCodeBuf, off, iGprDst, iGprAddend);
4539 }
4540 else
4541# ifdef IEM_WITH_THROW_CATCH
4542 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4543# else
4544 AssertReleaseFailedStmt(off = UINT32_MAX);
4545# endif
4546
4547#else
4548# error "Port me!"
4549#endif
4550 return off;
4551}
4552
4553
4554/*********************************************************************************************************************************
4555* Unary Operations *
4556*********************************************************************************************************************************/
4557
4558/**
4559 * Emits code for two complement negation of a 64-bit GPR.
4560 */
4561DECL_FORCE_INLINE_THROW(uint32_t)
4562iemNativeEmitNegGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4563{
4564#if defined(RT_ARCH_AMD64)
4565 /* neg Ev */
4566 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
4567 pCodeBuf[off++] = 0xf7;
4568 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4569
4570#elif defined(RT_ARCH_ARM64)
4571 /* sub dst, xzr, dst */
4572 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst);
4573
4574#else
4575# error "Port me"
4576#endif
4577 return off;
4578}
4579
4580
4581/**
4582 * Emits code for two complement negation of a 64-bit GPR.
4583 */
4584DECL_INLINE_THROW(uint32_t)
4585iemNativeEmitNegGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4586{
4587#if defined(RT_ARCH_AMD64)
4588 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4589#elif defined(RT_ARCH_ARM64)
4590 off = iemNativeEmitNegGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4591#else
4592# error "Port me"
4593#endif
4594 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4595 return off;
4596}
4597
4598
4599/**
4600 * Emits code for two complement negation of a 32-bit GPR.
4601 * @note bit 32 thru 63 are set to zero.
4602 */
4603DECL_FORCE_INLINE_THROW(uint32_t)
4604iemNativeEmitNegGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst)
4605{
4606#if defined(RT_ARCH_AMD64)
4607 /* neg Ev */
4608 if (iGprDst >= 8)
4609 pCodeBuf[off++] = X86_OP_REX_B;
4610 pCodeBuf[off++] = 0xf7;
4611 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 3, iGprDst & 7);
4612
4613#elif defined(RT_ARCH_ARM64)
4614 /* sub dst, xzr, dst */
4615 pCodeBuf[off++] = Armv8A64MkInstrNeg(iGprDst, false /*f64Bit*/);
4616
4617#else
4618# error "Port me"
4619#endif
4620 return off;
4621}
4622
4623
4624/**
4625 * Emits code for two complement negation of a 32-bit GPR.
4626 * @note bit 32 thru 63 are set to zero.
4627 */
4628DECL_INLINE_THROW(uint32_t)
4629iemNativeEmitNegGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4630{
4631#if defined(RT_ARCH_AMD64)
4632 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst);
4633#elif defined(RT_ARCH_ARM64)
4634 off = iemNativeEmitNegGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst);
4635#else
4636# error "Port me"
4637#endif
4638 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4639 return off;
4640}
4641
4642
4643
4644/*********************************************************************************************************************************
4645* Bit Operations *
4646*********************************************************************************************************************************/
4647
4648/**
4649 * Emits code for clearing bits 16 thru 63 in the GPR.
4650 */
4651DECL_INLINE_THROW(uint32_t)
4652iemNativeEmitClear16UpGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst)
4653{
4654#if defined(RT_ARCH_AMD64)
4655 /* movzx Gv,Ew */
4656 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4657 if (iGprDst >= 8)
4658 pbCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
4659 pbCodeBuf[off++] = 0x0f;
4660 pbCodeBuf[off++] = 0xb7;
4661 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprDst & 7);
4662
4663#elif defined(RT_ARCH_ARM64)
4664 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4665# if 1
4666 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(iGprDst, iGprDst);
4667# else
4668 ///* This produces 0xffff; 0x4f: N=1 imms=001111 (immr=0) => size=64 length=15 */
4669 //pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 0x4f);
4670# endif
4671#else
4672# error "Port me"
4673#endif
4674 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4675 return off;
4676}
4677
4678
4679/**
4680 * Emits code for AND'ing two 64-bit GPRs.
4681 *
4682 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4683 * and ARM64 hosts.
4684 */
4685DECL_FORCE_INLINE(uint32_t)
4686iemNativeEmitAndGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4687{
4688#if defined(RT_ARCH_AMD64)
4689 /* and Gv, Ev */
4690 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4691 pCodeBuf[off++] = 0x23;
4692 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4693 RT_NOREF(fSetFlags);
4694
4695#elif defined(RT_ARCH_ARM64)
4696 if (!fSetFlags)
4697 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc);
4698 else
4699 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc);
4700
4701#else
4702# error "Port me"
4703#endif
4704 return off;
4705}
4706
4707
4708/**
4709 * Emits code for AND'ing two 64-bit GPRs.
4710 *
4711 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4712 * and ARM64 hosts.
4713 */
4714DECL_INLINE_THROW(uint32_t)
4715iemNativeEmitAndGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4716{
4717#if defined(RT_ARCH_AMD64)
4718 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4719#elif defined(RT_ARCH_ARM64)
4720 off = iemNativeEmitAndGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4721#else
4722# error "Port me"
4723#endif
4724 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4725 return off;
4726}
4727
4728
4729/**
4730 * Emits code for AND'ing two 32-bit GPRs.
4731 */
4732DECL_FORCE_INLINE(uint32_t)
4733iemNativeEmitAndGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4734{
4735#if defined(RT_ARCH_AMD64)
4736 /* and Gv, Ev */
4737 if (iGprDst >= 8 || iGprSrc >= 8)
4738 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
4739 pCodeBuf[off++] = 0x23;
4740 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
4741 RT_NOREF(fSetFlags);
4742
4743#elif defined(RT_ARCH_ARM64)
4744 if (!fSetFlags)
4745 pCodeBuf[off++] = Armv8A64MkInstrAnd(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4746 else
4747 pCodeBuf[off++] = Armv8A64MkInstrAnds(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
4748
4749#else
4750# error "Port me"
4751#endif
4752 return off;
4753}
4754
4755
4756/**
4757 * Emits code for AND'ing two 32-bit GPRs.
4758 */
4759DECL_INLINE_THROW(uint32_t)
4760iemNativeEmitAndGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool fSetFlags = false)
4761{
4762#if defined(RT_ARCH_AMD64)
4763 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc, fSetFlags);
4764#elif defined(RT_ARCH_ARM64)
4765 off = iemNativeEmitAndGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, fSetFlags);
4766#else
4767# error "Port me"
4768#endif
4769 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4770 return off;
4771}
4772
4773
4774/**
4775 * Emits code for AND'ing a 64-bit GPRs with a constant.
4776 *
4777 * @note When fSetFlags=true, JZ/JNZ jumps can be used afterwards on both AMD64
4778 * and ARM64 hosts.
4779 */
4780DECL_INLINE_THROW(uint32_t)
4781iemNativeEmitAndGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm, bool fSetFlags = false)
4782{
4783#if defined(RT_ARCH_AMD64)
4784 if ((int64_t)uImm == (int8_t)uImm)
4785 {
4786 /* and Ev, imm8 */
4787 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
4788 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4789 pbCodeBuf[off++] = 0x83;
4790 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4791 pbCodeBuf[off++] = (uint8_t)uImm;
4792 }
4793 else if ((int64_t)uImm == (int32_t)uImm)
4794 {
4795 /* and Ev, imm32 */
4796 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4797 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
4798 pbCodeBuf[off++] = 0x81;
4799 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4800 pbCodeBuf[off++] = RT_BYTE1(uImm);
4801 pbCodeBuf[off++] = RT_BYTE2(uImm);
4802 pbCodeBuf[off++] = RT_BYTE3(uImm);
4803 pbCodeBuf[off++] = RT_BYTE4(uImm);
4804 }
4805 else
4806 {
4807 /* Use temporary register for the 64-bit immediate. */
4808 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4809 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg);
4810 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4811 }
4812 RT_NOREF(fSetFlags);
4813
4814#elif defined(RT_ARCH_ARM64)
4815 uint32_t uImmR = 0;
4816 uint32_t uImmNandS = 0;
4817 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4818 {
4819 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4820 if (!fSetFlags)
4821 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR);
4822 else
4823 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR);
4824 }
4825 else
4826 {
4827 /* Use temporary register for the 64-bit immediate. */
4828 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4829 off = iemNativeEmitAndGprByGpr(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4830 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4831 }
4832
4833#else
4834# error "Port me"
4835#endif
4836 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4837 return off;
4838}
4839
4840
4841/**
4842 * Emits code for AND'ing an 32-bit GPRs with a constant.
4843 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4844 * @note For ARM64 this only supports @a uImm values that can be expressed using
4845 * the two 6-bit immediates of the AND/ANDS instructions. The caller must
4846 * make sure this is possible!
4847 */
4848DECL_FORCE_INLINE_THROW(uint32_t)
4849iemNativeEmitAndGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4850{
4851#if defined(RT_ARCH_AMD64)
4852 /* and Ev, imm */
4853 if (iGprDst >= 8)
4854 pCodeBuf[off++] = X86_OP_REX_B;
4855 if ((int32_t)uImm == (int8_t)uImm)
4856 {
4857 pCodeBuf[off++] = 0x83;
4858 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4859 pCodeBuf[off++] = (uint8_t)uImm;
4860 }
4861 else
4862 {
4863 pCodeBuf[off++] = 0x81;
4864 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
4865 pCodeBuf[off++] = RT_BYTE1(uImm);
4866 pCodeBuf[off++] = RT_BYTE2(uImm);
4867 pCodeBuf[off++] = RT_BYTE3(uImm);
4868 pCodeBuf[off++] = RT_BYTE4(uImm);
4869 }
4870 RT_NOREF(fSetFlags);
4871
4872#elif defined(RT_ARCH_ARM64)
4873 uint32_t uImmR = 0;
4874 uint32_t uImmNandS = 0;
4875 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4876 {
4877 if (!fSetFlags)
4878 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4879 else
4880 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4881 }
4882 else
4883# ifdef IEM_WITH_THROW_CATCH
4884 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4885# else
4886 AssertReleaseFailedStmt(off = UINT32_MAX);
4887# endif
4888
4889#else
4890# error "Port me"
4891#endif
4892 return off;
4893}
4894
4895
4896/**
4897 * Emits code for AND'ing an 32-bit GPRs with a constant.
4898 *
4899 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4900 */
4901DECL_INLINE_THROW(uint32_t)
4902iemNativeEmitAndGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm, bool fSetFlags = false)
4903{
4904#if defined(RT_ARCH_AMD64)
4905 off = iemNativeEmitAndGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm, fSetFlags);
4906
4907#elif defined(RT_ARCH_ARM64)
4908 uint32_t uImmR = 0;
4909 uint32_t uImmNandS = 0;
4910 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
4911 {
4912 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4913 if (!fSetFlags)
4914 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4915 else
4916 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
4917 }
4918 else
4919 {
4920 /* Use temporary register for the 64-bit immediate. */
4921 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
4922 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, iGprDst, iTmpReg, fSetFlags);
4923 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
4924 }
4925
4926#else
4927# error "Port me"
4928#endif
4929 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4930 return off;
4931}
4932
4933
4934/**
4935 * Emits code for AND'ing an 64-bit GPRs with a constant.
4936 *
4937 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
4938 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
4939 * the same.
4940 */
4941DECL_FORCE_INLINE_THROW(uint32_t)
4942iemNativeEmitGprEqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint64_t uImm,
4943 bool fSetFlags = false)
4944{
4945#if defined(RT_ARCH_AMD64)
4946 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4947 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc);
4948 RT_NOREF(fSetFlags);
4949
4950#elif defined(RT_ARCH_ARM64)
4951 uint32_t uImmR = 0;
4952 uint32_t uImmNandS = 0;
4953 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
4954 {
4955 if (!fSetFlags)
4956 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4957 else
4958 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR);
4959 }
4960 else if (iGprDst != iGprSrc)
4961 {
4962 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprDst, uImm);
4963 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
4964 }
4965 else
4966# ifdef IEM_WITH_THROW_CATCH
4967 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
4968# else
4969 AssertReleaseFailedStmt(off = UINT32_MAX);
4970# endif
4971
4972#else
4973# error "Port me"
4974#endif
4975 return off;
4976}
4977
4978/**
4979 * Emits code for AND'ing an 32-bit GPRs with a constant.
4980 *
4981 * @note For ARM64 any complicated immediates w/o a AND/ANDS compatible
4982 * encoding will assert / throw exception if @a iGprDst and @a iGprSrc are
4983 * the same.
4984 *
4985 * @note Bits 32 thru 63 in the destination will be zero after the operation.
4986 */
4987DECL_FORCE_INLINE_THROW(uint32_t)
4988iemNativeEmitGpr32EqGprAndImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint32_t uImm,
4989 bool fSetFlags = false)
4990{
4991#if defined(RT_ARCH_AMD64)
4992 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
4993 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
4994 RT_NOREF(fSetFlags);
4995
4996#elif defined(RT_ARCH_ARM64)
4997 uint32_t uImmR = 0;
4998 uint32_t uImmNandS = 0;
4999 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5000 {
5001 if (!fSetFlags)
5002 pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
5003 else
5004 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(iGprDst, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
5005 }
5006 else if (iGprDst != iGprSrc)
5007 {
5008 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprDst, uImm);
5009 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc, fSetFlags);
5010 }
5011 else
5012# ifdef IEM_WITH_THROW_CATCH
5013 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5014# else
5015 AssertReleaseFailedStmt(off = UINT32_MAX);
5016# endif
5017
5018#else
5019# error "Port me"
5020#endif
5021 return off;
5022}
5023
5024
5025/**
5026 * Emits code for OR'ing two 64-bit GPRs.
5027 */
5028DECL_FORCE_INLINE(uint32_t)
5029iemNativeEmitOrGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5030{
5031#if defined(RT_ARCH_AMD64)
5032 /* or Gv, Ev */
5033 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5034 pCodeBuf[off++] = 0x0b;
5035 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5036
5037#elif defined(RT_ARCH_ARM64)
5038 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc);
5039
5040#else
5041# error "Port me"
5042#endif
5043 return off;
5044}
5045
5046
5047/**
5048 * Emits code for OR'ing two 64-bit GPRs.
5049 */
5050DECL_INLINE_THROW(uint32_t)
5051iemNativeEmitOrGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5052{
5053#if defined(RT_ARCH_AMD64)
5054 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5055#elif defined(RT_ARCH_ARM64)
5056 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5057#else
5058# error "Port me"
5059#endif
5060 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5061 return off;
5062}
5063
5064
5065/**
5066 * Emits code for OR'ing two 32-bit GPRs.
5067 * @note Bits 63:32 of the destination GPR will be cleared.
5068 */
5069DECL_FORCE_INLINE(uint32_t)
5070iemNativeEmitOrGpr32ByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5071{
5072#if defined(RT_ARCH_AMD64)
5073 /* or Gv, Ev */
5074 if (iGprDst >= 8 || iGprSrc >= 8)
5075 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5076 pCodeBuf[off++] = 0x0b;
5077 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5078
5079#elif defined(RT_ARCH_ARM64)
5080 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
5081
5082#else
5083# error "Port me"
5084#endif
5085 return off;
5086}
5087
5088
5089/**
5090 * Emits code for OR'ing two 32-bit GPRs.
5091 * @note Bits 63:32 of the destination GPR will be cleared.
5092 */
5093DECL_INLINE_THROW(uint32_t)
5094iemNativeEmitOrGpr32ByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5095{
5096#if defined(RT_ARCH_AMD64)
5097 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5098#elif defined(RT_ARCH_ARM64)
5099 off = iemNativeEmitOrGpr32ByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5100#else
5101# error "Port me"
5102#endif
5103 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5104 return off;
5105}
5106
5107
5108/**
5109 * Emits code for OR'ing a 64-bit GPRs with a constant.
5110 */
5111DECL_INLINE_THROW(uint32_t)
5112iemNativeEmitOrGprByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint64_t uImm)
5113{
5114#if defined(RT_ARCH_AMD64)
5115 if ((int64_t)uImm == (int8_t)uImm)
5116 {
5117 /* or Ev, imm8 */
5118 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
5119 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5120 pbCodeBuf[off++] = 0x83;
5121 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5122 pbCodeBuf[off++] = (uint8_t)uImm;
5123 }
5124 else if ((int64_t)uImm == (int32_t)uImm)
5125 {
5126 /* or Ev, imm32 */
5127 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5128 pbCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B);
5129 pbCodeBuf[off++] = 0x81;
5130 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5131 pbCodeBuf[off++] = RT_BYTE1(uImm);
5132 pbCodeBuf[off++] = RT_BYTE2(uImm);
5133 pbCodeBuf[off++] = RT_BYTE3(uImm);
5134 pbCodeBuf[off++] = RT_BYTE4(uImm);
5135 }
5136 else
5137 {
5138 /* Use temporary register for the 64-bit immediate. */
5139 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5140 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iTmpReg);
5141 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5142 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5143 }
5144
5145#elif defined(RT_ARCH_ARM64)
5146 uint32_t uImmR = 0;
5147 uint32_t uImmNandS = 0;
5148 if (Armv8A64ConvertMask64ToImmRImmS(uImm, &uImmNandS, &uImmR))
5149 {
5150 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5151 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR);
5152 }
5153 else
5154 {
5155 /* Use temporary register for the 64-bit immediate. */
5156 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5157 off = iemNativeEmitOrGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iTmpReg);
5158 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5159 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5160 }
5161
5162#else
5163# error "Port me"
5164#endif
5165 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5166 return off;
5167}
5168
5169
5170/**
5171 * Emits code for OR'ing an 32-bit GPRs with a constant.
5172 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5173 * @note For ARM64 this only supports @a uImm values that can be expressed using
5174 * the two 6-bit immediates of the OR instructions. The caller must make
5175 * sure this is possible!
5176 */
5177DECL_FORCE_INLINE_THROW(uint32_t)
5178iemNativeEmitOrGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5179{
5180#if defined(RT_ARCH_AMD64)
5181 /* or Ev, imm */
5182 if (iGprDst >= 8)
5183 pCodeBuf[off++] = X86_OP_REX_B;
5184 if ((int32_t)uImm == (int8_t)uImm)
5185 {
5186 pCodeBuf[off++] = 0x83;
5187 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5188 pCodeBuf[off++] = (uint8_t)uImm;
5189 }
5190 else
5191 {
5192 pCodeBuf[off++] = 0x81;
5193 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
5194 pCodeBuf[off++] = RT_BYTE1(uImm);
5195 pCodeBuf[off++] = RT_BYTE2(uImm);
5196 pCodeBuf[off++] = RT_BYTE3(uImm);
5197 pCodeBuf[off++] = RT_BYTE4(uImm);
5198 }
5199
5200#elif defined(RT_ARCH_ARM64)
5201 uint32_t uImmR = 0;
5202 uint32_t uImmNandS = 0;
5203 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5204 pCodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5205 else
5206# ifdef IEM_WITH_THROW_CATCH
5207 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5208# else
5209 AssertReleaseFailedStmt(off = UINT32_MAX);
5210# endif
5211
5212#else
5213# error "Port me"
5214#endif
5215 return off;
5216}
5217
5218
5219/**
5220 * Emits code for OR'ing an 32-bit GPRs with a constant.
5221 *
5222 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5223 */
5224DECL_INLINE_THROW(uint32_t)
5225iemNativeEmitOrGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5226{
5227#if defined(RT_ARCH_AMD64)
5228 off = iemNativeEmitOrGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm);
5229
5230#elif defined(RT_ARCH_ARM64)
5231 uint32_t uImmR = 0;
5232 uint32_t uImmNandS = 0;
5233 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5234 {
5235 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5236 pu32CodeBuf[off++] = Armv8A64MkInstrOrrImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5237 }
5238 else
5239 {
5240 /* Use temporary register for the 64-bit immediate. */
5241 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
5242 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, iGprDst, iTmpReg);
5243 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
5244 }
5245
5246#else
5247# error "Port me"
5248#endif
5249 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5250 return off;
5251}
5252
5253
5254
5255/**
5256 * ORs two 64-bit GPRs together, storing the result in a third register.
5257 */
5258DECL_FORCE_INLINE(uint32_t)
5259iemNativeEmitGprEqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5260{
5261#ifdef RT_ARCH_AMD64
5262 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5263 {
5264 /** @todo consider LEA */
5265 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc1);
5266 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5267 }
5268 else
5269 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5270
5271#elif defined(RT_ARCH_ARM64)
5272 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2);
5273
5274#else
5275# error "Port me!"
5276#endif
5277 return off;
5278}
5279
5280
5281
5282/**
5283 * Ors two 32-bit GPRs together, storing the result in a third register.
5284 * @note Bits 32 thru 63 in @a iGprDst will be zero after the operation.
5285 */
5286DECL_FORCE_INLINE(uint32_t)
5287iemNativeEmitGpr32EqGprOrGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc1, uint8_t iGprSrc2)
5288{
5289#ifdef RT_ARCH_AMD64
5290 if (iGprDst != iGprSrc1 && iGprDst != iGprSrc2)
5291 {
5292 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc1);
5293 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprSrc2);
5294 }
5295 else
5296 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, iGprDst, iGprDst != iGprSrc1 ? iGprSrc1 : iGprSrc2);
5297
5298#elif defined(RT_ARCH_ARM64)
5299 pCodeBuf[off++] = Armv8A64MkInstrOrr(iGprDst, iGprSrc1, iGprSrc2, false /*f64Bit*/);
5300
5301#else
5302# error "Port me!"
5303#endif
5304 return off;
5305}
5306
5307
5308/**
5309 * Emits code for XOR'ing two 64-bit GPRs.
5310 */
5311DECL_INLINE_THROW(uint32_t)
5312iemNativeEmitXorGprByGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5313{
5314#if defined(RT_ARCH_AMD64)
5315 /* and Gv, Ev */
5316 pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5317 pCodeBuf[off++] = 0x33;
5318 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5319
5320#elif defined(RT_ARCH_ARM64)
5321 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc);
5322
5323#else
5324# error "Port me"
5325#endif
5326 return off;
5327}
5328
5329
5330/**
5331 * Emits code for XOR'ing two 64-bit GPRs.
5332 */
5333DECL_INLINE_THROW(uint32_t)
5334iemNativeEmitXorGprByGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5335{
5336#if defined(RT_ARCH_AMD64)
5337 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5338#elif defined(RT_ARCH_ARM64)
5339 off = iemNativeEmitXorGprByGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5340#else
5341# error "Port me"
5342#endif
5343 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5344 return off;
5345}
5346
5347
5348/**
5349 * Emits code for XOR'ing two 32-bit GPRs.
5350 */
5351DECL_INLINE_THROW(uint32_t)
5352iemNativeEmitXorGpr32ByGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5353{
5354#if defined(RT_ARCH_AMD64)
5355 /* and Gv, Ev */
5356 if (iGprDst >= 8 || iGprSrc >= 8)
5357 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R) | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
5358 pCodeBuf[off++] = 0x33;
5359 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
5360
5361#elif defined(RT_ARCH_ARM64)
5362 pCodeBuf[off++] = Armv8A64MkInstrEor(iGprDst, iGprDst, iGprSrc, false /*f64Bit*/);
5363
5364#else
5365# error "Port me"
5366#endif
5367 return off;
5368}
5369
5370
5371/**
5372 * Emits code for XOR'ing two 32-bit GPRs.
5373 */
5374DECL_INLINE_THROW(uint32_t)
5375iemNativeEmitXorGpr32ByGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
5376{
5377#if defined(RT_ARCH_AMD64)
5378 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iGprSrc);
5379#elif defined(RT_ARCH_ARM64)
5380 off = iemNativeEmitXorGpr32ByGpr32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
5381#else
5382# error "Port me"
5383#endif
5384 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5385 return off;
5386}
5387
5388
5389/**
5390 * Emits code for XOR'ing an 32-bit GPRs with a constant.
5391 * @note Bits 32 thru 63 in the destination will be zero after the operation.
5392 * @note For ARM64 this only supports @a uImm values that can be expressed using
5393 * the two 6-bit immediates of the EOR instructions. The caller must make
5394 * sure this is possible!
5395 */
5396DECL_FORCE_INLINE_THROW(uint32_t)
5397iemNativeEmitXorGpr32ByImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5398{
5399#if defined(RT_ARCH_AMD64)
5400 /* and Ev, imm */
5401 if (iGprDst >= 8)
5402 pCodeBuf[off++] = X86_OP_REX_B;
5403 if ((int32_t)uImm == (int8_t)uImm)
5404 {
5405 pCodeBuf[off++] = 0x83;
5406 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5407 pCodeBuf[off++] = (uint8_t)uImm;
5408 }
5409 else
5410 {
5411 pCodeBuf[off++] = 0x81;
5412 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGprDst & 7);
5413 pCodeBuf[off++] = RT_BYTE1(uImm);
5414 pCodeBuf[off++] = RT_BYTE2(uImm);
5415 pCodeBuf[off++] = RT_BYTE3(uImm);
5416 pCodeBuf[off++] = RT_BYTE4(uImm);
5417 }
5418
5419#elif defined(RT_ARCH_ARM64)
5420 uint32_t uImmR = 0;
5421 uint32_t uImmNandS = 0;
5422 if (Armv8A64ConvertMask32ToImmRImmS(uImm, &uImmNandS, &uImmR))
5423 pCodeBuf[off++] = Armv8A64MkInstrEorImm(iGprDst, iGprDst, uImmNandS, uImmR, false /*f64Bit*/);
5424 else
5425# ifdef IEM_WITH_THROW_CATCH
5426 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
5427# else
5428 AssertReleaseFailedStmt(off = UINT32_MAX);
5429# endif
5430
5431#else
5432# error "Port me"
5433#endif
5434 return off;
5435}
5436
5437
5438/**
5439 * Emits code for XOR'ing two 32-bit GPRs.
5440 */
5441DECL_INLINE_THROW(uint32_t)
5442iemNativeEmitXorGpr32ByImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint32_t uImm)
5443{
5444#if defined(RT_ARCH_AMD64)
5445 off = iemNativeEmitXorGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, uImm);
5446#elif defined(RT_ARCH_ARM64)
5447 off = iemNativeEmitXorGpr32ByImmEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, uImm);
5448#else
5449# error "Port me"
5450#endif
5451 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5452 return off;
5453}
5454
5455
5456/*********************************************************************************************************************************
5457* Shifting *
5458*********************************************************************************************************************************/
5459
5460/**
5461 * Emits code for shifting a GPR a fixed number of bits to the left.
5462 */
5463DECL_FORCE_INLINE(uint32_t)
5464iemNativeEmitShiftGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5465{
5466 Assert(cShift > 0 && cShift < 64);
5467
5468#if defined(RT_ARCH_AMD64)
5469 /* shl dst, cShift */
5470 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5471 if (cShift != 1)
5472 {
5473 pCodeBuf[off++] = 0xc1;
5474 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5475 pCodeBuf[off++] = cShift;
5476 }
5477 else
5478 {
5479 pCodeBuf[off++] = 0xd1;
5480 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5481 }
5482
5483#elif defined(RT_ARCH_ARM64)
5484 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift);
5485
5486#else
5487# error "Port me"
5488#endif
5489 return off;
5490}
5491
5492
5493/**
5494 * Emits code for shifting a GPR a fixed number of bits to the left.
5495 */
5496DECL_INLINE_THROW(uint32_t)
5497iemNativeEmitShiftGprLeft(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5498{
5499#if defined(RT_ARCH_AMD64)
5500 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5501#elif defined(RT_ARCH_ARM64)
5502 off = iemNativeEmitShiftGprLeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5503#else
5504# error "Port me"
5505#endif
5506 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5507 return off;
5508}
5509
5510
5511/**
5512 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5513 */
5514DECL_FORCE_INLINE(uint32_t)
5515iemNativeEmitShiftGpr32LeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5516{
5517 Assert(cShift > 0 && cShift < 32);
5518
5519#if defined(RT_ARCH_AMD64)
5520 /* shl dst, cShift */
5521 if (iGprDst >= 8)
5522 pCodeBuf[off++] = X86_OP_REX_B;
5523 if (cShift != 1)
5524 {
5525 pCodeBuf[off++] = 0xc1;
5526 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5527 pCodeBuf[off++] = cShift;
5528 }
5529 else
5530 {
5531 pCodeBuf[off++] = 0xd1;
5532 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprDst & 7);
5533 }
5534
5535#elif defined(RT_ARCH_ARM64)
5536 pCodeBuf[off++] = Armv8A64MkInstrLslImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5537
5538#else
5539# error "Port me"
5540#endif
5541 return off;
5542}
5543
5544
5545/**
5546 * Emits code for shifting a 32-bit GPR a fixed number of bits to the left.
5547 */
5548DECL_INLINE_THROW(uint32_t)
5549iemNativeEmitShiftGpr32Left(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5550{
5551#if defined(RT_ARCH_AMD64)
5552 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5553#elif defined(RT_ARCH_ARM64)
5554 off = iemNativeEmitShiftGpr32LeftEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5555#else
5556# error "Port me"
5557#endif
5558 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5559 return off;
5560}
5561
5562
5563/**
5564 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5565 */
5566DECL_FORCE_INLINE(uint32_t)
5567iemNativeEmitShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5568{
5569 Assert(cShift > 0 && cShift < 64);
5570
5571#if defined(RT_ARCH_AMD64)
5572 /* shr dst, cShift */
5573 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5574 if (cShift != 1)
5575 {
5576 pCodeBuf[off++] = 0xc1;
5577 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5578 pCodeBuf[off++] = cShift;
5579 }
5580 else
5581 {
5582 pCodeBuf[off++] = 0xd1;
5583 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5584 }
5585
5586#elif defined(RT_ARCH_ARM64)
5587 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift);
5588
5589#else
5590# error "Port me"
5591#endif
5592 return off;
5593}
5594
5595
5596/**
5597 * Emits code for (unsigned) shifting a GPR a fixed number of bits to the right.
5598 */
5599DECL_INLINE_THROW(uint32_t)
5600iemNativeEmitShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5601{
5602#if defined(RT_ARCH_AMD64)
5603 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5604#elif defined(RT_ARCH_ARM64)
5605 off = iemNativeEmitShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5606#else
5607# error "Port me"
5608#endif
5609 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5610 return off;
5611}
5612
5613
5614/**
5615 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5616 * right.
5617 */
5618DECL_FORCE_INLINE(uint32_t)
5619iemNativeEmitShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5620{
5621 Assert(cShift > 0 && cShift < 32);
5622
5623#if defined(RT_ARCH_AMD64)
5624 /* shr dst, cShift */
5625 if (iGprDst >= 8)
5626 pCodeBuf[off++] = X86_OP_REX_B;
5627 if (cShift != 1)
5628 {
5629 pCodeBuf[off++] = 0xc1;
5630 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5631 pCodeBuf[off++] = cShift;
5632 }
5633 else
5634 {
5635 pCodeBuf[off++] = 0xd1;
5636 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
5637 }
5638
5639#elif defined(RT_ARCH_ARM64)
5640 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprDst, cShift, false /*64Bit*/);
5641
5642#else
5643# error "Port me"
5644#endif
5645 return off;
5646}
5647
5648
5649/**
5650 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5651 * right.
5652 */
5653DECL_INLINE_THROW(uint32_t)
5654iemNativeEmitShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5655{
5656#if defined(RT_ARCH_AMD64)
5657 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5658#elif defined(RT_ARCH_ARM64)
5659 off = iemNativeEmitShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5660#else
5661# error "Port me"
5662#endif
5663 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5664 return off;
5665}
5666
5667
5668/**
5669 * Emits code for (unsigned) shifting a 32-bit GPR a fixed number of bits to the
5670 * right and assigning it to a different GPR.
5671 */
5672DECL_INLINE_THROW(uint32_t)
5673iemNativeEmitGpr32EqGprShiftRightImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, uint8_t cShift)
5674{
5675 Assert(cShift > 0); Assert(cShift < 32);
5676#if defined(RT_ARCH_AMD64)
5677 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc);
5678 off = iemNativeEmitShiftGpr32RightEx(pCodeBuf, off, iGprDst, cShift);
5679
5680#elif defined(RT_ARCH_ARM64)
5681 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(iGprDst, iGprSrc, cShift, false /*64Bit*/);
5682
5683#else
5684# error "Port me"
5685#endif
5686 return off;
5687}
5688
5689
5690/**
5691 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
5692 */
5693DECL_FORCE_INLINE(uint32_t)
5694iemNativeEmitArithShiftGprRightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5695{
5696 Assert(cShift > 0 && cShift < 64);
5697
5698#if defined(RT_ARCH_AMD64)
5699 /* sar dst, cShift */
5700 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5701 if (cShift != 1)
5702 {
5703 pCodeBuf[off++] = 0xc1;
5704 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5705 pCodeBuf[off++] = cShift;
5706 }
5707 else
5708 {
5709 pCodeBuf[off++] = 0xd1;
5710 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5711 }
5712
5713#elif defined(RT_ARCH_ARM64)
5714 pCodeBuf[off++] = Armv8A64MkInstrAsrImm(iGprDst, iGprDst, cShift);
5715
5716#else
5717# error "Port me"
5718#endif
5719 return off;
5720}
5721
5722
5723/**
5724 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
5725 */
5726DECL_INLINE_THROW(uint32_t)
5727iemNativeEmitArithShiftGprRight(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5728{
5729#if defined(RT_ARCH_AMD64)
5730 off = iemNativeEmitArithShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5731#elif defined(RT_ARCH_ARM64)
5732 off = iemNativeEmitArithShiftGprRightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5733#else
5734# error "Port me"
5735#endif
5736 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5737 return off;
5738}
5739
5740
5741/**
5742 * Emits code for (signed) shifting a 32-bit GPR a fixed number of bits to the right.
5743 */
5744DECL_FORCE_INLINE(uint32_t)
5745iemNativeEmitArithShiftGpr32RightEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5746{
5747 Assert(cShift > 0 && cShift < 64);
5748
5749#if defined(RT_ARCH_AMD64)
5750 /* sar dst, cShift */
5751 if (iGprDst >= 8)
5752 pCodeBuf[off++] = X86_OP_REX_B;
5753 if (cShift != 1)
5754 {
5755 pCodeBuf[off++] = 0xc1;
5756 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5757 pCodeBuf[off++] = cShift;
5758 }
5759 else
5760 {
5761 pCodeBuf[off++] = 0xd1;
5762 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprDst & 7);
5763 }
5764
5765#elif defined(RT_ARCH_ARM64)
5766 pCodeBuf[off++] = Armv8A64MkInstrAsrImm(iGprDst, iGprDst, cShift, false /*f64Bit*/);
5767
5768#else
5769# error "Port me"
5770#endif
5771 return off;
5772}
5773
5774
5775/**
5776 * Emits code for (signed) shifting a GPR a fixed number of bits to the right.
5777 */
5778DECL_INLINE_THROW(uint32_t)
5779iemNativeEmitArithShiftGpr32Right(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5780{
5781#if defined(RT_ARCH_AMD64)
5782 off = iemNativeEmitArithShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, cShift);
5783#elif defined(RT_ARCH_ARM64)
5784 off = iemNativeEmitArithShiftGpr32RightEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, cShift);
5785#else
5786# error "Port me"
5787#endif
5788 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5789 return off;
5790}
5791
5792
5793/**
5794 * Emits code for rotating a GPR a fixed number of bits to the left.
5795 */
5796DECL_FORCE_INLINE(uint32_t)
5797iemNativeEmitRotateGprLeftEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5798{
5799 Assert(cShift > 0 && cShift < 64);
5800
5801#if defined(RT_ARCH_AMD64)
5802 /* rol dst, cShift */
5803 pCodeBuf[off++] = iGprDst < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_B;
5804 if (cShift != 1)
5805 {
5806 pCodeBuf[off++] = 0xc1;
5807 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5808 pCodeBuf[off++] = cShift;
5809 }
5810 else
5811 {
5812 pCodeBuf[off++] = 0xd1;
5813 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
5814 }
5815
5816#elif defined(RT_ARCH_ARM64)
5817 pCodeBuf[off++] = Armv8A64MkInstrRorImm(iGprDst, iGprDst, cShift);
5818
5819#else
5820# error "Port me"
5821#endif
5822 return off;
5823}
5824
5825
5826#if defined(RT_ARCH_AMD64)
5827/**
5828 * Emits code for rotating a 32-bit GPR a fixed number of bits to the left via carry.
5829 */
5830DECL_FORCE_INLINE(uint32_t)
5831iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t cShift)
5832{
5833 Assert(cShift > 0 && cShift < 32);
5834
5835 /* rcl dst, cShift */
5836 if (iGprDst >= 8)
5837 pCodeBuf[off++] = X86_OP_REX_B;
5838 if (cShift != 1)
5839 {
5840 pCodeBuf[off++] = 0xc1;
5841 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
5842 pCodeBuf[off++] = cShift;
5843 }
5844 else
5845 {
5846 pCodeBuf[off++] = 0xd1;
5847 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
5848 }
5849
5850 return off;
5851}
5852#endif /* RT_ARCH_AMD64 */
5853
5854
5855
5856/**
5857 * Emits code for reversing the byte order for a 16-bit value in a 32-bit GPR.
5858 * @note Bits 63:32 of the destination GPR will be cleared.
5859 */
5860DECL_FORCE_INLINE(uint32_t)
5861iemNativeEmitBswapGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5862{
5863#if defined(RT_ARCH_AMD64)
5864 /*
5865 * There is no bswap r16 on x86 (the encoding exists but does not work).
5866 * So just use a rol (gcc -O2 is doing that).
5867 *
5868 * rol r16, 0x8
5869 */
5870 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
5871 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5872 if (iGpr >= 8)
5873 pbCodeBuf[off++] = X86_OP_REX_B;
5874 pbCodeBuf[off++] = 0xc1;
5875 pbCodeBuf[off++] = 0xc0 | (iGpr & 7);
5876 pbCodeBuf[off++] = 0x08;
5877#elif defined(RT_ARCH_ARM64)
5878 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5879
5880 pu32CodeBuf[off++] = Armv8A64MkInstrRev16(iGpr, iGpr, false /*f64Bit*/);
5881#else
5882# error "Port me"
5883#endif
5884
5885 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5886 return off;
5887}
5888
5889
5890/**
5891 * Emits code for reversing the byte order in a 32-bit GPR.
5892 * @note Bits 63:32 of the destination GPR will be cleared.
5893 */
5894DECL_FORCE_INLINE(uint32_t)
5895iemNativeEmitBswapGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5896{
5897#if defined(RT_ARCH_AMD64)
5898 /* bswap r32 */
5899 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5900
5901 if (iGpr >= 8)
5902 pbCodeBuf[off++] = X86_OP_REX_B;
5903 pbCodeBuf[off++] = 0x0f;
5904 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5905#elif defined(RT_ARCH_ARM64)
5906 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5907
5908 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, false /*f64Bit*/);
5909#else
5910# error "Port me"
5911#endif
5912
5913 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5914 return off;
5915}
5916
5917
5918/**
5919 * Emits code for reversing the byte order in a 64-bit GPR.
5920 */
5921DECL_FORCE_INLINE(uint32_t)
5922iemNativeEmitBswapGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr)
5923{
5924#if defined(RT_ARCH_AMD64)
5925 /* bswap r64 */
5926 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5927
5928 if (iGpr >= 8)
5929 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
5930 else
5931 pbCodeBuf[off++] = X86_OP_REX_W;
5932 pbCodeBuf[off++] = 0x0f;
5933 pbCodeBuf[off++] = 0xc8 | (iGpr & 7);
5934#elif defined(RT_ARCH_ARM64)
5935 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5936
5937 pu32CodeBuf[off++] = Armv8A64MkInstrRev(iGpr, iGpr, true /*f64Bit*/);
5938#else
5939# error "Port me"
5940#endif
5941
5942 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5943 return off;
5944}
5945
5946
5947/*********************************************************************************************************************************
5948* Compare and Testing *
5949*********************************************************************************************************************************/
5950
5951
5952#ifdef RT_ARCH_ARM64
5953/**
5954 * Emits an ARM64 compare instruction.
5955 */
5956DECL_INLINE_THROW(uint32_t)
5957iemNativeEmitCmpArm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight,
5958 bool f64Bit = true, uint32_t cShift = 0, ARMV8A64INSTRSHIFT enmShift = kArmv8A64InstrShift_Lsr)
5959{
5960 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5961 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR /*iRegResult*/, iGprLeft, iGprRight,
5962 f64Bit, true /*fSetFlags*/, cShift, enmShift);
5963 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5964 return off;
5965}
5966#endif
5967
5968
5969/**
5970 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
5971 * with conditional instruction.
5972 */
5973DECL_FORCE_INLINE(uint32_t)
5974iemNativeEmitCmpGprWithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5975{
5976#ifdef RT_ARCH_AMD64
5977 /* cmp Gv, Ev */
5978 pCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
5979 pCodeBuf[off++] = 0x3b;
5980 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
5981
5982#elif defined(RT_ARCH_ARM64)
5983 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight);
5984
5985#else
5986# error "Port me!"
5987#endif
5988 return off;
5989}
5990
5991
5992/**
5993 * Emits a compare of two 64-bit GPRs, settings status flags/whatever for use
5994 * with conditional instruction.
5995 */
5996DECL_INLINE_THROW(uint32_t)
5997iemNativeEmitCmpGprWithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
5998{
5999#ifdef RT_ARCH_AMD64
6000 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
6001#elif defined(RT_ARCH_ARM64)
6002 off = iemNativeEmitCmpGprWithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
6003#else
6004# error "Port me!"
6005#endif
6006 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6007 return off;
6008}
6009
6010
6011/**
6012 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
6013 * with conditional instruction.
6014 */
6015DECL_FORCE_INLINE(uint32_t)
6016iemNativeEmitCmpGpr32WithGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6017{
6018#ifdef RT_ARCH_AMD64
6019 /* cmp Gv, Ev */
6020 if (iGprLeft >= 8 || iGprRight >= 8)
6021 pCodeBuf[off++] = (iGprLeft >= 8 ? X86_OP_REX_R : 0) | (iGprRight >= 8 ? X86_OP_REX_B : 0);
6022 pCodeBuf[off++] = 0x3b;
6023 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprLeft & 7, iGprRight & 7);
6024
6025#elif defined(RT_ARCH_ARM64)
6026 pCodeBuf[off++] = Armv8A64MkInstrCmpReg(iGprLeft, iGprRight, false /*f64Bit*/);
6027
6028#else
6029# error "Port me!"
6030#endif
6031 return off;
6032}
6033
6034
6035/**
6036 * Emits a compare of two 32-bit GPRs, settings status flags/whatever for use
6037 * with conditional instruction.
6038 */
6039DECL_INLINE_THROW(uint32_t)
6040iemNativeEmitCmpGpr32WithGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint8_t iGprRight)
6041{
6042#ifdef RT_ARCH_AMD64
6043 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprLeft, iGprRight);
6044#elif defined(RT_ARCH_ARM64)
6045 off = iemNativeEmitCmpGpr32WithGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprLeft, iGprRight);
6046#else
6047# error "Port me!"
6048#endif
6049 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6050 return off;
6051}
6052
6053
6054/**
6055 * Emits a compare of a 64-bit GPR with a constant value, settings status
6056 * flags/whatever for use with conditional instruction.
6057 */
6058DECL_INLINE_THROW(uint32_t)
6059iemNativeEmitCmpGprWithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint64_t uImm)
6060{
6061#ifdef RT_ARCH_AMD64
6062 if (uImm <= UINT32_C(0xff))
6063 {
6064 /* cmp Ev, Ib */
6065 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
6066 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6067 pbCodeBuf[off++] = 0x83;
6068 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6069 pbCodeBuf[off++] = (uint8_t)uImm;
6070 }
6071 else if ((int64_t)uImm == (int32_t)uImm)
6072 {
6073 /* cmp Ev, imm */
6074 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
6075 pbCodeBuf[off++] = X86_OP_REX_W | (iGprLeft >= 8 ? X86_OP_REX_B : 0);
6076 pbCodeBuf[off++] = 0x81;
6077 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6078 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6079 pbCodeBuf[off++] = RT_BYTE1(uImm);
6080 pbCodeBuf[off++] = RT_BYTE2(uImm);
6081 pbCodeBuf[off++] = RT_BYTE3(uImm);
6082 pbCodeBuf[off++] = RT_BYTE4(uImm);
6083 }
6084 else
6085 {
6086 /* Use temporary register for the immediate. */
6087 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6088 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
6089 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6090 }
6091
6092#elif defined(RT_ARCH_ARM64)
6093 /** @todo guess there are clevere things we can do here... */
6094 if (uImm < _4K)
6095 {
6096 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6097 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6098 true /*64Bit*/, true /*fSetFlags*/);
6099 }
6100 else if ((uImm & ~(uint64_t)0xfff000) == 0)
6101 {
6102 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6103 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm >> 12,
6104 true /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6105 }
6106 else
6107 {
6108 /* Use temporary register for the immediate. */
6109 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6110 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iTmpReg);
6111 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6112 }
6113
6114#else
6115# error "Port me!"
6116#endif
6117
6118 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6119 return off;
6120}
6121
6122
6123/**
6124 * Emits a compare of a 32-bit GPR with a constant value, settings status
6125 * flags/whatever for use with conditional instruction.
6126 *
6127 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
6128 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
6129 * bits all zero). Will release assert or throw exception if the caller
6130 * violates this restriction.
6131 */
6132DECL_FORCE_INLINE_THROW(uint32_t)
6133iemNativeEmitCmpGpr32WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
6134{
6135#ifdef RT_ARCH_AMD64
6136 if (iGprLeft >= 8)
6137 pCodeBuf[off++] = X86_OP_REX_B;
6138 if (uImm <= UINT32_C(0x7f))
6139 {
6140 /* cmp Ev, Ib */
6141 pCodeBuf[off++] = 0x83;
6142 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6143 pCodeBuf[off++] = (uint8_t)uImm;
6144 }
6145 else
6146 {
6147 /* cmp Ev, imm */
6148 pCodeBuf[off++] = 0x81;
6149 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6150 pCodeBuf[off++] = RT_BYTE1(uImm);
6151 pCodeBuf[off++] = RT_BYTE2(uImm);
6152 pCodeBuf[off++] = RT_BYTE3(uImm);
6153 pCodeBuf[off++] = RT_BYTE4(uImm);
6154 }
6155
6156#elif defined(RT_ARCH_ARM64)
6157 /** @todo guess there are clevere things we can do here... */
6158 if (uImm < _4K)
6159 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6160 false /*64Bit*/, true /*fSetFlags*/);
6161 else if ((uImm & ~(uint32_t)0xfff000) == 0)
6162 pCodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6163 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6164 else
6165# ifdef IEM_WITH_THROW_CATCH
6166 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6167# else
6168 AssertReleaseFailedStmt(off = UINT32_MAX);
6169# endif
6170
6171#else
6172# error "Port me!"
6173#endif
6174 return off;
6175}
6176
6177
6178/**
6179 * Emits a compare of a 32-bit GPR with a constant value, settings status
6180 * flags/whatever for use with conditional instruction.
6181 */
6182DECL_INLINE_THROW(uint32_t)
6183iemNativeEmitCmpGpr32WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint32_t uImm)
6184{
6185#ifdef RT_ARCH_AMD64
6186 off = iemNativeEmitCmpGpr32WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm);
6187
6188#elif defined(RT_ARCH_ARM64)
6189 /** @todo guess there are clevere things we can do here... */
6190 if (uImm < _4K)
6191 {
6192 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6193 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6194 false /*64Bit*/, true /*fSetFlags*/);
6195 }
6196 else if ((uImm & ~(uint32_t)0xfff000) == 0)
6197 {
6198 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6199 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_XZR, iGprLeft, (uint32_t)uImm,
6200 false /*64Bit*/, true /*fSetFlags*/, true /*fShift12*/);
6201 }
6202 else
6203 {
6204 /* Use temporary register for the immediate. */
6205 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, uImm);
6206 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, iGprLeft, iTmpReg);
6207 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
6208 }
6209
6210#else
6211# error "Port me!"
6212#endif
6213
6214 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6215 return off;
6216}
6217
6218
6219/**
6220 * Emits a compare of a 32-bit GPR with a constant value, settings status
6221 * flags/whatever for use with conditional instruction.
6222 *
6223 * @note ARM64: Helper register is required (@a idxTmpReg) for isolating the
6224 * 16-bit value from @a iGrpLeft.
6225 * @note On ARM64 the @a uImm value must be in the range 0x000..0xfff or that
6226 * shifted 12 bits to the left (e.g. 0x1000..0xfff0000 with the lower 12
6227 * bits all zero). Will release assert or throw exception if the caller
6228 * violates this restriction.
6229 */
6230DECL_FORCE_INLINE_THROW(uint32_t)
6231iemNativeEmitCmpGpr16WithImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
6232 uint8_t idxTmpReg = UINT8_MAX)
6233{
6234#ifdef RT_ARCH_AMD64
6235 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6236 if (iGprLeft >= 8)
6237 pCodeBuf[off++] = X86_OP_REX_B;
6238 if (uImm <= UINT32_C(0x7f))
6239 {
6240 /* cmp Ev, Ib */
6241 pCodeBuf[off++] = 0x83;
6242 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6243 pCodeBuf[off++] = (uint8_t)uImm;
6244 }
6245 else
6246 {
6247 /* cmp Ev, imm */
6248 pCodeBuf[off++] = 0x81;
6249 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, iGprLeft & 7);
6250 pCodeBuf[off++] = RT_BYTE1(uImm);
6251 pCodeBuf[off++] = RT_BYTE2(uImm);
6252 }
6253 RT_NOREF(idxTmpReg);
6254
6255#elif defined(RT_ARCH_ARM64)
6256# ifdef IEM_WITH_THROW_CATCH
6257 AssertStmt(idxTmpReg < 32, IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
6258# else
6259 AssertReleaseStmt(idxTmpReg < 32, off = UINT32_MAX);
6260# endif
6261 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
6262 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, iGprLeft, 15, 0, false /*f64Bit*/);
6263 off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, idxTmpReg, uImm);
6264
6265#else
6266# error "Port me!"
6267#endif
6268 return off;
6269}
6270
6271
6272/**
6273 * Emits a compare of a 16-bit GPR with a constant value, settings status
6274 * flags/whatever for use with conditional instruction.
6275 *
6276 * @note ARM64: Helper register is required (idxTmpReg).
6277 */
6278DECL_INLINE_THROW(uint32_t)
6279iemNativeEmitCmpGpr16WithImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprLeft, uint16_t uImm,
6280 uint8_t idxTmpReg = UINT8_MAX)
6281{
6282#ifdef RT_ARCH_AMD64
6283 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprLeft, uImm, idxTmpReg);
6284#elif defined(RT_ARCH_ARM64)
6285 off = iemNativeEmitCmpGpr16WithImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGprLeft, uImm, idxTmpReg);
6286#else
6287# error "Port me!"
6288#endif
6289 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6290 return off;
6291}
6292
6293
6294
6295/*********************************************************************************************************************************
6296* Branching *
6297*********************************************************************************************************************************/
6298
6299/**
6300 * Emits a JMP rel32 / B imm19 to the given label.
6301 */
6302DECL_FORCE_INLINE_THROW(uint32_t)
6303iemNativeEmitJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t idxLabel)
6304{
6305 Assert(idxLabel < pReNative->cLabels);
6306
6307#ifdef RT_ARCH_AMD64
6308 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6309 {
6310 uint32_t offRel = pReNative->paLabels[idxLabel].off - (off + 2);
6311 if ((int32_t)offRel < 128 && (int32_t)offRel >= -128)
6312 {
6313 pCodeBuf[off++] = 0xeb; /* jmp rel8 */
6314 pCodeBuf[off++] = (uint8_t)offRel;
6315 }
6316 else
6317 {
6318 offRel -= 3;
6319 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
6320 pCodeBuf[off++] = RT_BYTE1(offRel);
6321 pCodeBuf[off++] = RT_BYTE2(offRel);
6322 pCodeBuf[off++] = RT_BYTE3(offRel);
6323 pCodeBuf[off++] = RT_BYTE4(offRel);
6324 }
6325 }
6326 else
6327 {
6328 pCodeBuf[off++] = 0xe9; /* jmp rel32 */
6329 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6330 pCodeBuf[off++] = 0xfe;
6331 pCodeBuf[off++] = 0xff;
6332 pCodeBuf[off++] = 0xff;
6333 pCodeBuf[off++] = 0xff;
6334 }
6335 pCodeBuf[off++] = 0xcc; /* int3 poison */
6336
6337#elif defined(RT_ARCH_ARM64)
6338 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
6339 {
6340 pCodeBuf[off] = Armv8A64MkInstrB(pReNative->paLabels[idxLabel].off - off);
6341 off++;
6342 }
6343 else
6344 {
6345 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm26At0);
6346 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
6347 }
6348
6349#else
6350# error "Port me!"
6351#endif
6352 return off;
6353}
6354
6355
6356/**
6357 * Emits a JMP rel32 / B imm19 to the given label.
6358 */
6359DECL_INLINE_THROW(uint32_t)
6360iemNativeEmitJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6361{
6362#ifdef RT_ARCH_AMD64
6363 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel);
6364#elif defined(RT_ARCH_ARM64)
6365 off = iemNativeEmitJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel);
6366#else
6367# error "Port me!"
6368#endif
6369 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6370 return off;
6371}
6372
6373
6374/**
6375 * Emits a JMP rel32 / B imm19 to a new undefined label.
6376 */
6377DECL_INLINE_THROW(uint32_t)
6378iemNativeEmitJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6379{
6380 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6381 return iemNativeEmitJmpToLabel(pReNative, off, idxLabel);
6382}
6383
6384/** Condition type. */
6385#ifdef RT_ARCH_AMD64
6386typedef enum IEMNATIVEINSTRCOND : uint8_t
6387{
6388 kIemNativeInstrCond_o = 0,
6389 kIemNativeInstrCond_no,
6390 kIemNativeInstrCond_c,
6391 kIemNativeInstrCond_nc,
6392 kIemNativeInstrCond_e,
6393 kIemNativeInstrCond_z = kIemNativeInstrCond_e,
6394 kIemNativeInstrCond_ne,
6395 kIemNativeInstrCond_nz = kIemNativeInstrCond_ne,
6396 kIemNativeInstrCond_be,
6397 kIemNativeInstrCond_nbe,
6398 kIemNativeInstrCond_s,
6399 kIemNativeInstrCond_ns,
6400 kIemNativeInstrCond_p,
6401 kIemNativeInstrCond_np,
6402 kIemNativeInstrCond_l,
6403 kIemNativeInstrCond_nl,
6404 kIemNativeInstrCond_le,
6405 kIemNativeInstrCond_nle
6406} IEMNATIVEINSTRCOND;
6407#elif defined(RT_ARCH_ARM64)
6408typedef ARMV8INSTRCOND IEMNATIVEINSTRCOND;
6409# define kIemNativeInstrCond_o todo_conditional_codes
6410# define kIemNativeInstrCond_no todo_conditional_codes
6411# define kIemNativeInstrCond_c todo_conditional_codes
6412# define kIemNativeInstrCond_nc todo_conditional_codes
6413# define kIemNativeInstrCond_e kArmv8InstrCond_Eq
6414# define kIemNativeInstrCond_ne kArmv8InstrCond_Ne
6415# define kIemNativeInstrCond_be kArmv8InstrCond_Ls
6416# define kIemNativeInstrCond_nbe kArmv8InstrCond_Hi
6417# define kIemNativeInstrCond_s todo_conditional_codes
6418# define kIemNativeInstrCond_ns todo_conditional_codes
6419# define kIemNativeInstrCond_p todo_conditional_codes
6420# define kIemNativeInstrCond_np todo_conditional_codes
6421# define kIemNativeInstrCond_l kArmv8InstrCond_Lt
6422# define kIemNativeInstrCond_nl kArmv8InstrCond_Ge
6423# define kIemNativeInstrCond_le kArmv8InstrCond_Le
6424# define kIemNativeInstrCond_nle kArmv8InstrCond_Gt
6425#else
6426# error "Port me!"
6427#endif
6428
6429
6430/**
6431 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6432 */
6433DECL_FORCE_INLINE_THROW(uint32_t)
6434iemNativeEmitJccToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
6435 uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6436{
6437 Assert(idxLabel < pReNative->cLabels);
6438
6439 uint32_t const offLabel = pReNative->paLabels[idxLabel].off;
6440#ifdef RT_ARCH_AMD64
6441 if (offLabel >= off)
6442 {
6443 /* jcc rel32 */
6444 pCodeBuf[off++] = 0x0f;
6445 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6446 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_Rel32, -4);
6447 pCodeBuf[off++] = 0x00;
6448 pCodeBuf[off++] = 0x00;
6449 pCodeBuf[off++] = 0x00;
6450 pCodeBuf[off++] = 0x00;
6451 }
6452 else
6453 {
6454 int32_t offDisp = offLabel - (off + 2);
6455 if ((int8_t)offDisp == offDisp)
6456 {
6457 /* jcc rel8 */
6458 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6459 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6460 }
6461 else
6462 {
6463 /* jcc rel32 */
6464 offDisp -= 4;
6465 pCodeBuf[off++] = 0x0f;
6466 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6467 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6468 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6469 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6470 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6471 }
6472 }
6473
6474#elif defined(RT_ARCH_ARM64)
6475 if (offLabel >= off)
6476 {
6477 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6478 pCodeBuf[off++] = Armv8A64MkInstrBCond(enmCond, -1);
6479 }
6480 else
6481 {
6482 Assert(off - offLabel <= 0x3ffffU);
6483 pCodeBuf[off] = Armv8A64MkInstrBCond(enmCond, offLabel - off);
6484 off++;
6485 }
6486
6487#else
6488# error "Port me!"
6489#endif
6490 return off;
6491}
6492
6493
6494/**
6495 * Emits a Jcc rel32 / B.cc imm19 to the given label (ASSUMED requiring fixup).
6496 */
6497DECL_INLINE_THROW(uint32_t)
6498iemNativeEmitJccToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel, IEMNATIVEINSTRCOND enmCond)
6499{
6500#ifdef RT_ARCH_AMD64
6501 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 6), off, idxLabel, enmCond);
6502#elif defined(RT_ARCH_ARM64)
6503 off = iemNativeEmitJccToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1), off, idxLabel, enmCond);
6504#else
6505# error "Port me!"
6506#endif
6507 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6508 return off;
6509}
6510
6511
6512/**
6513 * Emits a Jcc rel32 / B.cc imm19 to a new label.
6514 */
6515DECL_INLINE_THROW(uint32_t)
6516iemNativeEmitJccToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6517 IEMNATIVELABELTYPE enmLabelType, uint16_t uData, IEMNATIVEINSTRCOND enmCond)
6518{
6519 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
6520 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, enmCond);
6521}
6522
6523
6524/**
6525 * Emits a JZ/JE rel32 / B.EQ imm19 to the given label.
6526 */
6527DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6528{
6529#ifdef RT_ARCH_AMD64
6530 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_e);
6531#elif defined(RT_ARCH_ARM64)
6532 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Eq);
6533#else
6534# error "Port me!"
6535#endif
6536}
6537
6538/**
6539 * Emits a JZ/JE rel32 / B.EQ imm19 to a new label.
6540 */
6541DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6542 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6543{
6544#ifdef RT_ARCH_AMD64
6545 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_e);
6546#elif defined(RT_ARCH_ARM64)
6547 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Eq);
6548#else
6549# error "Port me!"
6550#endif
6551}
6552
6553
6554/**
6555 * Emits a JNZ/JNE rel32 / B.NE imm19 to the given label.
6556 */
6557DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6558{
6559#ifdef RT_ARCH_AMD64
6560 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_ne);
6561#elif defined(RT_ARCH_ARM64)
6562 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ne);
6563#else
6564# error "Port me!"
6565#endif
6566}
6567
6568/**
6569 * Emits a JNZ/JNE rel32 / B.NE imm19 to a new label.
6570 */
6571DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6572 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6573{
6574#ifdef RT_ARCH_AMD64
6575 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_ne);
6576#elif defined(RT_ARCH_ARM64)
6577 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ne);
6578#else
6579# error "Port me!"
6580#endif
6581}
6582
6583
6584/**
6585 * Emits a JBE/JNA rel32 / B.LS imm19 to the given label.
6586 */
6587DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6588{
6589#ifdef RT_ARCH_AMD64
6590 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_be);
6591#elif defined(RT_ARCH_ARM64)
6592 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Ls);
6593#else
6594# error "Port me!"
6595#endif
6596}
6597
6598/**
6599 * Emits a JBE/JNA rel32 / B.LS imm19 to a new label.
6600 */
6601DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6602 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6603{
6604#ifdef RT_ARCH_AMD64
6605 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_be);
6606#elif defined(RT_ARCH_ARM64)
6607 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Ls);
6608#else
6609# error "Port me!"
6610#endif
6611}
6612
6613
6614/**
6615 * Emits a JA/JNBE rel32 / B.HI imm19 to the given label.
6616 */
6617DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6618{
6619#ifdef RT_ARCH_AMD64
6620 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_nbe);
6621#elif defined(RT_ARCH_ARM64)
6622 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Hi);
6623#else
6624# error "Port me!"
6625#endif
6626}
6627
6628/**
6629 * Emits a JA/JNBE rel32 / B.HI imm19 to a new label.
6630 */
6631DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6632 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6633{
6634#ifdef RT_ARCH_AMD64
6635 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_nbe);
6636#elif defined(RT_ARCH_ARM64)
6637 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Hi);
6638#else
6639# error "Port me!"
6640#endif
6641}
6642
6643
6644/**
6645 * Emits a JL/JNGE rel32 / B.LT imm19 to the given label.
6646 */
6647DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxLabel)
6648{
6649#ifdef RT_ARCH_AMD64
6650 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kIemNativeInstrCond_l);
6651#elif defined(RT_ARCH_ARM64)
6652 return iemNativeEmitJccToLabel(pReNative, off, idxLabel, kArmv8InstrCond_Lt);
6653#else
6654# error "Port me!"
6655#endif
6656}
6657
6658/**
6659 * Emits a JA/JNGE rel32 / B.HI imm19 to a new label.
6660 */
6661DECL_INLINE_THROW(uint32_t) iemNativeEmitJlToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6662 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
6663{
6664#ifdef RT_ARCH_AMD64
6665 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kIemNativeInstrCond_l);
6666#elif defined(RT_ARCH_ARM64)
6667 return iemNativeEmitJccToNewLabel(pReNative, off, enmLabelType, uData, kArmv8InstrCond_Lt);
6668#else
6669# error "Port me!"
6670#endif
6671}
6672
6673
6674/**
6675 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6676 *
6677 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6678 *
6679 * Only use hardcoded jumps forward when emitting for exactly one
6680 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6681 * the right target address on all platforms!
6682 *
6683 * Please also note that on x86 it is necessary pass off + 256 or higher
6684 * for @a offTarget one believe the intervening code is more than 127
6685 * bytes long.
6686 */
6687DECL_FORCE_INLINE(uint32_t)
6688iemNativeEmitJccToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6689{
6690#ifdef RT_ARCH_AMD64
6691 /* jcc rel8 / rel32 */
6692 int32_t offDisp = (int32_t)(offTarget - (off + 2));
6693 if (offDisp < 128 && offDisp >= -128)
6694 {
6695 pCodeBuf[off++] = (uint8_t)enmCond | 0x70;
6696 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6697 }
6698 else
6699 {
6700 offDisp -= 4;
6701 pCodeBuf[off++] = 0x0f;
6702 pCodeBuf[off++] = (uint8_t)enmCond | 0x80;
6703 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6704 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6705 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6706 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6707 }
6708
6709#elif defined(RT_ARCH_ARM64)
6710 pCodeBuf[off] = Armv8A64MkInstrBCond(enmCond, (int32_t)(offTarget - off));
6711 off++;
6712#else
6713# error "Port me!"
6714#endif
6715 return off;
6716}
6717
6718
6719/**
6720 * Emits a Jcc rel32 / B.cc imm19 with a fixed displacement.
6721 *
6722 * @note The @a offTarget is the absolute jump target (unit is IEMNATIVEINSTR).
6723 *
6724 * Only use hardcoded jumps forward when emitting for exactly one
6725 * platform, otherwise apply iemNativeFixupFixedJump() to ensure hitting
6726 * the right target address on all platforms!
6727 *
6728 * Please also note that on x86 it is necessary pass off + 256 or higher
6729 * for @a offTarget if one believe the intervening code is more than 127
6730 * bytes long.
6731 */
6732DECL_INLINE_THROW(uint32_t)
6733iemNativeEmitJccToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget, IEMNATIVEINSTRCOND enmCond)
6734{
6735#ifdef RT_ARCH_AMD64
6736 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, offTarget, enmCond);
6737#elif defined(RT_ARCH_ARM64)
6738 off = iemNativeEmitJccToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget, enmCond);
6739#else
6740# error "Port me!"
6741#endif
6742 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6743 return off;
6744}
6745
6746
6747/**
6748 * Emits a JZ/JE rel32 / B.EQ imm19 with a fixed displacement.
6749 *
6750 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6751 */
6752DECL_INLINE_THROW(uint32_t) iemNativeEmitJzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6753{
6754#ifdef RT_ARCH_AMD64
6755 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_e);
6756#elif defined(RT_ARCH_ARM64)
6757 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Eq);
6758#else
6759# error "Port me!"
6760#endif
6761}
6762
6763
6764/**
6765 * Emits a JNZ/JNE rel32 / B.NE imm19 with a fixed displacement.
6766 *
6767 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6768 */
6769DECL_INLINE_THROW(uint32_t) iemNativeEmitJnzToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6770{
6771#ifdef RT_ARCH_AMD64
6772 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_ne);
6773#elif defined(RT_ARCH_ARM64)
6774 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ne);
6775#else
6776# error "Port me!"
6777#endif
6778}
6779
6780
6781/**
6782 * Emits a JBE/JNA rel32 / B.LS imm19 with a fixed displacement.
6783 *
6784 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6785 */
6786DECL_INLINE_THROW(uint32_t) iemNativeEmitJbeToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6787{
6788#ifdef RT_ARCH_AMD64
6789 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_be);
6790#elif defined(RT_ARCH_ARM64)
6791 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Ls);
6792#else
6793# error "Port me!"
6794#endif
6795}
6796
6797
6798/**
6799 * Emits a JA/JNBE rel32 / B.HI imm19 with a fixed displacement.
6800 *
6801 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6802 */
6803DECL_INLINE_THROW(uint32_t) iemNativeEmitJaToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6804{
6805#ifdef RT_ARCH_AMD64
6806 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kIemNativeInstrCond_nbe);
6807#elif defined(RT_ARCH_ARM64)
6808 return iemNativeEmitJccToFixed(pReNative, off, offTarget, kArmv8InstrCond_Hi);
6809#else
6810# error "Port me!"
6811#endif
6812}
6813
6814
6815/**
6816 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
6817 *
6818 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6819 */
6820DECL_FORCE_INLINE(uint32_t) iemNativeEmitJmpToFixedEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint32_t offTarget)
6821{
6822#ifdef RT_ARCH_AMD64
6823 /* jmp rel8 or rel32 */
6824 int32_t offDisp = offTarget - (off + 2);
6825 if (offDisp < 128 && offDisp >= -128)
6826 {
6827 pCodeBuf[off++] = 0xeb;
6828 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6829 }
6830 else
6831 {
6832 offDisp -= 3;
6833 pCodeBuf[off++] = 0xe9;
6834 pCodeBuf[off++] = RT_BYTE1((uint32_t)offDisp);
6835 pCodeBuf[off++] = RT_BYTE2((uint32_t)offDisp);
6836 pCodeBuf[off++] = RT_BYTE3((uint32_t)offDisp);
6837 pCodeBuf[off++] = RT_BYTE4((uint32_t)offDisp);
6838 }
6839
6840#elif defined(RT_ARCH_ARM64)
6841 pCodeBuf[off] = Armv8A64MkInstrB((int32_t)(offTarget - off));
6842 off++;
6843
6844#else
6845# error "Port me!"
6846#endif
6847 return off;
6848}
6849
6850
6851/**
6852 * Emits a JMP rel32/rel8 / B imm26 with a fixed displacement.
6853 *
6854 * See notes on @a offTarget in the iemNativeEmitJccToFixed() documentation.
6855 */
6856DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpToFixed(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offTarget)
6857{
6858#ifdef RT_ARCH_AMD64
6859 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, offTarget);
6860#elif defined(RT_ARCH_ARM64)
6861 off = iemNativeEmitJmpToFixedEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, offTarget);
6862#else
6863# error "Port me!"
6864#endif
6865 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6866 return off;
6867}
6868
6869
6870/**
6871 * Fixes up a conditional jump to a fixed label.
6872 * @see iemNativeEmitJmpToFixed, iemNativeEmitJnzToFixed,
6873 * iemNativeEmitJzToFixed, ...
6874 */
6875DECL_INLINE_THROW(void) iemNativeFixupFixedJump(PIEMRECOMPILERSTATE pReNative, uint32_t offFixup, uint32_t offTarget)
6876{
6877#ifdef RT_ARCH_AMD64
6878 uint8_t * const pbCodeBuf = pReNative->pInstrBuf;
6879 uint8_t const bOpcode = pbCodeBuf[offFixup];
6880 if ((uint8_t)(bOpcode - 0x70) < (uint8_t)0x10 || bOpcode == 0xeb)
6881 {
6882 pbCodeBuf[offFixup + 1] = (uint8_t)(offTarget - (offFixup + 2));
6883 AssertStmt(pbCodeBuf[offFixup + 1] == offTarget - (offFixup + 2),
6884 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_FIXED_JUMP_OUT_OF_RANGE));
6885 }
6886 else
6887 {
6888 if (bOpcode != 0x0f)
6889 Assert(bOpcode == 0xe9);
6890 else
6891 {
6892 offFixup += 1;
6893 Assert((uint8_t)(pbCodeBuf[offFixup] - 0x80) <= 0x10);
6894 }
6895 uint32_t const offRel32 = offTarget - (offFixup + 5);
6896 pbCodeBuf[offFixup + 1] = RT_BYTE1(offRel32);
6897 pbCodeBuf[offFixup + 2] = RT_BYTE2(offRel32);
6898 pbCodeBuf[offFixup + 3] = RT_BYTE3(offRel32);
6899 pbCodeBuf[offFixup + 4] = RT_BYTE4(offRel32);
6900 }
6901
6902#elif defined(RT_ARCH_ARM64)
6903 uint32_t * const pu32CodeBuf = pReNative->pInstrBuf;
6904 if ((pu32CodeBuf[offFixup] & UINT32_C(0xff000000)) == UINT32_C(0x54000000))
6905 {
6906 /* B.COND + BC.COND */
6907 int32_t const offDisp = offTarget - offFixup;
6908 Assert(offDisp >= -262144 && offDisp < 262144);
6909 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xff00001f))
6910 | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
6911 }
6912 else
6913 {
6914 /* B imm26 */
6915 Assert((pu32CodeBuf[offFixup] & UINT32_C(0xfc000000)) == UINT32_C(0x14000000));
6916 int32_t const offDisp = offTarget - offFixup;
6917 Assert(offDisp >= -33554432 && offDisp < 33554432);
6918 pu32CodeBuf[offFixup] = (pu32CodeBuf[offFixup] & UINT32_C(0xfc000000))
6919 | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
6920 }
6921
6922#else
6923# error "Port me!"
6924#endif
6925}
6926
6927
6928#ifdef RT_ARCH_AMD64
6929/**
6930 * For doing bt on a register.
6931 */
6932DECL_INLINE_THROW(uint32_t)
6933iemNativeEmitAmd64TestBitInGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t iBitNo)
6934{
6935 Assert(iBitNo < 64);
6936 /* bt Ev, imm8 */
6937 if (iBitNo >= 32)
6938 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
6939 else if (iGprSrc >= 8)
6940 pCodeBuf[off++] = X86_OP_REX_B;
6941 pCodeBuf[off++] = 0x0f;
6942 pCodeBuf[off++] = 0xba;
6943 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
6944 pCodeBuf[off++] = iBitNo;
6945 return off;
6946}
6947#endif /* RT_ARCH_AMD64 */
6948
6949
6950/**
6951 * Internal helper, don't call directly.
6952 */
6953DECL_INLINE_THROW(uint32_t)
6954iemNativeEmitTestBitInGprAndJmpToLabelIfCc(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
6955 uint8_t iBitNo, uint32_t idxLabel, bool fJmpIfSet)
6956{
6957 Assert(iBitNo < 64);
6958#ifdef RT_ARCH_AMD64
6959 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6960 if (iBitNo < 8)
6961 {
6962 /* test Eb, imm8 */
6963 if (iGprSrc >= 4)
6964 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
6965 pbCodeBuf[off++] = 0xf6;
6966 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
6967 pbCodeBuf[off++] = (uint8_t)1 << iBitNo;
6968 off = iemNativeEmitJccToLabel(pReNative, off, idxLabel, fJmpIfSet ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
6969 }
6970 else
6971 {
6972 /* bt Ev, imm8 */
6973 if (iBitNo >= 32)
6974 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
6975 else if (iGprSrc >= 8)
6976 pbCodeBuf[off++] = X86_OP_REX_B;
6977 pbCodeBuf[off++] = 0x0f;
6978 pbCodeBuf[off++] = 0xba;
6979 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
6980 pbCodeBuf[off++] = iBitNo;
6981 off = iemNativeEmitJccToLabel(pReNative, off, idxLabel, fJmpIfSet ? kIemNativeInstrCond_c : kIemNativeInstrCond_nc);
6982 }
6983
6984#elif defined(RT_ARCH_ARM64)
6985 /* Use the TBNZ instruction here. */
6986 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6987 if (pReNative->paLabels[idxLabel].enmType > kIemNativeLabelType_LastWholeTbBranch)
6988 {
6989 AssertMsg(pReNative->paLabels[idxLabel].off == UINT32_MAX,
6990 ("TODO: Please enable & test commented out code for jumping back to a predefined label.\n"));
6991 //uint32_t offLabel = pReNative->paLabels[idxLabel].off;
6992 //if (offLabel == UINT32_MAX)
6993 {
6994 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm14At5);
6995 pu32CodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, 0, iGprSrc, iBitNo);
6996 }
6997 //else
6998 //{
6999 // RT_BREAKPOINT();
7000 // Assert(off - offLabel <= 0x1fffU);
7001 // pu32CodeBuf[off++] = Armv8A64MkInstrTbzTbnz(fJmpIfSet, offLabel - off, iGprSrc, iBitNo);
7002 //
7003 //}
7004 }
7005 else
7006 {
7007 Assert(Armv8A64ConvertImmRImmS2Mask64(0x40, (64U - iBitNo) & 63U) == RT_BIT_64(iBitNo));
7008 pu32CodeBuf[off++] = Armv8A64MkInstrTstImm(iGprSrc, 0x40, (64U - iBitNo) & 63U);
7009 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
7010 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(fJmpIfSet ? kArmv8InstrCond_Ne : kArmv8InstrCond_Eq, 0);
7011 }
7012
7013#else
7014# error "Port me!"
7015#endif
7016 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7017 return off;
7018}
7019
7020
7021/**
7022 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _set_ in
7023 * @a iGprSrc.
7024 *
7025 * @note On ARM64 the range is only +/-8191 instructions.
7026 */
7027DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7028 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7029{
7030 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, true /*fJmpIfSet*/);
7031}
7032
7033
7034/**
7035 * Emits a jump to @a idxLabel on the condition that bit @a iBitNo _is_ _not_
7036 * _set_ in @a iGprSrc.
7037 *
7038 * @note On ARM64 the range is only +/-8191 instructions.
7039 */
7040DECL_INLINE_THROW(uint32_t) iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7041 uint8_t iGprSrc, uint8_t iBitNo, uint32_t idxLabel)
7042{
7043 return iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, iGprSrc, iBitNo, idxLabel, false /*fJmpIfSet*/);
7044}
7045
7046
7047/**
7048 * Emits a test for any of the bits from @a fBits in @a iGprSrc, setting CPU
7049 * flags accordingly.
7050 */
7051DECL_INLINE_THROW(uint32_t)
7052iemNativeEmitTestAnyBitsInGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t fBits)
7053{
7054 Assert(fBits != 0);
7055#ifdef RT_ARCH_AMD64
7056
7057 if (fBits >= UINT32_MAX)
7058 {
7059 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7060
7061 /* test Ev,Gv */
7062 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
7063 pbCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R) | (iTmpReg < 8 ? 0 : X86_OP_REX_B);
7064 pbCodeBuf[off++] = 0x85;
7065 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 8, iTmpReg & 7);
7066
7067 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7068 }
7069 else if (fBits <= UINT32_MAX)
7070 {
7071 /* test Eb, imm8 or test Ev, imm32 */
7072 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
7073 if (fBits <= UINT8_MAX)
7074 {
7075 if (iGprSrc >= 4)
7076 pbCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7077 pbCodeBuf[off++] = 0xf6;
7078 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7079 pbCodeBuf[off++] = (uint8_t)fBits;
7080 }
7081 else
7082 {
7083 if (iGprSrc >= 8)
7084 pbCodeBuf[off++] = X86_OP_REX_B;
7085 pbCodeBuf[off++] = 0xf7;
7086 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7087 pbCodeBuf[off++] = RT_BYTE1(fBits);
7088 pbCodeBuf[off++] = RT_BYTE2(fBits);
7089 pbCodeBuf[off++] = RT_BYTE3(fBits);
7090 pbCodeBuf[off++] = RT_BYTE4(fBits);
7091 }
7092 }
7093 /** @todo implement me. */
7094 else
7095 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
7096
7097#elif defined(RT_ARCH_ARM64)
7098 uint32_t uImmR = 0;
7099 uint32_t uImmNandS = 0;
7100 if (Armv8A64ConvertMask64ToImmRImmS(fBits, &uImmNandS, &uImmR))
7101 {
7102 /* ands xzr, iGprSrc, #fBits */
7103 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7104 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR);
7105 }
7106 else
7107 {
7108 /* ands xzr, iGprSrc, iTmpReg */
7109 uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7110 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7111 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg);
7112 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7113 }
7114
7115#else
7116# error "Port me!"
7117#endif
7118 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7119 return off;
7120}
7121
7122
7123/**
7124 * Emits a test for any of the bits from @a fBits in the lower 32 bits of
7125 * @a iGprSrc, setting CPU flags accordingly.
7126 *
7127 * @note For ARM64 this only supports @a fBits values that can be expressed
7128 * using the two 6-bit immediates of the ANDS instruction. The caller
7129 * must make sure this is possible!
7130 */
7131DECL_FORCE_INLINE_THROW(uint32_t)
7132iemNativeEmitTestAnyBitsInGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint32_t fBits)
7133{
7134 Assert(fBits != 0);
7135
7136#ifdef RT_ARCH_AMD64
7137 if (fBits <= UINT8_MAX)
7138 {
7139 /* test Eb, imm8 */
7140 if (iGprSrc >= 4)
7141 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7142 pCodeBuf[off++] = 0xf6;
7143 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7144 pCodeBuf[off++] = (uint8_t)fBits;
7145 }
7146 else
7147 {
7148 /* test Ev, imm32 */
7149 if (iGprSrc >= 8)
7150 pCodeBuf[off++] = X86_OP_REX_B;
7151 pCodeBuf[off++] = 0xf7;
7152 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7153 pCodeBuf[off++] = RT_BYTE1(fBits);
7154 pCodeBuf[off++] = RT_BYTE2(fBits);
7155 pCodeBuf[off++] = RT_BYTE3(fBits);
7156 pCodeBuf[off++] = RT_BYTE4(fBits);
7157 }
7158
7159#elif defined(RT_ARCH_ARM64)
7160 /* ands xzr, src, #fBits */
7161 uint32_t uImmR = 0;
7162 uint32_t uImmNandS = 0;
7163 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7164 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7165 else
7166# ifdef IEM_WITH_THROW_CATCH
7167 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
7168# else
7169 AssertReleaseFailedStmt(off = UINT32_MAX);
7170# endif
7171
7172#else
7173# error "Port me!"
7174#endif
7175 return off;
7176}
7177
7178
7179
7180/**
7181 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
7182 * @a iGprSrc, setting CPU flags accordingly.
7183 *
7184 * @note For ARM64 this only supports @a fBits values that can be expressed
7185 * using the two 6-bit immediates of the ANDS instruction. The caller
7186 * must make sure this is possible!
7187 */
7188DECL_FORCE_INLINE_THROW(uint32_t)
7189iemNativeEmitTestAnyBitsInGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
7190{
7191 Assert(fBits != 0);
7192
7193#ifdef RT_ARCH_AMD64
7194 /* test Eb, imm8 */
7195 if (iGprSrc >= 4)
7196 pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
7197 pCodeBuf[off++] = 0xf6;
7198 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
7199 pCodeBuf[off++] = fBits;
7200
7201#elif defined(RT_ARCH_ARM64)
7202 /* ands xzr, src, #fBits */
7203 uint32_t uImmR = 0;
7204 uint32_t uImmNandS = 0;
7205 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7206 pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7207 else
7208# ifdef IEM_WITH_THROW_CATCH
7209 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
7210# else
7211 AssertReleaseFailedStmt(off = UINT32_MAX);
7212# endif
7213
7214#else
7215# error "Port me!"
7216#endif
7217 return off;
7218}
7219
7220
7221/**
7222 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
7223 * @a iGprSrc, setting CPU flags accordingly.
7224 */
7225DECL_INLINE_THROW(uint32_t)
7226iemNativeEmitTestAnyBitsInGpr8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
7227{
7228 Assert(fBits != 0);
7229
7230#ifdef RT_ARCH_AMD64
7231 off = iemNativeEmitTestAnyBitsInGpr8Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprSrc, fBits);
7232
7233#elif defined(RT_ARCH_ARM64)
7234 /* ands xzr, src, [tmp|#imm] */
7235 uint32_t uImmR = 0;
7236 uint32_t uImmNandS = 0;
7237 if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
7238 {
7239 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7240 pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
7241 }
7242 else
7243 {
7244 /* Use temporary register for the 64-bit immediate. */
7245 uint8_t iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
7246 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7247 pu32CodeBuf[off++] = Armv8A64MkInstrAnds(ARMV8_A64_REG_XZR, iGprSrc, iTmpReg, false /*f64Bit*/);
7248 iemNativeRegFreeTmpImm(pReNative, iTmpReg);
7249 }
7250
7251#else
7252# error "Port me!"
7253#endif
7254 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7255 return off;
7256}
7257
7258
7259/**
7260 * Emits a jump to @a idxLabel on the condition _any_ of the bits in @a fBits
7261 * are set in @a iGprSrc.
7262 */
7263DECL_INLINE_THROW(uint32_t)
7264iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7265 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
7266{
7267 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
7268
7269 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
7270 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7271
7272 return off;
7273}
7274
7275
7276/**
7277 * Emits a jump to @a idxLabel on the condition _none_ of the bits in @a fBits
7278 * are set in @a iGprSrc.
7279 */
7280DECL_INLINE_THROW(uint32_t)
7281iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7282 uint8_t iGprSrc, uint64_t fBits, uint32_t idxLabel)
7283{
7284 Assert(fBits); Assert(!RT_IS_POWER_OF_TWO(fBits));
7285
7286 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, iGprSrc, fBits);
7287 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7288
7289 return off;
7290}
7291
7292
7293/**
7294 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7295 *
7296 * The operand size is given by @a f64Bit.
7297 */
7298DECL_FORCE_INLINE_THROW(uint32_t)
7299iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7300 uint8_t iGprSrc, bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
7301{
7302 Assert(idxLabel < pReNative->cLabels);
7303
7304#ifdef RT_ARCH_AMD64
7305 /* test reg32,reg32 / test reg64,reg64 */
7306 if (f64Bit)
7307 pCodeBuf[off++] = X86_OP_REX_W | (iGprSrc < 8 ? 0 : X86_OP_REX_R | X86_OP_REX_B);
7308 else if (iGprSrc >= 8)
7309 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
7310 pCodeBuf[off++] = 0x85;
7311 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprSrc & 7, iGprSrc & 7);
7312
7313 /* jnz idxLabel */
7314 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabel,
7315 fJmpIfNotZero ? kIemNativeInstrCond_ne : kIemNativeInstrCond_e);
7316
7317#elif defined(RT_ARCH_ARM64)
7318 if (pReNative->paLabels[idxLabel].off != UINT32_MAX)
7319 {
7320 pCodeBuf[off] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, (int32_t)(pReNative->paLabels[idxLabel].off - off),
7321 iGprSrc, f64Bit);
7322 off++;
7323 }
7324 else
7325 {
7326 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
7327 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(fJmpIfNotZero, 0, iGprSrc, f64Bit);
7328 }
7329
7330#else
7331# error "Port me!"
7332#endif
7333 return off;
7334}
7335
7336
7337/**
7338 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7339 *
7340 * The operand size is given by @a f64Bit.
7341 */
7342DECL_FORCE_INLINE_THROW(uint32_t)
7343iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc,
7344 bool f64Bit, bool fJmpIfNotZero, uint32_t idxLabel)
7345{
7346#ifdef RT_ARCH_AMD64
7347 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 3 + 6),
7348 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
7349#elif defined(RT_ARCH_ARM64)
7350 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, iemNativeInstrBufEnsure(pReNative, off, 1),
7351 off, iGprSrc, f64Bit, fJmpIfNotZero, idxLabel);
7352#else
7353# error "Port me!"
7354#endif
7355 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7356 return off;
7357}
7358
7359
7360/* if (Grp1 == 0) Jmp idxLabel; */
7361
7362/**
7363 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
7364 *
7365 * The operand size is given by @a f64Bit.
7366 */
7367DECL_FORCE_INLINE_THROW(uint32_t)
7368iemNativeEmitTestIfGprIsZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7369 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7370{
7371 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
7372 f64Bit, false /*fJmpIfNotZero*/, idxLabel);
7373}
7374
7375
7376/**
7377 * Emits code that jumps to @a idxLabel if @a iGprSrc is zero.
7378 *
7379 * The operand size is given by @a f64Bit.
7380 */
7381DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7382 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7383{
7384 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, false /*fJmpIfNotZero*/, idxLabel);
7385}
7386
7387
7388/**
7389 * Emits code that jumps to a new label if @a iGprSrc is zero.
7390 *
7391 * The operand size is given by @a f64Bit.
7392 */
7393DECL_INLINE_THROW(uint32_t)
7394iemNativeEmitTestIfGprIsZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7395 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7396{
7397 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7398 return iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7399}
7400
7401
7402/* if (Grp1 != 0) Jmp idxLabel; */
7403
7404/**
7405 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7406 *
7407 * The operand size is given by @a f64Bit.
7408 */
7409DECL_FORCE_INLINE_THROW(uint32_t)
7410iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf, uint32_t off,
7411 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7412{
7413 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, iGprSrc,
7414 f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7415}
7416
7417
7418/**
7419 * Emits code that jumps to @a idxLabel if @a iGprSrc is not zero.
7420 *
7421 * The operand size is given by @a f64Bit.
7422 */
7423DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7424 uint8_t iGprSrc, bool f64Bit, uint32_t idxLabel)
7425{
7426 return iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, true /*fJmpIfNotZero*/, idxLabel);
7427}
7428
7429
7430/**
7431 * Emits code that jumps to a new label if @a iGprSrc is not zero.
7432 *
7433 * The operand size is given by @a f64Bit.
7434 */
7435DECL_INLINE_THROW(uint32_t)
7436iemNativeEmitTestIfGprIsNotZeroAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, bool f64Bit,
7437 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7438{
7439 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7440 return iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, iGprSrc, f64Bit, idxLabel);
7441}
7442
7443
7444/* if (Grp1 != Gpr2) Jmp idxLabel; */
7445
7446/**
7447 * Emits code that jumps to the given label if @a iGprLeft and @a iGprRight
7448 * differs.
7449 */
7450DECL_INLINE_THROW(uint32_t)
7451iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7452 uint8_t iGprLeft, uint8_t iGprRight, uint32_t idxLabel)
7453{
7454 off = iemNativeEmitCmpGprWithGpr(pReNative, off, iGprLeft, iGprRight);
7455 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7456 return off;
7457}
7458
7459
7460/**
7461 * Emits code that jumps to a new label if @a iGprLeft and @a iGprRight differs.
7462 */
7463DECL_INLINE_THROW(uint32_t)
7464iemNativeEmitTestIfGprNotEqualGprAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7465 uint8_t iGprLeft, uint8_t iGprRight,
7466 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7467{
7468 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7469 return iemNativeEmitTestIfGprNotEqualGprAndJmpToLabel(pReNative, off, iGprLeft, iGprRight, idxLabel);
7470}
7471
7472
7473/* if (Grp != Imm) Jmp idxLabel; */
7474
7475/**
7476 * Emits code that jumps to the given label if @a iGprSrc differs from @a uImm.
7477 */
7478DECL_INLINE_THROW(uint32_t)
7479iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7480 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
7481{
7482 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
7483 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7484 return off;
7485}
7486
7487
7488/**
7489 * Emits code that jumps to a new label if @a iGprSrc differs from @a uImm.
7490 */
7491DECL_INLINE_THROW(uint32_t)
7492iemNativeEmitTestIfGprNotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7493 uint8_t iGprSrc, uint64_t uImm,
7494 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7495{
7496 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7497 return iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7498}
7499
7500
7501/**
7502 * Emits code that jumps to the given label if 32-bit @a iGprSrc differs from
7503 * @a uImm.
7504 */
7505DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7506 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
7507{
7508 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
7509 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7510 return off;
7511}
7512
7513
7514/**
7515 * Emits code that jumps to a new label if 32-bit @a iGprSrc differs from
7516 * @a uImm.
7517 */
7518DECL_INLINE_THROW(uint32_t)
7519iemNativeEmitTestIfGpr32NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7520 uint8_t iGprSrc, uint32_t uImm,
7521 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7522{
7523 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7524 return iemNativeEmitTestIfGpr32NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7525}
7526
7527
7528/**
7529 * Emits code that jumps to the given label if 16-bit @a iGprSrc differs from
7530 * @a uImm.
7531 */
7532DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7533 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel)
7534{
7535 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm);
7536 off = iemNativeEmitJnzToLabel(pReNative, off, idxLabel);
7537 return off;
7538}
7539
7540
7541/**
7542 * Emits code that jumps to a new label if 16-bit @a iGprSrc differs from
7543 * @a uImm.
7544 */
7545DECL_INLINE_THROW(uint32_t)
7546iemNativeEmitTestIfGpr16NotEqualImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7547 uint8_t iGprSrc, uint16_t uImm,
7548 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7549{
7550 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7551 return iemNativeEmitTestIfGpr16NotEqualImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7552}
7553
7554
7555/* if (Grp == Imm) Jmp idxLabel; */
7556
7557/**
7558 * Emits code that jumps to the given label if @a iGprSrc equals @a uImm.
7559 */
7560DECL_INLINE_THROW(uint32_t)
7561iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7562 uint8_t iGprSrc, uint64_t uImm, uint32_t idxLabel)
7563{
7564 off = iemNativeEmitCmpGprWithImm(pReNative, off, iGprSrc, uImm);
7565 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7566 return off;
7567}
7568
7569
7570/**
7571 * Emits code that jumps to a new label if @a iGprSrc equals from @a uImm.
7572 */
7573DECL_INLINE_THROW(uint32_t)
7574iemNativeEmitTestIfGprEqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint64_t uImm,
7575 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7576{
7577 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7578 return iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7579}
7580
7581
7582/**
7583 * Emits code that jumps to the given label if 32-bit @a iGprSrc equals @a uImm.
7584 */
7585DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7586 uint8_t iGprSrc, uint32_t uImm, uint32_t idxLabel)
7587{
7588 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, iGprSrc, uImm);
7589 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7590 return off;
7591}
7592
7593
7594/**
7595 * Emits code that jumps to a new label if 32-bit @a iGprSrc equals @a uImm.
7596 */
7597DECL_INLINE_THROW(uint32_t)
7598iemNativeEmitTestIfGpr32EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint32_t uImm,
7599 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0)
7600{
7601 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7602 return iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel);
7603}
7604
7605
7606/**
7607 * Emits code that jumps to the given label if 16-bit @a iGprSrc equals @a uImm.
7608 *
7609 * @note ARM64: Helper register is required (idxTmpReg).
7610 */
7611DECL_INLINE_THROW(uint32_t) iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7612 uint8_t iGprSrc, uint16_t uImm, uint32_t idxLabel,
7613 uint8_t idxTmpReg = UINT8_MAX)
7614{
7615 off = iemNativeEmitCmpGpr16WithImm(pReNative, off, iGprSrc, uImm, idxTmpReg);
7616 off = iemNativeEmitJzToLabel(pReNative, off, idxLabel);
7617 return off;
7618}
7619
7620
7621/**
7622 * Emits code that jumps to a new label if 16-bit @a iGprSrc equals @a uImm.
7623 *
7624 * @note ARM64: Helper register is required (idxTmpReg).
7625 */
7626DECL_INLINE_THROW(uint32_t)
7627iemNativeEmitTestIfGpr16EqualsImmAndJmpToNewLabel(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc, uint16_t uImm,
7628 IEMNATIVELABELTYPE enmLabelType, uint16_t uData = 0,
7629 uint8_t idxTmpReg = UINT8_MAX)
7630{
7631 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, enmLabelType, UINT32_MAX /*offWhere*/, uData);
7632 return iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, iGprSrc, uImm, idxLabel, idxTmpReg);
7633}
7634
7635
7636
7637/*********************************************************************************************************************************
7638* Indirect Jumps. *
7639*********************************************************************************************************************************/
7640
7641/**
7642 * Emits an indirect jump a 64-bit address in a GPR.
7643 */
7644DECL_INLINE_THROW(uint32_t) iemNativeEmitJmpViaGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprSrc)
7645{
7646#ifdef RT_ARCH_AMD64
7647 uint8_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
7648 if (iGprSrc >= 8)
7649 pCodeBuf[off++] = X86_OP_REX_B;
7650 pCodeBuf[off++] = 0xff;
7651 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, iGprSrc & 7);
7652
7653#elif defined(RT_ARCH_ARM64)
7654 uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7655 pCodeBuf[off++] = Armv8A64MkInstrBr(iGprSrc);
7656
7657#else
7658# error "port me"
7659#endif
7660 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7661 return off;
7662}
7663
7664
7665/*********************************************************************************************************************************
7666* Calls. *
7667*********************************************************************************************************************************/
7668
7669/**
7670 * Emits a call to a 64-bit address.
7671 */
7672DECL_INLINE_THROW(uint32_t) iemNativeEmitCallImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t uPfn)
7673{
7674#ifdef RT_ARCH_AMD64
7675 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xAX, uPfn);
7676
7677 /* call rax */
7678 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7679 pbCodeBuf[off++] = 0xff;
7680 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
7681
7682#elif defined(RT_ARCH_ARM64)
7683 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, uPfn);
7684
7685 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7686 pu32CodeBuf[off++] = Armv8A64MkInstrBlr(IEMNATIVE_REG_FIXED_TMP0);
7687
7688#else
7689# error "port me"
7690#endif
7691 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7692 return off;
7693}
7694
7695
7696/**
7697 * Emits code to load a stack variable into an argument GPR.
7698 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
7699 */
7700DECL_FORCE_INLINE_THROW(uint32_t)
7701iemNativeEmitLoadArgGregFromStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7702 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = UINT32_MAX,
7703 bool fSpilledVarsInVolatileRegs = false)
7704{
7705 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7706 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7707 AssertStmt(pVar->enmKind == kIemNativeVarKind_Stack, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7708
7709 uint8_t const idxRegVar = pVar->idxReg;
7710 if ( idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs)
7711 && ( (RT_BIT_32(idxRegVar) & (~IEMNATIVE_CALL_VOLATILE_GREG_MASK | fHstVolatileRegsAllowed))
7712 || !fSpilledVarsInVolatileRegs ))
7713 {
7714 AssertStmt( !(RT_BIT_32(idxRegVar) & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
7715 || (RT_BIT_32(idxRegVar) & fHstVolatileRegsAllowed),
7716 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_13));
7717 if (!offAddend)
7718 {
7719 if (idxRegArg != idxRegVar)
7720 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegArg, idxRegVar);
7721 }
7722 else
7723 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegArg, idxRegVar, offAddend);
7724 }
7725 else
7726 {
7727 uint8_t const idxStackSlot = pVar->idxStackSlot;
7728 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7729 off = iemNativeEmitLoadGprByBp(pReNative, off, idxRegArg, iemNativeStackCalcBpDisp(idxStackSlot));
7730 if (offAddend)
7731 off = iemNativeEmitAddGprImm(pReNative, off, idxRegArg, offAddend);
7732 }
7733 return off;
7734}
7735
7736
7737/**
7738 * Emits code to load a stack or immediate variable value into an argument GPR,
7739 * optional with a addend.
7740 * @throws VERR_IEM_VAR_NOT_INITIALIZED, VERR_IEM_VAR_UNEXPECTED_KIND
7741 */
7742DECL_FORCE_INLINE_THROW(uint32_t)
7743iemNativeEmitLoadArgGregFromImmOrStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7744 int32_t offAddend = 0, uint32_t fHstVolatileRegsAllowed = 0,
7745 bool fSpilledVarsInVolatileRegs = false)
7746{
7747 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7748 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7749 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7750 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegArg, pVar->u.uValue + offAddend);
7751 else
7752 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, idxRegArg, idxVar, offAddend,
7753 fHstVolatileRegsAllowed, fSpilledVarsInVolatileRegs);
7754 return off;
7755}
7756
7757
7758/**
7759 * Emits code to load the variable address into an argument GPR.
7760 *
7761 * This only works for uninitialized and stack variables.
7762 */
7763DECL_FORCE_INLINE_THROW(uint32_t)
7764iemNativeEmitLoadArgGregWithVarAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7765 bool fFlushShadows)
7766{
7767 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7768 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7769 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
7770 || pVar->enmKind == kIemNativeVarKind_Stack,
7771 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7772 AssertStmt(!pVar->fSimdReg,
7773 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7774
7775 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7776 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
7777
7778 uint8_t const idxRegVar = pVar->idxReg;
7779 if (idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs))
7780 {
7781 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, idxRegVar);
7782 iemNativeRegFreeVar(pReNative, idxRegVar, fFlushShadows);
7783 Assert(pVar->idxReg == UINT8_MAX);
7784 }
7785 Assert( pVar->idxStackSlot != UINT8_MAX
7786 && pVar->idxReg == UINT8_MAX);
7787
7788 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
7789}
7790
7791
7792#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7793/**
7794 * Emits code to load the variable address into an argument GPR.
7795 *
7796 * This is a special variant intended for SIMD variables only and only called
7797 * by the TLB miss path in the memory fetch/store code because there we pass
7798 * the value by reference and need both the register and stack depending on which
7799 * path is taken (TLB hit vs. miss).
7800 */
7801DECL_FORCE_INLINE_THROW(uint32_t)
7802iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxRegArg, uint8_t idxVar,
7803 bool fSyncRegWithStack = true)
7804{
7805 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7806 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7807 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
7808 || pVar->enmKind == kIemNativeVarKind_Stack,
7809 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7810 AssertStmt(pVar->fSimdReg,
7811 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7812 Assert( pVar->idxStackSlot != UINT8_MAX
7813 && pVar->idxReg != UINT8_MAX);
7814
7815 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7816 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
7817
7818 uint8_t const idxRegVar = pVar->idxReg;
7819 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7820 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
7821
7822 if (fSyncRegWithStack)
7823 {
7824 if (pVar->cbVar == sizeof(RTUINT128U))
7825 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDisp, idxRegVar);
7826 else
7827 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDisp, idxRegVar);
7828 }
7829
7830 return iemNativeEmitLeaGprByBp(pReNative, off, idxRegArg, offBpDisp);
7831}
7832
7833
7834/**
7835 * Emits code to sync the host SIMD register assigned to the given SIMD variable.
7836 *
7837 * This is a special helper and only called
7838 * by the TLB miss path in the memory fetch/store code because there we pass
7839 * the value by reference and need to sync the value on the stack with the assigned host register
7840 * after a TLB miss where the value ends up on the stack.
7841 */
7842DECL_FORCE_INLINE_THROW(uint32_t)
7843iemNativeEmitSimdVarSyncStackToRegister(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar)
7844{
7845 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7846 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7847 AssertStmt( pVar->enmKind == kIemNativeVarKind_Invalid
7848 || pVar->enmKind == kIemNativeVarKind_Stack,
7849 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7850 AssertStmt(pVar->fSimdReg,
7851 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7852 Assert( pVar->idxStackSlot != UINT8_MAX
7853 && pVar->idxReg != UINT8_MAX);
7854
7855 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7856 int32_t const offBpDisp = iemNativeStackCalcBpDisp(idxStackSlot);
7857
7858 uint8_t const idxRegVar = pVar->idxReg;
7859 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
7860 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U));
7861
7862 if (pVar->cbVar == sizeof(RTUINT128U))
7863 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxRegVar, offBpDisp);
7864 else
7865 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxRegVar, offBpDisp);
7866
7867 return off;
7868}
7869
7870
7871/**
7872 * Emits a gprdst = ~gprsrc store.
7873 */
7874DECL_FORCE_INLINE_THROW(uint32_t)
7875iemNativeEmitInvBitsGprEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
7876{
7877#ifdef RT_ARCH_AMD64
7878 if (iGprDst != iGprSrc)
7879 {
7880 /* mov gprdst, gprsrc. */
7881 if (f64Bit)
7882 off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, iGprDst, iGprSrc);
7883 else
7884 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, iGprDst, iGprSrc); /* Bits 32:63 are cleared. */
7885 }
7886
7887 /* not gprdst */
7888 if (f64Bit || iGprDst >= 8)
7889 pCodeBuf[off++] = (f64Bit ? X86_OP_REX_W : 0)
7890 | (iGprDst >= 8 ? X86_OP_REX_B : 0);
7891 pCodeBuf[off++] = 0xf7;
7892 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, iGprDst & 7);
7893#elif defined(RT_ARCH_ARM64)
7894 pCodeBuf[off++] = Armv8A64MkInstrOrn(iGprDst, ARMV8_A64_REG_XZR, iGprSrc, f64Bit);
7895#else
7896# error "port me"
7897#endif
7898 return off;
7899}
7900
7901
7902/**
7903 * Emits a gprdst = ~gprsrc store.
7904 */
7905DECL_INLINE_THROW(uint32_t)
7906iemNativeEmitInvBitsGpr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc, bool f64Bit = true)
7907{
7908#ifdef RT_ARCH_AMD64
7909 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 9), off, iGprDst, iGprSrc, f64Bit);
7910#elif defined(RT_ARCH_ARM64)
7911 off = iemNativeEmitInvBitsGprEx(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc, f64Bit);
7912#else
7913# error "port me"
7914#endif
7915 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7916 return off;
7917}
7918
7919
7920/**
7921 * Emits a 128-bit vector register store to a VCpu value.
7922 */
7923DECL_FORCE_INLINE_THROW(uint32_t)
7924iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7925{
7926#ifdef RT_ARCH_AMD64
7927 /* movdqa mem128, reg128 */ /* ASSUMING an aligned location here. */
7928 pCodeBuf[off++] = 0x66;
7929 if (iVecReg >= 8)
7930 pCodeBuf[off++] = X86_OP_REX_R;
7931 pCodeBuf[off++] = 0x0f;
7932 pCodeBuf[off++] = 0x7f;
7933 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7934#elif defined(RT_ARCH_ARM64)
7935 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
7936
7937#else
7938# error "port me"
7939#endif
7940 return off;
7941}
7942
7943
7944/**
7945 * Emits a 128-bit vector register load of a VCpu value.
7946 */
7947DECL_INLINE_THROW(uint32_t)
7948iemNativeEmitSimdStoreVecRegToVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7949{
7950#ifdef RT_ARCH_AMD64
7951 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
7952#elif defined(RT_ARCH_ARM64)
7953 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
7954#else
7955# error "port me"
7956#endif
7957 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7958 return off;
7959}
7960
7961
7962/**
7963 * Emits a high 128-bit vector register store to a VCpu value.
7964 */
7965DECL_FORCE_INLINE_THROW(uint32_t)
7966iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7967{
7968#ifdef RT_ARCH_AMD64
7969 /* vextracti128 mem128, reg128, 1 */ /* ASSUMES AVX2 support. */
7970 pCodeBuf[off++] = X86_OP_VEX3;
7971 if (iVecReg >= 8)
7972 pCodeBuf[off++] = 0x63;
7973 else
7974 pCodeBuf[off++] = 0xe3;
7975 pCodeBuf[off++] = 0x7d;
7976 pCodeBuf[off++] = 0x39;
7977 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
7978 pCodeBuf[off++] = 0x01; /* Immediate */
7979#elif defined(RT_ARCH_ARM64)
7980 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
7981#else
7982# error "port me"
7983#endif
7984 return off;
7985}
7986
7987
7988/**
7989 * Emits a high 128-bit vector register load of a VCpu value.
7990 */
7991DECL_INLINE_THROW(uint32_t)
7992iemNativeEmitSimdStoreVecRegToVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
7993{
7994#ifdef RT_ARCH_AMD64
7995 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
7996#elif defined(RT_ARCH_ARM64)
7997 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
7998 Assert(!(iVecReg & 0x1));
7999 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
8000#else
8001# error "port me"
8002#endif
8003 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8004 return off;
8005}
8006
8007
8008/**
8009 * Emits a 128-bit vector register load of a VCpu value.
8010 */
8011DECL_FORCE_INLINE_THROW(uint32_t)
8012iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8013{
8014#ifdef RT_ARCH_AMD64
8015 /* movdqa reg128, mem128 */ /* ASSUMING an aligned location here. */
8016 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8017 if (iVecReg >= 8)
8018 pCodeBuf[off++] = X86_OP_REX_R;
8019 pCodeBuf[off++] = 0x0f;
8020 pCodeBuf[off++] = 0x6f;
8021 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
8022#elif defined(RT_ARCH_ARM64)
8023 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
8024
8025#else
8026# error "port me"
8027#endif
8028 return off;
8029}
8030
8031
8032/**
8033 * Emits a 128-bit vector register load of a VCpu value.
8034 */
8035DECL_INLINE_THROW(uint32_t)
8036iemNativeEmitSimdLoadVecRegFromVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8037{
8038#ifdef RT_ARCH_AMD64
8039 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
8040#elif defined(RT_ARCH_ARM64)
8041 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
8042#else
8043# error "port me"
8044#endif
8045 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8046 return off;
8047}
8048
8049
8050/**
8051 * Emits a 128-bit vector register load of a VCpu value.
8052 */
8053DECL_FORCE_INLINE_THROW(uint32_t)
8054iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8055{
8056#ifdef RT_ARCH_AMD64
8057 /* vinserti128 ymm, ymm, mem128, 1. */ /* ASSUMES AVX2 support */
8058 pCodeBuf[off++] = X86_OP_VEX3;
8059 if (iVecReg >= 8)
8060 pCodeBuf[off++] = 0x63;
8061 else
8062 pCodeBuf[off++] = 0xe3;
8063 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
8064 pCodeBuf[off++] = 0x38;
8065 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
8066 pCodeBuf[off++] = 0x01; /* Immediate */
8067#elif defined(RT_ARCH_ARM64)
8068 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
8069#else
8070# error "port me"
8071#endif
8072 return off;
8073}
8074
8075
8076/**
8077 * Emits a 128-bit vector register load of a VCpu value.
8078 */
8079DECL_INLINE_THROW(uint32_t)
8080iemNativeEmitSimdLoadVecRegFromVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
8081{
8082#ifdef RT_ARCH_AMD64
8083 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
8084#elif defined(RT_ARCH_ARM64)
8085 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8086 Assert(!(iVecReg & 0x1));
8087 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
8088#else
8089# error "port me"
8090#endif
8091 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8092 return off;
8093}
8094
8095
8096/**
8097 * Emits a vecdst = vecsrc load.
8098 */
8099DECL_FORCE_INLINE(uint32_t)
8100iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8101{
8102#ifdef RT_ARCH_AMD64
8103 /* movdqu vecdst, vecsrc */
8104 pCodeBuf[off++] = 0xf3;
8105
8106 if ((iVecRegDst | iVecRegSrc) >= 8)
8107 pCodeBuf[off++] = iVecRegDst < 8 ? X86_OP_REX_B
8108 : iVecRegSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
8109 : X86_OP_REX_R;
8110 pCodeBuf[off++] = 0x0f;
8111 pCodeBuf[off++] = 0x6f;
8112 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8113
8114#elif defined(RT_ARCH_ARM64)
8115 /* mov dst, src; alias for: orr dst, src, src */
8116 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
8117
8118#else
8119# error "port me"
8120#endif
8121 return off;
8122}
8123
8124
8125/**
8126 * Emits a vecdst = vecsrc load, 128-bit.
8127 */
8128DECL_INLINE_THROW(uint32_t)
8129iemNativeEmitSimdLoadVecRegFromVecRegU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8130{
8131#ifdef RT_ARCH_AMD64
8132 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
8133#elif defined(RT_ARCH_ARM64)
8134 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
8135#else
8136# error "port me"
8137#endif
8138 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8139 return off;
8140}
8141
8142
8143/**
8144 * Emits a vecdst[128:255] = vecsrc[128:255] load.
8145 */
8146DECL_FORCE_INLINE_THROW(uint32_t)
8147iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8148{
8149#ifdef RT_ARCH_AMD64
8150 /* vperm2i128 dst, dst, src, 0x30. */ /* ASSUMES AVX2 support */
8151 pCodeBuf[off++] = X86_OP_VEX3;
8152 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
8153 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
8154 pCodeBuf[off++] = 0x46;
8155 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8156 pCodeBuf[off++] = 0x30; /* Immediate, this will leave the low 128 bits of dst untouched and move the high 128 bits from src to dst. */
8157
8158#elif defined(RT_ARCH_ARM64)
8159 RT_NOREF(pCodeBuf, iVecRegDst, iVecRegSrc);
8160
8161 /* Should never be called because we can just use iemNativeEmitSimdLoadVecRegFromVecRegU128(). */
8162# ifdef IEM_WITH_THROW_CATCH
8163 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
8164# else
8165 AssertReleaseFailedStmt(off = UINT32_MAX);
8166# endif
8167#else
8168# error "port me"
8169#endif
8170 return off;
8171}
8172
8173
8174/**
8175 * Emits a vecdst[128:255] = vecsrc[128:255] load, high 128-bit.
8176 */
8177DECL_INLINE_THROW(uint32_t)
8178iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8179{
8180#ifdef RT_ARCH_AMD64
8181 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecRegDst, iVecRegSrc);
8182#elif defined(RT_ARCH_ARM64)
8183 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
8184 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iVecRegSrc + 1);
8185#else
8186# error "port me"
8187#endif
8188 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8189 return off;
8190}
8191
8192
8193/**
8194 * Emits a vecdst[0:127] = vecsrc[128:255] load.
8195 */
8196DECL_FORCE_INLINE_THROW(uint32_t)
8197iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8198{
8199#ifdef RT_ARCH_AMD64
8200 /* vextracti128 dst, src, 1. */ /* ASSUMES AVX2 support */
8201 pCodeBuf[off++] = X86_OP_VEX3;
8202 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegDst >= 8, false, iVecRegSrc >= 8);
8203 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
8204 pCodeBuf[off++] = 0x39;
8205 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7);
8206 pCodeBuf[off++] = 0x1;
8207
8208#elif defined(RT_ARCH_ARM64)
8209 RT_NOREF(pCodeBuf, iVecRegDst, iVecRegSrc);
8210
8211 /* Should never be called because we can just use iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(). */
8212# ifdef IEM_WITH_THROW_CATCH
8213 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
8214# else
8215 AssertReleaseFailedStmt(off = UINT32_MAX);
8216# endif
8217#else
8218# error "port me"
8219#endif
8220 return off;
8221}
8222
8223
8224/**
8225 * Emits a vecdst[0:127] = vecsrc[128:255] load, high 128-bit.
8226 */
8227DECL_INLINE_THROW(uint32_t)
8228iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8229{
8230#ifdef RT_ARCH_AMD64
8231 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iVecRegSrc);
8232#elif defined(RT_ARCH_ARM64)
8233 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
8234 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc + 1);
8235#else
8236# error "port me"
8237#endif
8238 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8239 return off;
8240}
8241
8242
8243/**
8244 * Emits a vecdst = vecsrc load, 256-bit.
8245 */
8246DECL_INLINE_THROW(uint32_t)
8247iemNativeEmitSimdLoadVecRegFromVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8248{
8249#ifdef RT_ARCH_AMD64
8250 /* vmovdqa ymm, ymm */
8251 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
8252 if (iVecRegDst >= 8 && iVecRegSrc >= 8)
8253 {
8254 pbCodeBuf[off++] = X86_OP_VEX3;
8255 pbCodeBuf[off++] = 0x41;
8256 pbCodeBuf[off++] = 0x7d;
8257 pbCodeBuf[off++] = 0x6f;
8258 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8259 }
8260 else
8261 {
8262 pbCodeBuf[off++] = X86_OP_VEX2;
8263 pbCodeBuf[off++] = (iVecRegSrc >= 8 || iVecRegDst >= 8) ? 0x7d : 0xfd;
8264 pbCodeBuf[off++] = iVecRegSrc >= 8 ? 0x7f : 0x6f;
8265 pbCodeBuf[off++] = iVecRegSrc >= 8
8266 ? X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iVecRegDst & 7)
8267 : X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8268 }
8269#elif defined(RT_ARCH_ARM64)
8270 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8271 Assert(!(iVecRegDst & 0x1)); Assert(!(iVecRegSrc & 0x1));
8272 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst, iVecRegSrc );
8273 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst + 1, iVecRegSrc + 1);
8274#else
8275# error "port me"
8276#endif
8277 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8278 return off;
8279}
8280
8281
8282/**
8283 * Emits a vecdst = vecsrc load.
8284 */
8285DECL_FORCE_INLINE(uint32_t)
8286iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8287{
8288#ifdef RT_ARCH_AMD64
8289 /* vinserti128 dst, dst, src, 1. */ /* ASSUMES AVX2 support */
8290 pCodeBuf[off++] = X86_OP_VEX3;
8291 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
8292 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
8293 pCodeBuf[off++] = 0x38;
8294 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
8295 pCodeBuf[off++] = 0x01; /* Immediate */
8296
8297#elif defined(RT_ARCH_ARM64)
8298 Assert(!(iVecRegDst & 0x1) && !(iVecRegSrc & 0x1));
8299 /* mov dst, src; alias for: orr dst, src, src */
8300 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst + 1, iVecRegSrc, iVecRegSrc);
8301
8302#else
8303# error "port me"
8304#endif
8305 return off;
8306}
8307
8308
8309/**
8310 * Emits a vecdst[128:255] = vecsrc[0:127] load, 128-bit.
8311 */
8312DECL_INLINE_THROW(uint32_t)
8313iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
8314{
8315#ifdef RT_ARCH_AMD64
8316 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iVecRegSrc);
8317#elif defined(RT_ARCH_ARM64)
8318 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc);
8319#else
8320# error "port me"
8321#endif
8322 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8323 return off;
8324}
8325
8326
8327/**
8328 * Emits a gprdst = vecsrc[x] load, 64-bit.
8329 */
8330DECL_FORCE_INLINE(uint32_t)
8331iemNativeEmitSimdLoadGprFromVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
8332{
8333#ifdef RT_ARCH_AMD64
8334 if (iQWord >= 2)
8335 {
8336 /*
8337 * vpextrq doesn't work on the upper 128-bits.
8338 * So we use the following sequence:
8339 * vextracti128 vectmp0, vecsrc, 1
8340 * pextrq gpr, vectmp0, #(iQWord - 2)
8341 */
8342 /* vextracti128 */
8343 pCodeBuf[off++] = X86_OP_VEX3;
8344 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegSrc >= 8);
8345 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
8346 pCodeBuf[off++] = 0x39;
8347 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
8348 pCodeBuf[off++] = 0x1;
8349
8350 /* pextrq */
8351 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8352 pCodeBuf[off++] = X86_OP_REX_W
8353 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
8354 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8355 pCodeBuf[off++] = 0x0f;
8356 pCodeBuf[off++] = 0x3a;
8357 pCodeBuf[off++] = 0x16;
8358 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprDst & 7);
8359 pCodeBuf[off++] = iQWord - 2;
8360 }
8361 else
8362 {
8363 /* pextrq gpr, vecsrc, #iQWord (ASSUMES SSE4.1). */
8364 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8365 pCodeBuf[off++] = X86_OP_REX_W
8366 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
8367 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8368 pCodeBuf[off++] = 0x0f;
8369 pCodeBuf[off++] = 0x3a;
8370 pCodeBuf[off++] = 0x16;
8371 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
8372 pCodeBuf[off++] = iQWord;
8373 }
8374#elif defined(RT_ARCH_ARM64)
8375 /* umov gprdst, vecsrc[iQWord] */
8376 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iQWord, kArmv8InstrUmovInsSz_U64);
8377#else
8378# error "port me"
8379#endif
8380 return off;
8381}
8382
8383
8384/**
8385 * Emits a gprdst = vecsrc[x] load, 64-bit.
8386 */
8387DECL_INLINE_THROW(uint32_t)
8388iemNativeEmitSimdLoadGprFromVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iQWord)
8389{
8390 Assert(iQWord <= 3);
8391
8392#ifdef RT_ARCH_AMD64
8393 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iVecRegSrc, iQWord);
8394#elif defined(RT_ARCH_ARM64)
8395 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8396 Assert(!(iVecRegSrc & 0x1));
8397 /* Need to access the "high" 128-bit vector register. */
8398 if (iQWord >= 2)
8399 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iQWord - 2);
8400 else
8401 off = iemNativeEmitSimdLoadGprFromVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iQWord);
8402#else
8403# error "port me"
8404#endif
8405 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8406 return off;
8407}
8408
8409
8410/**
8411 * Emits a gprdst = vecsrc[x] load, 32-bit.
8412 */
8413DECL_FORCE_INLINE(uint32_t)
8414iemNativeEmitSimdLoadGprFromVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
8415{
8416#ifdef RT_ARCH_AMD64
8417 if (iDWord >= 4)
8418 {
8419 /*
8420 * vpextrd doesn't work on the upper 128-bits.
8421 * So we use the following sequence:
8422 * vextracti128 vectmp0, vecsrc, 1
8423 * pextrd gpr, vectmp0, #(iDWord - 4)
8424 */
8425 /* vextracti128 */
8426 pCodeBuf[off++] = X86_OP_VEX3;
8427 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegSrc >= 8);
8428 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
8429 pCodeBuf[off++] = 0x39;
8430 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
8431 pCodeBuf[off++] = 0x1;
8432
8433 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
8434 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8435 if (iGprDst >= 8 || IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8)
8436 pCodeBuf[off++] = (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
8437 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8438 pCodeBuf[off++] = 0x0f;
8439 pCodeBuf[off++] = 0x3a;
8440 pCodeBuf[off++] = 0x16;
8441 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprDst & 7);
8442 pCodeBuf[off++] = iDWord - 4;
8443 }
8444 else
8445 {
8446 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
8447 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8448 if (iGprDst >= 8 || iVecRegSrc >= 8)
8449 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
8450 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8451 pCodeBuf[off++] = 0x0f;
8452 pCodeBuf[off++] = 0x3a;
8453 pCodeBuf[off++] = 0x16;
8454 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
8455 pCodeBuf[off++] = iDWord;
8456 }
8457#elif defined(RT_ARCH_ARM64)
8458 Assert(iDWord < 4);
8459
8460 /* umov gprdst, vecsrc[iDWord] */
8461 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iDWord, kArmv8InstrUmovInsSz_U32, false /*fDst64Bit*/);
8462#else
8463# error "port me"
8464#endif
8465 return off;
8466}
8467
8468
8469/**
8470 * Emits a gprdst = vecsrc[x] load, 32-bit.
8471 */
8472DECL_INLINE_THROW(uint32_t)
8473iemNativeEmitSimdLoadGprFromVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iDWord)
8474{
8475 Assert(iDWord <= 7);
8476
8477#ifdef RT_ARCH_AMD64
8478 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 15), off, iGprDst, iVecRegSrc, iDWord);
8479#elif defined(RT_ARCH_ARM64)
8480 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8481 Assert(!(iVecRegSrc & 0x1));
8482 /* Need to access the "high" 128-bit vector register. */
8483 if (iDWord >= 4)
8484 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iDWord - 4);
8485 else
8486 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iDWord);
8487#else
8488# error "port me"
8489#endif
8490 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8491 return off;
8492}
8493
8494
8495/**
8496 * Emits a gprdst = vecsrc[x] load, 16-bit.
8497 */
8498DECL_FORCE_INLINE(uint32_t)
8499iemNativeEmitSimdLoadGprFromVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
8500{
8501#ifdef RT_ARCH_AMD64
8502 if (iWord >= 8)
8503 {
8504 /** @todo Currently not used. */
8505 AssertReleaseFailed();
8506 }
8507 else
8508 {
8509 /* pextrw gpr, vecsrc, #iWord */
8510 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8511 if (iGprDst >= 8 || iVecRegSrc >= 8)
8512 pCodeBuf[off++] = (iGprDst < 8 ? 0 : X86_OP_REX_R)
8513 | (iVecRegSrc < 8 ? 0 : X86_OP_REX_B);
8514 pCodeBuf[off++] = 0x0f;
8515 pCodeBuf[off++] = 0xc5;
8516 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iVecRegSrc & 7);
8517 pCodeBuf[off++] = iWord;
8518 }
8519#elif defined(RT_ARCH_ARM64)
8520 /* umov gprdst, vecsrc[iWord] */
8521 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iWord, kArmv8InstrUmovInsSz_U16, false /*fDst64Bit*/);
8522#else
8523# error "port me"
8524#endif
8525 return off;
8526}
8527
8528
8529/**
8530 * Emits a gprdst = vecsrc[x] load, 16-bit.
8531 */
8532DECL_INLINE_THROW(uint32_t)
8533iemNativeEmitSimdLoadGprFromVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iWord)
8534{
8535 Assert(iWord <= 16);
8536
8537#ifdef RT_ARCH_AMD64
8538 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGprDst, iVecRegSrc, iWord);
8539#elif defined(RT_ARCH_ARM64)
8540 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8541 Assert(!(iVecRegSrc & 0x1));
8542 /* Need to access the "high" 128-bit vector register. */
8543 if (iWord >= 8)
8544 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iWord - 8);
8545 else
8546 off = iemNativeEmitSimdLoadGprFromVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iWord);
8547#else
8548# error "port me"
8549#endif
8550 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8551 return off;
8552}
8553
8554
8555/**
8556 * Emits a gprdst = vecsrc[x] load, 8-bit.
8557 */
8558DECL_FORCE_INLINE(uint32_t)
8559iemNativeEmitSimdLoadGprFromVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
8560{
8561#ifdef RT_ARCH_AMD64
8562 if (iByte >= 16)
8563 {
8564 /** @todo Currently not used. */
8565 AssertReleaseFailed();
8566 }
8567 else
8568 {
8569 /* pextrb gpr, vecsrc, #iByte */
8570 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8571 if (iGprDst >= 8 || iVecRegSrc >= 8)
8572 pCodeBuf[off++] = (iVecRegSrc < 8 ? 0 : X86_OP_REX_R)
8573 | (iGprDst < 8 ? 0 : X86_OP_REX_B);
8574 pCodeBuf[off++] = 0x0f;
8575 pCodeBuf[off++] = 0x3a;
8576 pCodeBuf[off++] = 0x14;
8577 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, iGprDst & 7);
8578 pCodeBuf[off++] = iByte;
8579 }
8580#elif defined(RT_ARCH_ARM64)
8581 /* umov gprdst, vecsrc[iByte] */
8582 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iByte, kArmv8InstrUmovInsSz_U8, false /*fDst64Bit*/);
8583#else
8584# error "port me"
8585#endif
8586 return off;
8587}
8588
8589
8590/**
8591 * Emits a gprdst = vecsrc[x] load, 8-bit.
8592 */
8593DECL_INLINE_THROW(uint32_t)
8594iemNativeEmitSimdLoadGprFromVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iVecRegSrc, uint8_t iByte)
8595{
8596 Assert(iByte <= 32);
8597
8598#ifdef RT_ARCH_AMD64
8599 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iByte);
8600#elif defined(RT_ARCH_ARM64)
8601 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8602 Assert(!(iVecRegSrc & 0x1));
8603 /* Need to access the "high" 128-bit vector register. */
8604 if (iByte >= 16)
8605 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc + 1, iByte - 16);
8606 else
8607 off = iemNativeEmitSimdLoadGprFromVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iByte);
8608#else
8609# error "port me"
8610#endif
8611 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8612 return off;
8613}
8614
8615
8616/**
8617 * Emits a vecdst[x] = gprsrc store, 64-bit.
8618 */
8619DECL_FORCE_INLINE(uint32_t)
8620iemNativeEmitSimdStoreGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
8621{
8622#ifdef RT_ARCH_AMD64
8623 if (iQWord >= 2)
8624 {
8625 /*
8626 * vpinsrq doesn't work on the upper 128-bits.
8627 * So we use the following sequence:
8628 * vextracti128 vectmp0, vecdst, 1
8629 * pinsrq vectmp0, gpr, #(iQWord - 2)
8630 * vinserti128 vecdst, vectmp0, 1
8631 */
8632 /* vextracti128 */
8633 pCodeBuf[off++] = X86_OP_VEX3;
8634 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
8635 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
8636 pCodeBuf[off++] = 0x39;
8637 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
8638 pCodeBuf[off++] = 0x1;
8639
8640 /* pinsrq */
8641 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8642 pCodeBuf[off++] = X86_OP_REX_W
8643 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
8644 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8645 pCodeBuf[off++] = 0x0f;
8646 pCodeBuf[off++] = 0x3a;
8647 pCodeBuf[off++] = 0x22;
8648 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprSrc & 7);
8649 pCodeBuf[off++] = iQWord - 2;
8650
8651 /* vinserti128 */
8652 pCodeBuf[off++] = X86_OP_VEX3;
8653 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
8654 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
8655 pCodeBuf[off++] = 0x38;
8656 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
8657 pCodeBuf[off++] = 0x01; /* Immediate */
8658 }
8659 else
8660 {
8661 /* pinsrq vecsrc, gpr, #iQWord (ASSUMES SSE4.1). */
8662 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8663 pCodeBuf[off++] = X86_OP_REX_W
8664 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8665 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8666 pCodeBuf[off++] = 0x0f;
8667 pCodeBuf[off++] = 0x3a;
8668 pCodeBuf[off++] = 0x22;
8669 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8670 pCodeBuf[off++] = iQWord;
8671 }
8672#elif defined(RT_ARCH_ARM64)
8673 /* ins vecsrc[iQWord], gpr */
8674 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iQWord, kArmv8InstrUmovInsSz_U64);
8675#else
8676# error "port me"
8677#endif
8678 return off;
8679}
8680
8681
8682/**
8683 * Emits a vecdst[x] = gprsrc store, 64-bit.
8684 */
8685DECL_INLINE_THROW(uint32_t)
8686iemNativeEmitSimdStoreGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
8687{
8688 Assert(iQWord <= 3);
8689
8690#ifdef RT_ARCH_AMD64
8691 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 19), off, iVecRegDst, iGprSrc, iQWord);
8692#elif defined(RT_ARCH_ARM64)
8693 Assert(!(iVecRegDst & 0x1));
8694 if (iQWord >= 2)
8695 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iGprSrc, iQWord - 2);
8696 else
8697 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iQWord);
8698#else
8699# error "port me"
8700#endif
8701 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8702 return off;
8703}
8704
8705
8706/**
8707 * Emits a vecdst[x] = gprsrc store, 32-bit.
8708 */
8709DECL_FORCE_INLINE(uint32_t)
8710iemNativeEmitSimdStoreGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
8711{
8712#ifdef RT_ARCH_AMD64
8713 if (iDWord >= 4)
8714 {
8715 /*
8716 * vpinsrq doesn't work on the upper 128-bits.
8717 * So we use the following sequence:
8718 * vextracti128 vectmp0, vecdst, 1
8719 * pinsrd vectmp0, gpr, #(iDword - 4)
8720 * vinserti128 vecdst, vectmp0, 1
8721 */
8722 /* vextracti128 */
8723 pCodeBuf[off++] = X86_OP_VEX3;
8724 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
8725 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
8726 pCodeBuf[off++] = 0x39;
8727 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
8728 pCodeBuf[off++] = 0x1;
8729
8730 /* pinsrd */
8731 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8732 if (IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8 || iGprSrc >= 8)
8733 pCodeBuf[off++] = (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
8734 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8735 pCodeBuf[off++] = 0x0f;
8736 pCodeBuf[off++] = 0x3a;
8737 pCodeBuf[off++] = 0x22;
8738 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprSrc & 7);
8739 pCodeBuf[off++] = iDWord - 4;
8740
8741 /* vinserti128 */
8742 pCodeBuf[off++] = X86_OP_VEX3;
8743 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
8744 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
8745 pCodeBuf[off++] = 0x38;
8746 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
8747 pCodeBuf[off++] = 0x01; /* Immediate */
8748 }
8749 else
8750 {
8751 /* pinsrd vecsrc, gpr, #iDWord (ASSUMES SSE4.1). */
8752 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8753 if (iVecRegDst >= 8 || iGprSrc >= 8)
8754 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8755 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8756 pCodeBuf[off++] = 0x0f;
8757 pCodeBuf[off++] = 0x3a;
8758 pCodeBuf[off++] = 0x22;
8759 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8760 pCodeBuf[off++] = iDWord;
8761 }
8762#elif defined(RT_ARCH_ARM64)
8763 /* ins vecsrc[iDWord], gpr */
8764 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iDWord, kArmv8InstrUmovInsSz_U32);
8765#else
8766# error "port me"
8767#endif
8768 return off;
8769}
8770
8771
8772/**
8773 * Emits a vecdst[x] = gprsrc store, 64-bit.
8774 */
8775DECL_INLINE_THROW(uint32_t)
8776iemNativeEmitSimdStoreGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
8777{
8778 Assert(iDWord <= 7);
8779
8780#ifdef RT_ARCH_AMD64
8781 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 19), off, iVecRegDst, iGprSrc, iDWord);
8782#elif defined(RT_ARCH_ARM64)
8783 Assert(!(iVecRegDst & 0x1));
8784 if (iDWord >= 4)
8785 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iGprSrc, iDWord - 4);
8786 else
8787 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iDWord);
8788#else
8789# error "port me"
8790#endif
8791 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8792 return off;
8793}
8794
8795
8796/**
8797 * Emits a vecdst[x] = gprsrc store, 16-bit.
8798 */
8799DECL_FORCE_INLINE(uint32_t)
8800iemNativeEmitSimdStoreGprToVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iWord)
8801{
8802#ifdef RT_ARCH_AMD64
8803 /* pinsrw vecsrc, gpr, #iWord. */
8804 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8805 if (iVecRegDst >= 8 || iGprSrc >= 8)
8806 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8807 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8808 pCodeBuf[off++] = 0x0f;
8809 pCodeBuf[off++] = 0xc4;
8810 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8811 pCodeBuf[off++] = iWord;
8812#elif defined(RT_ARCH_ARM64)
8813 /* ins vecsrc[iWord], gpr */
8814 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iWord, kArmv8InstrUmovInsSz_U16);
8815#else
8816# error "port me"
8817#endif
8818 return off;
8819}
8820
8821
8822/**
8823 * Emits a vecdst[x] = gprsrc store, 16-bit.
8824 */
8825DECL_INLINE_THROW(uint32_t)
8826iemNativeEmitSimdStoreGprToVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iWord)
8827{
8828 Assert(iWord <= 15);
8829
8830#ifdef RT_ARCH_AMD64
8831 off = iemNativeEmitSimdStoreGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 6), off, iVecRegDst, iGprSrc, iWord);
8832#elif defined(RT_ARCH_ARM64)
8833 off = iemNativeEmitSimdStoreGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iWord);
8834#else
8835# error "port me"
8836#endif
8837 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8838 return off;
8839}
8840
8841
8842/**
8843 * Emits a vecdst[x] = gprsrc store, 8-bit.
8844 */
8845DECL_FORCE_INLINE(uint32_t)
8846iemNativeEmitSimdStoreGprToVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iByte)
8847{
8848#ifdef RT_ARCH_AMD64
8849 /* pinsrb vecsrc, gpr, #iByte (ASSUMES SSE4.1). */
8850 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8851 if (iVecRegDst >= 8 || iGprSrc >= 8)
8852 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
8853 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
8854 pCodeBuf[off++] = 0x0f;
8855 pCodeBuf[off++] = 0x3a;
8856 pCodeBuf[off++] = 0x20;
8857 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
8858 pCodeBuf[off++] = iByte;
8859#elif defined(RT_ARCH_ARM64)
8860 /* ins vecsrc[iByte], gpr */
8861 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iByte, kArmv8InstrUmovInsSz_U8);
8862#else
8863# error "port me"
8864#endif
8865 return off;
8866}
8867
8868
8869/**
8870 * Emits a vecdst[x] = gprsrc store, 8-bit.
8871 */
8872DECL_INLINE_THROW(uint32_t)
8873iemNativeEmitSimdStoreGprToVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iByte)
8874{
8875 Assert(iByte <= 15);
8876
8877#ifdef RT_ARCH_AMD64
8878 off = iemNativeEmitSimdStoreGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iByte);
8879#elif defined(RT_ARCH_ARM64)
8880 off = iemNativeEmitSimdStoreGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iByte);
8881#else
8882# error "port me"
8883#endif
8884 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8885 return off;
8886}
8887
8888
8889/**
8890 * Emits a vecdst.au32[iDWord] = 0 store.
8891 */
8892DECL_FORCE_INLINE(uint32_t)
8893iemNativeEmitSimdZeroVecRegElemU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
8894{
8895 Assert(iDWord <= 7);
8896
8897#ifdef RT_ARCH_AMD64
8898 /*
8899 * xor tmp0, tmp0
8900 * pinsrd xmm, tmp0, iDword
8901 */
8902 if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
8903 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B;
8904 pCodeBuf[off++] = 0x33;
8905 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_REG_FIXED_TMP0 & 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
8906 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(pCodeBuf, off, iVecReg, IEMNATIVE_REG_FIXED_TMP0, iDWord);
8907#elif defined(RT_ARCH_ARM64)
8908 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8909 Assert(!(iVecReg & 0x1));
8910 /* ins vecsrc[iDWord], wzr */
8911 if (iDWord >= 4)
8912 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg + 1, ARMV8_A64_REG_WZR, iDWord - 4, kArmv8InstrUmovInsSz_U32);
8913 else
8914 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecReg, ARMV8_A64_REG_WZR, iDWord, kArmv8InstrUmovInsSz_U32);
8915#else
8916# error "port me"
8917#endif
8918 return off;
8919}
8920
8921
8922/**
8923 * Emits a vecdst.au32[iDWord] = 0 store.
8924 */
8925DECL_INLINE_THROW(uint32_t)
8926iemNativeEmitSimdZeroVecRegElemU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint8_t iDWord)
8927{
8928
8929#ifdef RT_ARCH_AMD64
8930 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, iDWord);
8931#elif defined(RT_ARCH_ARM64)
8932 off = iemNativeEmitSimdZeroVecRegElemU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, iDWord);
8933#else
8934# error "port me"
8935#endif
8936 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8937 return off;
8938}
8939
8940
8941/**
8942 * Emits a vecdst[0:127] = 0 store.
8943 */
8944DECL_FORCE_INLINE(uint32_t)
8945iemNativeEmitSimdZeroVecRegLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
8946{
8947#ifdef RT_ARCH_AMD64
8948 /* pxor xmm, xmm */
8949 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
8950 if (iVecReg >= 8)
8951 pCodeBuf[off++] = X86_OP_REX_B | X86_OP_REX_R;
8952 pCodeBuf[off++] = 0x0f;
8953 pCodeBuf[off++] = 0xef;
8954 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
8955#elif defined(RT_ARCH_ARM64)
8956 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
8957 Assert(!(iVecReg & 0x1));
8958 /* eor vecreg, vecreg, vecreg */
8959 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
8960#else
8961# error "port me"
8962#endif
8963 return off;
8964}
8965
8966
8967/**
8968 * Emits a vecdst[0:127] = 0 store.
8969 */
8970DECL_INLINE_THROW(uint32_t)
8971iemNativeEmitSimdZeroVecRegLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
8972{
8973#ifdef RT_ARCH_AMD64
8974 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
8975#elif defined(RT_ARCH_ARM64)
8976 off = iemNativeEmitSimdZeroVecRegLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
8977#else
8978# error "port me"
8979#endif
8980 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8981 return off;
8982}
8983
8984
8985/**
8986 * Emits a vecdst[128:255] = 0 store.
8987 */
8988DECL_FORCE_INLINE(uint32_t)
8989iemNativeEmitSimdZeroVecRegHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
8990{
8991#ifdef RT_ARCH_AMD64
8992 /* vmovdqa xmm, xmm. This will clear the upper half of ymm */
8993 if (iVecReg < 8)
8994 {
8995 pCodeBuf[off++] = X86_OP_VEX2;
8996 pCodeBuf[off++] = 0xf9;
8997 }
8998 else
8999 {
9000 pCodeBuf[off++] = X86_OP_VEX3;
9001 pCodeBuf[off++] = 0x41;
9002 pCodeBuf[off++] = 0x79;
9003 }
9004 pCodeBuf[off++] = 0x6f;
9005 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
9006#elif defined(RT_ARCH_ARM64)
9007 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9008 Assert(!(iVecReg & 0x1));
9009 /* eor vecreg, vecreg, vecreg */
9010 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
9011#else
9012# error "port me"
9013#endif
9014 return off;
9015}
9016
9017
9018/**
9019 * Emits a vecdst[128:255] = 0 store.
9020 */
9021DECL_INLINE_THROW(uint32_t)
9022iemNativeEmitSimdZeroVecRegHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
9023{
9024#ifdef RT_ARCH_AMD64
9025 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecReg);
9026#elif defined(RT_ARCH_ARM64)
9027 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
9028#else
9029# error "port me"
9030#endif
9031 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9032 return off;
9033}
9034
9035
9036/**
9037 * Emits a vecdst[0:255] = 0 store.
9038 */
9039DECL_FORCE_INLINE(uint32_t)
9040iemNativeEmitSimdZeroVecRegU256Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg)
9041{
9042#ifdef RT_ARCH_AMD64
9043 /* vpxor ymm, ymm, ymm */
9044 if (iVecReg < 8)
9045 {
9046 pCodeBuf[off++] = X86_OP_VEX2;
9047 pCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
9048 }
9049 else
9050 {
9051 pCodeBuf[off++] = X86_OP_VEX3;
9052 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X | 0x01;
9053 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
9054 }
9055 pCodeBuf[off++] = 0xef;
9056 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecReg & 7, iVecReg & 7);
9057#elif defined(RT_ARCH_ARM64)
9058 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9059 Assert(!(iVecReg & 0x1));
9060 /* eor vecreg, vecreg, vecreg */
9061 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg, iVecReg, iVecReg);
9062 pCodeBuf[off++] = Armv8A64MkVecInstrEor(iVecReg + 1, iVecReg + 1, iVecReg + 1);
9063#else
9064# error "port me"
9065#endif
9066 return off;
9067}
9068
9069
9070/**
9071 * Emits a vecdst[0:255] = 0 store.
9072 */
9073DECL_INLINE_THROW(uint32_t)
9074iemNativeEmitSimdZeroVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg)
9075{
9076#ifdef RT_ARCH_AMD64
9077 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 5), off, iVecReg);
9078#elif defined(RT_ARCH_ARM64)
9079 off = iemNativeEmitSimdZeroVecRegU256Ex(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecReg);
9080#else
9081# error "port me"
9082#endif
9083 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9084 return off;
9085}
9086
9087
9088/**
9089 * Emits a vecdst = gprsrc broadcast, 8-bit.
9090 */
9091DECL_FORCE_INLINE(uint32_t)
9092iemNativeEmitSimdBroadcastGprToVecRegU8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9093{
9094#ifdef RT_ARCH_AMD64
9095 /* pinsrb vecdst, gpr, #0 (ASSUMES SSE 4.1) */
9096 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9097 if (iVecRegDst >= 8 || iGprSrc >= 8)
9098 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9099 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9100 pCodeBuf[off++] = 0x0f;
9101 pCodeBuf[off++] = 0x3a;
9102 pCodeBuf[off++] = 0x20;
9103 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9104 pCodeBuf[off++] = 0x00;
9105
9106 /* vpbroadcastb {y,x}mm, xmm (ASSUMES AVX2). */
9107 pCodeBuf[off++] = X86_OP_VEX3;
9108 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
9109 | 0x02 /* opcode map. */
9110 | ( iVecRegDst >= 8
9111 ? 0
9112 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
9113 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
9114 pCodeBuf[off++] = 0x78;
9115 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
9116#elif defined(RT_ARCH_ARM64)
9117 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9118 Assert(!(iVecRegDst & 0x1) || !f256Bit);
9119
9120 /* dup vecsrc, gpr */
9121 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U8);
9122 if (f256Bit)
9123 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U8);
9124#else
9125# error "port me"
9126#endif
9127 return off;
9128}
9129
9130
9131/**
9132 * Emits a vecdst[x] = gprsrc broadcast, 8-bit.
9133 */
9134DECL_INLINE_THROW(uint32_t)
9135iemNativeEmitSimdBroadcastGprToVecRegU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9136{
9137#ifdef RT_ARCH_AMD64
9138 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
9139#elif defined(RT_ARCH_ARM64)
9140 off = iemNativeEmitSimdBroadcastGprToVecRegU8Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
9141#else
9142# error "port me"
9143#endif
9144 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9145 return off;
9146}
9147
9148
9149/**
9150 * Emits a vecdst = gprsrc broadcast, 16-bit.
9151 */
9152DECL_FORCE_INLINE(uint32_t)
9153iemNativeEmitSimdBroadcastGprToVecRegU16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9154{
9155#ifdef RT_ARCH_AMD64
9156 /* pinsrw vecdst, gpr, #0 */
9157 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9158 if (iVecRegDst >= 8 || iGprSrc >= 8)
9159 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9160 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9161 pCodeBuf[off++] = 0x0f;
9162 pCodeBuf[off++] = 0xc4;
9163 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9164 pCodeBuf[off++] = 0x00;
9165
9166 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
9167 pCodeBuf[off++] = X86_OP_VEX3;
9168 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
9169 | 0x02 /* opcode map. */
9170 | ( iVecRegDst >= 8
9171 ? 0
9172 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
9173 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
9174 pCodeBuf[off++] = 0x79;
9175 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
9176#elif defined(RT_ARCH_ARM64)
9177 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9178 Assert(!(iVecRegDst & 0x1) || !f256Bit);
9179
9180 /* dup vecsrc, gpr */
9181 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U16);
9182 if (f256Bit)
9183 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U16);
9184#else
9185# error "port me"
9186#endif
9187 return off;
9188}
9189
9190
9191/**
9192 * Emits a vecdst[x] = gprsrc broadcast, 16-bit.
9193 */
9194DECL_INLINE_THROW(uint32_t)
9195iemNativeEmitSimdBroadcastGprToVecRegU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9196{
9197#ifdef RT_ARCH_AMD64
9198 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
9199#elif defined(RT_ARCH_ARM64)
9200 off = iemNativeEmitSimdBroadcastGprToVecRegU16Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
9201#else
9202# error "port me"
9203#endif
9204 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9205 return off;
9206}
9207
9208
9209/**
9210 * Emits a vecdst = gprsrc broadcast, 32-bit.
9211 */
9212DECL_FORCE_INLINE(uint32_t)
9213iemNativeEmitSimdBroadcastGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9214{
9215#ifdef RT_ARCH_AMD64
9216 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
9217 * vbroadcast needs a memory operand or another xmm register to work... */
9218
9219 /* pinsrd vecdst, gpr, #0 (ASSUMES SSE4.1). */
9220 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9221 if (iVecRegDst >= 8 || iGprSrc >= 8)
9222 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9223 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9224 pCodeBuf[off++] = 0x0f;
9225 pCodeBuf[off++] = 0x3a;
9226 pCodeBuf[off++] = 0x22;
9227 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9228 pCodeBuf[off++] = 0x00;
9229
9230 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
9231 pCodeBuf[off++] = X86_OP_VEX3;
9232 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
9233 | 0x02 /* opcode map. */
9234 | ( iVecRegDst >= 8
9235 ? 0
9236 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
9237 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
9238 pCodeBuf[off++] = 0x58;
9239 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
9240#elif defined(RT_ARCH_ARM64)
9241 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9242 Assert(!(iVecRegDst & 0x1) || !f256Bit);
9243
9244 /* dup vecsrc, gpr */
9245 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U32);
9246 if (f256Bit)
9247 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U32);
9248#else
9249# error "port me"
9250#endif
9251 return off;
9252}
9253
9254
9255/**
9256 * Emits a vecdst[x] = gprsrc broadcast, 32-bit.
9257 */
9258DECL_INLINE_THROW(uint32_t)
9259iemNativeEmitSimdBroadcastGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9260{
9261#ifdef RT_ARCH_AMD64
9262 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
9263#elif defined(RT_ARCH_ARM64)
9264 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
9265#else
9266# error "port me"
9267#endif
9268 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9269 return off;
9270}
9271
9272
9273/**
9274 * Emits a vecdst = gprsrc broadcast, 64-bit.
9275 */
9276DECL_FORCE_INLINE(uint32_t)
9277iemNativeEmitSimdBroadcastGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9278{
9279#ifdef RT_ARCH_AMD64
9280 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
9281 * vbroadcast needs a memory operand or another xmm register to work... */
9282
9283 /* pinsrq vecdst, gpr, #0 (ASSUMES SSE4.1). */
9284 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
9285 pCodeBuf[off++] = X86_OP_REX_W
9286 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
9287 | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
9288 pCodeBuf[off++] = 0x0f;
9289 pCodeBuf[off++] = 0x3a;
9290 pCodeBuf[off++] = 0x22;
9291 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
9292 pCodeBuf[off++] = 0x00;
9293
9294 /* vpbroadcastq {y,x}mm, xmm (ASSUMES AVX2). */
9295 pCodeBuf[off++] = X86_OP_VEX3;
9296 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X
9297 | 0x02 /* opcode map. */
9298 | ( iVecRegDst >= 8
9299 ? 0
9300 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
9301 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
9302 pCodeBuf[off++] = 0x59;
9303 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
9304#elif defined(RT_ARCH_ARM64)
9305 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9306 Assert(!(iVecRegDst & 0x1) || !f256Bit);
9307
9308 /* dup vecsrc, gpr */
9309 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U64);
9310 if (f256Bit)
9311 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U64);
9312#else
9313# error "port me"
9314#endif
9315 return off;
9316}
9317
9318
9319/**
9320 * Emits a vecdst[x] = gprsrc broadcast, 64-bit.
9321 */
9322DECL_INLINE_THROW(uint32_t)
9323iemNativeEmitSimdBroadcastGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
9324{
9325#ifdef RT_ARCH_AMD64
9326 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 14), off, iVecRegDst, iGprSrc, f256Bit);
9327#elif defined(RT_ARCH_ARM64)
9328 off = iemNativeEmitSimdBroadcastGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
9329#else
9330# error "port me"
9331#endif
9332 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9333 return off;
9334}
9335
9336
9337/**
9338 * Emits a vecdst[0:127] = vecdst[128:255] = vecsrc[0:127] broadcast, 128-bit.
9339 */
9340DECL_FORCE_INLINE(uint32_t)
9341iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9342{
9343#ifdef RT_ARCH_AMD64
9344 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(pCodeBuf, off, iVecRegDst, iVecRegSrc);
9345
9346 /* vinserti128 ymm, ymm, xmm, 1. */ /* ASSUMES AVX2 support */
9347 pCodeBuf[off++] = X86_OP_VEX3;
9348 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, iVecRegSrc >= 8, false, iVecRegDst >= 8);
9349 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
9350 pCodeBuf[off++] = 0x38;
9351 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7);
9352 pCodeBuf[off++] = 0x01; /* Immediate */
9353#elif defined(RT_ARCH_ARM64)
9354 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
9355 Assert(!(iVecRegDst & 0x1));
9356
9357 /* mov dst, src; alias for: orr dst, src, src */
9358 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc);
9359 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst + 1, iVecRegSrc, iVecRegSrc);
9360#else
9361# error "port me"
9362#endif
9363 return off;
9364}
9365
9366
9367/**
9368 * Emits a vecdst[0:127] = vecdst[128:255] = vecsrc[0:127] broadcast, 128-bit.
9369 */
9370DECL_INLINE_THROW(uint32_t)
9371iemNativeEmitSimdBroadcastVecRegU128ToVecReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc)
9372{
9373#ifdef RT_ARCH_AMD64
9374 off = iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(iemNativeInstrBufEnsure(pReNative, off, 11), off, iVecRegDst, iVecRegSrc);
9375#elif defined(RT_ARCH_ARM64)
9376 off = iemNativeEmitSimdBroadcastVecRegU128ToVecRegEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iVecRegDst, iVecRegSrc);
9377#else
9378# error "port me"
9379#endif
9380 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9381 return off;
9382}
9383
9384#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
9385
9386/** @} */
9387
9388#endif /* !VMM_INCLUDED_SRC_include_IEMN8veRecompilerEmit_h */
9389
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette