VirtualBox

source: vbox/trunk/src/VBox/Disassembler/DisasmFormatYasm.cpp

Last change on this file was 103928, checked in by vboxsync, 5 months ago

DIS: Correct movzx ambiguity with memory source. Makes 'kmk check' work again.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 72.2 KB
RevLine 
[9266]1/* $Id: DisasmFormatYasm.cpp 103928 2024-03-19 21:27:41Z vboxsync $ */
2/** @file
3 * VBox Disassembler - Yasm(/Nasm) Style Formatter.
4 */
5
6/*
[98103]7 * Copyright (C) 2008-2023 Oracle and/or its affiliates.
[9266]8 *
[96407]9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
[9266]26 */
27
28
[57358]29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
[9266]32#include <VBox/dis.h>
33#include "DisasmInternal.h"
34#include <iprt/assert.h>
35#include <iprt/ctype.h>
[76346]36#include <iprt/err.h>
37#include <iprt/string.h>
[9266]38
39
[57358]40/*********************************************************************************************************************************
41* Global Variables *
42*********************************************************************************************************************************/
[9266]43static const char g_szSpaces[] =
44" ";
[9925]45static const char g_aszYasmRegGen8[20][5] =
[9266]46{
[9925]47 "al\0\0", "cl\0\0", "dl\0\0", "bl\0\0", "ah\0\0", "ch\0\0", "dh\0\0", "bh\0\0", "r8b\0", "r9b\0", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b", "spl\0", "bpl\0", "sil\0", "dil\0"
[9266]48};
49static const char g_aszYasmRegGen16[16][5] =
50{
51 "ax\0\0", "cx\0\0", "dx\0\0", "bx\0\0", "sp\0\0", "bp\0\0", "si\0\0", "di\0\0", "r8w\0", "r9w\0", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w"
52};
[63457]53#if 0 /* unused */
[9266]54static const char g_aszYasmRegGen1616[8][6] =
55{
56 "bx+si", "bx+di", "bp+si", "bp+di", "si\0\0\0", "di\0\0\0", "bp\0\0\0", "bx\0\0\0"
57};
[63457]58#endif
[9266]59static const char g_aszYasmRegGen32[16][5] =
60{
61 "eax\0", "ecx\0", "edx\0", "ebx\0", "esp\0", "ebp\0", "esi\0", "edi\0", "r8d\0", "r9d\0", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d"
62};
63static const char g_aszYasmRegGen64[16][4] =
64{
65 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8\0", "r9\0", "r10", "r11", "r12", "r13", "r14", "r15"
66};
67static const char g_aszYasmRegSeg[6][3] =
68{
69 "es", "cs", "ss", "ds", "fs", "gs"
70};
71static const char g_aszYasmRegFP[8][4] =
72{
73 "st0", "st1", "st2", "st3", "st4", "st5", "st6", "st7"
74};
75static const char g_aszYasmRegMMX[8][4] =
76{
77 "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"
78};
79static const char g_aszYasmRegXMM[16][6] =
80{
81 "xmm0\0", "xmm1\0", "xmm2\0", "xmm3\0", "xmm4\0", "xmm5\0", "xmm6\0", "xmm7\0", "xmm8\0", "xmm9\0", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
82};
[53094]83static const char g_aszYasmRegYMM[16][6] =
84{
85 "ymm0\0", "ymm1\0", "ymm2\0", "ymm3\0", "ymm4\0", "ymm5\0", "ymm6\0", "ymm7\0", "ymm8\0", "ymm9\0", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
86};
[9266]87static const char g_aszYasmRegCRx[16][5] =
88{
89 "cr0\0", "cr1\0", "cr2\0", "cr3\0", "cr4\0", "cr5\0", "cr6\0", "cr7\0", "cr8\0", "cr9\0", "cr10", "cr11", "cr12", "cr13", "cr14", "cr15"
90};
91static const char g_aszYasmRegDRx[16][5] =
92{
93 "dr0\0", "dr1\0", "dr2\0", "dr3\0", "dr4\0", "dr5\0", "dr6\0", "dr7\0", "dr8\0", "dr9\0", "dr10", "dr11", "dr12", "dr13", "dr14", "dr15"
94};
95static const char g_aszYasmRegTRx[16][5] =
96{
97 "tr0\0", "tr1\0", "tr2\0", "tr3\0", "tr4\0", "tr5\0", "tr6\0", "tr7\0", "tr8\0", "tr9\0", "tr10", "tr11", "tr12", "tr13", "tr14", "tr15"
98};
99
100
101
102/**
103 * Gets the base register name for the given parameter.
104 *
105 * @returns Pointer to the register name.
[41789]106 * @param pDis The disassembler state.
[9266]107 * @param pParam The parameter.
108 * @param pcchReg Where to store the length of the name.
109 */
[41790]110static const char *disasmFormatYasmBaseReg(PCDISSTATE pDis, PCDISOPPARAM pParam, size_t *pcchReg)
[9266]111{
[62594]112 RT_NOREF_PV(pDis);
113
[41678]114 switch (pParam->fUse & ( DISUSE_REG_GEN8 | DISUSE_REG_GEN16 | DISUSE_REG_GEN32 | DISUSE_REG_GEN64
[53094]115 | DISUSE_REG_FP | DISUSE_REG_MMX | DISUSE_REG_XMM | DISUSE_REG_YMM
116 | DISUSE_REG_CR | DISUSE_REG_DBG | DISUSE_REG_SEG | DISUSE_REG_TEST))
[9266]117
118 {
[41676]119 case DISUSE_REG_GEN8:
[9925]120 {
[101539]121 Assert(pParam->x86.Base.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen8));
122 const char *psz = g_aszYasmRegGen8[pParam->x86.Base.idxGenReg];
[9925]123 *pcchReg = 2 + !!psz[2] + !!psz[3];
124 return psz;
125 }
[9266]126
[41676]127 case DISUSE_REG_GEN16:
[9266]128 {
[101539]129 Assert(pParam->x86.Base.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen16));
130 const char *psz = g_aszYasmRegGen16[pParam->x86.Base.idxGenReg];
[9266]131 *pcchReg = 2 + !!psz[2] + !!psz[3];
132 return psz;
133 }
134
[53172]135 // VSIB
136 case DISUSE_REG_XMM | DISUSE_REG_GEN32:
137 case DISUSE_REG_YMM | DISUSE_REG_GEN32:
[41676]138 case DISUSE_REG_GEN32:
[9266]139 {
[101539]140 Assert(pParam->x86.Base.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen32));
141 const char *psz = g_aszYasmRegGen32[pParam->x86.Base.idxGenReg];
[9266]142 *pcchReg = 2 + !!psz[2] + !!psz[3];
143 return psz;
144 }
145
[53172]146 // VSIB
147 case DISUSE_REG_XMM | DISUSE_REG_GEN64:
148 case DISUSE_REG_YMM | DISUSE_REG_GEN64:
[41676]149 case DISUSE_REG_GEN64:
[9266]150 {
[101539]151 Assert(pParam->x86.Base.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen64));
152 const char *psz = g_aszYasmRegGen64[pParam->x86.Base.idxGenReg];
[9266]153 *pcchReg = 2 + !!psz[2] + !!psz[3];
154 return psz;
155 }
156
[41676]157 case DISUSE_REG_FP:
[9266]158 {
[101539]159 Assert(pParam->x86.Base.idxFpuReg < RT_ELEMENTS(g_aszYasmRegFP));
160 const char *psz = g_aszYasmRegFP[pParam->x86.Base.idxFpuReg];
[9266]161 *pcchReg = 3;
162 return psz;
163 }
164
[41676]165 case DISUSE_REG_MMX:
[9266]166 {
[101539]167 Assert(pParam->x86.Base.idxMmxReg < RT_ELEMENTS(g_aszYasmRegMMX));
168 const char *psz = g_aszYasmRegMMX[pParam->x86.Base.idxMmxReg];
[9266]169 *pcchReg = 3;
170 return psz;
171 }
172
[41676]173 case DISUSE_REG_XMM:
[9266]174 {
[101539]175 Assert(pParam->x86.Base.idxXmmReg < RT_ELEMENTS(g_aszYasmRegXMM));
176 const char *psz = g_aszYasmRegXMM[pParam->x86.Base.idxXmmReg];
[9266]177 *pcchReg = 4 + !!psz[4];
178 return psz;
179 }
180
[53094]181 case DISUSE_REG_YMM:
182 {
[101539]183 Assert(pParam->x86.Base.idxYmmReg < RT_ELEMENTS(g_aszYasmRegYMM));
184 const char *psz = g_aszYasmRegYMM[pParam->x86.Base.idxYmmReg];
[53094]185 *pcchReg = 4 + !!psz[4];
186 return psz;
187 }
188
[41676]189 case DISUSE_REG_CR:
[9266]190 {
[101539]191 Assert(pParam->x86.Base.idxCtrlReg < RT_ELEMENTS(g_aszYasmRegCRx));
192 const char *psz = g_aszYasmRegCRx[pParam->x86.Base.idxCtrlReg];
[9266]193 *pcchReg = 3;
194 return psz;
195 }
196
[41676]197 case DISUSE_REG_DBG:
[9266]198 {
[101539]199 Assert(pParam->x86.Base.idxDbgReg < RT_ELEMENTS(g_aszYasmRegDRx));
200 const char *psz = g_aszYasmRegDRx[pParam->x86.Base.idxDbgReg];
[9266]201 *pcchReg = 3;
202 return psz;
203 }
204
[41676]205 case DISUSE_REG_SEG:
[9266]206 {
[101539]207 Assert(pParam->x86.Base.idxSegReg < RT_ELEMENTS(g_aszYasmRegCRx));
208 const char *psz = g_aszYasmRegSeg[pParam->x86.Base.idxSegReg];
[9266]209 *pcchReg = 2;
210 return psz;
211 }
212
[41676]213 case DISUSE_REG_TEST:
[9266]214 {
[101539]215 Assert(pParam->x86.Base.idxTestReg < RT_ELEMENTS(g_aszYasmRegTRx));
216 const char *psz = g_aszYasmRegTRx[pParam->x86.Base.idxTestReg];
[9266]217 *pcchReg = 3;
218 return psz;
219 }
220
221 default:
[41678]222 AssertMsgFailed(("%#x\n", pParam->fUse));
[9266]223 *pcchReg = 3;
224 return "r??";
225 }
226}
227
228
229/**
230 * Gets the index register name for the given parameter.
231 *
232 * @returns The index register name.
[41789]233 * @param pDis The disassembler state.
[9266]234 * @param pParam The parameter.
235 * @param pcchReg Where to store the length of the name.
236 */
[41790]237static const char *disasmFormatYasmIndexReg(PCDISSTATE pDis, PCDISOPPARAM pParam, size_t *pcchReg)
[9266]238{
[53172]239 if (pParam->fUse & DISUSE_REG_XMM)
240 {
[101539]241 Assert(pParam->x86.Index.idxXmmReg < RT_ELEMENTS(g_aszYasmRegXMM));
242 const char *psz = g_aszYasmRegXMM[pParam->x86.Index.idxXmmReg];
[53172]243 *pcchReg = 4 + !!psz[4];
244 return psz;
245 }
246 else if (pParam->fUse & DISUSE_REG_YMM)
247 {
[101539]248 Assert(pParam->x86.Index.idxYmmReg < RT_ELEMENTS(g_aszYasmRegYMM));
249 const char *psz = g_aszYasmRegYMM[pParam->x86.Index.idxYmmReg];
[53172]250 *pcchReg = 4 + !!psz[4];
251 return psz;
252
253 }
254 else
[101539]255 switch (pDis->x86.uAddrMode)
[9266]256 {
[41675]257 case DISCPUMODE_16BIT:
[9266]258 {
[101539]259 Assert(pParam->x86.Index.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen16));
260 const char *psz = g_aszYasmRegGen16[pParam->x86.Index.idxGenReg];
[9266]261 *pcchReg = 2 + !!psz[2] + !!psz[3];
262 return psz;
263 }
264
[41675]265 case DISCPUMODE_32BIT:
[9266]266 {
[101539]267 Assert(pParam->x86.Index.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen32));
268 const char *psz = g_aszYasmRegGen32[pParam->x86.Index.idxGenReg];
[9266]269 *pcchReg = 2 + !!psz[2] + !!psz[3];
270 return psz;
271 }
272
[41675]273 case DISCPUMODE_64BIT:
[9266]274 {
[101539]275 Assert(pParam->x86.Index.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen64));
276 const char *psz = g_aszYasmRegGen64[pParam->x86.Index.idxGenReg];
[9266]277 *pcchReg = 2 + !!psz[2] + !!psz[3];
278 return psz;
279 }
280
281 default:
[101539]282 AssertMsgFailed(("%#x %#x\n", pParam->fUse, pDis->x86.uAddrMode));
[9266]283 *pcchReg = 3;
284 return "r??";
285 }
286}
287
288
289/**
290 * Formats the current instruction in Yasm (/ Nasm) style.
291 *
292 *
293 * @returns The number of output characters. If this is >= cchBuf, then the content
294 * of pszBuf will be truncated.
[41789]295 * @param pDis Pointer to the disassembler state.
[9266]296 * @param pszBuf The output buffer.
297 * @param cchBuf The size of the output buffer.
298 * @param fFlags Format flags, see DIS_FORMAT_FLAGS_*.
299 * @param pfnGetSymbol Get symbol name for a jmp or call target address. Optional.
300 * @param pvUser User argument for pfnGetSymbol.
301 */
[41790]302DISDECL(size_t) DISFormatYasmEx(PCDISSTATE pDis, char *pszBuf, size_t cchBuf, uint32_t fFlags,
[9266]303 PFNDISGETSYMBOL pfnGetSymbol, void *pvUser)
304{
[47330]305/** @todo monitor and mwait aren't formatted correctly in 64-bit mode. */
[9266]306 /*
307 * Input validation and massaging.
308 */
[41789]309 AssertPtr(pDis);
[9266]310 AssertPtrNull(pszBuf);
311 Assert(pszBuf || !cchBuf);
312 AssertPtrNull(pfnGetSymbol);
313 AssertMsg(DIS_FMT_FLAGS_IS_VALID(fFlags), ("%#x\n", fFlags));
314 if (fFlags & DIS_FMT_FLAGS_ADDR_COMMENT)
315 fFlags = (fFlags & ~DIS_FMT_FLAGS_ADDR_LEFT) | DIS_FMT_FLAGS_ADDR_RIGHT;
316 if (fFlags & DIS_FMT_FLAGS_BYTES_COMMENT)
317 fFlags = (fFlags & ~DIS_FMT_FLAGS_BYTES_LEFT) | DIS_FMT_FLAGS_BYTES_RIGHT;
318
[41789]319 PCDISOPCODE const pOp = pDis->pCurInstr;
[9266]320
321 /*
322 * Output macros
323 */
324 char *pszDst = pszBuf;
325 size_t cchDst = cchBuf;
326 size_t cchOutput = 0;
327#define PUT_C(ch) \
328 do { \
329 cchOutput++; \
330 if (cchDst > 1) \
331 { \
332 cchDst--; \
333 *pszDst++ = (ch); \
334 } \
335 } while (0)
336#define PUT_STR(pszSrc, cchSrc) \
337 do { \
338 cchOutput += (cchSrc); \
339 if (cchDst > (cchSrc)) \
340 { \
341 memcpy(pszDst, (pszSrc), (cchSrc)); \
342 pszDst += (cchSrc); \
343 cchDst -= (cchSrc); \
344 } \
345 else if (cchDst > 1) \
346 { \
347 memcpy(pszDst, (pszSrc), cchDst - 1); \
348 pszDst += cchDst - 1; \
349 cchDst = 1; \
350 } \
351 } while (0)
352#define PUT_SZ(sz) \
353 PUT_STR((sz), sizeof(sz) - 1)
[9271]354#define PUT_SZ_STRICT(szStrict, szRelaxed) \
355 do { if (fFlags & DIS_FMT_FLAGS_STRICT) PUT_SZ(szStrict); else PUT_SZ(szRelaxed); } while (0)
[9266]356#define PUT_PSZ(psz) \
357 do { const size_t cchTmp = strlen(psz); PUT_STR((psz), cchTmp); } while (0)
358#define PUT_NUM(cch, fmt, num) \
359 do { \
360 cchOutput += (cch); \
361 if (cchDst > 1) \
362 { \
363 const size_t cchTmp = RTStrPrintf(pszDst, cchDst, fmt, (num)); \
364 pszDst += cchTmp; \
365 cchDst -= cchTmp; \
366 Assert(cchTmp == (cch) || cchDst == 1); \
367 } \
368 } while (0)
[101546]369#define PUT_NUM_8(num) PUT_NUM(4, !(fFlags & DIS_FMT_FLAGS_C_HEX) ? "0%02xh" : "%#04x", (uint8_t)(num))
370#define PUT_NUM_16(num) PUT_NUM(6, !(fFlags & DIS_FMT_FLAGS_C_HEX) ? "0%04xh" : "%#06x", (uint16_t)(num))
371#define PUT_NUM_32(num) PUT_NUM(10, !(fFlags & DIS_FMT_FLAGS_C_HEX) ? "0%08xh" : "%#010x", (uint32_t)(num))
372#define PUT_NUM_64(num) PUT_NUM(18, !(fFlags & DIS_FMT_FLAGS_C_HEX) ? "0%016RX64h" : "%#018RX64", (uint64_t)(num))
[9266]373
[9271]374#define PUT_NUM_SIGN(cch, fmt, num, stype, utype) \
375 do { \
376 if ((stype)(num) >= 0) \
377 { \
378 PUT_C('+'); \
379 PUT_NUM(cch, fmt, (utype)(num)); \
380 } \
381 else \
382 { \
383 PUT_C('-'); \
384 PUT_NUM(cch, fmt, (utype)-(stype)(num)); \
385 } \
386 } while (0)
[101546]387#define PUT_NUM_S8(num) PUT_NUM_SIGN(4, !(fFlags & DIS_FMT_FLAGS_C_HEX) ? "0%02xh" : "%#04x", num, int8_t, uint8_t)
388#define PUT_NUM_S16(num) PUT_NUM_SIGN(6, !(fFlags & DIS_FMT_FLAGS_C_HEX) ? "0%04xh" : "%#06x", num, int16_t, uint16_t)
389#define PUT_NUM_S32(num) PUT_NUM_SIGN(10, !(fFlags & DIS_FMT_FLAGS_C_HEX) ? "0%08xh" : "%#010x", num, int32_t, uint32_t)
390#define PUT_NUM_S64(num) PUT_NUM_SIGN(18, !(fFlags & DIS_FMT_FLAGS_C_HEX) ? "0%016RX64h" : "%#018RX64", num, int64_t, uint64_t)
[9271]391
[46177]392#define PUT_SYMBOL_TWO(a_rcSym, a_szStart, a_chEnd) \
393 do { \
394 if (RT_SUCCESS(a_rcSym)) \
395 { \
396 PUT_SZ(a_szStart); \
397 PUT_PSZ(szSymbol); \
398 if (off != 0) \
399 { \
400 if ((int8_t)off == off) \
401 PUT_NUM_S8(off); \
402 else if ((int16_t)off == off) \
403 PUT_NUM_S16(off); \
404 else if ((int32_t)off == off) \
405 PUT_NUM_S32(off); \
406 else \
407 PUT_NUM_S64(off); \
408 } \
409 PUT_C(a_chEnd); \
410 } \
411 } while (0)
[9271]412
[46177]413#define PUT_SYMBOL(a_uSeg, a_uAddr, a_szStart, a_chEnd) \
414 do { \
415 if (pfnGetSymbol) \
416 { \
417 int rcSym = pfnGetSymbol(pDis, a_uSeg, a_uAddr, szSymbol, sizeof(szSymbol), &off, pvUser); \
418 PUT_SYMBOL_TWO(rcSym, a_szStart, a_chEnd); \
419 } \
420 } while (0)
421
422
[9266]423 /*
424 * The address?
425 */
426 if (fFlags & DIS_FMT_FLAGS_ADDR_LEFT)
427 {
428#if HC_ARCH_BITS == 64 || GC_ARCH_BITS == 64
[41789]429 if (pDis->uInstrAddr >= _4G)
430 PUT_NUM(9, "%08x`", (uint32_t)(pDis->uInstrAddr >> 32));
[9266]431#endif
[41789]432 PUT_NUM(8, "%08x", (uint32_t)pDis->uInstrAddr);
[9266]433 PUT_C(' ');
434 }
435
436 /*
437 * The opcode bytes?
438 */
439 if (fFlags & DIS_FMT_FLAGS_BYTES_LEFT)
440 {
[41789]441 size_t cchTmp = disFormatBytes(pDis, pszDst, cchDst, fFlags);
[9266]442 cchOutput += cchTmp;
443 if (cchDst > 1)
444 {
445 if (cchTmp <= cchDst)
446 {
447 cchDst -= cchTmp;
448 pszDst += cchTmp;
449 }
450 else
451 {
452 pszDst += cchDst - 1;
453 cchDst = 1;
454 }
455 }
456
457 /* Some padding to align the instruction. */
[101546]458 uint32_t cbWidth = (fFlags & DIS_FMT_FLAGS_BYTES_WIDTH_MASK) >> DIS_FMT_FLAGS_BYTES_WIDTH_SHIFT;
459 if (!cbWidth)
460 cbWidth = 7;
461 size_t cchPadding = (cbWidth * (2 + !!(fFlags & DIS_FMT_FLAGS_BYTES_SPACED)))
[9266]462 + !!(fFlags & DIS_FMT_FLAGS_BYTES_BRACKETS) * 2
463 + 2;
464 cchPadding = cchTmp + 1 >= cchPadding ? 1 : cchPadding - cchTmp;
465 PUT_STR(g_szSpaces, cchPadding);
466 }
467
468
469 /*
470 * Filter out invalid opcodes first as they need special
471 * treatment. UD2 is an exception and should be handled normally.
472 */
473 size_t const offInstruction = cchOutput;
[41737]474 if ( pOp->uOpcode == OP_INVALID
475 || ( pOp->uOpcode == OP_ILLUD2
[101539]476 && (pDis->x86.fPrefix & DISPREFIX_LOCK)))
[41761]477 PUT_SZ("Illegal opcode");
[9266]478 else
479 {
480 /*
481 * Prefixes
482 */
[101539]483 if (pDis->x86.fPrefix & DISPREFIX_LOCK)
[9266]484 PUT_SZ("lock ");
[101539]485 if (pDis->x86.fPrefix & DISPREFIX_REP)
[9266]486 PUT_SZ("rep ");
[101539]487 else if(pDis->x86.fPrefix & DISPREFIX_REPNE)
[9266]488 PUT_SZ("repne ");
489
490 /*
491 * Adjust the format string to the correct mnemonic
492 * or to avoid things the assembler cannot handle correctly.
493 */
494 char szTmpFmt[48];
495 const char *pszFmt = pOp->pszOpcode;
[60418]496 bool fIgnoresOpSize = false;
[60442]497 bool fMayNeedAddrSize = false;
[41737]498 switch (pOp->uOpcode)
[9266]499 {
500 case OP_JECXZ:
[101539]501 pszFmt = pDis->x86.uOpMode == DISCPUMODE_16BIT ? "jcxz %Jb" : pDis->x86.uOpMode == DISCPUMODE_32BIT ? "jecxz %Jb" : "jrcxz %Jb";
[9266]502 break;
503 case OP_PUSHF:
[101539]504 pszFmt = pDis->x86.uOpMode == DISCPUMODE_16BIT ? "pushfw" : pDis->x86.uOpMode == DISCPUMODE_32BIT ? "pushfd" : "pushfq";
[9266]505 break;
506 case OP_POPF:
[101539]507 pszFmt = pDis->x86.uOpMode == DISCPUMODE_16BIT ? "popfw" : pDis->x86.uOpMode == DISCPUMODE_32BIT ? "popfd" : "popfq";
[9266]508 break;
509 case OP_PUSHA:
[101539]510 pszFmt = pDis->x86.uOpMode == DISCPUMODE_16BIT ? "pushaw" : "pushad";
[9266]511 break;
512 case OP_POPA:
[101539]513 pszFmt = pDis->x86.uOpMode == DISCPUMODE_16BIT ? "popaw" : "popad";
[9266]514 break;
515 case OP_INSB:
516 pszFmt = "insb";
[60442]517 fIgnoresOpSize = fMayNeedAddrSize = true;
[9266]518 break;
519 case OP_INSWD:
[101539]520 pszFmt = pDis->x86.uOpMode == DISCPUMODE_16BIT ? "insw" : pDis->x86.uOpMode == DISCPUMODE_32BIT ? "insd" : "insq";
[60442]521 fMayNeedAddrSize = true;
[9266]522 break;
523 case OP_OUTSB:
524 pszFmt = "outsb";
[60442]525 fIgnoresOpSize = fMayNeedAddrSize = true;
[9266]526 break;
527 case OP_OUTSWD:
[101539]528 pszFmt = pDis->x86.uOpMode == DISCPUMODE_16BIT ? "outsw" : pDis->x86.uOpMode == DISCPUMODE_32BIT ? "outsd" : "outsq";
[60442]529 fMayNeedAddrSize = true;
[9266]530 break;
531 case OP_MOVSB:
532 pszFmt = "movsb";
[60442]533 fIgnoresOpSize = fMayNeedAddrSize = true;
[9266]534 break;
535 case OP_MOVSWD:
[101539]536 pszFmt = pDis->x86.uOpMode == DISCPUMODE_16BIT ? "movsw" : pDis->x86.uOpMode == DISCPUMODE_32BIT ? "movsd" : "movsq";
[60442]537 fMayNeedAddrSize = true;
[9266]538 break;
539 case OP_CMPSB:
540 pszFmt = "cmpsb";
[60442]541 fIgnoresOpSize = fMayNeedAddrSize = true;
[9266]542 break;
543 case OP_CMPWD:
[101539]544 pszFmt = pDis->x86.uOpMode == DISCPUMODE_16BIT ? "cmpsw" : pDis->x86.uOpMode == DISCPUMODE_32BIT ? "cmpsd" : "cmpsq";
[60442]545 fMayNeedAddrSize = true;
[9266]546 break;
547 case OP_SCASB:
548 pszFmt = "scasb";
[60442]549 fIgnoresOpSize = fMayNeedAddrSize = true;
[9266]550 break;
551 case OP_SCASWD:
[101539]552 pszFmt = pDis->x86.uOpMode == DISCPUMODE_16BIT ? "scasw" : pDis->x86.uOpMode == DISCPUMODE_32BIT ? "scasd" : "scasq";
[60442]553 fMayNeedAddrSize = true;
[9266]554 break;
555 case OP_LODSB:
556 pszFmt = "lodsb";
[60442]557 fIgnoresOpSize = fMayNeedAddrSize = true;
[9266]558 break;
559 case OP_LODSWD:
[101539]560 pszFmt = pDis->x86.uOpMode == DISCPUMODE_16BIT ? "lodsw" : pDis->x86.uOpMode == DISCPUMODE_32BIT ? "lodsd" : "lodsq";
[60442]561 fMayNeedAddrSize = true;
[9266]562 break;
563 case OP_STOSB:
564 pszFmt = "stosb";
[60442]565 fIgnoresOpSize = fMayNeedAddrSize = true;
[9266]566 break;
567 case OP_STOSWD:
[101539]568 pszFmt = pDis->x86.uOpMode == DISCPUMODE_16BIT ? "stosw" : pDis->x86.uOpMode == DISCPUMODE_32BIT ? "stosd" : "stosq";
[60442]569 fMayNeedAddrSize = true;
[9266]570 break;
571 case OP_CBW:
[101539]572 pszFmt = pDis->x86.uOpMode == DISCPUMODE_16BIT ? "cbw" : pDis->x86.uOpMode == DISCPUMODE_32BIT ? "cwde" : "cdqe";
[9266]573 break;
574 case OP_CWD:
[101539]575 pszFmt = pDis->x86.uOpMode == DISCPUMODE_16BIT ? "cwd" : pDis->x86.uOpMode == DISCPUMODE_32BIT ? "cdq" : "cqo";
[9266]576 break;
577 case OP_SHL:
578 Assert(pszFmt[3] == '/');
579 pszFmt += 4;
580 break;
581 case OP_XLAT:
582 pszFmt = "xlatb";
583 break;
584 case OP_INT3:
585 pszFmt = "int3";
586 break;
587
588 /*
589 * Don't know how to tell yasm to generate complicated nop stuff, so 'db' it.
590 */
591 case OP_NOP:
[101539]592 if (pDis->x86.bOpCode == 0x90)
[9266]593 /* fine, fine */;
[9342]594 else if (pszFmt[sizeof("nop %Ev") - 1] == '/' && pszFmt[sizeof("nop %Ev")] == 'p')
[9266]595 pszFmt = "prefetch %Eb";
[101539]596 else if (pDis->x86.bOpCode == 0x1f)
[9266]597 {
[41789]598 Assert(pDis->cbInstr >= 3);
[101546]599 PUT_SZ("db 00fh, 01fh");
600 for (unsigned off = 2; off < pDis->cbInstr; off++)
[9266]601 {
602 PUT_C(',');
[101546]603 PUT_C(' ');
604 PUT_NUM_8(pDis->Instr.ab[off]);
[9266]605 }
606 pszFmt = "";
607 }
608 break;
609
610 default:
611 /* ST(X) -> stX (floating point) */
612 if (*pszFmt == 'f' && strchr(pszFmt, '('))
613 {
614 char *pszFmtDst = szTmpFmt;
615 char ch;
616 do
617 {
618 ch = *pszFmt++;
619 if (ch == 'S' && pszFmt[0] == 'T' && pszFmt[1] == '(')
620 {
621 *pszFmtDst++ = 's';
622 *pszFmtDst++ = 't';
623 pszFmt += 2;
624 ch = *pszFmt;
625 Assert(pszFmt[1] == ')');
626 pszFmt += 2;
627 *pszFmtDst++ = ch;
628 }
629 else
630 *pszFmtDst++ = ch;
631 } while (ch != '\0');
632 pszFmt = szTmpFmt;
633 }
[95314]634 if (strchr("#@&", *pszFmt))
[53172]635 {
636 const char *pszDelim = strchr(pszFmt, '/');
637 const char *pszSpace = (pszDelim ? strchr(pszDelim, ' ') : NULL);
638 if (pszDelim != NULL)
639 {
640 char *pszFmtDst = szTmpFmt;
641 if (pszSpace == NULL) pszSpace = strchr(pszDelim, 0);
[103511]642 if ( (*pszFmt == '#' && !(pDis->x86.bVexByte2 & DISPREFIX_VEX_F_W)) /** @todo check this*/
[101539]643 || (*pszFmt == '@' && !VEXREG_IS256B(pDis->x86.bVexDestReg))
[53172]644 || (*pszFmt == '&' && ( DISUSE_IS_EFFECTIVE_ADDR(pDis->Param1.fUse)
645 || DISUSE_IS_EFFECTIVE_ADDR(pDis->Param2.fUse)
646 || DISUSE_IS_EFFECTIVE_ADDR(pDis->Param3.fUse)
647 || DISUSE_IS_EFFECTIVE_ADDR(pDis->Param4.fUse))))
648 {
649 strncpy(pszFmtDst, pszFmt + 1, pszDelim - pszFmt - 1);
650 pszFmtDst += pszDelim - pszFmt - 1;
651 }
652 else
653 {
654 strncpy(pszFmtDst, pszDelim + 1, pszSpace - pszDelim - 1);
655 pszFmtDst += pszSpace - pszDelim - 1;
656 }
657 strcpy (pszFmtDst, pszSpace);
658 pszFmt = szTmpFmt;
659 }
660 }
[9266]661 break;
662
663 /*
664 * Horrible hacks.
665 */
666 case OP_FLD:
[101539]667 if (pDis->x86.bOpCode == 0xdb) /* m80fp workaround. */
668 *(int *)&pDis->Param1.x86.fParam &= ~0x1f; /* make it pure OP_PARM_M */
[9266]669 break;
670 case OP_LAR: /* hack w -> v, probably not correct. */
[101539]671 *(int *)&pDis->Param2.x86.fParam &= ~0x1f;
672 *(int *)&pDis->Param2.x86.fParam |= OP_PARM_v;
[9266]673 break;
674 }
675
676 /*
[60442]677 * Add operand size and address prefixes for outsb, movsb, etc.
[60418]678 */
[101539]679 if (pDis->x86.fPrefix & (DISPREFIX_OPSIZE | DISPREFIX_ADDRSIZE))
[60418]680 {
[101539]681 if (fIgnoresOpSize && (pDis->x86.fPrefix & DISPREFIX_OPSIZE) )
[60442]682 {
683 if (pDis->uCpuMode == DISCPUMODE_16BIT)
684 PUT_SZ("o32 ");
685 else
686 PUT_SZ("o16 ");
687 }
[101539]688 if (fMayNeedAddrSize && (pDis->x86.fPrefix & DISPREFIX_ADDRSIZE) )
[60442]689 {
690 if (pDis->uCpuMode == DISCPUMODE_16BIT)
691 PUT_SZ("a32 ");
692 else
693 PUT_SZ("a16 ");
694 }
[60418]695 }
696
697 /*
[9266]698 * Formatting context and associated macros.
699 */
[41789]700 PCDISOPPARAM pParam = &pDis->Param1;
[9266]701 int iParam = 1;
702
703#define PUT_FAR() \
704 do { \
[101539]705 if ( OP_PARM_VSUBTYPE(pParam->x86.fParam) == OP_PARM_p \
[41737]706 && pOp->uOpcode != OP_LDS /* table bugs? */ \
707 && pOp->uOpcode != OP_LES \
708 && pOp->uOpcode != OP_LFS \
709 && pOp->uOpcode != OP_LGS \
710 && pOp->uOpcode != OP_LSS ) \
[9266]711 PUT_SZ("far "); \
712 } while (0)
[9275]713 /** @todo mov ah,ch ends up with a byte 'override'... - check if this wasn't fixed. */
714 /** @todo drop the work/dword/qword override when the src/dst is a register (except for movsx/movzx). */
[9266]715#define PUT_SIZE_OVERRIDE() \
716 do { \
[101539]717 switch (OP_PARM_VSUBTYPE(pParam->x86.fParam)) \
[9266]718 { \
719 case OP_PARM_v: \
[53007]720 case OP_PARM_y: \
[101539]721 switch (pDis->x86.uOpMode) \
[9266]722 { \
[101539]723 case DISCPUMODE_16BIT: if (OP_PARM_VSUBTYPE(pParam->x86.fParam) != OP_PARM_y) PUT_SZ("word "); break; \
[53172]724 case DISCPUMODE_32BIT: \
[103511]725 if (pDis->pCurInstr->uOpcode != OP_GATHER || (pDis->x86.bVexByte2 & DISPREFIX_VEX_F_W)) \
726 { PUT_SZ("dword "); break; } \
[69046]727 RT_FALL_THRU(); \
[41675]728 case DISCPUMODE_64BIT: PUT_SZ("qword "); break; \
[9266]729 default: break; \
730 } \
731 break; \
732 case OP_PARM_b: PUT_SZ("byte "); break; \
[53131]733 case OP_PARM_w: \
[101539]734 if ( OP_PARM_VTYPE(pParam->x86.fParam) == OP_PARM_W \
735 || OP_PARM_VTYPE(pParam->x86.fParam) == OP_PARM_M) \
[53131]736 { \
[101539]737 if (VEXREG_IS256B(pDis->x86.bVexDestReg)) PUT_SZ("dword "); \
[95314]738 else PUT_SZ("word "); \
[53131]739 } \
[103928]740 else if (pOp->uOpcode == OP_MOVZX || pOp->uOpcode == OP_MOVSX) \
741 PUT_SZ("word "); \
[53131]742 break; \
743 case OP_PARM_d: \
[101539]744 if ( OP_PARM_VTYPE(pParam->x86.fParam) == OP_PARM_W \
745 || OP_PARM_VTYPE(pParam->x86.fParam) == OP_PARM_M) \
[53131]746 { \
[101539]747 if (VEXREG_IS256B(pDis->x86.bVexDestReg)) PUT_SZ("qword "); \
[95314]748 else PUT_SZ("dword "); \
[53131]749 } \
750 break; \
751 case OP_PARM_q: \
[101539]752 if ( OP_PARM_VTYPE(pParam->x86.fParam) == OP_PARM_W \
753 || OP_PARM_VTYPE(pParam->x86.fParam) == OP_PARM_M) \
[53131]754 { \
[101539]755 if (VEXREG_IS256B(pDis->x86.bVexDestReg)) PUT_SZ("oword "); \
[95314]756 else PUT_SZ("qword "); \
[53131]757 } \
[103928]758 break; \
[53094]759 case OP_PARM_ps: \
760 case OP_PARM_pd: \
[101539]761 case OP_PARM_x: if (VEXREG_IS256B(pDis->x86.bVexDestReg)) { PUT_SZ("yword "); break; } RT_FALL_THRU(); \
[53094]762 case OP_PARM_ss: \
763 case OP_PARM_sd: \
764 case OP_PARM_dq: PUT_SZ("oword "); break; \
[53131]765 case OP_PARM_qq: PUT_SZ("yword "); break; \
[9266]766 case OP_PARM_p: break; /* see PUT_FAR */ \
[41678]767 case OP_PARM_s: if (pParam->fUse & DISUSE_REG_FP) PUT_SZ("tword "); break; /* ?? */ \
[9266]768 case OP_PARM_z: break; \
769 case OP_PARM_NONE: \
[101539]770 if ( OP_PARM_VTYPE(pParam->x86.fParam) == OP_PARM_M \
[41737]771 && ((pParam->fUse & DISUSE_REG_FP) || pOp->uOpcode == OP_FLD)) \
[9266]772 PUT_SZ("tword "); \
773 break; \
774 default: break; /*no pointer type specified/necessary*/ \
775 } \
776 } while (0)
777 static const char s_szSegPrefix[6][4] = { "es:", "cs:", "ss:", "ds:", "fs:", "gs:" };
778#define PUT_SEGMENT_OVERRIDE() \
779 do { \
[101539]780 if (pDis->x86.fPrefix & DISPREFIX_SEG) \
781 PUT_STR(s_szSegPrefix[pDis->x86.idxSegPrefix], 3); \
[9266]782 } while (0)
783
784
785 /*
786 * Segment prefixing for instructions that doesn't do memory access.
787 */
[101539]788 if ( (pDis->x86.fPrefix & DISPREFIX_SEG)
[41789]789 && !DISUSE_IS_EFFECTIVE_ADDR(pDis->Param1.fUse)
790 && !DISUSE_IS_EFFECTIVE_ADDR(pDis->Param2.fUse)
791 && !DISUSE_IS_EFFECTIVE_ADDR(pDis->Param3.fUse))
[9266]792 {
[101539]793 PUT_STR(s_szSegPrefix[pDis->x86.idxSegPrefix], 2);
[9266]794 PUT_C(' ');
795 }
796
797
798 /*
799 * The formatting loop.
800 */
[9271]801 RTINTPTR off;
802 char szSymbol[128];
[9266]803 char ch;
804 while ((ch = *pszFmt++) != '\0')
805 {
806 if (ch == '%')
807 {
808 ch = *pszFmt++;
809 switch (ch)
810 {
811 /*
[95314]812 * ModRM - Register only / VEX.vvvv.
[9266]813 */
814 case 'C': /* Control register (ParseModRM / UseModRM). */
815 case 'D': /* Debug register (ParseModRM / UseModRM). */
816 case 'G': /* ModRM selects general register (ParseModRM / UseModRM). */
817 case 'S': /* ModRM byte selects a segment register (ParseModRM / UseModRM). */
818 case 'T': /* ModRM byte selects a test register (ParseModRM / UseModRM). */
819 case 'V': /* ModRM byte selects an XMM/SSE register (ParseModRM / UseModRM). */
820 case 'P': /* ModRM byte selects MMX register (ParseModRM / UseModRM). */
[53094]821 case 'H': /* The VEX.vvvv field of the VEX prefix selects a XMM/YMM register. */
[95314]822 case 'B': /* The VEX.vvvv field of the VEX prefix selects a general register (ParseVexDest). */
[53131]823 case 'L': /* The upper 4 bits of the 8-bit immediate selects a XMM/YMM register. */
[9266]824 {
825 pszFmt += RT_C_IS_ALPHA(pszFmt[0]) ? RT_C_IS_ALPHA(pszFmt[1]) ? 2 : 1 : 0;
[41678]826 Assert(!(pParam->fUse & (DISUSE_INDEX | DISUSE_SCALE) /* No SIB here... */));
827 Assert(!(pParam->fUse & (DISUSE_DISPLACEMENT8 | DISUSE_DISPLACEMENT16 | DISUSE_DISPLACEMENT32 | DISUSE_DISPLACEMENT64 | DISUSE_RIPDISPLACEMENT32)));
[9266]828
829 size_t cchReg;
[41789]830 const char *pszReg = disasmFormatYasmBaseReg(pDis, pParam, &cchReg);
[9266]831 PUT_STR(pszReg, cchReg);
832 break;
833 }
834
835 /*
836 * ModRM - Register or memory.
837 */
838 case 'E': /* ModRM specifies parameter (ParseModRM / UseModRM / UseSIB). */
839 case 'Q': /* ModRM byte selects MMX register or memory address (ParseModRM / UseModRM). */
840 case 'R': /* ModRM byte may only refer to a general register (ParseModRM / UseModRM). */
841 case 'W': /* ModRM byte selects an XMM/SSE register or a memory address (ParseModRM / UseModRM). */
[95479]842 case 'U': /* ModRM byte may only refer to a XMM/SSE register (ParseModRM / UseModRM). */
843 case 'M': /* ModRM byte may only refer to memory (ParseModRM / UseModRM). */
[9266]844 {
845 pszFmt += RT_C_IS_ALPHA(pszFmt[0]) ? RT_C_IS_ALPHA(pszFmt[1]) ? 2 : 1 : 0;
846
847 PUT_FAR();
[41720]848 uint32_t const fUse = pParam->fUse;
849 if (DISUSE_IS_EFFECTIVE_ADDR(fUse))
[9266]850 {
851 /* Work around mov seg,[mem16] and mov [mem16],seg as these always make a 16-bit mem
852 while the register variants deals with 16, 32 & 64 in the normal fashion. */
[101539]853 if ( pParam->x86.fParam != OP_PARM_Ev
[41737]854 || pOp->uOpcode != OP_MOV
[41738]855 || ( pOp->fParam1 != OP_PARM_Sw
856 && pOp->fParam2 != OP_PARM_Sw))
[9266]857 PUT_SIZE_OVERRIDE();
858 PUT_C('[');
859 }
[9271]860 if ( (fFlags & DIS_FMT_FLAGS_STRICT)
[41720]861 && (fUse & (DISUSE_DISPLACEMENT8 | DISUSE_DISPLACEMENT16 | DISUSE_DISPLACEMENT32 | DISUSE_DISPLACEMENT64 | DISUSE_RIPDISPLACEMENT32)))
[9266]862 {
[41720]863 if ( (fUse & DISUSE_DISPLACEMENT8)
[101539]864 && !pParam->x86.uDisp.i8)
[9266]865 PUT_SZ("byte ");
[41720]866 else if ( (fUse & DISUSE_DISPLACEMENT16)
[101539]867 && (int8_t)pParam->x86.uDisp.i16 == (int16_t)pParam->x86.uDisp.i16)
[9266]868 PUT_SZ("word ");
[41720]869 else if ( (fUse & DISUSE_DISPLACEMENT32)
[101539]870 && (int16_t)pParam->x86.uDisp.i32 == (int32_t)pParam->x86.uDisp.i32) //??
[9266]871 PUT_SZ("dword ");
[41720]872 else if ( (fUse & DISUSE_DISPLACEMENT64)
[101539]873 && (pDis->x86.SIB.Bits.Base != 5 || pDis->x86.ModRM.Bits.Mod != 0)
874 && (int32_t)pParam->x86.uDisp.i64 == (int64_t)pParam->x86.uDisp.i64) //??
[9761]875 PUT_SZ("qword ");
[9266]876 }
[41720]877 if (DISUSE_IS_EFFECTIVE_ADDR(fUse))
[9266]878 PUT_SEGMENT_OVERRIDE();
879
[41720]880 bool fBase = (fUse & DISUSE_BASE) /* When exactly is DISUSE_BASE supposed to be set? disasmModRMReg doesn't set it. */
881 || ( (fUse & ( DISUSE_REG_GEN8
882 | DISUSE_REG_GEN16
883 | DISUSE_REG_GEN32
884 | DISUSE_REG_GEN64
885 | DISUSE_REG_FP
886 | DISUSE_REG_MMX
887 | DISUSE_REG_XMM
[53094]888 | DISUSE_REG_YMM
[41720]889 | DISUSE_REG_CR
890 | DISUSE_REG_DBG
891 | DISUSE_REG_SEG
892 | DISUSE_REG_TEST ))
893 && !DISUSE_IS_EFFECTIVE_ADDR(fUse));
[9266]894 if (fBase)
895 {
896 size_t cchReg;
[41789]897 const char *pszReg = disasmFormatYasmBaseReg(pDis, pParam, &cchReg);
[9266]898 PUT_STR(pszReg, cchReg);
899 }
900
[41720]901 if (fUse & DISUSE_INDEX)
[9266]902 {
903 if (fBase)
904 PUT_C('+');
905
906 size_t cchReg;
[41789]907 const char *pszReg = disasmFormatYasmIndexReg(pDis, pParam, &cchReg);
[9266]908 PUT_STR(pszReg, cchReg);
909
[41720]910 if (fUse & DISUSE_SCALE)
[9266]911 {
912 PUT_C('*');
[101539]913 PUT_C('0' + pParam->x86.uScale);
[9266]914 }
915 }
916 else
[41720]917 Assert(!(fUse & DISUSE_SCALE));
[9266]918
[46177]919 int64_t off2 = 0;
[41720]920 if (fUse & (DISUSE_DISPLACEMENT8 | DISUSE_DISPLACEMENT16 | DISUSE_DISPLACEMENT32 | DISUSE_DISPLACEMENT64 | DISUSE_RIPDISPLACEMENT32))
[9266]921 {
[41720]922 if (fUse & DISUSE_DISPLACEMENT8)
[101539]923 off2 = pParam->x86.uDisp.i8;
[41720]924 else if (fUse & DISUSE_DISPLACEMENT16)
[101539]925 off2 = pParam->x86.uDisp.i16;
[41720]926 else if (fUse & (DISUSE_DISPLACEMENT32 | DISUSE_RIPDISPLACEMENT32))
[101539]927 off2 = pParam->x86.uDisp.i32;
[41720]928 else if (fUse & DISUSE_DISPLACEMENT64)
[101539]929 off2 = pParam->x86.uDisp.i64;
[10203]930 else
931 {
932 AssertFailed();
[25990]933 off2 = 0;
[10203]934 }
[9266]935
[55497]936 int64_t off3 = off2;
937 if (fBase || (fUse & (DISUSE_INDEX | DISUSE_RIPDISPLACEMENT32)))
[9340]938 {
[55497]939 PUT_C(off3 >= 0 ? '+' : '-');
940 if (off3 < 0)
941 off3 = -off3;
[9340]942 }
[41720]943 if (fUse & DISUSE_DISPLACEMENT8)
[55497]944 PUT_NUM_8( off3);
[41720]945 else if (fUse & DISUSE_DISPLACEMENT16)
[55497]946 PUT_NUM_16(off3);
[41720]947 else if (fUse & DISUSE_DISPLACEMENT32)
[55497]948 PUT_NUM_32(off3);
[41720]949 else if (fUse & DISUSE_DISPLACEMENT64)
[55497]950 PUT_NUM_64(off3);
[9266]951 else
952 {
[55497]953 PUT_NUM_32(off3);
954 PUT_SZ(" wrt rip (");
955 off2 += pDis->uInstrAddr + pDis->cbInstr;
956 PUT_NUM_64(off2);
957 if (pfnGetSymbol)
[101539]958 PUT_SYMBOL((pDis->x86.fPrefix & DISPREFIX_SEG)
959 ? DIS_FMT_SEL_FROM_REG(pDis->x86.idxSegPrefix)
[55497]960 : DIS_FMT_SEL_FROM_REG(DISSELREG_DS),
[101539]961 pDis->x86.uAddrMode == DISCPUMODE_64BIT
[55497]962 ? (uint64_t)off2
[101539]963 : pDis->x86.uAddrMode == DISCPUMODE_32BIT
[55497]964 ? (uint32_t)off2
965 : (uint16_t)off2,
966 " = ",
967 ')');
968 else
969 PUT_C(')');
[9266]970 }
971 }
972
[41720]973 if (DISUSE_IS_EFFECTIVE_ADDR(fUse))
[46177]974 {
[55497]975 if (pfnGetSymbol && !fBase && !(fUse & (DISUSE_INDEX | DISUSE_RIPDISPLACEMENT32)) && off2 != 0)
[101539]976 PUT_SYMBOL((pDis->x86.fPrefix & DISPREFIX_SEG)
977 ? DIS_FMT_SEL_FROM_REG(pDis->x86.idxSegPrefix)
[46177]978 : DIS_FMT_SEL_FROM_REG(DISSELREG_DS),
[101539]979 pDis->x86.uAddrMode == DISCPUMODE_64BIT
[46177]980 ? (uint64_t)off2
[101539]981 : pDis->x86.uAddrMode == DISCPUMODE_32BIT
[46177]982 ? (uint32_t)off2
983 : (uint16_t)off2,
[55497]984 " (=",
985 ')');
[9266]986 PUT_C(']');
[46177]987 }
[9266]988 break;
989 }
990
991 case 'F': /* Eflags register (0 - popf/pushf only, avoided in adjustments above). */
992 AssertFailed();
993 break;
994
995 case 'I': /* Immediate data (ParseImmByte, ParseImmByteSX, ParseImmV, ParseImmUshort, ParseImmZ). */
996 Assert(*pszFmt == 'b' || *pszFmt == 'v' || *pszFmt == 'w' || *pszFmt == 'z'); pszFmt++;
[41678]997 switch (pParam->fUse & ( DISUSE_IMMEDIATE8 | DISUSE_IMMEDIATE16 | DISUSE_IMMEDIATE32 | DISUSE_IMMEDIATE64
998 | DISUSE_IMMEDIATE16_SX8 | DISUSE_IMMEDIATE32_SX8 | DISUSE_IMMEDIATE64_SX8))
[9266]999 {
[41676]1000 case DISUSE_IMMEDIATE8:
[9271]1001 if ( (fFlags & DIS_FMT_FLAGS_STRICT)
[41738]1002 && ( (pOp->fParam1 >= OP_PARM_REG_GEN8_START && pOp->fParam1 <= OP_PARM_REG_GEN8_END)
1003 || (pOp->fParam2 >= OP_PARM_REG_GEN8_START && pOp->fParam2 <= OP_PARM_REG_GEN8_END))
[9271]1004 )
[9266]1005 PUT_SZ("strict byte ");
[41741]1006 PUT_NUM_8(pParam->uValue);
[9266]1007 break;
1008
[41676]1009 case DISUSE_IMMEDIATE16:
[101539]1010 if ( pDis->uCpuMode != pDis->x86.uOpMode
[9271]1011 || ( (fFlags & DIS_FMT_FLAGS_STRICT)
[41741]1012 && ( (int8_t)pParam->uValue == (int16_t)pParam->uValue
[41738]1013 || (pOp->fParam1 >= OP_PARM_REG_GEN16_START && pOp->fParam1 <= OP_PARM_REG_GEN16_END)
1014 || (pOp->fParam2 >= OP_PARM_REG_GEN16_START && pOp->fParam2 <= OP_PARM_REG_GEN16_END))
[9271]1015 )
1016 )
[9266]1017 {
[101539]1018 if (OP_PARM_VSUBTYPE(pParam->x86.fParam) == OP_PARM_b)
[9271]1019 PUT_SZ_STRICT("strict byte ", "byte ");
[101539]1020 else if ( OP_PARM_VSUBTYPE(pParam->x86.fParam) == OP_PARM_v
1021 || OP_PARM_VSUBTYPE(pParam->x86.fParam) == OP_PARM_z)
[9271]1022 PUT_SZ_STRICT("strict word ", "word ");
[9266]1023 }
[41741]1024 PUT_NUM_16(pParam->uValue);
[9266]1025 break;
1026
[41676]1027 case DISUSE_IMMEDIATE16_SX8:
[101539]1028 if ( !(pDis->x86.fPrefix & DISPREFIX_OPSIZE)
[42050]1029 || pDis->pCurInstr->uOpcode != OP_PUSH)
1030 PUT_SZ_STRICT("strict byte ", "byte ");
1031 else
1032 PUT_SZ("word ");
[41741]1033 PUT_NUM_16(pParam->uValue);
[9266]1034 break;
1035
[41676]1036 case DISUSE_IMMEDIATE32:
[101539]1037 if ( pDis->x86.uOpMode != (pDis->uCpuMode == DISCPUMODE_16BIT ? DISCPUMODE_16BIT : DISCPUMODE_32BIT) /* not perfect */
[9271]1038 || ( (fFlags & DIS_FMT_FLAGS_STRICT)
[41741]1039 && ( (int8_t)pParam->uValue == (int32_t)pParam->uValue
[41738]1040 || (pOp->fParam1 >= OP_PARM_REG_GEN32_START && pOp->fParam1 <= OP_PARM_REG_GEN32_END)
1041 || (pOp->fParam2 >= OP_PARM_REG_GEN32_START && pOp->fParam2 <= OP_PARM_REG_GEN32_END))
[9271]1042 )
[9266]1043 )
1044 {
[101539]1045 if (OP_PARM_VSUBTYPE(pParam->x86.fParam) == OP_PARM_b)
[9271]1046 PUT_SZ_STRICT("strict byte ", "byte ");
[101539]1047 else if ( OP_PARM_VSUBTYPE(pParam->x86.fParam) == OP_PARM_v
1048 || OP_PARM_VSUBTYPE(pParam->x86.fParam) == OP_PARM_z)
[9271]1049 PUT_SZ_STRICT("strict dword ", "dword ");
[9266]1050 }
[41741]1051 PUT_NUM_32(pParam->uValue);
[46177]1052 if (pDis->uCpuMode == DISCPUMODE_32BIT)
1053 PUT_SYMBOL(DIS_FMT_SEL_FROM_REG(DISSELREG_CS), pParam->uValue, " (=", ')');
[9266]1054 break;
1055
[41676]1056 case DISUSE_IMMEDIATE32_SX8:
[101539]1057 if ( !(pDis->x86.fPrefix & DISPREFIX_OPSIZE)
[42050]1058 || pDis->pCurInstr->uOpcode != OP_PUSH)
1059 PUT_SZ_STRICT("strict byte ", "byte ");
1060 else
1061 PUT_SZ("dword ");
[41741]1062 PUT_NUM_32(pParam->uValue);
[9266]1063 break;
1064
[41676]1065 case DISUSE_IMMEDIATE64_SX8:
[101539]1066 if ( !(pDis->x86.fPrefix & DISPREFIX_OPSIZE)
[42050]1067 || pDis->pCurInstr->uOpcode != OP_PUSH)
1068 PUT_SZ_STRICT("strict byte ", "byte ");
1069 else
1070 PUT_SZ("qword ");
[41741]1071 PUT_NUM_64(pParam->uValue);
[10272]1072 break;
1073
[41676]1074 case DISUSE_IMMEDIATE64:
[41741]1075 PUT_NUM_64(pParam->uValue);
[9266]1076 break;
1077
1078 default:
1079 AssertFailed();
1080 break;
1081 }
1082 break;
1083
1084 case 'J': /* Relative jump offset (ParseImmBRel + ParseImmVRel). */
1085 {
1086 int32_t offDisplacement;
1087 Assert(iParam == 1);
[9271]1088 bool fPrefix = (fFlags & DIS_FMT_FLAGS_STRICT)
[41737]1089 && pOp->uOpcode != OP_CALL
1090 && pOp->uOpcode != OP_LOOP
1091 && pOp->uOpcode != OP_LOOPE
1092 && pOp->uOpcode != OP_LOOPNE
1093 && pOp->uOpcode != OP_JECXZ;
1094 if (pOp->uOpcode == OP_CALL)
[9272]1095 fFlags &= ~DIS_FMT_FLAGS_RELATIVE_BRANCH;
[9266]1096
[41678]1097 if (pParam->fUse & DISUSE_IMMEDIATE8_REL)
[9266]1098 {
1099 if (fPrefix)
1100 PUT_SZ("short ");
[41741]1101 offDisplacement = (int8_t)pParam->uValue;
[9266]1102 Assert(*pszFmt == 'b'); pszFmt++;
[9271]1103
1104 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
1105 PUT_NUM_S8(offDisplacement);
[9266]1106 }
[41678]1107 else if (pParam->fUse & DISUSE_IMMEDIATE16_REL)
[9266]1108 {
1109 if (fPrefix)
1110 PUT_SZ("near ");
[41741]1111 offDisplacement = (int16_t)pParam->uValue;
[9266]1112 Assert(*pszFmt == 'v'); pszFmt++;
[9271]1113
1114 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
1115 PUT_NUM_S16(offDisplacement);
[9266]1116 }
1117 else
1118 {
1119 if (fPrefix)
1120 PUT_SZ("near ");
[41741]1121 offDisplacement = (int32_t)pParam->uValue;
[46177]1122 Assert(pParam->fUse & (DISUSE_IMMEDIATE32_REL | DISUSE_IMMEDIATE64_REL));
[9266]1123 Assert(*pszFmt == 'v'); pszFmt++;
[9271]1124
1125 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
1126 PUT_NUM_S32(offDisplacement);
[9266]1127 }
[9271]1128 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
1129 PUT_SZ(" (");
[9266]1130
[41789]1131 RTUINTPTR uTrgAddr = pDis->uInstrAddr + pDis->cbInstr + offDisplacement;
1132 if (pDis->uCpuMode == DISCPUMODE_16BIT)
[9266]1133 PUT_NUM_16(uTrgAddr);
[41789]1134 else if (pDis->uCpuMode == DISCPUMODE_32BIT)
[9266]1135 PUT_NUM_32(uTrgAddr);
1136 else
1137 PUT_NUM_64(uTrgAddr);
[9271]1138
[46177]1139 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
[9271]1140 {
[46177]1141 PUT_SYMBOL(DIS_FMT_SEL_FROM_REG(DISSELREG_CS), uTrgAddr, " = ", ' ');
1142 PUT_C(')');
[9271]1143 }
[46177]1144 else
1145 PUT_SYMBOL(DIS_FMT_SEL_FROM_REG(DISSELREG_CS), uTrgAddr, " (", ')');
[9266]1146 break;
1147 }
1148
1149 case 'A': /* Direct (jump/call) address (ParseImmAddr). */
[9271]1150 {
[9266]1151 Assert(*pszFmt == 'p'); pszFmt++;
1152 PUT_FAR();
1153 PUT_SIZE_OVERRIDE();
1154 PUT_SEGMENT_OVERRIDE();
[62452]1155 off = 0;
[10203]1156 int rc = VERR_SYMBOL_NOT_FOUND;
[41678]1157 switch (pParam->fUse & (DISUSE_IMMEDIATE_ADDR_16_16 | DISUSE_IMMEDIATE_ADDR_16_32 | DISUSE_DISPLACEMENT64 | DISUSE_DISPLACEMENT32 | DISUSE_DISPLACEMENT16))
[9266]1158 {
[41676]1159 case DISUSE_IMMEDIATE_ADDR_16_16:
[41741]1160 PUT_NUM_16(pParam->uValue >> 16);
[9266]1161 PUT_C(':');
[41741]1162 PUT_NUM_16(pParam->uValue);
[9271]1163 if (pfnGetSymbol)
[41789]1164 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_VALUE(pParam->uValue >> 16), (uint16_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
[9266]1165 break;
[41676]1166 case DISUSE_IMMEDIATE_ADDR_16_32:
[41741]1167 PUT_NUM_16(pParam->uValue >> 32);
[9266]1168 PUT_C(':');
[41741]1169 PUT_NUM_32(pParam->uValue);
[9271]1170 if (pfnGetSymbol)
[41789]1171 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_VALUE(pParam->uValue >> 16), (uint32_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
[9266]1172 break;
[41676]1173 case DISUSE_DISPLACEMENT16:
[41741]1174 PUT_NUM_16(pParam->uValue);
[9271]1175 if (pfnGetSymbol)
[41789]1176 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_REG(DISSELREG_CS), (uint16_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
[9266]1177 break;
[41676]1178 case DISUSE_DISPLACEMENT32:
[41741]1179 PUT_NUM_32(pParam->uValue);
[9271]1180 if (pfnGetSymbol)
[41789]1181 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_REG(DISSELREG_CS), (uint32_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
[9266]1182 break;
[41676]1183 case DISUSE_DISPLACEMENT64:
[41741]1184 PUT_NUM_64(pParam->uValue);
[9271]1185 if (pfnGetSymbol)
[41789]1186 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_REG(DISSELREG_CS), (uint64_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
[9266]1187 break;
1188 default:
1189 AssertFailed();
1190 break;
1191 }
[9271]1192
[46177]1193 PUT_SYMBOL_TWO(rc, " [", ']');
[9266]1194 break;
[9271]1195 }
[9266]1196
1197 case 'O': /* No ModRM byte (ParseImmAddr). */
[9271]1198 {
[9266]1199 Assert(*pszFmt == 'b' || *pszFmt == 'v'); pszFmt++;
1200 PUT_FAR();
1201 PUT_SIZE_OVERRIDE();
1202 PUT_C('[');
1203 PUT_SEGMENT_OVERRIDE();
[62452]1204 off = 0;
[10203]1205 int rc = VERR_SYMBOL_NOT_FOUND;
[41678]1206 switch (pParam->fUse & (DISUSE_IMMEDIATE_ADDR_16_16 | DISUSE_IMMEDIATE_ADDR_16_32 | DISUSE_DISPLACEMENT64 | DISUSE_DISPLACEMENT32 | DISUSE_DISPLACEMENT16))
[9266]1207 {
[41676]1208 case DISUSE_IMMEDIATE_ADDR_16_16:
[41741]1209 PUT_NUM_16(pParam->uValue >> 16);
[9266]1210 PUT_C(':');
[41741]1211 PUT_NUM_16(pParam->uValue);
[9271]1212 if (pfnGetSymbol)
[41789]1213 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_VALUE(pParam->uValue >> 16), (uint16_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
[9266]1214 break;
[41676]1215 case DISUSE_IMMEDIATE_ADDR_16_32:
[41741]1216 PUT_NUM_16(pParam->uValue >> 32);
[9266]1217 PUT_C(':');
[41741]1218 PUT_NUM_32(pParam->uValue);
[9271]1219 if (pfnGetSymbol)
[41789]1220 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_VALUE(pParam->uValue >> 16), (uint32_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
[9266]1221 break;
[41676]1222 case DISUSE_DISPLACEMENT16:
[101539]1223 PUT_NUM_16(pParam->x86.uDisp.i16);
[9271]1224 if (pfnGetSymbol)
[101539]1225 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_REG(DISSELREG_CS), pParam->x86.uDisp.u16, szSymbol, sizeof(szSymbol), &off, pvUser);
[9266]1226 break;
[41676]1227 case DISUSE_DISPLACEMENT32:
[101539]1228 PUT_NUM_32(pParam->x86.uDisp.i32);
[9271]1229 if (pfnGetSymbol)
[101539]1230 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_REG(DISSELREG_CS), pParam->x86.uDisp.u32, szSymbol, sizeof(szSymbol), &off, pvUser);
[9266]1231 break;
[41676]1232 case DISUSE_DISPLACEMENT64:
[101539]1233 PUT_NUM_64(pParam->x86.uDisp.i64);
[9271]1234 if (pfnGetSymbol)
[101539]1235 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_REG(DISSELREG_CS), pParam->x86.uDisp.u64, szSymbol, sizeof(szSymbol), &off, pvUser);
[9266]1236 break;
1237 default:
1238 AssertFailed();
1239 break;
1240 }
1241 PUT_C(']');
[9271]1242
[46177]1243 PUT_SYMBOL_TWO(rc, " (", ')');
[9266]1244 break;
[9271]1245 }
[9266]1246
1247 case 'X': /* DS:SI (ParseXb, ParseXv). */
1248 case 'Y': /* ES:DI (ParseYb, ParseYv). */
1249 {
1250 Assert(*pszFmt == 'b' || *pszFmt == 'v'); pszFmt++;
1251 PUT_FAR();
1252 PUT_SIZE_OVERRIDE();
1253 PUT_C('[');
[41678]1254 if (pParam->fUse & DISUSE_POINTER_DS_BASED)
[9266]1255 PUT_SZ("ds:");
1256 else
1257 PUT_SZ("es:");
1258
1259 size_t cchReg;
[41789]1260 const char *pszReg = disasmFormatYasmBaseReg(pDis, pParam, &cchReg);
[9266]1261 PUT_STR(pszReg, cchReg);
1262 PUT_C(']');
1263 break;
1264 }
1265
[46949]1266 case 'e': /* Register based on operand size (e.g. %eAX, %eAH) (ParseFixedReg). */
[9266]1267 {
[46949]1268 Assert(RT_C_IS_ALPHA(pszFmt[0]) && RT_C_IS_ALPHA(pszFmt[1]) && !RT_C_IS_ALPHA(pszFmt[2]));
1269 pszFmt += 2;
[9266]1270 size_t cchReg;
[41789]1271 const char *pszReg = disasmFormatYasmBaseReg(pDis, pParam, &cchReg);
[9266]1272 PUT_STR(pszReg, cchReg);
1273 break;
1274 }
1275
1276 default:
1277 AssertMsgFailed(("%c%s!\n", ch, pszFmt));
1278 break;
1279 }
1280 AssertMsg(*pszFmt == ',' || *pszFmt == '\0', ("%c%s\n", ch, pszFmt));
1281 }
1282 else
1283 {
1284 PUT_C(ch);
1285 if (ch == ',')
1286 {
1287 Assert(*pszFmt != ' ');
1288 PUT_C(' ');
1289 switch (++iParam)
1290 {
[41789]1291 case 2: pParam = &pDis->Param2; break;
1292 case 3: pParam = &pDis->Param3; break;
[53094]1293 case 4: pParam = &pDis->Param4; break;
[9266]1294 default: pParam = NULL; break;
1295 }
1296 }
1297 }
1298 } /* while more to format */
1299 }
1300
1301 /*
1302 * Any additional output to the right of the instruction?
1303 */
1304 if (fFlags & (DIS_FMT_FLAGS_BYTES_RIGHT | DIS_FMT_FLAGS_ADDR_RIGHT))
1305 {
1306 /* some up front padding. */
1307 size_t cchPadding = cchOutput - offInstruction;
1308 cchPadding = cchPadding + 1 >= 42 ? 1 : 42 - cchPadding;
1309 PUT_STR(g_szSpaces, cchPadding);
1310
1311 /* comment? */
1312 if (fFlags & (DIS_FMT_FLAGS_BYTES_RIGHT | DIS_FMT_FLAGS_ADDR_RIGHT))
1313 PUT_SZ(";");
1314
1315 /*
1316 * The address?
1317 */
1318 if (fFlags & DIS_FMT_FLAGS_ADDR_RIGHT)
1319 {
1320 PUT_C(' ');
1321#if HC_ARCH_BITS == 64 || GC_ARCH_BITS == 64
[41789]1322 if (pDis->uInstrAddr >= _4G)
1323 PUT_NUM(9, "%08x`", (uint32_t)(pDis->uInstrAddr >> 32));
[9266]1324#endif
[41789]1325 PUT_NUM(8, "%08x", (uint32_t)pDis->uInstrAddr);
[9266]1326 }
1327
1328 /*
1329 * Opcode bytes?
1330 */
1331 if (fFlags & DIS_FMT_FLAGS_BYTES_RIGHT)
1332 {
1333 PUT_C(' ');
[41789]1334 size_t cchTmp = disFormatBytes(pDis, pszDst, cchDst, fFlags);
[9266]1335 cchOutput += cchTmp;
1336 if (cchTmp >= cchDst)
1337 cchTmp = cchDst - (cchDst != 0);
1338 cchDst -= cchTmp;
1339 pszDst += cchTmp;
1340 }
1341 }
1342
1343 /*
1344 * Terminate it - on overflow we'll have reserved one byte for this.
1345 */
1346 if (cchDst > 0)
1347 *pszDst = '\0';
1348 else
1349 Assert(!cchBuf);
1350
1351 /* clean up macros */
1352#undef PUT_PSZ
1353#undef PUT_SZ
1354#undef PUT_STR
1355#undef PUT_C
1356 return cchOutput;
1357}
1358
1359
1360/**
1361 * Formats the current instruction in Yasm (/ Nasm) style.
1362 *
1363 * This is a simplified version of DISFormatYasmEx() provided for your convenience.
1364 *
1365 *
1366 * @returns The number of output characters. If this is >= cchBuf, then the content
1367 * of pszBuf will be truncated.
[41789]1368 * @param pDis Pointer to the disassembler state.
[9266]1369 * @param pszBuf The output buffer.
1370 * @param cchBuf The size of the output buffer.
1371 */
[41790]1372DISDECL(size_t) DISFormatYasm(PCDISSTATE pDis, char *pszBuf, size_t cchBuf)
[9266]1373{
[41789]1374 return DISFormatYasmEx(pDis, pszBuf, cchBuf, 0 /* fFlags */, NULL /* pfnGetSymbol */, NULL /* pvUser */);
[9266]1375}
1376
[41501]1377
1378/**
1379 * Checks if the encoding of the given disassembled instruction is something we
1380 * can never get YASM to produce.
1381 *
1382 * @returns true if it's odd, false if it isn't.
[41789]1383 * @param pDis The disassembler output. The byte fetcher callback will
[41501]1384 * be used if present as we might need to fetch opcode
1385 * bytes.
1386 */
[41790]1387DISDECL(bool) DISFormatYasmIsOddEncoding(PDISSTATE pDis)
[41501]1388{
1389 /*
1390 * Mod rm + SIB: Check for duplicate EBP encodings that yasm won't use for very good reasons.
1391 */
[101539]1392 if ( pDis->x86.uAddrMode != DISCPUMODE_16BIT /// @todo correct?
1393 && pDis->x86.ModRM.Bits.Rm == 4
1394 && pDis->x86.ModRM.Bits.Mod != 3)
[41501]1395 {
1396 /* No scaled index SIB (index=4), except for ESP. */
[101539]1397 if ( pDis->x86.SIB.Bits.Index == 4
1398 && pDis->x86.SIB.Bits.Base != 4)
[41501]1399 return true;
1400
1401 /* EBP + displacement */
[101539]1402 if ( pDis->x86.ModRM.Bits.Mod != 0
1403 && pDis->x86.SIB.Bits.Base == 5
1404 && pDis->x86.SIB.Bits.Scale == 0)
[41501]1405 return true;
1406 }
1407
1408 /*
1409 * Seems to be an instruction alias here, but I cannot find any docs on it... hrmpf!
1410 */
[41789]1411 if ( pDis->pCurInstr->uOpcode == OP_SHL
[101539]1412 && pDis->x86.ModRM.Bits.Reg == 6)
[41501]1413 return true;
1414
1415 /*
1416 * Check for multiple prefixes of the same kind.
1417 */
[42050]1418 uint8_t off1stSeg = UINT8_MAX;
1419 uint8_t offOpSize = UINT8_MAX;
1420 uint8_t offAddrSize = UINT8_MAX;
[41501]1421 uint32_t fPrefixes = 0;
[101539]1422 for (uint32_t offOpcode = 0; offOpcode < RT_ELEMENTS(pDis->Instr.ab); offOpcode++)
[41501]1423 {
1424 uint32_t f;
[101539]1425 switch (pDis->Instr.ab[offOpcode])
[41501]1426 {
1427 case 0xf0:
[41675]1428 f = DISPREFIX_LOCK;
[41501]1429 break;
1430
1431 case 0xf2:
1432 case 0xf3:
[41675]1433 f = DISPREFIX_REP; /* yes, both */
[41501]1434 break;
1435
1436 case 0x2e:
1437 case 0x3e:
1438 case 0x26:
1439 case 0x36:
1440 case 0x64:
1441 case 0x65:
[42050]1442 if (off1stSeg == UINT8_MAX)
1443 off1stSeg = offOpcode;
[41675]1444 f = DISPREFIX_SEG;
[41501]1445 break;
1446
1447 case 0x66:
[42050]1448 if (offOpSize == UINT8_MAX)
1449 offOpSize = offOpcode;
[41675]1450 f = DISPREFIX_OPSIZE;
[41501]1451 break;
1452
1453 case 0x67:
[42050]1454 if (offAddrSize == UINT8_MAX)
1455 offAddrSize = offOpcode;
[41675]1456 f = DISPREFIX_ADDRSIZE;
[41501]1457 break;
1458
1459 case 0x40: case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47:
1460 case 0x48: case 0x49: case 0x4a: case 0x4b: case 0x4c: case 0x4d: case 0x4e: case 0x4f:
[41789]1461 f = pDis->uCpuMode == DISCPUMODE_64BIT ? DISPREFIX_REX : 0;
[41501]1462 break;
1463
1464 default:
1465 f = 0;
1466 break;
1467 }
1468 if (!f)
1469 break; /* done */
1470 if (fPrefixes & f)
1471 return true;
1472 fPrefixes |= f;
1473 }
1474
1475 /* segment overrides are fun */
[41675]1476 if (fPrefixes & DISPREFIX_SEG)
[41501]1477 {
1478 /* no effective address which it may apply to. */
[101539]1479 Assert((pDis->x86.fPrefix & DISPREFIX_SEG) || pDis->uCpuMode == DISCPUMODE_64BIT);
[41789]1480 if ( !DISUSE_IS_EFFECTIVE_ADDR(pDis->Param1.fUse)
1481 && !DISUSE_IS_EFFECTIVE_ADDR(pDis->Param2.fUse)
1482 && !DISUSE_IS_EFFECTIVE_ADDR(pDis->Param3.fUse))
[41501]1483 return true;
[42050]1484
1485 /* Yasm puts the segment prefixes before the operand prefix with no
1486 way of overriding it. */
1487 if (offOpSize < off1stSeg)
1488 return true;
[41501]1489 }
1490
1491 /* fixed register + addr override doesn't go down all that well. */
[41675]1492 if (fPrefixes & DISPREFIX_ADDRSIZE)
[41501]1493 {
[101539]1494 Assert(pDis->x86.fPrefix & DISPREFIX_ADDRSIZE);
[41789]1495 if ( pDis->pCurInstr->fParam3 == OP_PARM_NONE
1496 && pDis->pCurInstr->fParam2 == OP_PARM_NONE
1497 && ( pDis->pCurInstr->fParam1 >= OP_PARM_REG_GEN32_START
1498 && pDis->pCurInstr->fParam1 <= OP_PARM_REG_GEN32_END))
[41501]1499 return true;
1500 }
1501
[42050]1502 /* Almost all prefixes are bad for jumps. */
[41501]1503 if (fPrefixes)
1504 {
[41789]1505 switch (pDis->pCurInstr->uOpcode)
[41501]1506 {
1507 /* nop w/ prefix(es). */
1508 case OP_NOP:
1509 return true;
1510
1511 case OP_JMP:
[41789]1512 if ( pDis->pCurInstr->fParam1 != OP_PARM_Jb
1513 && pDis->pCurInstr->fParam1 != OP_PARM_Jv)
[41501]1514 break;
[69046]1515 RT_FALL_THRU();
[41501]1516 case OP_JO:
1517 case OP_JNO:
1518 case OP_JC:
1519 case OP_JNC:
1520 case OP_JE:
1521 case OP_JNE:
1522 case OP_JBE:
1523 case OP_JNBE:
1524 case OP_JS:
1525 case OP_JNS:
1526 case OP_JP:
1527 case OP_JNP:
1528 case OP_JL:
1529 case OP_JNL:
1530 case OP_JLE:
1531 case OP_JNLE:
1532 /** @todo branch hinting 0x2e/0x3e... */
1533 return true;
1534 }
1535
1536 }
1537
[42050]1538 /* All but the segment prefix is bad news for push/pop. */
[41675]1539 if (fPrefixes & ~DISPREFIX_SEG)
[41501]1540 {
[41789]1541 switch (pDis->pCurInstr->uOpcode)
[41501]1542 {
1543 case OP_POP:
1544 case OP_PUSH:
[41789]1545 if ( pDis->pCurInstr->fParam1 >= OP_PARM_REG_SEG_START
1546 && pDis->pCurInstr->fParam1 <= OP_PARM_REG_SEG_END)
[41501]1547 return true;
[41675]1548 if ( (fPrefixes & ~DISPREFIX_OPSIZE)
[41789]1549 && pDis->pCurInstr->fParam1 >= OP_PARM_REG_GEN32_START
1550 && pDis->pCurInstr->fParam1 <= OP_PARM_REG_GEN32_END)
[41501]1551 return true;
1552 break;
1553
1554 case OP_POPA:
1555 case OP_POPF:
1556 case OP_PUSHA:
1557 case OP_PUSHF:
[41675]1558 if (fPrefixes & ~DISPREFIX_OPSIZE)
[41501]1559 return true;
1560 break;
1561 }
1562 }
1563
1564 /* Implicit 8-bit register instructions doesn't mix with operand size. */
[41675]1565 if ( (fPrefixes & DISPREFIX_OPSIZE)
[41789]1566 && ( ( pDis->pCurInstr->fParam1 == OP_PARM_Gb /* r8 */
1567 && pDis->pCurInstr->fParam2 == OP_PARM_Eb /* r8/mem8 */)
1568 || ( pDis->pCurInstr->fParam2 == OP_PARM_Gb /* r8 */
1569 && pDis->pCurInstr->fParam1 == OP_PARM_Eb /* r8/mem8 */))
[41501]1570 )
1571 {
[41789]1572 switch (pDis->pCurInstr->uOpcode)
[41501]1573 {
1574 case OP_ADD:
1575 case OP_OR:
1576 case OP_ADC:
1577 case OP_SBB:
1578 case OP_AND:
1579 case OP_SUB:
1580 case OP_XOR:
1581 case OP_CMP:
1582 return true;
1583 default:
1584 break;
1585 }
1586 }
1587
[42050]1588 /* Instructions taking no address or operand which thus may be annoyingly
1589 difficult to format for yasm. */
1590 if (fPrefixes)
1591 {
1592 switch (pDis->pCurInstr->uOpcode)
1593 {
1594 case OP_STI:
1595 case OP_STC:
1596 case OP_CLI:
1597 case OP_CLD:
1598 case OP_CLC:
1599 case OP_INT:
1600 case OP_INT3:
1601 case OP_INTO:
1602 case OP_HLT:
[42369]1603 /** @todo Many more to can be added here. */
[42050]1604 return true;
1605 default:
1606 break;
1607 }
1608 }
[41501]1609
[42369]1610 /* FPU and other instructions that ignores operand size override. */
1611 if (fPrefixes & DISPREFIX_OPSIZE)
1612 {
1613 switch (pDis->pCurInstr->uOpcode)
1614 {
1615 /* FPU: */
1616 case OP_FIADD:
1617 case OP_FIMUL:
1618 case OP_FISUB:
1619 case OP_FISUBR:
1620 case OP_FIDIV:
1621 case OP_FIDIVR:
1622 /** @todo there are many more. */
1623 return true;
[42050]1624
[42369]1625 case OP_MOV:
1626 /** @todo could be that we're not disassembling these correctly. */
1627 if (pDis->pCurInstr->fParam1 == OP_PARM_Sw)
1628 return true;
1629 /** @todo what about the other way? */
1630 break;
1631
1632 default:
1633 break;
1634 }
1635 }
1636
1637
[41501]1638 /*
1639 * Check for the version of xyz reg,reg instruction that the assembler doesn't use.
1640 *
1641 * For example:
1642 * expected: 1aee sbb ch, dh ; SBB r8, r/m8
1643 * yasm: 18F5 sbb ch, dh ; SBB r/m8, r8
1644 */
[101539]1645 if (pDis->x86.ModRM.Bits.Mod == 3 /* reg,reg */)
[41501]1646 {
[41789]1647 switch (pDis->pCurInstr->uOpcode)
[41501]1648 {
1649 case OP_ADD:
1650 case OP_OR:
1651 case OP_ADC:
1652 case OP_SBB:
1653 case OP_AND:
1654 case OP_SUB:
1655 case OP_XOR:
1656 case OP_CMP:
[41789]1657 if ( ( pDis->pCurInstr->fParam1 == OP_PARM_Gb /* r8 */
1658 && pDis->pCurInstr->fParam2 == OP_PARM_Eb /* r8/mem8 */)
1659 || ( pDis->pCurInstr->fParam1 == OP_PARM_Gv /* rX */
1660 && pDis->pCurInstr->fParam2 == OP_PARM_Ev /* rX/memX */))
[41501]1661 return true;
1662
1663 /* 82 (see table A-6). */
[101539]1664 if (pDis->x86.bOpCode == 0x82)
[41501]1665 return true;
1666 break;
1667
1668 /* ff /0, fe /0, ff /1, fe /0 */
1669 case OP_DEC:
1670 case OP_INC:
1671 return true;
1672
1673 case OP_POP:
1674 case OP_PUSH:
[101539]1675 Assert(pDis->x86.bOpCode == 0x8f);
[41501]1676 return true;
1677
[41505]1678 case OP_MOV:
[101539]1679 if ( pDis->x86.bOpCode == 0x8a
1680 || pDis->x86.bOpCode == 0x8b)
[41505]1681 return true;
1682 break;
1683
[41501]1684 default:
1685 break;
1686 }
1687 }
1688
1689 /* shl eax,1 will be assembled to the form without the immediate byte. */
[41789]1690 if ( pDis->pCurInstr->fParam2 == OP_PARM_Ib
1691 && (uint8_t)pDis->Param2.uValue == 1)
[41501]1692 {
[41789]1693 switch (pDis->pCurInstr->uOpcode)
[41501]1694 {
1695 case OP_SHL:
1696 case OP_SHR:
1697 case OP_SAR:
1698 case OP_RCL:
1699 case OP_RCR:
1700 case OP_ROL:
1701 case OP_ROR:
1702 return true;
1703 }
1704 }
1705
1706 /* And some more - see table A-6. */
[101539]1707 if (pDis->x86.bOpCode == 0x82)
[41501]1708 {
[41789]1709 switch (pDis->pCurInstr->uOpcode)
[41501]1710 {
1711 case OP_ADD:
1712 case OP_OR:
1713 case OP_ADC:
1714 case OP_SBB:
1715 case OP_AND:
1716 case OP_SUB:
1717 case OP_XOR:
1718 case OP_CMP:
1719 return true;
1720 break;
1721 }
1722 }
1723
1724
1725 /* check for REX.X = 1 without SIB. */
1726
1727 /* Yasm encodes setnbe al with /2 instead of /0 like the AMD manual
1728 says (intel doesn't appear to care). */
[41789]1729 switch (pDis->pCurInstr->uOpcode)
[41501]1730 {
1731 case OP_SETO:
1732 case OP_SETNO:
1733 case OP_SETC:
1734 case OP_SETNC:
1735 case OP_SETE:
1736 case OP_SETNE:
1737 case OP_SETBE:
1738 case OP_SETNBE:
1739 case OP_SETS:
1740 case OP_SETNS:
1741 case OP_SETP:
1742 case OP_SETNP:
1743 case OP_SETL:
1744 case OP_SETNL:
1745 case OP_SETLE:
1746 case OP_SETNLE:
[101539]1747 AssertMsg(pDis->x86.bOpCode >= 0x90 && pDis->x86.bOpCode <= 0x9f, ("%#x\n", pDis->x86.bOpCode));
1748 if (pDis->x86.ModRM.Bits.Reg != 2)
[41501]1749 return true;
1750 break;
1751 }
1752
1753 /*
1754 * The MOVZX reg32,mem16 instruction without an operand size prefix
1755 * doesn't quite make sense...
1756 */
[41789]1757 if ( pDis->pCurInstr->uOpcode == OP_MOVZX
[101539]1758 && pDis->x86.bOpCode == 0xB7
[41789]1759 && (pDis->uCpuMode == DISCPUMODE_16BIT) != !!(fPrefixes & DISPREFIX_OPSIZE))
[41501]1760 return true;
1761
[101426]1762 /*
1763 * YASM doesn't do ICEBP/INT1/INT01, unlike NASM.
1764 */
[101539]1765 if (pDis->x86.bOpCode == 0xF1)
[101426]1766 return true;
1767
[41501]1768 return false;
1769}
1770
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle
ContactPrivacy/Do Not Sell My InfoTerms of Use