VirtualBox

source: vbox/trunk/src/VBox/Disassembler/DisasmFormatYasm.cpp@ 42050

Last change on this file since 42050 was 42050, checked in by vboxsync, 12 years ago

DisasmFormatYasm.cpp: Fixed formatting of PUSH Ib with a operand size override. Extended DISFormatYasmIsOddEncoding a little bit more.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 62.2 KB
Line 
1/* $Id: DisasmFormatYasm.cpp 42050 2012-07-09 12:41:24Z vboxsync $ */
2/** @file
3 * VBox Disassembler - Yasm(/Nasm) Style Formatter.
4 */
5
6/*
7 * Copyright (C) 2008-2012 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#include <VBox/dis.h>
23#include "DisasmInternal.h"
24#include <iprt/string.h>
25#include <iprt/assert.h>
26#include <iprt/ctype.h>
27
28
29/*******************************************************************************
30* Global Variables *
31*******************************************************************************/
32static const char g_szSpaces[] =
33" ";
34static const char g_aszYasmRegGen8[20][5] =
35{
36 "al\0\0", "cl\0\0", "dl\0\0", "bl\0\0", "ah\0\0", "ch\0\0", "dh\0\0", "bh\0\0", "r8b\0", "r9b\0", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b", "spl\0", "bpl\0", "sil\0", "dil\0"
37};
38static const char g_aszYasmRegGen16[16][5] =
39{
40 "ax\0\0", "cx\0\0", "dx\0\0", "bx\0\0", "sp\0\0", "bp\0\0", "si\0\0", "di\0\0", "r8w\0", "r9w\0", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w"
41};
42static const char g_aszYasmRegGen1616[8][6] =
43{
44 "bx+si", "bx+di", "bp+si", "bp+di", "si\0\0\0", "di\0\0\0", "bp\0\0\0", "bx\0\0\0"
45};
46static const char g_aszYasmRegGen32[16][5] =
47{
48 "eax\0", "ecx\0", "edx\0", "ebx\0", "esp\0", "ebp\0", "esi\0", "edi\0", "r8d\0", "r9d\0", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d"
49};
50static const char g_aszYasmRegGen64[16][4] =
51{
52 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8\0", "r9\0", "r10", "r11", "r12", "r13", "r14", "r15"
53};
54static const char g_aszYasmRegSeg[6][3] =
55{
56 "es", "cs", "ss", "ds", "fs", "gs"
57};
58static const char g_aszYasmRegFP[8][4] =
59{
60 "st0", "st1", "st2", "st3", "st4", "st5", "st6", "st7"
61};
62static const char g_aszYasmRegMMX[8][4] =
63{
64 "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"
65};
66static const char g_aszYasmRegXMM[16][6] =
67{
68 "xmm0\0", "xmm1\0", "xmm2\0", "xmm3\0", "xmm4\0", "xmm5\0", "xmm6\0", "xmm7\0", "xmm8\0", "xmm9\0", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
69};
70static const char g_aszYasmRegCRx[16][5] =
71{
72 "cr0\0", "cr1\0", "cr2\0", "cr3\0", "cr4\0", "cr5\0", "cr6\0", "cr7\0", "cr8\0", "cr9\0", "cr10", "cr11", "cr12", "cr13", "cr14", "cr15"
73};
74static const char g_aszYasmRegDRx[16][5] =
75{
76 "dr0\0", "dr1\0", "dr2\0", "dr3\0", "dr4\0", "dr5\0", "dr6\0", "dr7\0", "dr8\0", "dr9\0", "dr10", "dr11", "dr12", "dr13", "dr14", "dr15"
77};
78static const char g_aszYasmRegTRx[16][5] =
79{
80 "tr0\0", "tr1\0", "tr2\0", "tr3\0", "tr4\0", "tr5\0", "tr6\0", "tr7\0", "tr8\0", "tr9\0", "tr10", "tr11", "tr12", "tr13", "tr14", "tr15"
81};
82
83
84
85/**
86 * Gets the base register name for the given parameter.
87 *
88 * @returns Pointer to the register name.
89 * @param pDis The disassembler state.
90 * @param pParam The parameter.
91 * @param pcchReg Where to store the length of the name.
92 */
93static const char *disasmFormatYasmBaseReg(PCDISSTATE pDis, PCDISOPPARAM pParam, size_t *pcchReg)
94{
95 switch (pParam->fUse & ( DISUSE_REG_GEN8 | DISUSE_REG_GEN16 | DISUSE_REG_GEN32 | DISUSE_REG_GEN64
96 | DISUSE_REG_FP | DISUSE_REG_MMX | DISUSE_REG_XMM | DISUSE_REG_CR
97 | DISUSE_REG_DBG | DISUSE_REG_SEG | DISUSE_REG_TEST))
98
99 {
100 case DISUSE_REG_GEN8:
101 {
102 Assert(pParam->Base.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen8));
103 const char *psz = g_aszYasmRegGen8[pParam->Base.idxGenReg];
104 *pcchReg = 2 + !!psz[2] + !!psz[3];
105 return psz;
106 }
107
108 case DISUSE_REG_GEN16:
109 {
110 Assert(pParam->Base.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen16));
111 const char *psz = g_aszYasmRegGen16[pParam->Base.idxGenReg];
112 *pcchReg = 2 + !!psz[2] + !!psz[3];
113 return psz;
114 }
115
116 case DISUSE_REG_GEN32:
117 {
118 Assert(pParam->Base.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen32));
119 const char *psz = g_aszYasmRegGen32[pParam->Base.idxGenReg];
120 *pcchReg = 2 + !!psz[2] + !!psz[3];
121 return psz;
122 }
123
124 case DISUSE_REG_GEN64:
125 {
126 Assert(pParam->Base.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen64));
127 const char *psz = g_aszYasmRegGen64[pParam->Base.idxGenReg];
128 *pcchReg = 2 + !!psz[2] + !!psz[3];
129 return psz;
130 }
131
132 case DISUSE_REG_FP:
133 {
134 Assert(pParam->Base.idxFpuReg < RT_ELEMENTS(g_aszYasmRegFP));
135 const char *psz = g_aszYasmRegFP[pParam->Base.idxFpuReg];
136 *pcchReg = 3;
137 return psz;
138 }
139
140 case DISUSE_REG_MMX:
141 {
142 Assert(pParam->Base.idxMmxReg < RT_ELEMENTS(g_aszYasmRegMMX));
143 const char *psz = g_aszYasmRegMMX[pParam->Base.idxMmxReg];
144 *pcchReg = 3;
145 return psz;
146 }
147
148 case DISUSE_REG_XMM:
149 {
150 Assert(pParam->Base.idxXmmReg < RT_ELEMENTS(g_aszYasmRegXMM));
151 const char *psz = g_aszYasmRegXMM[pParam->Base.idxMmxReg];
152 *pcchReg = 4 + !!psz[4];
153 return psz;
154 }
155
156 case DISUSE_REG_CR:
157 {
158 Assert(pParam->Base.idxCtrlReg < RT_ELEMENTS(g_aszYasmRegCRx));
159 const char *psz = g_aszYasmRegCRx[pParam->Base.idxCtrlReg];
160 *pcchReg = 3;
161 return psz;
162 }
163
164 case DISUSE_REG_DBG:
165 {
166 Assert(pParam->Base.idxDbgReg < RT_ELEMENTS(g_aszYasmRegDRx));
167 const char *psz = g_aszYasmRegDRx[pParam->Base.idxDbgReg];
168 *pcchReg = 3;
169 return psz;
170 }
171
172 case DISUSE_REG_SEG:
173 {
174 Assert(pParam->Base.idxSegReg < RT_ELEMENTS(g_aszYasmRegCRx));
175 const char *psz = g_aszYasmRegSeg[pParam->Base.idxSegReg];
176 *pcchReg = 2;
177 return psz;
178 }
179
180 case DISUSE_REG_TEST:
181 {
182 Assert(pParam->Base.idxTestReg < RT_ELEMENTS(g_aszYasmRegTRx));
183 const char *psz = g_aszYasmRegTRx[pParam->Base.idxTestReg];
184 *pcchReg = 3;
185 return psz;
186 }
187
188 default:
189 AssertMsgFailed(("%#x\n", pParam->fUse));
190 *pcchReg = 3;
191 return "r??";
192 }
193}
194
195
196/**
197 * Gets the index register name for the given parameter.
198 *
199 * @returns The index register name.
200 * @param pDis The disassembler state.
201 * @param pParam The parameter.
202 * @param pcchReg Where to store the length of the name.
203 */
204static const char *disasmFormatYasmIndexReg(PCDISSTATE pDis, PCDISOPPARAM pParam, size_t *pcchReg)
205{
206 switch (pDis->uAddrMode)
207 {
208 case DISCPUMODE_16BIT:
209 {
210 Assert(pParam->Index.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen16));
211 const char *psz = g_aszYasmRegGen16[pParam->Index.idxGenReg];
212 *pcchReg = 2 + !!psz[2] + !!psz[3];
213 return psz;
214 }
215
216 case DISCPUMODE_32BIT:
217 {
218 Assert(pParam->Index.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen32));
219 const char *psz = g_aszYasmRegGen32[pParam->Index.idxGenReg];
220 *pcchReg = 2 + !!psz[2] + !!psz[3];
221 return psz;
222 }
223
224 case DISCPUMODE_64BIT:
225 {
226 Assert(pParam->Index.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen64));
227 const char *psz = g_aszYasmRegGen64[pParam->Index.idxGenReg];
228 *pcchReg = 2 + !!psz[2] + !!psz[3];
229 return psz;
230 }
231
232 default:
233 AssertMsgFailed(("%#x %#x\n", pParam->fUse, pDis->uAddrMode));
234 *pcchReg = 3;
235 return "r??";
236 }
237}
238
239
240/**
241 * Formats the current instruction in Yasm (/ Nasm) style.
242 *
243 *
244 * @returns The number of output characters. If this is >= cchBuf, then the content
245 * of pszBuf will be truncated.
246 * @param pDis Pointer to the disassembler state.
247 * @param pszBuf The output buffer.
248 * @param cchBuf The size of the output buffer.
249 * @param fFlags Format flags, see DIS_FORMAT_FLAGS_*.
250 * @param pfnGetSymbol Get symbol name for a jmp or call target address. Optional.
251 * @param pvUser User argument for pfnGetSymbol.
252 */
253DISDECL(size_t) DISFormatYasmEx(PCDISSTATE pDis, char *pszBuf, size_t cchBuf, uint32_t fFlags,
254 PFNDISGETSYMBOL pfnGetSymbol, void *pvUser)
255{
256 /*
257 * Input validation and massaging.
258 */
259 AssertPtr(pDis);
260 AssertPtrNull(pszBuf);
261 Assert(pszBuf || !cchBuf);
262 AssertPtrNull(pfnGetSymbol);
263 AssertMsg(DIS_FMT_FLAGS_IS_VALID(fFlags), ("%#x\n", fFlags));
264 if (fFlags & DIS_FMT_FLAGS_ADDR_COMMENT)
265 fFlags = (fFlags & ~DIS_FMT_FLAGS_ADDR_LEFT) | DIS_FMT_FLAGS_ADDR_RIGHT;
266 if (fFlags & DIS_FMT_FLAGS_BYTES_COMMENT)
267 fFlags = (fFlags & ~DIS_FMT_FLAGS_BYTES_LEFT) | DIS_FMT_FLAGS_BYTES_RIGHT;
268
269 PCDISOPCODE const pOp = pDis->pCurInstr;
270
271 /*
272 * Output macros
273 */
274 char *pszDst = pszBuf;
275 size_t cchDst = cchBuf;
276 size_t cchOutput = 0;
277#define PUT_C(ch) \
278 do { \
279 cchOutput++; \
280 if (cchDst > 1) \
281 { \
282 cchDst--; \
283 *pszDst++ = (ch); \
284 } \
285 } while (0)
286#define PUT_STR(pszSrc, cchSrc) \
287 do { \
288 cchOutput += (cchSrc); \
289 if (cchDst > (cchSrc)) \
290 { \
291 memcpy(pszDst, (pszSrc), (cchSrc)); \
292 pszDst += (cchSrc); \
293 cchDst -= (cchSrc); \
294 } \
295 else if (cchDst > 1) \
296 { \
297 memcpy(pszDst, (pszSrc), cchDst - 1); \
298 pszDst += cchDst - 1; \
299 cchDst = 1; \
300 } \
301 } while (0)
302#define PUT_SZ(sz) \
303 PUT_STR((sz), sizeof(sz) - 1)
304#define PUT_SZ_STRICT(szStrict, szRelaxed) \
305 do { if (fFlags & DIS_FMT_FLAGS_STRICT) PUT_SZ(szStrict); else PUT_SZ(szRelaxed); } while (0)
306#define PUT_PSZ(psz) \
307 do { const size_t cchTmp = strlen(psz); PUT_STR((psz), cchTmp); } while (0)
308#define PUT_NUM(cch, fmt, num) \
309 do { \
310 cchOutput += (cch); \
311 if (cchDst > 1) \
312 { \
313 const size_t cchTmp = RTStrPrintf(pszDst, cchDst, fmt, (num)); \
314 pszDst += cchTmp; \
315 cchDst -= cchTmp; \
316 Assert(cchTmp == (cch) || cchDst == 1); \
317 } \
318 } while (0)
319/** @todo add two flags for choosing between %X / %x and h / 0x. */
320#define PUT_NUM_8(num) PUT_NUM(4, "0%02xh", (uint8_t)(num))
321#define PUT_NUM_16(num) PUT_NUM(6, "0%04xh", (uint16_t)(num))
322#define PUT_NUM_32(num) PUT_NUM(10, "0%08xh", (uint32_t)(num))
323#define PUT_NUM_64(num) PUT_NUM(18, "0%016RX64h", (uint64_t)(num))
324
325#define PUT_NUM_SIGN(cch, fmt, num, stype, utype) \
326 do { \
327 if ((stype)(num) >= 0) \
328 { \
329 PUT_C('+'); \
330 PUT_NUM(cch, fmt, (utype)(num)); \
331 } \
332 else \
333 { \
334 PUT_C('-'); \
335 PUT_NUM(cch, fmt, (utype)-(stype)(num)); \
336 } \
337 } while (0)
338#define PUT_NUM_S8(num) PUT_NUM_SIGN(4, "0%02xh", num, int8_t, uint8_t)
339#define PUT_NUM_S16(num) PUT_NUM_SIGN(6, "0%04xh", num, int16_t, uint16_t)
340#define PUT_NUM_S32(num) PUT_NUM_SIGN(10, "0%08xh", num, int32_t, uint32_t)
341#define PUT_NUM_S64(num) PUT_NUM_SIGN(18, "0%016RX64h", num, int64_t, uint64_t)
342
343
344 /*
345 * The address?
346 */
347 if (fFlags & DIS_FMT_FLAGS_ADDR_LEFT)
348 {
349#if HC_ARCH_BITS == 64 || GC_ARCH_BITS == 64
350 if (pDis->uInstrAddr >= _4G)
351 PUT_NUM(9, "%08x`", (uint32_t)(pDis->uInstrAddr >> 32));
352#endif
353 PUT_NUM(8, "%08x", (uint32_t)pDis->uInstrAddr);
354 PUT_C(' ');
355 }
356
357 /*
358 * The opcode bytes?
359 */
360 if (fFlags & DIS_FMT_FLAGS_BYTES_LEFT)
361 {
362 size_t cchTmp = disFormatBytes(pDis, pszDst, cchDst, fFlags);
363 cchOutput += cchTmp;
364 if (cchDst > 1)
365 {
366 if (cchTmp <= cchDst)
367 {
368 cchDst -= cchTmp;
369 pszDst += cchTmp;
370 }
371 else
372 {
373 pszDst += cchDst - 1;
374 cchDst = 1;
375 }
376 }
377
378 /* Some padding to align the instruction. */
379 size_t cchPadding = (7 * (2 + !!(fFlags & DIS_FMT_FLAGS_BYTES_SPACED)))
380 + !!(fFlags & DIS_FMT_FLAGS_BYTES_BRACKETS) * 2
381 + 2;
382 cchPadding = cchTmp + 1 >= cchPadding ? 1 : cchPadding - cchTmp;
383 PUT_STR(g_szSpaces, cchPadding);
384 }
385
386
387 /*
388 * Filter out invalid opcodes first as they need special
389 * treatment. UD2 is an exception and should be handled normally.
390 */
391 size_t const offInstruction = cchOutput;
392 if ( pOp->uOpcode == OP_INVALID
393 || ( pOp->uOpcode == OP_ILLUD2
394 && (pDis->fPrefix & DISPREFIX_LOCK)))
395 PUT_SZ("Illegal opcode");
396 else
397 {
398 /*
399 * Prefixes
400 */
401 if (pDis->fPrefix & DISPREFIX_LOCK)
402 PUT_SZ("lock ");
403 if(pDis->fPrefix & DISPREFIX_REP)
404 PUT_SZ("rep ");
405 else if(pDis->fPrefix & DISPREFIX_REPNE)
406 PUT_SZ("repne ");
407
408 /*
409 * Adjust the format string to the correct mnemonic
410 * or to avoid things the assembler cannot handle correctly.
411 */
412 char szTmpFmt[48];
413 const char *pszFmt = pOp->pszOpcode;
414 switch (pOp->uOpcode)
415 {
416 case OP_JECXZ:
417 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "jcxz %Jb" : pDis->uOpMode == DISCPUMODE_32BIT ? "jecxz %Jb" : "jrcxz %Jb";
418 break;
419 case OP_PUSHF:
420 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "pushfw" : pDis->uOpMode == DISCPUMODE_32BIT ? "pushfd" : "pushfq";
421 break;
422 case OP_POPF:
423 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "popfw" : pDis->uOpMode == DISCPUMODE_32BIT ? "popfd" : "popfq";
424 break;
425 case OP_PUSHA:
426 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "pushaw" : "pushad";
427 break;
428 case OP_POPA:
429 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "popaw" : "popad";
430 break;
431 case OP_INSB:
432 pszFmt = "insb";
433 break;
434 case OP_INSWD:
435 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "insw" : pDis->uOpMode == DISCPUMODE_32BIT ? "insd" : "insq";
436 break;
437 case OP_OUTSB:
438 pszFmt = "outsb";
439 break;
440 case OP_OUTSWD:
441 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "outsw" : pDis->uOpMode == DISCPUMODE_32BIT ? "outsd" : "outsq";
442 break;
443 case OP_MOVSB:
444 pszFmt = "movsb";
445 break;
446 case OP_MOVSWD:
447 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "movsw" : pDis->uOpMode == DISCPUMODE_32BIT ? "movsd" : "movsq";
448 break;
449 case OP_CMPSB:
450 pszFmt = "cmpsb";
451 break;
452 case OP_CMPWD:
453 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "cmpsw" : pDis->uOpMode == DISCPUMODE_32BIT ? "cmpsd" : "cmpsq";
454 break;
455 case OP_SCASB:
456 pszFmt = "scasb";
457 break;
458 case OP_SCASWD:
459 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "scasw" : pDis->uOpMode == DISCPUMODE_32BIT ? "scasd" : "scasq";
460 break;
461 case OP_LODSB:
462 pszFmt = "lodsb";
463 break;
464 case OP_LODSWD:
465 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "lodsw" : pDis->uOpMode == DISCPUMODE_32BIT ? "lodsd" : "lodsq";
466 break;
467 case OP_STOSB:
468 pszFmt = "stosb";
469 break;
470 case OP_STOSWD:
471 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "stosw" : pDis->uOpMode == DISCPUMODE_32BIT ? "stosd" : "stosq";
472 break;
473 case OP_CBW:
474 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "cbw" : pDis->uOpMode == DISCPUMODE_32BIT ? "cwde" : "cdqe";
475 break;
476 case OP_CWD:
477 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "cwd" : pDis->uOpMode == DISCPUMODE_32BIT ? "cdq" : "cqo";
478 break;
479 case OP_SHL:
480 Assert(pszFmt[3] == '/');
481 pszFmt += 4;
482 break;
483 case OP_XLAT:
484 pszFmt = "xlatb";
485 break;
486 case OP_INT3:
487 pszFmt = "int3";
488 break;
489
490 /*
491 * Don't know how to tell yasm to generate complicated nop stuff, so 'db' it.
492 */
493 case OP_NOP:
494 if (pDis->bOpCode == 0x90)
495 /* fine, fine */;
496 else if (pszFmt[sizeof("nop %Ev") - 1] == '/' && pszFmt[sizeof("nop %Ev")] == 'p')
497 pszFmt = "prefetch %Eb";
498 else if (pDis->bOpCode == 0x1f)
499 {
500 Assert(pDis->cbInstr >= 3);
501 PUT_SZ("db 00fh, 01fh,");
502 PUT_NUM_8(MAKE_MODRM(pDis->ModRM.Bits.Mod, pDis->ModRM.Bits.Reg, pDis->ModRM.Bits.Rm));
503 for (unsigned i = 3; i < pDis->cbInstr; i++)
504 {
505 PUT_C(',');
506 PUT_NUM_8(0x90); ///@todo fixme.
507 }
508 pszFmt = "";
509 }
510 break;
511
512 default:
513 /* ST(X) -> stX (floating point) */
514 if (*pszFmt == 'f' && strchr(pszFmt, '('))
515 {
516 char *pszFmtDst = szTmpFmt;
517 char ch;
518 do
519 {
520 ch = *pszFmt++;
521 if (ch == 'S' && pszFmt[0] == 'T' && pszFmt[1] == '(')
522 {
523 *pszFmtDst++ = 's';
524 *pszFmtDst++ = 't';
525 pszFmt += 2;
526 ch = *pszFmt;
527 Assert(pszFmt[1] == ')');
528 pszFmt += 2;
529 *pszFmtDst++ = ch;
530 }
531 else
532 *pszFmtDst++ = ch;
533 } while (ch != '\0');
534 pszFmt = szTmpFmt;
535 }
536 break;
537
538 /*
539 * Horrible hacks.
540 */
541 case OP_FLD:
542 if (pDis->bOpCode == 0xdb) /* m80fp workaround. */
543 *(int *)&pDis->Param1.fParam &= ~0x1f; /* make it pure OP_PARM_M */
544 break;
545 case OP_LAR: /* hack w -> v, probably not correct. */
546 *(int *)&pDis->Param2.fParam &= ~0x1f;
547 *(int *)&pDis->Param2.fParam |= OP_PARM_v;
548 break;
549 }
550
551 /*
552 * Formatting context and associated macros.
553 */
554 PCDISOPPARAM pParam = &pDis->Param1;
555 int iParam = 1;
556
557#define PUT_FAR() \
558 do { \
559 if ( OP_PARM_VSUBTYPE(pParam->fParam) == OP_PARM_p \
560 && pOp->uOpcode != OP_LDS /* table bugs? */ \
561 && pOp->uOpcode != OP_LES \
562 && pOp->uOpcode != OP_LFS \
563 && pOp->uOpcode != OP_LGS \
564 && pOp->uOpcode != OP_LSS ) \
565 PUT_SZ("far "); \
566 } while (0)
567 /** @todo mov ah,ch ends up with a byte 'override'... - check if this wasn't fixed. */
568 /** @todo drop the work/dword/qword override when the src/dst is a register (except for movsx/movzx). */
569#define PUT_SIZE_OVERRIDE() \
570 do { \
571 switch (OP_PARM_VSUBTYPE(pParam->fParam)) \
572 { \
573 case OP_PARM_v: \
574 switch (pDis->uOpMode) \
575 { \
576 case DISCPUMODE_16BIT: PUT_SZ("word "); break; \
577 case DISCPUMODE_32BIT: PUT_SZ("dword "); break; \
578 case DISCPUMODE_64BIT: PUT_SZ("qword "); break; \
579 default: break; \
580 } \
581 break; \
582 case OP_PARM_b: PUT_SZ("byte "); break; \
583 case OP_PARM_w: PUT_SZ("word "); break; \
584 case OP_PARM_d: PUT_SZ("dword "); break; \
585 case OP_PARM_q: PUT_SZ("qword "); break; \
586 case OP_PARM_dq: \
587 if (OP_PARM_VTYPE(pParam->fParam) != OP_PARM_W) /* these are 128 bit, pray they are all unambiguous.. */ \
588 PUT_SZ("qword "); \
589 break; \
590 case OP_PARM_p: break; /* see PUT_FAR */ \
591 case OP_PARM_s: if (pParam->fUse & DISUSE_REG_FP) PUT_SZ("tword "); break; /* ?? */ \
592 case OP_PARM_z: break; \
593 case OP_PARM_NONE: \
594 if ( OP_PARM_VTYPE(pParam->fParam) == OP_PARM_M \
595 && ((pParam->fUse & DISUSE_REG_FP) || pOp->uOpcode == OP_FLD)) \
596 PUT_SZ("tword "); \
597 break; \
598 default: break; /*no pointer type specified/necessary*/ \
599 } \
600 } while (0)
601 static const char s_szSegPrefix[6][4] = { "es:", "cs:", "ss:", "ds:", "fs:", "gs:" };
602#define PUT_SEGMENT_OVERRIDE() \
603 do { \
604 if (pDis->fPrefix & DISPREFIX_SEG) \
605 PUT_STR(s_szSegPrefix[pDis->idxSegPrefix], 3); \
606 } while (0)
607
608
609 /*
610 * Segment prefixing for instructions that doesn't do memory access.
611 */
612 if ( (pDis->fPrefix & DISPREFIX_SEG)
613 && !DISUSE_IS_EFFECTIVE_ADDR(pDis->Param1.fUse)
614 && !DISUSE_IS_EFFECTIVE_ADDR(pDis->Param2.fUse)
615 && !DISUSE_IS_EFFECTIVE_ADDR(pDis->Param3.fUse))
616 {
617 PUT_STR(s_szSegPrefix[pDis->idxSegPrefix], 2);
618 PUT_C(' ');
619 }
620
621
622 /*
623 * The formatting loop.
624 */
625 RTINTPTR off;
626 char szSymbol[128];
627 char ch;
628 while ((ch = *pszFmt++) != '\0')
629 {
630 if (ch == '%')
631 {
632 ch = *pszFmt++;
633 switch (ch)
634 {
635 /*
636 * ModRM - Register only.
637 */
638 case 'C': /* Control register (ParseModRM / UseModRM). */
639 case 'D': /* Debug register (ParseModRM / UseModRM). */
640 case 'G': /* ModRM selects general register (ParseModRM / UseModRM). */
641 case 'S': /* ModRM byte selects a segment register (ParseModRM / UseModRM). */
642 case 'T': /* ModRM byte selects a test register (ParseModRM / UseModRM). */
643 case 'V': /* ModRM byte selects an XMM/SSE register (ParseModRM / UseModRM). */
644 case 'P': /* ModRM byte selects MMX register (ParseModRM / UseModRM). */
645 {
646 pszFmt += RT_C_IS_ALPHA(pszFmt[0]) ? RT_C_IS_ALPHA(pszFmt[1]) ? 2 : 1 : 0;
647 Assert(!(pParam->fUse & (DISUSE_INDEX | DISUSE_SCALE) /* No SIB here... */));
648 Assert(!(pParam->fUse & (DISUSE_DISPLACEMENT8 | DISUSE_DISPLACEMENT16 | DISUSE_DISPLACEMENT32 | DISUSE_DISPLACEMENT64 | DISUSE_RIPDISPLACEMENT32)));
649
650 size_t cchReg;
651 const char *pszReg = disasmFormatYasmBaseReg(pDis, pParam, &cchReg);
652 PUT_STR(pszReg, cchReg);
653 break;
654 }
655
656 /*
657 * ModRM - Register or memory.
658 */
659 case 'E': /* ModRM specifies parameter (ParseModRM / UseModRM / UseSIB). */
660 case 'Q': /* ModRM byte selects MMX register or memory address (ParseModRM / UseModRM). */
661 case 'R': /* ModRM byte may only refer to a general register (ParseModRM / UseModRM). */
662 case 'W': /* ModRM byte selects an XMM/SSE register or a memory address (ParseModRM / UseModRM). */
663 case 'M': /* ModRM may only refer to memory (ParseModRM / UseModRM). */
664 {
665 pszFmt += RT_C_IS_ALPHA(pszFmt[0]) ? RT_C_IS_ALPHA(pszFmt[1]) ? 2 : 1 : 0;
666
667 PUT_FAR();
668 uint32_t const fUse = pParam->fUse;
669 if (DISUSE_IS_EFFECTIVE_ADDR(fUse))
670 {
671 /* Work around mov seg,[mem16] and mov [mem16],seg as these always make a 16-bit mem
672 while the register variants deals with 16, 32 & 64 in the normal fashion. */
673 if ( pParam->fParam != OP_PARM_Ev
674 || pOp->uOpcode != OP_MOV
675 || ( pOp->fParam1 != OP_PARM_Sw
676 && pOp->fParam2 != OP_PARM_Sw))
677 PUT_SIZE_OVERRIDE();
678 PUT_C('[');
679 }
680 if ( (fFlags & DIS_FMT_FLAGS_STRICT)
681 && (fUse & (DISUSE_DISPLACEMENT8 | DISUSE_DISPLACEMENT16 | DISUSE_DISPLACEMENT32 | DISUSE_DISPLACEMENT64 | DISUSE_RIPDISPLACEMENT32)))
682 {
683 if ( (fUse & DISUSE_DISPLACEMENT8)
684 && !pParam->uDisp.i8)
685 PUT_SZ("byte ");
686 else if ( (fUse & DISUSE_DISPLACEMENT16)
687 && (int8_t)pParam->uDisp.i16 == (int16_t)pParam->uDisp.i16)
688 PUT_SZ("word ");
689 else if ( (fUse & DISUSE_DISPLACEMENT32)
690 && (int16_t)pParam->uDisp.i32 == (int32_t)pParam->uDisp.i32) //??
691 PUT_SZ("dword ");
692 else if ( (fUse & DISUSE_DISPLACEMENT64)
693 && (pDis->SIB.Bits.Base != 5 || pDis->ModRM.Bits.Mod != 0)
694 && (int32_t)pParam->uDisp.i64 == (int64_t)pParam->uDisp.i64) //??
695 PUT_SZ("qword ");
696 }
697 if (DISUSE_IS_EFFECTIVE_ADDR(fUse))
698 PUT_SEGMENT_OVERRIDE();
699
700 bool fBase = (fUse & DISUSE_BASE) /* When exactly is DISUSE_BASE supposed to be set? disasmModRMReg doesn't set it. */
701 || ( (fUse & ( DISUSE_REG_GEN8
702 | DISUSE_REG_GEN16
703 | DISUSE_REG_GEN32
704 | DISUSE_REG_GEN64
705 | DISUSE_REG_FP
706 | DISUSE_REG_MMX
707 | DISUSE_REG_XMM
708 | DISUSE_REG_CR
709 | DISUSE_REG_DBG
710 | DISUSE_REG_SEG
711 | DISUSE_REG_TEST ))
712 && !DISUSE_IS_EFFECTIVE_ADDR(fUse));
713 if (fBase)
714 {
715 size_t cchReg;
716 const char *pszReg = disasmFormatYasmBaseReg(pDis, pParam, &cchReg);
717 PUT_STR(pszReg, cchReg);
718 }
719
720 if (fUse & DISUSE_INDEX)
721 {
722 if (fBase)
723 PUT_C('+');
724
725 size_t cchReg;
726 const char *pszReg = disasmFormatYasmIndexReg(pDis, pParam, &cchReg);
727 PUT_STR(pszReg, cchReg);
728
729 if (fUse & DISUSE_SCALE)
730 {
731 PUT_C('*');
732 PUT_C('0' + pParam->uScale);
733 }
734 }
735 else
736 Assert(!(fUse & DISUSE_SCALE));
737
738 if (fUse & (DISUSE_DISPLACEMENT8 | DISUSE_DISPLACEMENT16 | DISUSE_DISPLACEMENT32 | DISUSE_DISPLACEMENT64 | DISUSE_RIPDISPLACEMENT32))
739 {
740 int64_t off2;
741 if (fUse & DISUSE_DISPLACEMENT8)
742 off2 = pParam->uDisp.i8;
743 else if (fUse & DISUSE_DISPLACEMENT16)
744 off2 = pParam->uDisp.i16;
745 else if (fUse & (DISUSE_DISPLACEMENT32 | DISUSE_RIPDISPLACEMENT32))
746 off2 = pParam->uDisp.i32;
747 else if (fUse & DISUSE_DISPLACEMENT64)
748 off2 = pParam->uDisp.i64;
749 else
750 {
751 AssertFailed();
752 off2 = 0;
753 }
754
755 if (fBase || (fUse & DISUSE_INDEX))
756 {
757 PUT_C(off2 >= 0 ? '+' : '-');
758 if (off2 < 0)
759 off2 = -off2;
760 }
761 if (fUse & DISUSE_DISPLACEMENT8)
762 PUT_NUM_8( off2);
763 else if (fUse & DISUSE_DISPLACEMENT16)
764 PUT_NUM_16(off2);
765 else if (fUse & DISUSE_DISPLACEMENT32)
766 PUT_NUM_32(off2);
767 else if (fUse & DISUSE_DISPLACEMENT64)
768 PUT_NUM_64(off2);
769 else
770 {
771 PUT_NUM_32(off2);
772 PUT_SZ(" wrt rip"); //??
773 }
774 }
775
776 if (DISUSE_IS_EFFECTIVE_ADDR(fUse))
777 PUT_C(']');
778 break;
779 }
780
781 case 'F': /* Eflags register (0 - popf/pushf only, avoided in adjustments above). */
782 AssertFailed();
783 break;
784
785 case 'I': /* Immediate data (ParseImmByte, ParseImmByteSX, ParseImmV, ParseImmUshort, ParseImmZ). */
786 Assert(*pszFmt == 'b' || *pszFmt == 'v' || *pszFmt == 'w' || *pszFmt == 'z'); pszFmt++;
787 switch (pParam->fUse & ( DISUSE_IMMEDIATE8 | DISUSE_IMMEDIATE16 | DISUSE_IMMEDIATE32 | DISUSE_IMMEDIATE64
788 | DISUSE_IMMEDIATE16_SX8 | DISUSE_IMMEDIATE32_SX8 | DISUSE_IMMEDIATE64_SX8))
789 {
790 case DISUSE_IMMEDIATE8:
791 if ( (fFlags & DIS_FMT_FLAGS_STRICT)
792 && ( (pOp->fParam1 >= OP_PARM_REG_GEN8_START && pOp->fParam1 <= OP_PARM_REG_GEN8_END)
793 || (pOp->fParam2 >= OP_PARM_REG_GEN8_START && pOp->fParam2 <= OP_PARM_REG_GEN8_END))
794 )
795 PUT_SZ("strict byte ");
796 PUT_NUM_8(pParam->uValue);
797 break;
798
799 case DISUSE_IMMEDIATE16:
800 if ( pDis->uCpuMode != pDis->uOpMode
801 || ( (fFlags & DIS_FMT_FLAGS_STRICT)
802 && ( (int8_t)pParam->uValue == (int16_t)pParam->uValue
803 || (pOp->fParam1 >= OP_PARM_REG_GEN16_START && pOp->fParam1 <= OP_PARM_REG_GEN16_END)
804 || (pOp->fParam2 >= OP_PARM_REG_GEN16_START && pOp->fParam2 <= OP_PARM_REG_GEN16_END))
805 )
806 )
807 {
808 if (OP_PARM_VSUBTYPE(pParam->fParam) == OP_PARM_b)
809 PUT_SZ_STRICT("strict byte ", "byte ");
810 else if ( OP_PARM_VSUBTYPE(pParam->fParam) == OP_PARM_v
811 || OP_PARM_VSUBTYPE(pParam->fParam) == OP_PARM_z)
812 PUT_SZ_STRICT("strict word ", "word ");
813 }
814 PUT_NUM_16(pParam->uValue);
815 break;
816
817 case DISUSE_IMMEDIATE16_SX8:
818 if ( !(pDis->fPrefix & DISPREFIX_OPSIZE)
819 || pDis->pCurInstr->uOpcode != OP_PUSH)
820 PUT_SZ_STRICT("strict byte ", "byte ");
821 else
822 PUT_SZ("word ");
823 PUT_NUM_16(pParam->uValue);
824 break;
825
826 case DISUSE_IMMEDIATE32:
827 if ( pDis->uOpMode != (pDis->uCpuMode == DISCPUMODE_16BIT ? DISCPUMODE_16BIT : DISCPUMODE_32BIT) /* not perfect */
828 || ( (fFlags & DIS_FMT_FLAGS_STRICT)
829 && ( (int8_t)pParam->uValue == (int32_t)pParam->uValue
830 || (pOp->fParam1 >= OP_PARM_REG_GEN32_START && pOp->fParam1 <= OP_PARM_REG_GEN32_END)
831 || (pOp->fParam2 >= OP_PARM_REG_GEN32_START && pOp->fParam2 <= OP_PARM_REG_GEN32_END))
832 )
833 )
834 {
835 if (OP_PARM_VSUBTYPE(pParam->fParam) == OP_PARM_b)
836 PUT_SZ_STRICT("strict byte ", "byte ");
837 else if ( OP_PARM_VSUBTYPE(pParam->fParam) == OP_PARM_v
838 || OP_PARM_VSUBTYPE(pParam->fParam) == OP_PARM_z)
839 PUT_SZ_STRICT("strict dword ", "dword ");
840 }
841 PUT_NUM_32(pParam->uValue);
842 break;
843
844 case DISUSE_IMMEDIATE32_SX8:
845 if ( !(pDis->fPrefix & DISPREFIX_OPSIZE)
846 || pDis->pCurInstr->uOpcode != OP_PUSH)
847 PUT_SZ_STRICT("strict byte ", "byte ");
848 else
849 PUT_SZ("dword ");
850 PUT_NUM_32(pParam->uValue);
851 break;
852
853 case DISUSE_IMMEDIATE64_SX8:
854 if ( !(pDis->fPrefix & DISPREFIX_OPSIZE)
855 || pDis->pCurInstr->uOpcode != OP_PUSH)
856 PUT_SZ_STRICT("strict byte ", "byte ");
857 else
858 PUT_SZ("qword ");
859 PUT_NUM_64(pParam->uValue);
860 break;
861
862 case DISUSE_IMMEDIATE64:
863 PUT_NUM_64(pParam->uValue);
864 break;
865
866 default:
867 AssertFailed();
868 break;
869 }
870 break;
871
872 case 'J': /* Relative jump offset (ParseImmBRel + ParseImmVRel). */
873 {
874 int32_t offDisplacement;
875 Assert(iParam == 1);
876 bool fPrefix = (fFlags & DIS_FMT_FLAGS_STRICT)
877 && pOp->uOpcode != OP_CALL
878 && pOp->uOpcode != OP_LOOP
879 && pOp->uOpcode != OP_LOOPE
880 && pOp->uOpcode != OP_LOOPNE
881 && pOp->uOpcode != OP_JECXZ;
882 if (pOp->uOpcode == OP_CALL)
883 fFlags &= ~DIS_FMT_FLAGS_RELATIVE_BRANCH;
884
885 if (pParam->fUse & DISUSE_IMMEDIATE8_REL)
886 {
887 if (fPrefix)
888 PUT_SZ("short ");
889 offDisplacement = (int8_t)pParam->uValue;
890 Assert(*pszFmt == 'b'); pszFmt++;
891
892 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
893 PUT_NUM_S8(offDisplacement);
894 }
895 else if (pParam->fUse & DISUSE_IMMEDIATE16_REL)
896 {
897 if (fPrefix)
898 PUT_SZ("near ");
899 offDisplacement = (int16_t)pParam->uValue;
900 Assert(*pszFmt == 'v'); pszFmt++;
901
902 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
903 PUT_NUM_S16(offDisplacement);
904 }
905 else
906 {
907 if (fPrefix)
908 PUT_SZ("near ");
909 offDisplacement = (int32_t)pParam->uValue;
910 Assert(pParam->fUse & (DISUSE_IMMEDIATE32_REL|DISUSE_IMMEDIATE64_REL));
911 Assert(*pszFmt == 'v'); pszFmt++;
912
913 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
914 PUT_NUM_S32(offDisplacement);
915 }
916 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
917 PUT_SZ(" (");
918
919 RTUINTPTR uTrgAddr = pDis->uInstrAddr + pDis->cbInstr + offDisplacement;
920 if (pDis->uCpuMode == DISCPUMODE_16BIT)
921 PUT_NUM_16(uTrgAddr);
922 else if (pDis->uCpuMode == DISCPUMODE_32BIT)
923 PUT_NUM_32(uTrgAddr);
924 else
925 PUT_NUM_64(uTrgAddr);
926
927 if (pfnGetSymbol)
928 {
929 int rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_REG(DISSELREG_CS), uTrgAddr, szSymbol, sizeof(szSymbol), &off, pvUser);
930 if (RT_SUCCESS(rc))
931 {
932 PUT_SZ(" [");
933 PUT_PSZ(szSymbol);
934 if (off != 0)
935 {
936 if ((int8_t)off == off)
937 PUT_NUM_S8(off);
938 else if ((int16_t)off == off)
939 PUT_NUM_S16(off);
940 else if ((int32_t)off == off)
941 PUT_NUM_S32(off);
942 else
943 PUT_NUM_S64(off);
944 }
945 PUT_C(']');
946 }
947 }
948
949 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
950 PUT_C(')');
951 break;
952 }
953
954 case 'A': /* Direct (jump/call) address (ParseImmAddr). */
955 {
956 Assert(*pszFmt == 'p'); pszFmt++;
957 PUT_FAR();
958 PUT_SIZE_OVERRIDE();
959 PUT_SEGMENT_OVERRIDE();
960 int rc = VERR_SYMBOL_NOT_FOUND;
961 switch (pParam->fUse & (DISUSE_IMMEDIATE_ADDR_16_16 | DISUSE_IMMEDIATE_ADDR_16_32 | DISUSE_DISPLACEMENT64 | DISUSE_DISPLACEMENT32 | DISUSE_DISPLACEMENT16))
962 {
963 case DISUSE_IMMEDIATE_ADDR_16_16:
964 PUT_NUM_16(pParam->uValue >> 16);
965 PUT_C(':');
966 PUT_NUM_16(pParam->uValue);
967 if (pfnGetSymbol)
968 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_VALUE(pParam->uValue >> 16), (uint16_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
969 break;
970 case DISUSE_IMMEDIATE_ADDR_16_32:
971 PUT_NUM_16(pParam->uValue >> 32);
972 PUT_C(':');
973 PUT_NUM_32(pParam->uValue);
974 if (pfnGetSymbol)
975 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_VALUE(pParam->uValue >> 16), (uint32_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
976 break;
977 case DISUSE_DISPLACEMENT16:
978 PUT_NUM_16(pParam->uValue);
979 if (pfnGetSymbol)
980 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_REG(DISSELREG_CS), (uint16_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
981 break;
982 case DISUSE_DISPLACEMENT32:
983 PUT_NUM_32(pParam->uValue);
984 if (pfnGetSymbol)
985 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_REG(DISSELREG_CS), (uint32_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
986 break;
987 case DISUSE_DISPLACEMENT64:
988 PUT_NUM_64(pParam->uValue);
989 if (pfnGetSymbol)
990 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_REG(DISSELREG_CS), (uint64_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
991 break;
992 default:
993 AssertFailed();
994 break;
995 }
996
997 if (RT_SUCCESS(rc))
998 {
999 PUT_SZ(" [");
1000 PUT_PSZ(szSymbol);
1001 if (off != 0)
1002 {
1003 if ((int8_t)off == off)
1004 PUT_NUM_S8(off);
1005 else if ((int16_t)off == off)
1006 PUT_NUM_S16(off);
1007 else if ((int32_t)off == off)
1008 PUT_NUM_S32(off);
1009 else
1010 PUT_NUM_S64(off);
1011 }
1012 PUT_C(']');
1013 }
1014 break;
1015 }
1016
1017 case 'O': /* No ModRM byte (ParseImmAddr). */
1018 {
1019 Assert(*pszFmt == 'b' || *pszFmt == 'v'); pszFmt++;
1020 PUT_FAR();
1021 PUT_SIZE_OVERRIDE();
1022 PUT_C('[');
1023 PUT_SEGMENT_OVERRIDE();
1024 int rc = VERR_SYMBOL_NOT_FOUND;
1025 switch (pParam->fUse & (DISUSE_IMMEDIATE_ADDR_16_16 | DISUSE_IMMEDIATE_ADDR_16_32 | DISUSE_DISPLACEMENT64 | DISUSE_DISPLACEMENT32 | DISUSE_DISPLACEMENT16))
1026 {
1027 case DISUSE_IMMEDIATE_ADDR_16_16:
1028 PUT_NUM_16(pParam->uValue >> 16);
1029 PUT_C(':');
1030 PUT_NUM_16(pParam->uValue);
1031 if (pfnGetSymbol)
1032 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_VALUE(pParam->uValue >> 16), (uint16_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
1033 break;
1034 case DISUSE_IMMEDIATE_ADDR_16_32:
1035 PUT_NUM_16(pParam->uValue >> 32);
1036 PUT_C(':');
1037 PUT_NUM_32(pParam->uValue);
1038 if (pfnGetSymbol)
1039 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_VALUE(pParam->uValue >> 16), (uint32_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
1040 break;
1041 case DISUSE_DISPLACEMENT16:
1042 PUT_NUM_16(pParam->uDisp.i16);
1043 if (pfnGetSymbol)
1044 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_REG(DISSELREG_CS), pParam->uDisp.u16, szSymbol, sizeof(szSymbol), &off, pvUser);
1045 break;
1046 case DISUSE_DISPLACEMENT32:
1047 PUT_NUM_32(pParam->uDisp.i32);
1048 if (pfnGetSymbol)
1049 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_REG(DISSELREG_CS), pParam->uDisp.u32, szSymbol, sizeof(szSymbol), &off, pvUser);
1050 break;
1051 case DISUSE_DISPLACEMENT64:
1052 PUT_NUM_64(pParam->uDisp.i64);
1053 if (pfnGetSymbol)
1054 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_REG(DISSELREG_CS), pParam->uDisp.u64, szSymbol, sizeof(szSymbol), &off, pvUser);
1055 break;
1056 default:
1057 AssertFailed();
1058 break;
1059 }
1060 PUT_C(']');
1061
1062 if (RT_SUCCESS(rc))
1063 {
1064 PUT_SZ(" (");
1065 PUT_PSZ(szSymbol);
1066 if (off != 0)
1067 {
1068 if ((int8_t)off == off)
1069 PUT_NUM_S8(off);
1070 else if ((int16_t)off == off)
1071 PUT_NUM_S16(off);
1072 else if ((int32_t)off == off)
1073 PUT_NUM_S32(off);
1074 else
1075 PUT_NUM_S64(off);
1076 }
1077 PUT_C(')');
1078 }
1079 break;
1080 }
1081
1082 case 'X': /* DS:SI (ParseXb, ParseXv). */
1083 case 'Y': /* ES:DI (ParseYb, ParseYv). */
1084 {
1085 Assert(*pszFmt == 'b' || *pszFmt == 'v'); pszFmt++;
1086 PUT_FAR();
1087 PUT_SIZE_OVERRIDE();
1088 PUT_C('[');
1089 if (pParam->fUse & DISUSE_POINTER_DS_BASED)
1090 PUT_SZ("ds:");
1091 else
1092 PUT_SZ("es:");
1093
1094 size_t cchReg;
1095 const char *pszReg = disasmFormatYasmBaseReg(pDis, pParam, &cchReg);
1096 PUT_STR(pszReg, cchReg);
1097 PUT_C(']');
1098 break;
1099 }
1100
1101 case 'e': /* Register based on operand size (e.g. %eAX) (ParseFixedReg). */
1102 {
1103 Assert(RT_C_IS_ALPHA(pszFmt[0]) && RT_C_IS_ALPHA(pszFmt[1]) && !RT_C_IS_ALPHA(pszFmt[2])); pszFmt += 2;
1104 size_t cchReg;
1105 const char *pszReg = disasmFormatYasmBaseReg(pDis, pParam, &cchReg);
1106 PUT_STR(pszReg, cchReg);
1107 break;
1108 }
1109
1110 default:
1111 AssertMsgFailed(("%c%s!\n", ch, pszFmt));
1112 break;
1113 }
1114 AssertMsg(*pszFmt == ',' || *pszFmt == '\0', ("%c%s\n", ch, pszFmt));
1115 }
1116 else
1117 {
1118 PUT_C(ch);
1119 if (ch == ',')
1120 {
1121 Assert(*pszFmt != ' ');
1122 PUT_C(' ');
1123 switch (++iParam)
1124 {
1125 case 2: pParam = &pDis->Param2; break;
1126 case 3: pParam = &pDis->Param3; break;
1127 default: pParam = NULL; break;
1128 }
1129 }
1130 }
1131 } /* while more to format */
1132 }
1133
1134 /*
1135 * Any additional output to the right of the instruction?
1136 */
1137 if (fFlags & (DIS_FMT_FLAGS_BYTES_RIGHT | DIS_FMT_FLAGS_ADDR_RIGHT))
1138 {
1139 /* some up front padding. */
1140 size_t cchPadding = cchOutput - offInstruction;
1141 cchPadding = cchPadding + 1 >= 42 ? 1 : 42 - cchPadding;
1142 PUT_STR(g_szSpaces, cchPadding);
1143
1144 /* comment? */
1145 if (fFlags & (DIS_FMT_FLAGS_BYTES_RIGHT | DIS_FMT_FLAGS_ADDR_RIGHT))
1146 PUT_SZ(";");
1147
1148 /*
1149 * The address?
1150 */
1151 if (fFlags & DIS_FMT_FLAGS_ADDR_RIGHT)
1152 {
1153 PUT_C(' ');
1154#if HC_ARCH_BITS == 64 || GC_ARCH_BITS == 64
1155 if (pDis->uInstrAddr >= _4G)
1156 PUT_NUM(9, "%08x`", (uint32_t)(pDis->uInstrAddr >> 32));
1157#endif
1158 PUT_NUM(8, "%08x", (uint32_t)pDis->uInstrAddr);
1159 }
1160
1161 /*
1162 * Opcode bytes?
1163 */
1164 if (fFlags & DIS_FMT_FLAGS_BYTES_RIGHT)
1165 {
1166 PUT_C(' ');
1167 size_t cchTmp = disFormatBytes(pDis, pszDst, cchDst, fFlags);
1168 cchOutput += cchTmp;
1169 if (cchTmp >= cchDst)
1170 cchTmp = cchDst - (cchDst != 0);
1171 cchDst -= cchTmp;
1172 pszDst += cchTmp;
1173 }
1174 }
1175
1176 /*
1177 * Terminate it - on overflow we'll have reserved one byte for this.
1178 */
1179 if (cchDst > 0)
1180 *pszDst = '\0';
1181 else
1182 Assert(!cchBuf);
1183
1184 /* clean up macros */
1185#undef PUT_PSZ
1186#undef PUT_SZ
1187#undef PUT_STR
1188#undef PUT_C
1189 return cchOutput;
1190}
1191
1192
1193/**
1194 * Formats the current instruction in Yasm (/ Nasm) style.
1195 *
1196 * This is a simplified version of DISFormatYasmEx() provided for your convenience.
1197 *
1198 *
1199 * @returns The number of output characters. If this is >= cchBuf, then the content
1200 * of pszBuf will be truncated.
1201 * @param pDis Pointer to the disassembler state.
1202 * @param pszBuf The output buffer.
1203 * @param cchBuf The size of the output buffer.
1204 */
1205DISDECL(size_t) DISFormatYasm(PCDISSTATE pDis, char *pszBuf, size_t cchBuf)
1206{
1207 return DISFormatYasmEx(pDis, pszBuf, cchBuf, 0 /* fFlags */, NULL /* pfnGetSymbol */, NULL /* pvUser */);
1208}
1209
1210
1211/**
1212 * Checks if the encoding of the given disassembled instruction is something we
1213 * can never get YASM to produce.
1214 *
1215 * @returns true if it's odd, false if it isn't.
1216 * @param pDis The disassembler output. The byte fetcher callback will
1217 * be used if present as we might need to fetch opcode
1218 * bytes.
1219 */
1220DISDECL(bool) DISFormatYasmIsOddEncoding(PDISSTATE pDis)
1221{
1222 /*
1223 * Mod rm + SIB: Check for duplicate EBP encodings that yasm won't use for very good reasons.
1224 */
1225 if ( pDis->uAddrMode != DISCPUMODE_16BIT ///@todo correct?
1226 && pDis->ModRM.Bits.Rm == 4
1227 && pDis->ModRM.Bits.Mod != 3)
1228 {
1229 /* No scaled index SIB (index=4), except for ESP. */
1230 if ( pDis->SIB.Bits.Index == 4
1231 && pDis->SIB.Bits.Base != 4)
1232 return true;
1233
1234 /* EBP + displacement */
1235 if ( pDis->ModRM.Bits.Mod != 0
1236 && pDis->SIB.Bits.Base == 5
1237 && pDis->SIB.Bits.Scale == 0)
1238 return true;
1239 }
1240
1241 /*
1242 * Seems to be an instruction alias here, but I cannot find any docs on it... hrmpf!
1243 */
1244 if ( pDis->pCurInstr->uOpcode == OP_SHL
1245 && pDis->ModRM.Bits.Reg == 6)
1246 return true;
1247
1248 /*
1249 * Check for multiple prefixes of the same kind.
1250 */
1251 uint8_t off1stSeg = UINT8_MAX;
1252 uint8_t offOpSize = UINT8_MAX;
1253 uint8_t offAddrSize = UINT8_MAX;
1254 uint32_t fPrefixes = 0;
1255 for (uint32_t offOpcode = 0; offOpcode < RT_ELEMENTS(pDis->abInstr); offOpcode++)
1256 {
1257 uint32_t f;
1258 switch (pDis->abInstr[offOpcode])
1259 {
1260 case 0xf0:
1261 f = DISPREFIX_LOCK;
1262 break;
1263
1264 case 0xf2:
1265 case 0xf3:
1266 f = DISPREFIX_REP; /* yes, both */
1267 break;
1268
1269 case 0x2e:
1270 case 0x3e:
1271 case 0x26:
1272 case 0x36:
1273 case 0x64:
1274 case 0x65:
1275 if (off1stSeg == UINT8_MAX)
1276 off1stSeg = offOpcode;
1277 f = DISPREFIX_SEG;
1278 break;
1279
1280 case 0x66:
1281 if (offOpSize == UINT8_MAX)
1282 offOpSize = offOpcode;
1283 f = DISPREFIX_OPSIZE;
1284 break;
1285
1286 case 0x67:
1287 if (offAddrSize == UINT8_MAX)
1288 offAddrSize = offOpcode;
1289 f = DISPREFIX_ADDRSIZE;
1290 break;
1291
1292 case 0x40: case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47:
1293 case 0x48: case 0x49: case 0x4a: case 0x4b: case 0x4c: case 0x4d: case 0x4e: case 0x4f:
1294 f = pDis->uCpuMode == DISCPUMODE_64BIT ? DISPREFIX_REX : 0;
1295 break;
1296
1297 default:
1298 f = 0;
1299 break;
1300 }
1301 if (!f)
1302 break; /* done */
1303 if (fPrefixes & f)
1304 return true;
1305 fPrefixes |= f;
1306 }
1307
1308 /* segment overrides are fun */
1309 if (fPrefixes & DISPREFIX_SEG)
1310 {
1311 /* no effective address which it may apply to. */
1312 Assert((pDis->fPrefix & DISPREFIX_SEG) || pDis->uCpuMode == DISCPUMODE_64BIT);
1313 if ( !DISUSE_IS_EFFECTIVE_ADDR(pDis->Param1.fUse)
1314 && !DISUSE_IS_EFFECTIVE_ADDR(pDis->Param2.fUse)
1315 && !DISUSE_IS_EFFECTIVE_ADDR(pDis->Param3.fUse))
1316 return true;
1317
1318 /* Yasm puts the segment prefixes before the operand prefix with no
1319 way of overriding it. */
1320 if (offOpSize < off1stSeg)
1321 return true;
1322 }
1323
1324 /* fixed register + addr override doesn't go down all that well. */
1325 if (fPrefixes & DISPREFIX_ADDRSIZE)
1326 {
1327 Assert(pDis->fPrefix & DISPREFIX_ADDRSIZE);
1328 if ( pDis->pCurInstr->fParam3 == OP_PARM_NONE
1329 && pDis->pCurInstr->fParam2 == OP_PARM_NONE
1330 && ( pDis->pCurInstr->fParam1 >= OP_PARM_REG_GEN32_START
1331 && pDis->pCurInstr->fParam1 <= OP_PARM_REG_GEN32_END))
1332 return true;
1333 }
1334
1335 /* Almost all prefixes are bad for jumps. */
1336 if (fPrefixes)
1337 {
1338 switch (pDis->pCurInstr->uOpcode)
1339 {
1340 /* nop w/ prefix(es). */
1341 case OP_NOP:
1342 return true;
1343
1344 case OP_JMP:
1345 if ( pDis->pCurInstr->fParam1 != OP_PARM_Jb
1346 && pDis->pCurInstr->fParam1 != OP_PARM_Jv)
1347 break;
1348 /* fall thru */
1349 case OP_JO:
1350 case OP_JNO:
1351 case OP_JC:
1352 case OP_JNC:
1353 case OP_JE:
1354 case OP_JNE:
1355 case OP_JBE:
1356 case OP_JNBE:
1357 case OP_JS:
1358 case OP_JNS:
1359 case OP_JP:
1360 case OP_JNP:
1361 case OP_JL:
1362 case OP_JNL:
1363 case OP_JLE:
1364 case OP_JNLE:
1365 /** @todo branch hinting 0x2e/0x3e... */
1366 return true;
1367 }
1368
1369 }
1370
1371 /* All but the segment prefix is bad news for push/pop. */
1372 if (fPrefixes & ~DISPREFIX_SEG)
1373 {
1374 switch (pDis->pCurInstr->uOpcode)
1375 {
1376 case OP_POP:
1377 case OP_PUSH:
1378 if ( pDis->pCurInstr->fParam1 >= OP_PARM_REG_SEG_START
1379 && pDis->pCurInstr->fParam1 <= OP_PARM_REG_SEG_END)
1380 return true;
1381 if ( (fPrefixes & ~DISPREFIX_OPSIZE)
1382 && pDis->pCurInstr->fParam1 >= OP_PARM_REG_GEN32_START
1383 && pDis->pCurInstr->fParam1 <= OP_PARM_REG_GEN32_END)
1384 return true;
1385 break;
1386
1387 case OP_POPA:
1388 case OP_POPF:
1389 case OP_PUSHA:
1390 case OP_PUSHF:
1391 if (fPrefixes & ~DISPREFIX_OPSIZE)
1392 return true;
1393 break;
1394 }
1395 }
1396
1397 /* Implicit 8-bit register instructions doesn't mix with operand size. */
1398 if ( (fPrefixes & DISPREFIX_OPSIZE)
1399 && ( ( pDis->pCurInstr->fParam1 == OP_PARM_Gb /* r8 */
1400 && pDis->pCurInstr->fParam2 == OP_PARM_Eb /* r8/mem8 */)
1401 || ( pDis->pCurInstr->fParam2 == OP_PARM_Gb /* r8 */
1402 && pDis->pCurInstr->fParam1 == OP_PARM_Eb /* r8/mem8 */))
1403 )
1404 {
1405 switch (pDis->pCurInstr->uOpcode)
1406 {
1407 case OP_ADD:
1408 case OP_OR:
1409 case OP_ADC:
1410 case OP_SBB:
1411 case OP_AND:
1412 case OP_SUB:
1413 case OP_XOR:
1414 case OP_CMP:
1415 return true;
1416 default:
1417 break;
1418 }
1419 }
1420
1421 /* Instructions taking no address or operand which thus may be annoyingly
1422 difficult to format for yasm. */
1423 if (fPrefixes)
1424 {
1425 switch (pDis->pCurInstr->uOpcode)
1426 {
1427 case OP_STI:
1428 case OP_STC:
1429 case OP_CLI:
1430 case OP_CLD:
1431 case OP_CLC:
1432 case OP_INT:
1433 case OP_INT3:
1434 case OP_INTO:
1435 case OP_HLT:
1436 /* Many more to can be added... */
1437 return true;
1438 default:
1439 break;
1440 }
1441 }
1442
1443
1444 /*
1445 * Check for the version of xyz reg,reg instruction that the assembler doesn't use.
1446 *
1447 * For example:
1448 * expected: 1aee sbb ch, dh ; SBB r8, r/m8
1449 * yasm: 18F5 sbb ch, dh ; SBB r/m8, r8
1450 */
1451 if (pDis->ModRM.Bits.Mod == 3 /* reg,reg */)
1452 {
1453 switch (pDis->pCurInstr->uOpcode)
1454 {
1455 case OP_ADD:
1456 case OP_OR:
1457 case OP_ADC:
1458 case OP_SBB:
1459 case OP_AND:
1460 case OP_SUB:
1461 case OP_XOR:
1462 case OP_CMP:
1463 if ( ( pDis->pCurInstr->fParam1 == OP_PARM_Gb /* r8 */
1464 && pDis->pCurInstr->fParam2 == OP_PARM_Eb /* r8/mem8 */)
1465 || ( pDis->pCurInstr->fParam1 == OP_PARM_Gv /* rX */
1466 && pDis->pCurInstr->fParam2 == OP_PARM_Ev /* rX/memX */))
1467 return true;
1468
1469 /* 82 (see table A-6). */
1470 if (pDis->bOpCode == 0x82)
1471 return true;
1472 break;
1473
1474 /* ff /0, fe /0, ff /1, fe /0 */
1475 case OP_DEC:
1476 case OP_INC:
1477 return true;
1478
1479 case OP_POP:
1480 case OP_PUSH:
1481 Assert(pDis->bOpCode == 0x8f);
1482 return true;
1483
1484 case OP_MOV:
1485 if ( pDis->bOpCode == 0x8a
1486 || pDis->bOpCode == 0x8b)
1487 return true;
1488 break;
1489
1490 default:
1491 break;
1492 }
1493 }
1494
1495 /* shl eax,1 will be assembled to the form without the immediate byte. */
1496 if ( pDis->pCurInstr->fParam2 == OP_PARM_Ib
1497 && (uint8_t)pDis->Param2.uValue == 1)
1498 {
1499 switch (pDis->pCurInstr->uOpcode)
1500 {
1501 case OP_SHL:
1502 case OP_SHR:
1503 case OP_SAR:
1504 case OP_RCL:
1505 case OP_RCR:
1506 case OP_ROL:
1507 case OP_ROR:
1508 return true;
1509 }
1510 }
1511
1512 /* And some more - see table A-6. */
1513 if (pDis->bOpCode == 0x82)
1514 {
1515 switch (pDis->pCurInstr->uOpcode)
1516 {
1517 case OP_ADD:
1518 case OP_OR:
1519 case OP_ADC:
1520 case OP_SBB:
1521 case OP_AND:
1522 case OP_SUB:
1523 case OP_XOR:
1524 case OP_CMP:
1525 return true;
1526 break;
1527 }
1528 }
1529
1530
1531 /* check for REX.X = 1 without SIB. */
1532
1533 /* Yasm encodes setnbe al with /2 instead of /0 like the AMD manual
1534 says (intel doesn't appear to care). */
1535 switch (pDis->pCurInstr->uOpcode)
1536 {
1537 case OP_SETO:
1538 case OP_SETNO:
1539 case OP_SETC:
1540 case OP_SETNC:
1541 case OP_SETE:
1542 case OP_SETNE:
1543 case OP_SETBE:
1544 case OP_SETNBE:
1545 case OP_SETS:
1546 case OP_SETNS:
1547 case OP_SETP:
1548 case OP_SETNP:
1549 case OP_SETL:
1550 case OP_SETNL:
1551 case OP_SETLE:
1552 case OP_SETNLE:
1553 AssertMsg(pDis->bOpCode >= 0x90 && pDis->bOpCode <= 0x9f, ("%#x\n", pDis->bOpCode));
1554 if (pDis->ModRM.Bits.Reg != 2)
1555 return true;
1556 break;
1557 }
1558
1559 /*
1560 * The MOVZX reg32,mem16 instruction without an operand size prefix
1561 * doesn't quite make sense...
1562 */
1563 if ( pDis->pCurInstr->uOpcode == OP_MOVZX
1564 && pDis->bOpCode == 0xB7
1565 && (pDis->uCpuMode == DISCPUMODE_16BIT) != !!(fPrefixes & DISPREFIX_OPSIZE))
1566 return true;
1567
1568 return false;
1569}
1570
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette