VirtualBox

source: vbox/trunk/src/VBox/Disassembler/DisasmFormatYasm.cpp@ 53007

Last change on this file since 53007 was 53007, checked in by vboxsync, 10 years ago

DIS: Fixed VMREAD/VMWRITE disassembling, added support for MOVBE, POPCNT, TZCNT, LZCNT, ADCX, ADOX and CRC32 (only CRC32 Gd, Eb & CRC32 Gd, Ey forms).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 62.9 KB
Line 
1/* $Id: DisasmFormatYasm.cpp 53007 2014-10-09 09:13:24Z vboxsync $ */
2/** @file
3 * VBox Disassembler - Yasm(/Nasm) Style Formatter.
4 */
5
6/*
7 * Copyright (C) 2008-2012 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#include <VBox/dis.h>
23#include "DisasmInternal.h"
24#include <iprt/string.h>
25#include <iprt/assert.h>
26#include <iprt/ctype.h>
27
28
29/*******************************************************************************
30* Global Variables *
31*******************************************************************************/
32static const char g_szSpaces[] =
33" ";
34static const char g_aszYasmRegGen8[20][5] =
35{
36 "al\0\0", "cl\0\0", "dl\0\0", "bl\0\0", "ah\0\0", "ch\0\0", "dh\0\0", "bh\0\0", "r8b\0", "r9b\0", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b", "spl\0", "bpl\0", "sil\0", "dil\0"
37};
38static const char g_aszYasmRegGen16[16][5] =
39{
40 "ax\0\0", "cx\0\0", "dx\0\0", "bx\0\0", "sp\0\0", "bp\0\0", "si\0\0", "di\0\0", "r8w\0", "r9w\0", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w"
41};
42static const char g_aszYasmRegGen1616[8][6] =
43{
44 "bx+si", "bx+di", "bp+si", "bp+di", "si\0\0\0", "di\0\0\0", "bp\0\0\0", "bx\0\0\0"
45};
46static const char g_aszYasmRegGen32[16][5] =
47{
48 "eax\0", "ecx\0", "edx\0", "ebx\0", "esp\0", "ebp\0", "esi\0", "edi\0", "r8d\0", "r9d\0", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d"
49};
50static const char g_aszYasmRegGen64[16][4] =
51{
52 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8\0", "r9\0", "r10", "r11", "r12", "r13", "r14", "r15"
53};
54static const char g_aszYasmRegSeg[6][3] =
55{
56 "es", "cs", "ss", "ds", "fs", "gs"
57};
58static const char g_aszYasmRegFP[8][4] =
59{
60 "st0", "st1", "st2", "st3", "st4", "st5", "st6", "st7"
61};
62static const char g_aszYasmRegMMX[8][4] =
63{
64 "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"
65};
66static const char g_aszYasmRegXMM[16][6] =
67{
68 "xmm0\0", "xmm1\0", "xmm2\0", "xmm3\0", "xmm4\0", "xmm5\0", "xmm6\0", "xmm7\0", "xmm8\0", "xmm9\0", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
69};
70static const char g_aszYasmRegCRx[16][5] =
71{
72 "cr0\0", "cr1\0", "cr2\0", "cr3\0", "cr4\0", "cr5\0", "cr6\0", "cr7\0", "cr8\0", "cr9\0", "cr10", "cr11", "cr12", "cr13", "cr14", "cr15"
73};
74static const char g_aszYasmRegDRx[16][5] =
75{
76 "dr0\0", "dr1\0", "dr2\0", "dr3\0", "dr4\0", "dr5\0", "dr6\0", "dr7\0", "dr8\0", "dr9\0", "dr10", "dr11", "dr12", "dr13", "dr14", "dr15"
77};
78static const char g_aszYasmRegTRx[16][5] =
79{
80 "tr0\0", "tr1\0", "tr2\0", "tr3\0", "tr4\0", "tr5\0", "tr6\0", "tr7\0", "tr8\0", "tr9\0", "tr10", "tr11", "tr12", "tr13", "tr14", "tr15"
81};
82
83
84
85/**
86 * Gets the base register name for the given parameter.
87 *
88 * @returns Pointer to the register name.
89 * @param pDis The disassembler state.
90 * @param pParam The parameter.
91 * @param pcchReg Where to store the length of the name.
92 */
93static const char *disasmFormatYasmBaseReg(PCDISSTATE pDis, PCDISOPPARAM pParam, size_t *pcchReg)
94{
95 switch (pParam->fUse & ( DISUSE_REG_GEN8 | DISUSE_REG_GEN16 | DISUSE_REG_GEN32 | DISUSE_REG_GEN64
96 | DISUSE_REG_FP | DISUSE_REG_MMX | DISUSE_REG_XMM | DISUSE_REG_CR
97 | DISUSE_REG_DBG | DISUSE_REG_SEG | DISUSE_REG_TEST))
98
99 {
100 case DISUSE_REG_GEN8:
101 {
102 Assert(pParam->Base.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen8));
103 const char *psz = g_aszYasmRegGen8[pParam->Base.idxGenReg];
104 *pcchReg = 2 + !!psz[2] + !!psz[3];
105 return psz;
106 }
107
108 case DISUSE_REG_GEN16:
109 {
110 Assert(pParam->Base.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen16));
111 const char *psz = g_aszYasmRegGen16[pParam->Base.idxGenReg];
112 *pcchReg = 2 + !!psz[2] + !!psz[3];
113 return psz;
114 }
115
116 case DISUSE_REG_GEN32:
117 {
118 Assert(pParam->Base.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen32));
119 const char *psz = g_aszYasmRegGen32[pParam->Base.idxGenReg];
120 *pcchReg = 2 + !!psz[2] + !!psz[3];
121 return psz;
122 }
123
124 case DISUSE_REG_GEN64:
125 {
126 Assert(pParam->Base.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen64));
127 const char *psz = g_aszYasmRegGen64[pParam->Base.idxGenReg];
128 *pcchReg = 2 + !!psz[2] + !!psz[3];
129 return psz;
130 }
131
132 case DISUSE_REG_FP:
133 {
134 Assert(pParam->Base.idxFpuReg < RT_ELEMENTS(g_aszYasmRegFP));
135 const char *psz = g_aszYasmRegFP[pParam->Base.idxFpuReg];
136 *pcchReg = 3;
137 return psz;
138 }
139
140 case DISUSE_REG_MMX:
141 {
142 Assert(pParam->Base.idxMmxReg < RT_ELEMENTS(g_aszYasmRegMMX));
143 const char *psz = g_aszYasmRegMMX[pParam->Base.idxMmxReg];
144 *pcchReg = 3;
145 return psz;
146 }
147
148 case DISUSE_REG_XMM:
149 {
150 Assert(pParam->Base.idxXmmReg < RT_ELEMENTS(g_aszYasmRegXMM));
151 const char *psz = g_aszYasmRegXMM[pParam->Base.idxMmxReg];
152 *pcchReg = 4 + !!psz[4];
153 return psz;
154 }
155
156 case DISUSE_REG_CR:
157 {
158 Assert(pParam->Base.idxCtrlReg < RT_ELEMENTS(g_aszYasmRegCRx));
159 const char *psz = g_aszYasmRegCRx[pParam->Base.idxCtrlReg];
160 *pcchReg = 3;
161 return psz;
162 }
163
164 case DISUSE_REG_DBG:
165 {
166 Assert(pParam->Base.idxDbgReg < RT_ELEMENTS(g_aszYasmRegDRx));
167 const char *psz = g_aszYasmRegDRx[pParam->Base.idxDbgReg];
168 *pcchReg = 3;
169 return psz;
170 }
171
172 case DISUSE_REG_SEG:
173 {
174 Assert(pParam->Base.idxSegReg < RT_ELEMENTS(g_aszYasmRegCRx));
175 const char *psz = g_aszYasmRegSeg[pParam->Base.idxSegReg];
176 *pcchReg = 2;
177 return psz;
178 }
179
180 case DISUSE_REG_TEST:
181 {
182 Assert(pParam->Base.idxTestReg < RT_ELEMENTS(g_aszYasmRegTRx));
183 const char *psz = g_aszYasmRegTRx[pParam->Base.idxTestReg];
184 *pcchReg = 3;
185 return psz;
186 }
187
188 default:
189 AssertMsgFailed(("%#x\n", pParam->fUse));
190 *pcchReg = 3;
191 return "r??";
192 }
193}
194
195
196/**
197 * Gets the index register name for the given parameter.
198 *
199 * @returns The index register name.
200 * @param pDis The disassembler state.
201 * @param pParam The parameter.
202 * @param pcchReg Where to store the length of the name.
203 */
204static const char *disasmFormatYasmIndexReg(PCDISSTATE pDis, PCDISOPPARAM pParam, size_t *pcchReg)
205{
206 switch (pDis->uAddrMode)
207 {
208 case DISCPUMODE_16BIT:
209 {
210 Assert(pParam->Index.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen16));
211 const char *psz = g_aszYasmRegGen16[pParam->Index.idxGenReg];
212 *pcchReg = 2 + !!psz[2] + !!psz[3];
213 return psz;
214 }
215
216 case DISCPUMODE_32BIT:
217 {
218 Assert(pParam->Index.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen32));
219 const char *psz = g_aszYasmRegGen32[pParam->Index.idxGenReg];
220 *pcchReg = 2 + !!psz[2] + !!psz[3];
221 return psz;
222 }
223
224 case DISCPUMODE_64BIT:
225 {
226 Assert(pParam->Index.idxGenReg < RT_ELEMENTS(g_aszYasmRegGen64));
227 const char *psz = g_aszYasmRegGen64[pParam->Index.idxGenReg];
228 *pcchReg = 2 + !!psz[2] + !!psz[3];
229 return psz;
230 }
231
232 default:
233 AssertMsgFailed(("%#x %#x\n", pParam->fUse, pDis->uAddrMode));
234 *pcchReg = 3;
235 return "r??";
236 }
237}
238
239
240/**
241 * Formats the current instruction in Yasm (/ Nasm) style.
242 *
243 *
244 * @returns The number of output characters. If this is >= cchBuf, then the content
245 * of pszBuf will be truncated.
246 * @param pDis Pointer to the disassembler state.
247 * @param pszBuf The output buffer.
248 * @param cchBuf The size of the output buffer.
249 * @param fFlags Format flags, see DIS_FORMAT_FLAGS_*.
250 * @param pfnGetSymbol Get symbol name for a jmp or call target address. Optional.
251 * @param pvUser User argument for pfnGetSymbol.
252 */
253DISDECL(size_t) DISFormatYasmEx(PCDISSTATE pDis, char *pszBuf, size_t cchBuf, uint32_t fFlags,
254 PFNDISGETSYMBOL pfnGetSymbol, void *pvUser)
255{
256/** @todo monitor and mwait aren't formatted correctly in 64-bit mode. */
257 /*
258 * Input validation and massaging.
259 */
260 AssertPtr(pDis);
261 AssertPtrNull(pszBuf);
262 Assert(pszBuf || !cchBuf);
263 AssertPtrNull(pfnGetSymbol);
264 AssertMsg(DIS_FMT_FLAGS_IS_VALID(fFlags), ("%#x\n", fFlags));
265 if (fFlags & DIS_FMT_FLAGS_ADDR_COMMENT)
266 fFlags = (fFlags & ~DIS_FMT_FLAGS_ADDR_LEFT) | DIS_FMT_FLAGS_ADDR_RIGHT;
267 if (fFlags & DIS_FMT_FLAGS_BYTES_COMMENT)
268 fFlags = (fFlags & ~DIS_FMT_FLAGS_BYTES_LEFT) | DIS_FMT_FLAGS_BYTES_RIGHT;
269
270 PCDISOPCODE const pOp = pDis->pCurInstr;
271
272 /*
273 * Output macros
274 */
275 char *pszDst = pszBuf;
276 size_t cchDst = cchBuf;
277 size_t cchOutput = 0;
278#define PUT_C(ch) \
279 do { \
280 cchOutput++; \
281 if (cchDst > 1) \
282 { \
283 cchDst--; \
284 *pszDst++ = (ch); \
285 } \
286 } while (0)
287#define PUT_STR(pszSrc, cchSrc) \
288 do { \
289 cchOutput += (cchSrc); \
290 if (cchDst > (cchSrc)) \
291 { \
292 memcpy(pszDst, (pszSrc), (cchSrc)); \
293 pszDst += (cchSrc); \
294 cchDst -= (cchSrc); \
295 } \
296 else if (cchDst > 1) \
297 { \
298 memcpy(pszDst, (pszSrc), cchDst - 1); \
299 pszDst += cchDst - 1; \
300 cchDst = 1; \
301 } \
302 } while (0)
303#define PUT_SZ(sz) \
304 PUT_STR((sz), sizeof(sz) - 1)
305#define PUT_SZ_STRICT(szStrict, szRelaxed) \
306 do { if (fFlags & DIS_FMT_FLAGS_STRICT) PUT_SZ(szStrict); else PUT_SZ(szRelaxed); } while (0)
307#define PUT_PSZ(psz) \
308 do { const size_t cchTmp = strlen(psz); PUT_STR((psz), cchTmp); } while (0)
309#define PUT_NUM(cch, fmt, num) \
310 do { \
311 cchOutput += (cch); \
312 if (cchDst > 1) \
313 { \
314 const size_t cchTmp = RTStrPrintf(pszDst, cchDst, fmt, (num)); \
315 pszDst += cchTmp; \
316 cchDst -= cchTmp; \
317 Assert(cchTmp == (cch) || cchDst == 1); \
318 } \
319 } while (0)
320/** @todo add two flags for choosing between %X / %x and h / 0x. */
321#define PUT_NUM_8(num) PUT_NUM(4, "0%02xh", (uint8_t)(num))
322#define PUT_NUM_16(num) PUT_NUM(6, "0%04xh", (uint16_t)(num))
323#define PUT_NUM_32(num) PUT_NUM(10, "0%08xh", (uint32_t)(num))
324#define PUT_NUM_64(num) PUT_NUM(18, "0%016RX64h", (uint64_t)(num))
325
326#define PUT_NUM_SIGN(cch, fmt, num, stype, utype) \
327 do { \
328 if ((stype)(num) >= 0) \
329 { \
330 PUT_C('+'); \
331 PUT_NUM(cch, fmt, (utype)(num)); \
332 } \
333 else \
334 { \
335 PUT_C('-'); \
336 PUT_NUM(cch, fmt, (utype)-(stype)(num)); \
337 } \
338 } while (0)
339#define PUT_NUM_S8(num) PUT_NUM_SIGN(4, "0%02xh", num, int8_t, uint8_t)
340#define PUT_NUM_S16(num) PUT_NUM_SIGN(6, "0%04xh", num, int16_t, uint16_t)
341#define PUT_NUM_S32(num) PUT_NUM_SIGN(10, "0%08xh", num, int32_t, uint32_t)
342#define PUT_NUM_S64(num) PUT_NUM_SIGN(18, "0%016RX64h", num, int64_t, uint64_t)
343
344#define PUT_SYMBOL_TWO(a_rcSym, a_szStart, a_chEnd) \
345 do { \
346 if (RT_SUCCESS(a_rcSym)) \
347 { \
348 PUT_SZ(a_szStart); \
349 PUT_PSZ(szSymbol); \
350 if (off != 0) \
351 { \
352 if ((int8_t)off == off) \
353 PUT_NUM_S8(off); \
354 else if ((int16_t)off == off) \
355 PUT_NUM_S16(off); \
356 else if ((int32_t)off == off) \
357 PUT_NUM_S32(off); \
358 else \
359 PUT_NUM_S64(off); \
360 } \
361 PUT_C(a_chEnd); \
362 } \
363 } while (0)
364
365#define PUT_SYMBOL(a_uSeg, a_uAddr, a_szStart, a_chEnd) \
366 do { \
367 if (pfnGetSymbol) \
368 { \
369 int rcSym = pfnGetSymbol(pDis, a_uSeg, a_uAddr, szSymbol, sizeof(szSymbol), &off, pvUser); \
370 PUT_SYMBOL_TWO(rcSym, a_szStart, a_chEnd); \
371 } \
372 } while (0)
373
374
375 /*
376 * The address?
377 */
378 if (fFlags & DIS_FMT_FLAGS_ADDR_LEFT)
379 {
380#if HC_ARCH_BITS == 64 || GC_ARCH_BITS == 64
381 if (pDis->uInstrAddr >= _4G)
382 PUT_NUM(9, "%08x`", (uint32_t)(pDis->uInstrAddr >> 32));
383#endif
384 PUT_NUM(8, "%08x", (uint32_t)pDis->uInstrAddr);
385 PUT_C(' ');
386 }
387
388 /*
389 * The opcode bytes?
390 */
391 if (fFlags & DIS_FMT_FLAGS_BYTES_LEFT)
392 {
393 size_t cchTmp = disFormatBytes(pDis, pszDst, cchDst, fFlags);
394 cchOutput += cchTmp;
395 if (cchDst > 1)
396 {
397 if (cchTmp <= cchDst)
398 {
399 cchDst -= cchTmp;
400 pszDst += cchTmp;
401 }
402 else
403 {
404 pszDst += cchDst - 1;
405 cchDst = 1;
406 }
407 }
408
409 /* Some padding to align the instruction. */
410 size_t cchPadding = (7 * (2 + !!(fFlags & DIS_FMT_FLAGS_BYTES_SPACED)))
411 + !!(fFlags & DIS_FMT_FLAGS_BYTES_BRACKETS) * 2
412 + 2;
413 cchPadding = cchTmp + 1 >= cchPadding ? 1 : cchPadding - cchTmp;
414 PUT_STR(g_szSpaces, cchPadding);
415 }
416
417
418 /*
419 * Filter out invalid opcodes first as they need special
420 * treatment. UD2 is an exception and should be handled normally.
421 */
422 size_t const offInstruction = cchOutput;
423 if ( pOp->uOpcode == OP_INVALID
424 || ( pOp->uOpcode == OP_ILLUD2
425 && (pDis->fPrefix & DISPREFIX_LOCK)))
426 PUT_SZ("Illegal opcode");
427 else
428 {
429 /*
430 * Prefixes
431 */
432 if (pDis->fPrefix & DISPREFIX_LOCK)
433 PUT_SZ("lock ");
434 if(pDis->fPrefix & DISPREFIX_REP)
435 PUT_SZ("rep ");
436 else if(pDis->fPrefix & DISPREFIX_REPNE)
437 PUT_SZ("repne ");
438
439 /*
440 * Adjust the format string to the correct mnemonic
441 * or to avoid things the assembler cannot handle correctly.
442 */
443 char szTmpFmt[48];
444 const char *pszFmt = pOp->pszOpcode;
445 switch (pOp->uOpcode)
446 {
447 case OP_JECXZ:
448 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "jcxz %Jb" : pDis->uOpMode == DISCPUMODE_32BIT ? "jecxz %Jb" : "jrcxz %Jb";
449 break;
450 case OP_PUSHF:
451 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "pushfw" : pDis->uOpMode == DISCPUMODE_32BIT ? "pushfd" : "pushfq";
452 break;
453 case OP_POPF:
454 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "popfw" : pDis->uOpMode == DISCPUMODE_32BIT ? "popfd" : "popfq";
455 break;
456 case OP_PUSHA:
457 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "pushaw" : "pushad";
458 break;
459 case OP_POPA:
460 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "popaw" : "popad";
461 break;
462 case OP_INSB:
463 pszFmt = "insb";
464 break;
465 case OP_INSWD:
466 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "insw" : pDis->uOpMode == DISCPUMODE_32BIT ? "insd" : "insq";
467 break;
468 case OP_OUTSB:
469 pszFmt = "outsb";
470 break;
471 case OP_OUTSWD:
472 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "outsw" : pDis->uOpMode == DISCPUMODE_32BIT ? "outsd" : "outsq";
473 break;
474 case OP_MOVSB:
475 pszFmt = "movsb";
476 break;
477 case OP_MOVSWD:
478 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "movsw" : pDis->uOpMode == DISCPUMODE_32BIT ? "movsd" : "movsq";
479 break;
480 case OP_CMPSB:
481 pszFmt = "cmpsb";
482 break;
483 case OP_CMPWD:
484 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "cmpsw" : pDis->uOpMode == DISCPUMODE_32BIT ? "cmpsd" : "cmpsq";
485 break;
486 case OP_SCASB:
487 pszFmt = "scasb";
488 break;
489 case OP_SCASWD:
490 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "scasw" : pDis->uOpMode == DISCPUMODE_32BIT ? "scasd" : "scasq";
491 break;
492 case OP_LODSB:
493 pszFmt = "lodsb";
494 break;
495 case OP_LODSWD:
496 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "lodsw" : pDis->uOpMode == DISCPUMODE_32BIT ? "lodsd" : "lodsq";
497 break;
498 case OP_STOSB:
499 pszFmt = "stosb";
500 break;
501 case OP_STOSWD:
502 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "stosw" : pDis->uOpMode == DISCPUMODE_32BIT ? "stosd" : "stosq";
503 break;
504 case OP_CBW:
505 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "cbw" : pDis->uOpMode == DISCPUMODE_32BIT ? "cwde" : "cdqe";
506 break;
507 case OP_CWD:
508 pszFmt = pDis->uOpMode == DISCPUMODE_16BIT ? "cwd" : pDis->uOpMode == DISCPUMODE_32BIT ? "cdq" : "cqo";
509 break;
510 case OP_SHL:
511 Assert(pszFmt[3] == '/');
512 pszFmt += 4;
513 break;
514 case OP_XLAT:
515 pszFmt = "xlatb";
516 break;
517 case OP_INT3:
518 pszFmt = "int3";
519 break;
520
521 /*
522 * Don't know how to tell yasm to generate complicated nop stuff, so 'db' it.
523 */
524 case OP_NOP:
525 if (pDis->bOpCode == 0x90)
526 /* fine, fine */;
527 else if (pszFmt[sizeof("nop %Ev") - 1] == '/' && pszFmt[sizeof("nop %Ev")] == 'p')
528 pszFmt = "prefetch %Eb";
529 else if (pDis->bOpCode == 0x1f)
530 {
531 Assert(pDis->cbInstr >= 3);
532 PUT_SZ("db 00fh, 01fh,");
533 PUT_NUM_8(MAKE_MODRM(pDis->ModRM.Bits.Mod, pDis->ModRM.Bits.Reg, pDis->ModRM.Bits.Rm));
534 for (unsigned i = 3; i < pDis->cbInstr; i++)
535 {
536 PUT_C(',');
537 PUT_NUM_8(0x90); ///@todo fixme.
538 }
539 pszFmt = "";
540 }
541 break;
542
543 default:
544 /* ST(X) -> stX (floating point) */
545 if (*pszFmt == 'f' && strchr(pszFmt, '('))
546 {
547 char *pszFmtDst = szTmpFmt;
548 char ch;
549 do
550 {
551 ch = *pszFmt++;
552 if (ch == 'S' && pszFmt[0] == 'T' && pszFmt[1] == '(')
553 {
554 *pszFmtDst++ = 's';
555 *pszFmtDst++ = 't';
556 pszFmt += 2;
557 ch = *pszFmt;
558 Assert(pszFmt[1] == ')');
559 pszFmt += 2;
560 *pszFmtDst++ = ch;
561 }
562 else
563 *pszFmtDst++ = ch;
564 } while (ch != '\0');
565 pszFmt = szTmpFmt;
566 }
567 break;
568
569 /*
570 * Horrible hacks.
571 */
572 case OP_FLD:
573 if (pDis->bOpCode == 0xdb) /* m80fp workaround. */
574 *(int *)&pDis->Param1.fParam &= ~0x1f; /* make it pure OP_PARM_M */
575 break;
576 case OP_LAR: /* hack w -> v, probably not correct. */
577 *(int *)&pDis->Param2.fParam &= ~0x1f;
578 *(int *)&pDis->Param2.fParam |= OP_PARM_v;
579 break;
580 }
581
582 /*
583 * Formatting context and associated macros.
584 */
585 PCDISOPPARAM pParam = &pDis->Param1;
586 int iParam = 1;
587
588#define PUT_FAR() \
589 do { \
590 if ( OP_PARM_VSUBTYPE(pParam->fParam) == OP_PARM_p \
591 && pOp->uOpcode != OP_LDS /* table bugs? */ \
592 && pOp->uOpcode != OP_LES \
593 && pOp->uOpcode != OP_LFS \
594 && pOp->uOpcode != OP_LGS \
595 && pOp->uOpcode != OP_LSS ) \
596 PUT_SZ("far "); \
597 } while (0)
598 /** @todo mov ah,ch ends up with a byte 'override'... - check if this wasn't fixed. */
599 /** @todo drop the work/dword/qword override when the src/dst is a register (except for movsx/movzx). */
600#define PUT_SIZE_OVERRIDE() \
601 do { \
602 switch (OP_PARM_VSUBTYPE(pParam->fParam)) \
603 { \
604 case OP_PARM_v: \
605 case OP_PARM_y: \
606 switch (pDis->uOpMode) \
607 { \
608 case DISCPUMODE_16BIT: PUT_SZ("word "); break; \
609 case DISCPUMODE_32BIT: PUT_SZ("dword "); break; \
610 case DISCPUMODE_64BIT: PUT_SZ("qword "); break; \
611 default: break; \
612 } \
613 break; \
614 case OP_PARM_b: PUT_SZ("byte "); break; \
615 case OP_PARM_w: PUT_SZ("word "); break; \
616 case OP_PARM_d: PUT_SZ("dword "); break; \
617 case OP_PARM_q: PUT_SZ("qword "); break; \
618 case OP_PARM_dq: \
619 if (OP_PARM_VTYPE(pParam->fParam) != OP_PARM_W) /* these are 128 bit, pray they are all unambiguous.. */ \
620 PUT_SZ("dqword "); \
621 break; \
622 case OP_PARM_p: break; /* see PUT_FAR */ \
623 case OP_PARM_s: if (pParam->fUse & DISUSE_REG_FP) PUT_SZ("tword "); break; /* ?? */ \
624 case OP_PARM_z: break; \
625 case OP_PARM_NONE: \
626 if ( OP_PARM_VTYPE(pParam->fParam) == OP_PARM_M \
627 && ((pParam->fUse & DISUSE_REG_FP) || pOp->uOpcode == OP_FLD)) \
628 PUT_SZ("tword "); \
629 break; \
630 default: break; /*no pointer type specified/necessary*/ \
631 } \
632 } while (0)
633 static const char s_szSegPrefix[6][4] = { "es:", "cs:", "ss:", "ds:", "fs:", "gs:" };
634#define PUT_SEGMENT_OVERRIDE() \
635 do { \
636 if (pDis->fPrefix & DISPREFIX_SEG) \
637 PUT_STR(s_szSegPrefix[pDis->idxSegPrefix], 3); \
638 } while (0)
639
640
641 /*
642 * Segment prefixing for instructions that doesn't do memory access.
643 */
644 if ( (pDis->fPrefix & DISPREFIX_SEG)
645 && !DISUSE_IS_EFFECTIVE_ADDR(pDis->Param1.fUse)
646 && !DISUSE_IS_EFFECTIVE_ADDR(pDis->Param2.fUse)
647 && !DISUSE_IS_EFFECTIVE_ADDR(pDis->Param3.fUse))
648 {
649 PUT_STR(s_szSegPrefix[pDis->idxSegPrefix], 2);
650 PUT_C(' ');
651 }
652
653
654 /*
655 * The formatting loop.
656 */
657 RTINTPTR off;
658 char szSymbol[128];
659 char ch;
660 while ((ch = *pszFmt++) != '\0')
661 {
662 if (ch == '%')
663 {
664 ch = *pszFmt++;
665 switch (ch)
666 {
667 /*
668 * ModRM - Register only.
669 */
670 case 'C': /* Control register (ParseModRM / UseModRM). */
671 case 'D': /* Debug register (ParseModRM / UseModRM). */
672 case 'G': /* ModRM selects general register (ParseModRM / UseModRM). */
673 case 'S': /* ModRM byte selects a segment register (ParseModRM / UseModRM). */
674 case 'T': /* ModRM byte selects a test register (ParseModRM / UseModRM). */
675 case 'V': /* ModRM byte selects an XMM/SSE register (ParseModRM / UseModRM). */
676 case 'P': /* ModRM byte selects MMX register (ParseModRM / UseModRM). */
677 {
678 pszFmt += RT_C_IS_ALPHA(pszFmt[0]) ? RT_C_IS_ALPHA(pszFmt[1]) ? 2 : 1 : 0;
679 Assert(!(pParam->fUse & (DISUSE_INDEX | DISUSE_SCALE) /* No SIB here... */));
680 Assert(!(pParam->fUse & (DISUSE_DISPLACEMENT8 | DISUSE_DISPLACEMENT16 | DISUSE_DISPLACEMENT32 | DISUSE_DISPLACEMENT64 | DISUSE_RIPDISPLACEMENT32)));
681
682 size_t cchReg;
683 const char *pszReg = disasmFormatYasmBaseReg(pDis, pParam, &cchReg);
684 PUT_STR(pszReg, cchReg);
685 break;
686 }
687
688 /*
689 * ModRM - Register or memory.
690 */
691 case 'E': /* ModRM specifies parameter (ParseModRM / UseModRM / UseSIB). */
692 case 'Q': /* ModRM byte selects MMX register or memory address (ParseModRM / UseModRM). */
693 case 'R': /* ModRM byte may only refer to a general register (ParseModRM / UseModRM). */
694 case 'W': /* ModRM byte selects an XMM/SSE register or a memory address (ParseModRM / UseModRM). */
695 case 'M': /* ModRM may only refer to memory (ParseModRM / UseModRM). */
696 {
697 pszFmt += RT_C_IS_ALPHA(pszFmt[0]) ? RT_C_IS_ALPHA(pszFmt[1]) ? 2 : 1 : 0;
698
699 PUT_FAR();
700 uint32_t const fUse = pParam->fUse;
701 if (DISUSE_IS_EFFECTIVE_ADDR(fUse))
702 {
703 /* Work around mov seg,[mem16] and mov [mem16],seg as these always make a 16-bit mem
704 while the register variants deals with 16, 32 & 64 in the normal fashion. */
705 if ( pParam->fParam != OP_PARM_Ev
706 || pOp->uOpcode != OP_MOV
707 || ( pOp->fParam1 != OP_PARM_Sw
708 && pOp->fParam2 != OP_PARM_Sw))
709 PUT_SIZE_OVERRIDE();
710 PUT_C('[');
711 }
712 if ( (fFlags & DIS_FMT_FLAGS_STRICT)
713 && (fUse & (DISUSE_DISPLACEMENT8 | DISUSE_DISPLACEMENT16 | DISUSE_DISPLACEMENT32 | DISUSE_DISPLACEMENT64 | DISUSE_RIPDISPLACEMENT32)))
714 {
715 if ( (fUse & DISUSE_DISPLACEMENT8)
716 && !pParam->uDisp.i8)
717 PUT_SZ("byte ");
718 else if ( (fUse & DISUSE_DISPLACEMENT16)
719 && (int8_t)pParam->uDisp.i16 == (int16_t)pParam->uDisp.i16)
720 PUT_SZ("word ");
721 else if ( (fUse & DISUSE_DISPLACEMENT32)
722 && (int16_t)pParam->uDisp.i32 == (int32_t)pParam->uDisp.i32) //??
723 PUT_SZ("dword ");
724 else if ( (fUse & DISUSE_DISPLACEMENT64)
725 && (pDis->SIB.Bits.Base != 5 || pDis->ModRM.Bits.Mod != 0)
726 && (int32_t)pParam->uDisp.i64 == (int64_t)pParam->uDisp.i64) //??
727 PUT_SZ("qword ");
728 }
729 if (DISUSE_IS_EFFECTIVE_ADDR(fUse))
730 PUT_SEGMENT_OVERRIDE();
731
732 bool fBase = (fUse & DISUSE_BASE) /* When exactly is DISUSE_BASE supposed to be set? disasmModRMReg doesn't set it. */
733 || ( (fUse & ( DISUSE_REG_GEN8
734 | DISUSE_REG_GEN16
735 | DISUSE_REG_GEN32
736 | DISUSE_REG_GEN64
737 | DISUSE_REG_FP
738 | DISUSE_REG_MMX
739 | DISUSE_REG_XMM
740 | DISUSE_REG_CR
741 | DISUSE_REG_DBG
742 | DISUSE_REG_SEG
743 | DISUSE_REG_TEST ))
744 && !DISUSE_IS_EFFECTIVE_ADDR(fUse));
745 if (fBase)
746 {
747 size_t cchReg;
748 const char *pszReg = disasmFormatYasmBaseReg(pDis, pParam, &cchReg);
749 PUT_STR(pszReg, cchReg);
750 }
751
752 if (fUse & DISUSE_INDEX)
753 {
754 if (fBase)
755 PUT_C('+');
756
757 size_t cchReg;
758 const char *pszReg = disasmFormatYasmIndexReg(pDis, pParam, &cchReg);
759 PUT_STR(pszReg, cchReg);
760
761 if (fUse & DISUSE_SCALE)
762 {
763 PUT_C('*');
764 PUT_C('0' + pParam->uScale);
765 }
766 }
767 else
768 Assert(!(fUse & DISUSE_SCALE));
769
770 int64_t off2 = 0;
771 if (fUse & (DISUSE_DISPLACEMENT8 | DISUSE_DISPLACEMENT16 | DISUSE_DISPLACEMENT32 | DISUSE_DISPLACEMENT64 | DISUSE_RIPDISPLACEMENT32))
772 {
773 if (fUse & DISUSE_DISPLACEMENT8)
774 off2 = pParam->uDisp.i8;
775 else if (fUse & DISUSE_DISPLACEMENT16)
776 off2 = pParam->uDisp.i16;
777 else if (fUse & (DISUSE_DISPLACEMENT32 | DISUSE_RIPDISPLACEMENT32))
778 off2 = pParam->uDisp.i32;
779 else if (fUse & DISUSE_DISPLACEMENT64)
780 off2 = pParam->uDisp.i64;
781 else
782 {
783 AssertFailed();
784 off2 = 0;
785 }
786
787 if (fBase || (fUse & DISUSE_INDEX))
788 {
789 PUT_C(off2 >= 0 ? '+' : '-');
790 if (off2 < 0)
791 off2 = -off2;
792 }
793 if (fUse & DISUSE_DISPLACEMENT8)
794 PUT_NUM_8( off2);
795 else if (fUse & DISUSE_DISPLACEMENT16)
796 PUT_NUM_16(off2);
797 else if (fUse & DISUSE_DISPLACEMENT32)
798 PUT_NUM_32(off2);
799 else if (fUse & DISUSE_DISPLACEMENT64)
800 PUT_NUM_64(off2);
801 else
802 {
803 PUT_NUM_32(off2);
804 PUT_SZ(" wrt rip"); //??
805 }
806 }
807
808 if (DISUSE_IS_EFFECTIVE_ADDR(fUse))
809 {
810 if (pfnGetSymbol && !fBase && !(fUse & DISUSE_INDEX) && off2 != 0)
811 PUT_SYMBOL((pDis->fPrefix & DISPREFIX_SEG)
812 ? DIS_FMT_SEL_FROM_REG(pDis->idxSegPrefix)
813 : DIS_FMT_SEL_FROM_REG(DISSELREG_DS),
814 pDis->uAddrMode == DISCPUMODE_64BIT
815 ? (uint64_t)off2
816 : pDis->uAddrMode == DISCPUMODE_32BIT
817 ? (uint32_t)off2
818 : (uint16_t)off2,
819 " (=", ')');
820 PUT_C(']');
821 }
822 break;
823 }
824
825 case 'F': /* Eflags register (0 - popf/pushf only, avoided in adjustments above). */
826 AssertFailed();
827 break;
828
829 case 'I': /* Immediate data (ParseImmByte, ParseImmByteSX, ParseImmV, ParseImmUshort, ParseImmZ). */
830 Assert(*pszFmt == 'b' || *pszFmt == 'v' || *pszFmt == 'w' || *pszFmt == 'z'); pszFmt++;
831 switch (pParam->fUse & ( DISUSE_IMMEDIATE8 | DISUSE_IMMEDIATE16 | DISUSE_IMMEDIATE32 | DISUSE_IMMEDIATE64
832 | DISUSE_IMMEDIATE16_SX8 | DISUSE_IMMEDIATE32_SX8 | DISUSE_IMMEDIATE64_SX8))
833 {
834 case DISUSE_IMMEDIATE8:
835 if ( (fFlags & DIS_FMT_FLAGS_STRICT)
836 && ( (pOp->fParam1 >= OP_PARM_REG_GEN8_START && pOp->fParam1 <= OP_PARM_REG_GEN8_END)
837 || (pOp->fParam2 >= OP_PARM_REG_GEN8_START && pOp->fParam2 <= OP_PARM_REG_GEN8_END))
838 )
839 PUT_SZ("strict byte ");
840 PUT_NUM_8(pParam->uValue);
841 break;
842
843 case DISUSE_IMMEDIATE16:
844 if ( pDis->uCpuMode != pDis->uOpMode
845 || ( (fFlags & DIS_FMT_FLAGS_STRICT)
846 && ( (int8_t)pParam->uValue == (int16_t)pParam->uValue
847 || (pOp->fParam1 >= OP_PARM_REG_GEN16_START && pOp->fParam1 <= OP_PARM_REG_GEN16_END)
848 || (pOp->fParam2 >= OP_PARM_REG_GEN16_START && pOp->fParam2 <= OP_PARM_REG_GEN16_END))
849 )
850 )
851 {
852 if (OP_PARM_VSUBTYPE(pParam->fParam) == OP_PARM_b)
853 PUT_SZ_STRICT("strict byte ", "byte ");
854 else if ( OP_PARM_VSUBTYPE(pParam->fParam) == OP_PARM_v
855 || OP_PARM_VSUBTYPE(pParam->fParam) == OP_PARM_z)
856 PUT_SZ_STRICT("strict word ", "word ");
857 }
858 PUT_NUM_16(pParam->uValue);
859 break;
860
861 case DISUSE_IMMEDIATE16_SX8:
862 if ( !(pDis->fPrefix & DISPREFIX_OPSIZE)
863 || pDis->pCurInstr->uOpcode != OP_PUSH)
864 PUT_SZ_STRICT("strict byte ", "byte ");
865 else
866 PUT_SZ("word ");
867 PUT_NUM_16(pParam->uValue);
868 break;
869
870 case DISUSE_IMMEDIATE32:
871 if ( pDis->uOpMode != (pDis->uCpuMode == DISCPUMODE_16BIT ? DISCPUMODE_16BIT : DISCPUMODE_32BIT) /* not perfect */
872 || ( (fFlags & DIS_FMT_FLAGS_STRICT)
873 && ( (int8_t)pParam->uValue == (int32_t)pParam->uValue
874 || (pOp->fParam1 >= OP_PARM_REG_GEN32_START && pOp->fParam1 <= OP_PARM_REG_GEN32_END)
875 || (pOp->fParam2 >= OP_PARM_REG_GEN32_START && pOp->fParam2 <= OP_PARM_REG_GEN32_END))
876 )
877 )
878 {
879 if (OP_PARM_VSUBTYPE(pParam->fParam) == OP_PARM_b)
880 PUT_SZ_STRICT("strict byte ", "byte ");
881 else if ( OP_PARM_VSUBTYPE(pParam->fParam) == OP_PARM_v
882 || OP_PARM_VSUBTYPE(pParam->fParam) == OP_PARM_z)
883 PUT_SZ_STRICT("strict dword ", "dword ");
884 }
885 PUT_NUM_32(pParam->uValue);
886 if (pDis->uCpuMode == DISCPUMODE_32BIT)
887 PUT_SYMBOL(DIS_FMT_SEL_FROM_REG(DISSELREG_CS), pParam->uValue, " (=", ')');
888 break;
889
890 case DISUSE_IMMEDIATE32_SX8:
891 if ( !(pDis->fPrefix & DISPREFIX_OPSIZE)
892 || pDis->pCurInstr->uOpcode != OP_PUSH)
893 PUT_SZ_STRICT("strict byte ", "byte ");
894 else
895 PUT_SZ("dword ");
896 PUT_NUM_32(pParam->uValue);
897 break;
898
899 case DISUSE_IMMEDIATE64_SX8:
900 if ( !(pDis->fPrefix & DISPREFIX_OPSIZE)
901 || pDis->pCurInstr->uOpcode != OP_PUSH)
902 PUT_SZ_STRICT("strict byte ", "byte ");
903 else
904 PUT_SZ("qword ");
905 PUT_NUM_64(pParam->uValue);
906 break;
907
908 case DISUSE_IMMEDIATE64:
909 PUT_NUM_64(pParam->uValue);
910 break;
911
912 default:
913 AssertFailed();
914 break;
915 }
916 break;
917
918 case 'J': /* Relative jump offset (ParseImmBRel + ParseImmVRel). */
919 {
920 int32_t offDisplacement;
921 Assert(iParam == 1);
922 bool fPrefix = (fFlags & DIS_FMT_FLAGS_STRICT)
923 && pOp->uOpcode != OP_CALL
924 && pOp->uOpcode != OP_LOOP
925 && pOp->uOpcode != OP_LOOPE
926 && pOp->uOpcode != OP_LOOPNE
927 && pOp->uOpcode != OP_JECXZ;
928 if (pOp->uOpcode == OP_CALL)
929 fFlags &= ~DIS_FMT_FLAGS_RELATIVE_BRANCH;
930
931 if (pParam->fUse & DISUSE_IMMEDIATE8_REL)
932 {
933 if (fPrefix)
934 PUT_SZ("short ");
935 offDisplacement = (int8_t)pParam->uValue;
936 Assert(*pszFmt == 'b'); pszFmt++;
937
938 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
939 PUT_NUM_S8(offDisplacement);
940 }
941 else if (pParam->fUse & DISUSE_IMMEDIATE16_REL)
942 {
943 if (fPrefix)
944 PUT_SZ("near ");
945 offDisplacement = (int16_t)pParam->uValue;
946 Assert(*pszFmt == 'v'); pszFmt++;
947
948 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
949 PUT_NUM_S16(offDisplacement);
950 }
951 else
952 {
953 if (fPrefix)
954 PUT_SZ("near ");
955 offDisplacement = (int32_t)pParam->uValue;
956 Assert(pParam->fUse & (DISUSE_IMMEDIATE32_REL | DISUSE_IMMEDIATE64_REL));
957 Assert(*pszFmt == 'v'); pszFmt++;
958
959 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
960 PUT_NUM_S32(offDisplacement);
961 }
962 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
963 PUT_SZ(" (");
964
965 RTUINTPTR uTrgAddr = pDis->uInstrAddr + pDis->cbInstr + offDisplacement;
966 if (pDis->uCpuMode == DISCPUMODE_16BIT)
967 PUT_NUM_16(uTrgAddr);
968 else if (pDis->uCpuMode == DISCPUMODE_32BIT)
969 PUT_NUM_32(uTrgAddr);
970 else
971 PUT_NUM_64(uTrgAddr);
972
973 if (fFlags & DIS_FMT_FLAGS_RELATIVE_BRANCH)
974 {
975 PUT_SYMBOL(DIS_FMT_SEL_FROM_REG(DISSELREG_CS), uTrgAddr, " = ", ' ');
976 PUT_C(')');
977 }
978 else
979 PUT_SYMBOL(DIS_FMT_SEL_FROM_REG(DISSELREG_CS), uTrgAddr, " (", ')');
980 break;
981 }
982
983 case 'A': /* Direct (jump/call) address (ParseImmAddr). */
984 {
985 Assert(*pszFmt == 'p'); pszFmt++;
986 PUT_FAR();
987 PUT_SIZE_OVERRIDE();
988 PUT_SEGMENT_OVERRIDE();
989 int rc = VERR_SYMBOL_NOT_FOUND;
990 switch (pParam->fUse & (DISUSE_IMMEDIATE_ADDR_16_16 | DISUSE_IMMEDIATE_ADDR_16_32 | DISUSE_DISPLACEMENT64 | DISUSE_DISPLACEMENT32 | DISUSE_DISPLACEMENT16))
991 {
992 case DISUSE_IMMEDIATE_ADDR_16_16:
993 PUT_NUM_16(pParam->uValue >> 16);
994 PUT_C(':');
995 PUT_NUM_16(pParam->uValue);
996 if (pfnGetSymbol)
997 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_VALUE(pParam->uValue >> 16), (uint16_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
998 break;
999 case DISUSE_IMMEDIATE_ADDR_16_32:
1000 PUT_NUM_16(pParam->uValue >> 32);
1001 PUT_C(':');
1002 PUT_NUM_32(pParam->uValue);
1003 if (pfnGetSymbol)
1004 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_VALUE(pParam->uValue >> 16), (uint32_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
1005 break;
1006 case DISUSE_DISPLACEMENT16:
1007 PUT_NUM_16(pParam->uValue);
1008 if (pfnGetSymbol)
1009 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_REG(DISSELREG_CS), (uint16_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
1010 break;
1011 case DISUSE_DISPLACEMENT32:
1012 PUT_NUM_32(pParam->uValue);
1013 if (pfnGetSymbol)
1014 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_REG(DISSELREG_CS), (uint32_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
1015 break;
1016 case DISUSE_DISPLACEMENT64:
1017 PUT_NUM_64(pParam->uValue);
1018 if (pfnGetSymbol)
1019 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_REG(DISSELREG_CS), (uint64_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
1020 break;
1021 default:
1022 AssertFailed();
1023 break;
1024 }
1025
1026 PUT_SYMBOL_TWO(rc, " [", ']');
1027 break;
1028 }
1029
1030 case 'O': /* No ModRM byte (ParseImmAddr). */
1031 {
1032 Assert(*pszFmt == 'b' || *pszFmt == 'v'); pszFmt++;
1033 PUT_FAR();
1034 PUT_SIZE_OVERRIDE();
1035 PUT_C('[');
1036 PUT_SEGMENT_OVERRIDE();
1037 int rc = VERR_SYMBOL_NOT_FOUND;
1038 switch (pParam->fUse & (DISUSE_IMMEDIATE_ADDR_16_16 | DISUSE_IMMEDIATE_ADDR_16_32 | DISUSE_DISPLACEMENT64 | DISUSE_DISPLACEMENT32 | DISUSE_DISPLACEMENT16))
1039 {
1040 case DISUSE_IMMEDIATE_ADDR_16_16:
1041 PUT_NUM_16(pParam->uValue >> 16);
1042 PUT_C(':');
1043 PUT_NUM_16(pParam->uValue);
1044 if (pfnGetSymbol)
1045 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_VALUE(pParam->uValue >> 16), (uint16_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
1046 break;
1047 case DISUSE_IMMEDIATE_ADDR_16_32:
1048 PUT_NUM_16(pParam->uValue >> 32);
1049 PUT_C(':');
1050 PUT_NUM_32(pParam->uValue);
1051 if (pfnGetSymbol)
1052 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_VALUE(pParam->uValue >> 16), (uint32_t)pParam->uValue, szSymbol, sizeof(szSymbol), &off, pvUser);
1053 break;
1054 case DISUSE_DISPLACEMENT16:
1055 PUT_NUM_16(pParam->uDisp.i16);
1056 if (pfnGetSymbol)
1057 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_REG(DISSELREG_CS), pParam->uDisp.u16, szSymbol, sizeof(szSymbol), &off, pvUser);
1058 break;
1059 case DISUSE_DISPLACEMENT32:
1060 PUT_NUM_32(pParam->uDisp.i32);
1061 if (pfnGetSymbol)
1062 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_REG(DISSELREG_CS), pParam->uDisp.u32, szSymbol, sizeof(szSymbol), &off, pvUser);
1063 break;
1064 case DISUSE_DISPLACEMENT64:
1065 PUT_NUM_64(pParam->uDisp.i64);
1066 if (pfnGetSymbol)
1067 rc = pfnGetSymbol(pDis, DIS_FMT_SEL_FROM_REG(DISSELREG_CS), pParam->uDisp.u64, szSymbol, sizeof(szSymbol), &off, pvUser);
1068 break;
1069 default:
1070 AssertFailed();
1071 break;
1072 }
1073 PUT_C(']');
1074
1075 PUT_SYMBOL_TWO(rc, " (", ')');
1076 break;
1077 }
1078
1079 case 'X': /* DS:SI (ParseXb, ParseXv). */
1080 case 'Y': /* ES:DI (ParseYb, ParseYv). */
1081 {
1082 Assert(*pszFmt == 'b' || *pszFmt == 'v'); pszFmt++;
1083 PUT_FAR();
1084 PUT_SIZE_OVERRIDE();
1085 PUT_C('[');
1086 if (pParam->fUse & DISUSE_POINTER_DS_BASED)
1087 PUT_SZ("ds:");
1088 else
1089 PUT_SZ("es:");
1090
1091 size_t cchReg;
1092 const char *pszReg = disasmFormatYasmBaseReg(pDis, pParam, &cchReg);
1093 PUT_STR(pszReg, cchReg);
1094 PUT_C(']');
1095 break;
1096 }
1097
1098 case 'e': /* Register based on operand size (e.g. %eAX, %eAH) (ParseFixedReg). */
1099 {
1100 Assert(RT_C_IS_ALPHA(pszFmt[0]) && RT_C_IS_ALPHA(pszFmt[1]) && !RT_C_IS_ALPHA(pszFmt[2]));
1101 pszFmt += 2;
1102 size_t cchReg;
1103 const char *pszReg = disasmFormatYasmBaseReg(pDis, pParam, &cchReg);
1104 PUT_STR(pszReg, cchReg);
1105 break;
1106 }
1107
1108 default:
1109 AssertMsgFailed(("%c%s!\n", ch, pszFmt));
1110 break;
1111 }
1112 AssertMsg(*pszFmt == ',' || *pszFmt == '\0', ("%c%s\n", ch, pszFmt));
1113 }
1114 else
1115 {
1116 PUT_C(ch);
1117 if (ch == ',')
1118 {
1119 Assert(*pszFmt != ' ');
1120 PUT_C(' ');
1121 switch (++iParam)
1122 {
1123 case 2: pParam = &pDis->Param2; break;
1124 case 3: pParam = &pDis->Param3; break;
1125 default: pParam = NULL; break;
1126 }
1127 }
1128 }
1129 } /* while more to format */
1130 }
1131
1132 /*
1133 * Any additional output to the right of the instruction?
1134 */
1135 if (fFlags & (DIS_FMT_FLAGS_BYTES_RIGHT | DIS_FMT_FLAGS_ADDR_RIGHT))
1136 {
1137 /* some up front padding. */
1138 size_t cchPadding = cchOutput - offInstruction;
1139 cchPadding = cchPadding + 1 >= 42 ? 1 : 42 - cchPadding;
1140 PUT_STR(g_szSpaces, cchPadding);
1141
1142 /* comment? */
1143 if (fFlags & (DIS_FMT_FLAGS_BYTES_RIGHT | DIS_FMT_FLAGS_ADDR_RIGHT))
1144 PUT_SZ(";");
1145
1146 /*
1147 * The address?
1148 */
1149 if (fFlags & DIS_FMT_FLAGS_ADDR_RIGHT)
1150 {
1151 PUT_C(' ');
1152#if HC_ARCH_BITS == 64 || GC_ARCH_BITS == 64
1153 if (pDis->uInstrAddr >= _4G)
1154 PUT_NUM(9, "%08x`", (uint32_t)(pDis->uInstrAddr >> 32));
1155#endif
1156 PUT_NUM(8, "%08x", (uint32_t)pDis->uInstrAddr);
1157 }
1158
1159 /*
1160 * Opcode bytes?
1161 */
1162 if (fFlags & DIS_FMT_FLAGS_BYTES_RIGHT)
1163 {
1164 PUT_C(' ');
1165 size_t cchTmp = disFormatBytes(pDis, pszDst, cchDst, fFlags);
1166 cchOutput += cchTmp;
1167 if (cchTmp >= cchDst)
1168 cchTmp = cchDst - (cchDst != 0);
1169 cchDst -= cchTmp;
1170 pszDst += cchTmp;
1171 }
1172 }
1173
1174 /*
1175 * Terminate it - on overflow we'll have reserved one byte for this.
1176 */
1177 if (cchDst > 0)
1178 *pszDst = '\0';
1179 else
1180 Assert(!cchBuf);
1181
1182 /* clean up macros */
1183#undef PUT_PSZ
1184#undef PUT_SZ
1185#undef PUT_STR
1186#undef PUT_C
1187 return cchOutput;
1188}
1189
1190
1191/**
1192 * Formats the current instruction in Yasm (/ Nasm) style.
1193 *
1194 * This is a simplified version of DISFormatYasmEx() provided for your convenience.
1195 *
1196 *
1197 * @returns The number of output characters. If this is >= cchBuf, then the content
1198 * of pszBuf will be truncated.
1199 * @param pDis Pointer to the disassembler state.
1200 * @param pszBuf The output buffer.
1201 * @param cchBuf The size of the output buffer.
1202 */
1203DISDECL(size_t) DISFormatYasm(PCDISSTATE pDis, char *pszBuf, size_t cchBuf)
1204{
1205 return DISFormatYasmEx(pDis, pszBuf, cchBuf, 0 /* fFlags */, NULL /* pfnGetSymbol */, NULL /* pvUser */);
1206}
1207
1208
1209/**
1210 * Checks if the encoding of the given disassembled instruction is something we
1211 * can never get YASM to produce.
1212 *
1213 * @returns true if it's odd, false if it isn't.
1214 * @param pDis The disassembler output. The byte fetcher callback will
1215 * be used if present as we might need to fetch opcode
1216 * bytes.
1217 */
1218DISDECL(bool) DISFormatYasmIsOddEncoding(PDISSTATE pDis)
1219{
1220 /*
1221 * Mod rm + SIB: Check for duplicate EBP encodings that yasm won't use for very good reasons.
1222 */
1223 if ( pDis->uAddrMode != DISCPUMODE_16BIT ///@todo correct?
1224 && pDis->ModRM.Bits.Rm == 4
1225 && pDis->ModRM.Bits.Mod != 3)
1226 {
1227 /* No scaled index SIB (index=4), except for ESP. */
1228 if ( pDis->SIB.Bits.Index == 4
1229 && pDis->SIB.Bits.Base != 4)
1230 return true;
1231
1232 /* EBP + displacement */
1233 if ( pDis->ModRM.Bits.Mod != 0
1234 && pDis->SIB.Bits.Base == 5
1235 && pDis->SIB.Bits.Scale == 0)
1236 return true;
1237 }
1238
1239 /*
1240 * Seems to be an instruction alias here, but I cannot find any docs on it... hrmpf!
1241 */
1242 if ( pDis->pCurInstr->uOpcode == OP_SHL
1243 && pDis->ModRM.Bits.Reg == 6)
1244 return true;
1245
1246 /*
1247 * Check for multiple prefixes of the same kind.
1248 */
1249 uint8_t off1stSeg = UINT8_MAX;
1250 uint8_t offOpSize = UINT8_MAX;
1251 uint8_t offAddrSize = UINT8_MAX;
1252 uint32_t fPrefixes = 0;
1253 for (uint32_t offOpcode = 0; offOpcode < RT_ELEMENTS(pDis->abInstr); offOpcode++)
1254 {
1255 uint32_t f;
1256 switch (pDis->abInstr[offOpcode])
1257 {
1258 case 0xf0:
1259 f = DISPREFIX_LOCK;
1260 break;
1261
1262 case 0xf2:
1263 case 0xf3:
1264 f = DISPREFIX_REP; /* yes, both */
1265 break;
1266
1267 case 0x2e:
1268 case 0x3e:
1269 case 0x26:
1270 case 0x36:
1271 case 0x64:
1272 case 0x65:
1273 if (off1stSeg == UINT8_MAX)
1274 off1stSeg = offOpcode;
1275 f = DISPREFIX_SEG;
1276 break;
1277
1278 case 0x66:
1279 if (offOpSize == UINT8_MAX)
1280 offOpSize = offOpcode;
1281 f = DISPREFIX_OPSIZE;
1282 break;
1283
1284 case 0x67:
1285 if (offAddrSize == UINT8_MAX)
1286 offAddrSize = offOpcode;
1287 f = DISPREFIX_ADDRSIZE;
1288 break;
1289
1290 case 0x40: case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47:
1291 case 0x48: case 0x49: case 0x4a: case 0x4b: case 0x4c: case 0x4d: case 0x4e: case 0x4f:
1292 f = pDis->uCpuMode == DISCPUMODE_64BIT ? DISPREFIX_REX : 0;
1293 break;
1294
1295 default:
1296 f = 0;
1297 break;
1298 }
1299 if (!f)
1300 break; /* done */
1301 if (fPrefixes & f)
1302 return true;
1303 fPrefixes |= f;
1304 }
1305
1306 /* segment overrides are fun */
1307 if (fPrefixes & DISPREFIX_SEG)
1308 {
1309 /* no effective address which it may apply to. */
1310 Assert((pDis->fPrefix & DISPREFIX_SEG) || pDis->uCpuMode == DISCPUMODE_64BIT);
1311 if ( !DISUSE_IS_EFFECTIVE_ADDR(pDis->Param1.fUse)
1312 && !DISUSE_IS_EFFECTIVE_ADDR(pDis->Param2.fUse)
1313 && !DISUSE_IS_EFFECTIVE_ADDR(pDis->Param3.fUse))
1314 return true;
1315
1316 /* Yasm puts the segment prefixes before the operand prefix with no
1317 way of overriding it. */
1318 if (offOpSize < off1stSeg)
1319 return true;
1320 }
1321
1322 /* fixed register + addr override doesn't go down all that well. */
1323 if (fPrefixes & DISPREFIX_ADDRSIZE)
1324 {
1325 Assert(pDis->fPrefix & DISPREFIX_ADDRSIZE);
1326 if ( pDis->pCurInstr->fParam3 == OP_PARM_NONE
1327 && pDis->pCurInstr->fParam2 == OP_PARM_NONE
1328 && ( pDis->pCurInstr->fParam1 >= OP_PARM_REG_GEN32_START
1329 && pDis->pCurInstr->fParam1 <= OP_PARM_REG_GEN32_END))
1330 return true;
1331 }
1332
1333 /* Almost all prefixes are bad for jumps. */
1334 if (fPrefixes)
1335 {
1336 switch (pDis->pCurInstr->uOpcode)
1337 {
1338 /* nop w/ prefix(es). */
1339 case OP_NOP:
1340 return true;
1341
1342 case OP_JMP:
1343 if ( pDis->pCurInstr->fParam1 != OP_PARM_Jb
1344 && pDis->pCurInstr->fParam1 != OP_PARM_Jv)
1345 break;
1346 /* fall thru */
1347 case OP_JO:
1348 case OP_JNO:
1349 case OP_JC:
1350 case OP_JNC:
1351 case OP_JE:
1352 case OP_JNE:
1353 case OP_JBE:
1354 case OP_JNBE:
1355 case OP_JS:
1356 case OP_JNS:
1357 case OP_JP:
1358 case OP_JNP:
1359 case OP_JL:
1360 case OP_JNL:
1361 case OP_JLE:
1362 case OP_JNLE:
1363 /** @todo branch hinting 0x2e/0x3e... */
1364 return true;
1365 }
1366
1367 }
1368
1369 /* All but the segment prefix is bad news for push/pop. */
1370 if (fPrefixes & ~DISPREFIX_SEG)
1371 {
1372 switch (pDis->pCurInstr->uOpcode)
1373 {
1374 case OP_POP:
1375 case OP_PUSH:
1376 if ( pDis->pCurInstr->fParam1 >= OP_PARM_REG_SEG_START
1377 && pDis->pCurInstr->fParam1 <= OP_PARM_REG_SEG_END)
1378 return true;
1379 if ( (fPrefixes & ~DISPREFIX_OPSIZE)
1380 && pDis->pCurInstr->fParam1 >= OP_PARM_REG_GEN32_START
1381 && pDis->pCurInstr->fParam1 <= OP_PARM_REG_GEN32_END)
1382 return true;
1383 break;
1384
1385 case OP_POPA:
1386 case OP_POPF:
1387 case OP_PUSHA:
1388 case OP_PUSHF:
1389 if (fPrefixes & ~DISPREFIX_OPSIZE)
1390 return true;
1391 break;
1392 }
1393 }
1394
1395 /* Implicit 8-bit register instructions doesn't mix with operand size. */
1396 if ( (fPrefixes & DISPREFIX_OPSIZE)
1397 && ( ( pDis->pCurInstr->fParam1 == OP_PARM_Gb /* r8 */
1398 && pDis->pCurInstr->fParam2 == OP_PARM_Eb /* r8/mem8 */)
1399 || ( pDis->pCurInstr->fParam2 == OP_PARM_Gb /* r8 */
1400 && pDis->pCurInstr->fParam1 == OP_PARM_Eb /* r8/mem8 */))
1401 )
1402 {
1403 switch (pDis->pCurInstr->uOpcode)
1404 {
1405 case OP_ADD:
1406 case OP_OR:
1407 case OP_ADC:
1408 case OP_SBB:
1409 case OP_AND:
1410 case OP_SUB:
1411 case OP_XOR:
1412 case OP_CMP:
1413 return true;
1414 default:
1415 break;
1416 }
1417 }
1418
1419 /* Instructions taking no address or operand which thus may be annoyingly
1420 difficult to format for yasm. */
1421 if (fPrefixes)
1422 {
1423 switch (pDis->pCurInstr->uOpcode)
1424 {
1425 case OP_STI:
1426 case OP_STC:
1427 case OP_CLI:
1428 case OP_CLD:
1429 case OP_CLC:
1430 case OP_INT:
1431 case OP_INT3:
1432 case OP_INTO:
1433 case OP_HLT:
1434 /** @todo Many more to can be added here. */
1435 return true;
1436 default:
1437 break;
1438 }
1439 }
1440
1441 /* FPU and other instructions that ignores operand size override. */
1442 if (fPrefixes & DISPREFIX_OPSIZE)
1443 {
1444 switch (pDis->pCurInstr->uOpcode)
1445 {
1446 /* FPU: */
1447 case OP_FIADD:
1448 case OP_FIMUL:
1449 case OP_FISUB:
1450 case OP_FISUBR:
1451 case OP_FIDIV:
1452 case OP_FIDIVR:
1453 /** @todo there are many more. */
1454 return true;
1455
1456 case OP_MOV:
1457 /** @todo could be that we're not disassembling these correctly. */
1458 if (pDis->pCurInstr->fParam1 == OP_PARM_Sw)
1459 return true;
1460 /** @todo what about the other way? */
1461 break;
1462
1463 default:
1464 break;
1465 }
1466 }
1467
1468
1469 /*
1470 * Check for the version of xyz reg,reg instruction that the assembler doesn't use.
1471 *
1472 * For example:
1473 * expected: 1aee sbb ch, dh ; SBB r8, r/m8
1474 * yasm: 18F5 sbb ch, dh ; SBB r/m8, r8
1475 */
1476 if (pDis->ModRM.Bits.Mod == 3 /* reg,reg */)
1477 {
1478 switch (pDis->pCurInstr->uOpcode)
1479 {
1480 case OP_ADD:
1481 case OP_OR:
1482 case OP_ADC:
1483 case OP_SBB:
1484 case OP_AND:
1485 case OP_SUB:
1486 case OP_XOR:
1487 case OP_CMP:
1488 if ( ( pDis->pCurInstr->fParam1 == OP_PARM_Gb /* r8 */
1489 && pDis->pCurInstr->fParam2 == OP_PARM_Eb /* r8/mem8 */)
1490 || ( pDis->pCurInstr->fParam1 == OP_PARM_Gv /* rX */
1491 && pDis->pCurInstr->fParam2 == OP_PARM_Ev /* rX/memX */))
1492 return true;
1493
1494 /* 82 (see table A-6). */
1495 if (pDis->bOpCode == 0x82)
1496 return true;
1497 break;
1498
1499 /* ff /0, fe /0, ff /1, fe /0 */
1500 case OP_DEC:
1501 case OP_INC:
1502 return true;
1503
1504 case OP_POP:
1505 case OP_PUSH:
1506 Assert(pDis->bOpCode == 0x8f);
1507 return true;
1508
1509 case OP_MOV:
1510 if ( pDis->bOpCode == 0x8a
1511 || pDis->bOpCode == 0x8b)
1512 return true;
1513 break;
1514
1515 default:
1516 break;
1517 }
1518 }
1519
1520 /* shl eax,1 will be assembled to the form without the immediate byte. */
1521 if ( pDis->pCurInstr->fParam2 == OP_PARM_Ib
1522 && (uint8_t)pDis->Param2.uValue == 1)
1523 {
1524 switch (pDis->pCurInstr->uOpcode)
1525 {
1526 case OP_SHL:
1527 case OP_SHR:
1528 case OP_SAR:
1529 case OP_RCL:
1530 case OP_RCR:
1531 case OP_ROL:
1532 case OP_ROR:
1533 return true;
1534 }
1535 }
1536
1537 /* And some more - see table A-6. */
1538 if (pDis->bOpCode == 0x82)
1539 {
1540 switch (pDis->pCurInstr->uOpcode)
1541 {
1542 case OP_ADD:
1543 case OP_OR:
1544 case OP_ADC:
1545 case OP_SBB:
1546 case OP_AND:
1547 case OP_SUB:
1548 case OP_XOR:
1549 case OP_CMP:
1550 return true;
1551 break;
1552 }
1553 }
1554
1555
1556 /* check for REX.X = 1 without SIB. */
1557
1558 /* Yasm encodes setnbe al with /2 instead of /0 like the AMD manual
1559 says (intel doesn't appear to care). */
1560 switch (pDis->pCurInstr->uOpcode)
1561 {
1562 case OP_SETO:
1563 case OP_SETNO:
1564 case OP_SETC:
1565 case OP_SETNC:
1566 case OP_SETE:
1567 case OP_SETNE:
1568 case OP_SETBE:
1569 case OP_SETNBE:
1570 case OP_SETS:
1571 case OP_SETNS:
1572 case OP_SETP:
1573 case OP_SETNP:
1574 case OP_SETL:
1575 case OP_SETNL:
1576 case OP_SETLE:
1577 case OP_SETNLE:
1578 AssertMsg(pDis->bOpCode >= 0x90 && pDis->bOpCode <= 0x9f, ("%#x\n", pDis->bOpCode));
1579 if (pDis->ModRM.Bits.Reg != 2)
1580 return true;
1581 break;
1582 }
1583
1584 /*
1585 * The MOVZX reg32,mem16 instruction without an operand size prefix
1586 * doesn't quite make sense...
1587 */
1588 if ( pDis->pCurInstr->uOpcode == OP_MOVZX
1589 && pDis->bOpCode == 0xB7
1590 && (pDis->uCpuMode == DISCPUMODE_16BIT) != !!(fPrefixes & DISPREFIX_OPSIZE))
1591 return true;
1592
1593 return false;
1594}
1595
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette