VirtualBox

source: vbox/trunk/src/VBox/Disassembler/testcase/tstDisasm-2.cpp@ 9100

最後變更 在這個檔案從9100是 9099,由 vboxsync 提交於 17 年 前

More instruction filtering and hacking.

  • 屬性 svn:eol-style 設為 native
  • 屬性 svn:keywords 設為 Id
檔案大小: 61.4 KB
 
1/* $Id: tstDisasm-2.cpp 9099 2008-05-25 23:54:49Z vboxsync $ */
2/** @file
3 * Testcase - Generic Disassembler Tool.
4 */
5
6/*
7 * Copyright (C) 2008 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.alldomusa.eu.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22/*******************************************************************************
23* Header Files *
24*******************************************************************************/
25#include <VBox/dis.h>
26#include <iprt/stream.h>
27#include <iprt/getopt.h>
28#include <iprt/file.h>
29#include <iprt/string.h>
30#include <iprt/runtime.h>
31#include <VBox/err.h>
32#include <iprt/ctype.h>
33
34
35/*******************************************************************************
36* Structures and Typedefs *
37*******************************************************************************/
38typedef enum { kAsmStyle_Default, kAsmStyle_yasm, kAsmStyle_masm, kAsmStyle_gas, kAsmStyle_invalid } ASMSTYLE;
39typedef enum { kUndefOp_Fail, kUndefOp_All, kUndefOp_DefineByte, kUndefOp_End } UNDEFOPHANDLING;
40
41typedef struct MYDISSTATE
42{
43 DISCPUSTATE Cpu;
44 uint64_t uAddress; /**< The current instruction address. */
45 uint8_t *pbInstr; /**< The current instruction (pointer). */
46 uint32_t cbInstr; /**< The size of the current instruction. */
47 bool fUndefOp; /**< Whether the current instruction is really an undefined opcode.*/
48 UNDEFOPHANDLING enmUndefOp; /**< How to treat undefined opcodes. */
49 int rc; /**< Set if we hit EOF. */
50 size_t cbLeft; /**< The number of bytes left. (read) */
51 uint8_t *pbNext; /**< The next byte. (read) */
52 uint64_t uNextAddr; /**< The address of the next byte. (read) */
53 char szLine[256]; /**< The disassembler text output. */
54} MYDISSTATE;
55typedef MYDISSTATE *PMYDISSTATE;
56
57
58/*
59 * Non-logging builds doesn't to full formatting so we must do it on our own.
60 * This should probably be moved into the disassembler later as it's needed for
61 * the vbox debugger as well.
62 *
63 * Comment in USE_MY_FORMATTER to enable it.
64 */
65#define USE_MY_FORMATTER
66
67#ifdef USE_MY_FORMATTER
68static const char g_aszYasmRegGen8x86[8][4] =
69{
70 "al\0", "cl\0", "dl\0", "bl\0", "ah\0", "ch\0", "dh\0", "bh\0"
71};
72static const char g_aszYasmRegGen8Amd64[16][5] =
73{
74 "al\0\0", "cl\0\0", "dl\0\0", "bl\0\0", "spb\0", "bpb\0", "sib\0", "dib\0", "r8b\0", "r9b\0", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b"
75};
76static const char g_aszYasmRegGen16[16][5] =
77{
78 "ax\0\0", "cx\0\0", "dx\0\0", "bx\0\0", "sp\0\0", "bp\0\0", "si\0\0", "di\0\0", "r8w\0", "r9w\0", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w"
79};
80static const char g_aszYasmRegGen1616[8][6] =
81{
82 "bx+si", "bx+di", "bp+si", "bp+di", "si\0\0\0", "di\0\0\0", "bp\0\0\0", "bx\0\0\0"
83};
84static const char g_aszYasmRegGen32[16][5] =
85{
86 "eax\0", "ecx\0", "edx\0", "ebx\0", "esp\0", "ebp\0", "esi\0", "edi\0", "r8d\0", "r9d\0", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d"
87};
88static const char g_aszYasmRegGen64[16][4] =
89{
90 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8\0", "r9\0", "r10", "r11", "r12", "r13", "r14", "r15"
91};
92static const char g_aszYasmRegSeg[6][3] =
93{
94 "es", "cs", "ss", "ds", "fs", "gs"
95};
96static const char g_aszYasmRegFP[8][4] =
97{
98 "st0", "st1", "st2", "st3", "st4", "st5", "st6", "st7"
99};
100static const char g_aszYasmRegMMX[8][4] =
101{
102 "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"
103};
104static const char g_aszYasmRegXMM[16][6] =
105{
106 "xmm0\0", "xmm1\0", "xmm2\0", "xmm3\0", "xmm4\0", "xmm5\0", "xmm6\0", "xmm7\0", "xmm8\0", "xmm9\0", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
107};
108static const char g_aszYasmRegCRx[16][5] =
109{
110 "cr0\0", "cr1\0", "cr2\0", "cr3\0", "cr4\0", "cr5\0", "cr6\0", "cr7\0", "cr8\0", "cr9\0", "cr10", "cr11", "cr12", "cr13", "cr14", "cr15"
111};
112static const char g_aszYasmRegDRx[16][5] =
113{
114 "dr0\0", "dr1\0", "dr2\0", "dr3\0", "dr4\0", "dr5\0", "dr6\0", "dr7\0", "dr8\0", "dr9\0", "dr10", "dr11", "dr12", "dr13", "dr14", "dr15"
115};
116static const char g_aszYasmRegTRx[16][5] =
117{
118 "tr0\0", "tr1\0", "tr2\0", "tr3\0", "tr4\0", "tr5\0", "tr6\0", "tr7\0", "tr8\0", "tr9\0", "tr10", "tr11", "tr12", "tr13", "tr14", "tr15"
119};
120
121
122DECLINLINE(const char *) MyDisasYasmFormatBaseReg(DISCPUSTATE const *pCpu, PCOP_PARAMETER pParam, size_t *pcchReg, bool fReg1616)
123{
124 switch (pParam->flags & ( USE_REG_GEN8 | USE_REG_GEN16 | USE_REG_GEN32 | USE_REG_GEN64
125 | USE_REG_FP | USE_REG_MMX | USE_REG_XMM | USE_REG_CR
126 | USE_REG_DBG | USE_REG_SEG | USE_REG_TEST))
127
128 {
129 case USE_REG_GEN8:
130 if (pCpu->opmode == CPUMODE_64BIT)
131 {
132 Assert(pParam->base.reg_gen < RT_ELEMENTS(g_aszYasmRegGen8Amd64));
133 const char *psz = g_aszYasmRegGen8Amd64[pParam->base.reg_gen];
134 *pcchReg = 2 + !!psz[2] + !!psz[3];
135 return psz;
136 }
137 *pcchReg = 2;
138 Assert(pParam->base.reg_gen < RT_ELEMENTS(g_aszYasmRegGen8x86));
139 return g_aszYasmRegGen8x86[pParam->base.reg_gen];
140
141 case USE_REG_GEN16:
142 {
143 if (fReg1616)
144 {
145 Assert(pParam->base.reg_gen < RT_ELEMENTS(g_aszYasmRegGen1616));
146 const char *psz = g_aszYasmRegGen1616[pParam->base.reg_gen];
147 *pcchReg = psz[2] ? 5 : 2;
148 return psz;
149 }
150
151 Assert(pParam->base.reg_gen < RT_ELEMENTS(g_aszYasmRegGen16));
152 const char *psz = g_aszYasmRegGen16[pParam->base.reg_gen];
153 *pcchReg = 2 + !!psz[2] + !!psz[3];
154 return psz;
155 }
156
157 case USE_REG_GEN32:
158 {
159 Assert(pParam->base.reg_gen < RT_ELEMENTS(g_aszYasmRegGen32));
160 const char *psz = g_aszYasmRegGen32[pParam->base.reg_gen];
161 *pcchReg = 2 + !!psz[2] + !!psz[3];
162 return psz;
163 }
164
165 case USE_REG_GEN64:
166 {
167 Assert(pParam->base.reg_gen < RT_ELEMENTS(g_aszYasmRegGen64));
168 const char *psz = g_aszYasmRegGen64[pParam->base.reg_gen];
169 *pcchReg = 2 + !!psz[2] + !!psz[3];
170 return psz;
171 }
172
173 case USE_REG_FP:
174 {
175 Assert(pParam->base.reg_fp < RT_ELEMENTS(g_aszYasmRegFP));
176 const char *psz = g_aszYasmRegFP[pParam->base.reg_fp];
177 *pcchReg = 3;
178 return psz;
179 }
180
181 case USE_REG_MMX:
182 {
183 Assert(pParam->base.reg_mmx < RT_ELEMENTS(g_aszYasmRegMMX));
184 const char *psz = g_aszYasmRegMMX[pParam->base.reg_mmx];
185 *pcchReg = 3;
186 return psz;
187 }
188
189 case USE_REG_XMM:
190 {
191 Assert(pParam->base.reg_xmm < RT_ELEMENTS(g_aszYasmRegXMM));
192 const char *psz = g_aszYasmRegXMM[pParam->base.reg_mmx];
193 *pcchReg = 4 + !!psz[4];
194 return psz;
195 }
196
197 case USE_REG_CR:
198 {
199 Assert(pParam->base.reg_ctrl < RT_ELEMENTS(g_aszYasmRegCRx));
200 const char *psz = g_aszYasmRegCRx[pParam->base.reg_ctrl];
201 *pcchReg = 3;
202 return psz;
203 }
204
205 case USE_REG_DBG:
206 {
207 Assert(pParam->base.reg_dbg < RT_ELEMENTS(g_aszYasmRegDRx));
208 const char *psz = g_aszYasmRegDRx[pParam->base.reg_dbg];
209 *pcchReg = 3;
210 return psz;
211 }
212
213 case USE_REG_SEG:
214 {
215 Assert(pParam->base.reg_seg < RT_ELEMENTS(g_aszYasmRegCRx));
216 const char *psz = g_aszYasmRegSeg[pParam->base.reg_seg];
217 *pcchReg = 2;
218 return psz;
219 }
220
221 case USE_REG_TEST:
222 {
223 Assert(pParam->base.reg_test < RT_ELEMENTS(g_aszYasmRegTRx));
224 const char *psz = g_aszYasmRegTRx[pParam->base.reg_test];
225 *pcchReg = 3;
226 return psz;
227 }
228
229 default:
230 AssertMsgFailed(("%#x\n", pParam->flags));
231 *pcchReg = 3;
232 return "r??";
233 }
234}
235
236DECLINLINE(const char *) MyDisasYasmFormatIndexReg(DISCPUSTATE const *pCpu, PCOP_PARAMETER pParam, size_t *pcchReg)
237{
238 switch (pCpu->addrmode)
239 {
240 case CPUMODE_16BIT:
241 {
242 Assert(pParam->index.reg_gen < RT_ELEMENTS(g_aszYasmRegGen16));
243 const char *psz = g_aszYasmRegGen16[pParam->index.reg_gen];
244 *pcchReg = 2 + !!psz[2] + !!psz[3];
245 return psz;
246 }
247
248 case CPUMODE_32BIT:
249 {
250 Assert(pParam->index.reg_gen < RT_ELEMENTS(g_aszYasmRegGen32));
251 const char *psz = g_aszYasmRegGen32[pParam->index.reg_gen];
252 *pcchReg = 2 + !!psz[2] + !!psz[3];
253 return psz;
254 }
255
256 case CPUMODE_64BIT:
257 {
258 Assert(pParam->index.reg_gen < RT_ELEMENTS(g_aszYasmRegGen64));
259 const char *psz = g_aszYasmRegGen64[pParam->index.reg_gen];
260 *pcchReg = 2 + !!psz[2] + !!psz[3];
261 return psz;
262 }
263
264 default:
265 AssertMsgFailed(("%#x %#x\n", pParam->flags, pCpu->addrmode));
266 *pcchReg = 3;
267 return "r??";
268 }
269}
270
271static size_t MyDisasYasmFormat(DISCPUSTATE const *pCpu, char *pszBuf, size_t cchBuf)
272{
273 PCOPCODE const pOp = pCpu->pCurInstr;
274 size_t cchOutput = 0;
275 char *pszDst = pszBuf;
276 size_t cchDst = cchBuf;
277
278 /* output macros */
279#define PUT_C(ch) \
280 do { \
281 cchOutput++; \
282 if (cchDst > 1) \
283 { \
284 cchDst--; \
285 *pszDst++ = (ch); \
286 } \
287 } while (0)
288#define PUT_STR(pszSrc, cchSrc) \
289 do { \
290 cchOutput += (cchSrc); \
291 if (cchDst > (cchSrc)) \
292 { \
293 memcpy(pszDst, (pszSrc), (cchSrc)); \
294 pszDst += (cchSrc); \
295 cchDst -= (cchSrc); \
296 } \
297 else if (cchDst > 1) \
298 { \
299 memcpy(pszDst, (pszSrc), cchDst - 1); \
300 pszDst += cchDst - 1; \
301 cchDst = 1; \
302 } \
303 } while (0)
304#define PUT_SZ(sz) \
305 PUT_STR((sz), sizeof(sz) - 1)
306#define PUT_PSZ(psz) \
307 do { const size_t cchTmp = strlen(psz); PUT_STR((psz), cchTmp); } while (0)
308#define PUT_NUM(cch, fmt, num) \
309 do { \
310 cchOutput += (cch); \
311 if (cchBuf > 1) \
312 { \
313 const size_t cchTmp = RTStrPrintf(pszDst, cchBuf, fmt, (num)); \
314 pszDst += cchTmp; \
315 cchBuf -= cchTmp; \
316 Assert(cchTmp == (cch) || cchBuf == 1); \
317 } \
318 } while (0)
319#define PUT_NUM_8(num) PUT_NUM(4, "0%02xh", (uint8_t)(num))
320#define PUT_NUM_16(num) PUT_NUM(6, "0%04xh", (uint16_t)(num))
321#define PUT_NUM_32(num) PUT_NUM(10, "0%08xh", (uint32_t)(num))
322#define PUT_NUM_64(num) PUT_NUM(18, "0%08xh", (uint64_t)(num))
323
324 /*
325 * Filter out invalid opcodes first as they need special
326 * treatment. UD2 is an exception and should be handled normally.
327 */
328 if ( pOp->opcode == OP_INVALID
329 || ( pOp->opcode == OP_ILLUD2
330 && (pCpu->prefix & PREFIX_LOCK)))
331 {
332
333 }
334 else
335 {
336 /*
337 * Prefixes
338 */
339 if (pCpu->prefix & PREFIX_LOCK)
340 PUT_SZ("lock ");
341 if(pCpu->prefix & PREFIX_REP)
342 PUT_SZ("rep ");
343 else if(pCpu->prefix & PREFIX_REPNE)
344 PUT_SZ("repne ");
345
346 /*
347 * Adjust the format string to avoid stuff the assembler cannot handle.
348 */
349 char szTmpFmt[48];
350 const char *pszFmt = pOp->pszOpcode;
351 switch (pOp->opcode)
352 {
353 case OP_JECXZ:
354 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "jcxz %Jb" : pCpu->opmode == CPUMODE_32BIT ? "jecxz %Jb" : "jrcxz %Jb";
355 break;
356 case OP_PUSHF:
357 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "pushfw" : pCpu->opmode == CPUMODE_32BIT ? "pushfd" : "pushfq";
358 break;
359 case OP_POPF:
360 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "popfw" : pCpu->opmode == CPUMODE_32BIT ? "popfd" : "popfq";
361 break;
362 case OP_INSB:
363 pszFmt = "insb";
364 break;
365 case OP_INSWD:
366 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "insw" : pCpu->opmode == CPUMODE_32BIT ? "insd" : "insq";
367 break;
368 case OP_OUTSB:
369 pszFmt = "outsb";
370 break;
371 case OP_OUTSWD:
372 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "outsw" : pCpu->opmode == CPUMODE_32BIT ? "outsd" : "outsq";
373 break;
374 case OP_MOVSB:
375 pszFmt = "movsb";
376 break;
377 case OP_MOVSWD:
378 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "movsw" : pCpu->opmode == CPUMODE_32BIT ? "movsd" : "movsq";
379 break;
380 case OP_CMPSB:
381 pszFmt = "cmpsb";
382 break;
383 case OP_CMPWD:
384 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "cmpsw" : pCpu->opmode == CPUMODE_32BIT ? "cmpsd" : "cmpsq";
385 break;
386 case OP_SCASB:
387 pszFmt = "scasb";
388 break;
389 case OP_SCASWD:
390 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "scasw" : pCpu->opmode == CPUMODE_32BIT ? "scasd" : "scasq";
391 break;
392 case OP_LODSB:
393 pszFmt = "lodsb";
394 break;
395 case OP_LODSWD:
396 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "lodsw" : pCpu->opmode == CPUMODE_32BIT ? "lodsd" : "lodsq";
397 break;
398 case OP_STOSB:
399 pszFmt = "stosb";
400 break;
401 case OP_STOSWD:
402 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "stosw" : pCpu->opmode == CPUMODE_32BIT ? "stosd" : "stosq";
403 break;
404 case OP_CBW:
405 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "cbw" : pCpu->opmode == CPUMODE_32BIT ? "cwde" : "cdqe";
406 break;
407 case OP_CWD:
408 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "cwd" : pCpu->opmode == CPUMODE_32BIT ? "cdq" : "cqo";
409 break;
410 case OP_SHL:
411 Assert(pszFmt[3] == '/');
412 pszFmt += 4;
413 break;
414 case OP_XLAT:
415 pszFmt = "xlatb";
416 break;
417 case OP_INT3:
418 pszFmt = "int3";
419 break;
420
421 /*
422 * Don't know how to tell yasm to generate complicated nop stuff, so 'db' it.
423 */
424 case OP_NOP:
425 if (pCpu->opcode == 0x90)
426 /* fine, fine */;
427 else if (pszFmt[sizeof("nop %Ev")] == '/' && pszFmt[sizeof("nop %Ev") + 1] == 'p')
428 pszFmt = "prefetch %Eb";
429 else if (pCpu->opcode == 0x1f)
430 {
431 Assert(pCpu->opsize >= 3);
432 PUT_SZ("db 00fh, 01fh,");
433 PUT_NUM_8(pCpu->ModRM.u);
434 for (unsigned i = 3; i < pCpu->opsize; i++)
435 {
436 PUT_C(',');
437 PUT_NUM_8(0x90); ///@todo fixme.
438 }
439 pszFmt = "";
440 }
441 break;
442
443 default:
444 /* ST(X) -> stX (floating point) */
445 if (*pszFmt == 'f' && strchr(pszFmt, '('))
446 {
447 char *pszFmtDst = szTmpFmt;
448 char ch;
449 do
450 {
451 ch = *pszFmt++;
452 if (ch == 'S' && pszFmt[0] == 'T' && pszFmt[1] == '(')
453 {
454 *pszFmtDst++ = 's';
455 *pszFmtDst++ = 't';
456 pszFmt += 2;
457 ch = *pszFmt;
458 Assert(pszFmt[1] == ')');
459 pszFmt += 2;
460 *pszFmtDst++ = ch;
461 }
462 else
463 *pszFmtDst++ = ch;
464 } while (ch != '\0');
465 pszFmt = szTmpFmt;
466 }
467 break;
468
469 /*
470 * Horrible hacks.
471 */
472 case OP_FLD:
473 if (pCpu->opcode == 0xdb) /* m80fp workaround. */
474 *(int *)&pCpu->param1.param &= ~0x1f; /* make it pure OP_PARM_M */
475 break;
476 case OP_LAR: /* hack w -> v, probably not correct. */
477 *(int *)&pCpu->param2.param &= ~0x1f;
478 *(int *)&pCpu->param2.param |= OP_PARM_v;
479 break;
480 }
481
482 /*
483 * Formatting context and associated macros.
484 */
485 PCOP_PARAMETER pParam = &pCpu->param1;
486 int iParam = 1;
487
488#define PUT_FAR() \
489 do { \
490 if ( OP_PARM_VSUBTYPE(pParam->param) == OP_PARM_p \
491 && pOp->opcode != OP_LDS /* table bugs? */ \
492 && pOp->opcode != OP_LES \
493 && pOp->opcode != OP_LFS \
494 && pOp->opcode != OP_LGS \
495 && pOp->opcode != OP_LSS ) \
496 PUT_SZ("far "); \
497 } while (0)
498 /** @todo mov ah,ch ends up with a byte 'override'... */
499#define PUT_SIZE_OVERRIDE() \
500 do { \
501 switch (OP_PARM_VSUBTYPE(pParam->param)) \
502 { \
503 case OP_PARM_v: \
504 switch (pCpu->opmode) \
505 { \
506 case CPUMODE_16BIT: PUT_SZ("word "); break; \
507 case CPUMODE_32BIT: PUT_SZ("dword "); break; \
508 case CPUMODE_64BIT: PUT_SZ("qword "); break; \
509 default: break; \
510 } \
511 break; \
512 case OP_PARM_b: PUT_SZ("byte "); break; \
513 case OP_PARM_w: PUT_SZ("word "); break; \
514 case OP_PARM_d: PUT_SZ("dword "); break; \
515 case OP_PARM_q: PUT_SZ("qword "); break; \
516 case OP_PARM_dq: \
517 if (OP_PARM_VTYPE(pParam->param) != OP_PARM_W) /* these are 128 bit, pray they are all unambiguous.. */ \
518 PUT_SZ("qword "); \
519 break; \
520 case OP_PARM_p: break; /* see PUT_FAR */ \
521 case OP_PARM_s: if (pParam->flags & USE_REG_FP) PUT_SZ("tword "); break; /* ?? */ \
522 case OP_PARM_z: break; \
523 case OP_PARM_NONE: \
524 if ( OP_PARM_VTYPE(pParam->param) == OP_PARM_M \
525 && ((pParam->flags & USE_REG_FP) || pOp->opcode == OP_FLD)) \
526 PUT_SZ("tword "); \
527 break; \
528 default: break; /*no pointer type specified/necessary*/ \
529 } \
530 } while (0)
531 static const char s_szSegPrefix[6][4] = { "es:", "cs:", "ss:", "ds:", "fs:", "gs:" };
532#define PUT_SEGMENT_OVERRIDE() \
533 do { \
534 if (pCpu->prefix & PREFIX_SEG) \
535 PUT_STR(s_szSegPrefix[pCpu->prefix_seg], 3); \
536 } while (0)
537
538
539 /*
540 * Segment prefixing for instructions that doesn't do memory access.
541 */
542 if ( (pCpu->prefix & PREFIX_SEG)
543 && !(pCpu->param1.flags & USE_EFFICIENT_ADDRESS)
544 && !(pCpu->param2.flags & USE_EFFICIENT_ADDRESS)
545 && !(pCpu->param3.flags & USE_EFFICIENT_ADDRESS))
546 {
547 PUT_STR(s_szSegPrefix[pCpu->prefix_seg], 2);
548 PUT_C(' ');
549 }
550
551
552 /*
553 * The formatting loop.
554 */
555 char ch;
556 while ((ch = *pszFmt++) != '\0')
557 {
558 if (ch == '%')
559 {
560 ch = *pszFmt++;
561 switch (ch)
562 {
563 /*
564 * ModRM - Register only.
565 */
566 case 'C': /* Control register (ParseModRM / UseModRM). */
567 case 'D': /* Debug register (ParseModRM / UseModRM). */
568 case 'G': /* ModRM selects general register (ParseModRM / UseModRM). */
569 case 'S': /* ModRM byte selects a segment register (ParseModRM / UseModRM). */
570 case 'T': /* ModRM byte selects a test register (ParseModRM / UseModRM). */
571 case 'V': /* ModRM byte selects an XMM/SSE register (ParseModRM / UseModRM). */
572 case 'P': /* ModRM byte selects MMX register (ParseModRM / UseModRM). */
573 {
574 pszFmt += RT_C_IS_ALPHA(pszFmt[0]) ? RT_C_IS_ALPHA(pszFmt[1]) ? 2 : 1 : 0;
575 Assert(!(pParam->flags & (USE_INDEX | USE_SCALE) /* No SIB here... */));
576 Assert(!(pParam->flags & (USE_DISPLACEMENT8 | USE_DISPLACEMENT16 | USE_DISPLACEMENT32 | USE_DISPLACEMENT64 | USE_RIPDISPLACEMENT32)));
577
578 size_t cchReg;
579 const char *pszReg = MyDisasYasmFormatBaseReg(pCpu, pParam, &cchReg, 0 /* pCpu->addrmode == CPUMODE_16BIT */);
580 PUT_STR(pszReg, cchReg);
581 break;
582 }
583
584 /*
585 * ModRM - Register or memory.
586 */
587 case 'E': /* ModRM specifies parameter (ParseModRM / UseModRM / UseSIB). */
588 case 'Q': /* ModRM byte selects MMX register or memory address (ParseModRM / UseModRM). */
589 case 'R': /* ModRM byte may only refer to a general register (ParseModRM / UseModRM). */
590 case 'W': /* ModRM byte selects an XMM/SSE register or a memory address (ParseModRM / UseModRM). */
591 case 'M': /* ModRM may only refer to memory (ParseModRM / UseModRM). */
592 {
593 pszFmt += RT_C_IS_ALPHA(pszFmt[0]) ? RT_C_IS_ALPHA(pszFmt[1]) ? 2 : 1 : 0;
594
595 PUT_FAR();
596 if (pParam->flags & USE_EFFICIENT_ADDRESS)
597 {
598 /* Work around mov seg,[mem16] and mov [mem16],seg as these always make a 16-bit mem
599 while the register variants deals with 16, 32 & 64 in the normal fashion. */
600 if ( pParam->param != OP_PARM_Ev
601 || pOp->opcode != OP_MOV
602 || ( pOp->param1 != OP_PARM_Sw
603 && pOp->param2 != OP_PARM_Sw))
604 PUT_SIZE_OVERRIDE();
605 PUT_C('[');
606 }
607 if (pParam->flags & (USE_DISPLACEMENT8 | USE_DISPLACEMENT16 | USE_DISPLACEMENT32 | USE_DISPLACEMENT64 | USE_RIPDISPLACEMENT32))
608 {
609 if ( (pParam->flags & USE_DISPLACEMENT8)
610 && !pParam->disp8)
611 PUT_SZ("byte ");
612 else if ( (pParam->flags & USE_DISPLACEMENT16)
613 && (int8_t)pParam->disp16 == (int16_t)pParam->disp16)
614 PUT_SZ("word ");
615 else if ( (pParam->flags & USE_DISPLACEMENT32)
616 && (int8_t)pParam->disp32 == (int32_t)pParam->disp32)
617 PUT_SZ("dword ");
618 }
619 if (pParam->flags & USE_EFFICIENT_ADDRESS)
620 PUT_SEGMENT_OVERRIDE();
621
622 bool fBase = (pParam->flags & USE_BASE) /* When exactly is USE_BASE supposed to be set? disasmModRMReg doesn't set it. */
623 || ( (pParam->flags & (USE_REG_GEN8 | USE_REG_GEN16 | USE_REG_GEN32 | USE_REG_GEN64))
624 && !(pParam->flags & USE_EFFICIENT_ADDRESS));
625 if (fBase)
626 {
627 size_t cchReg;
628 const char *pszReg = MyDisasYasmFormatBaseReg(pCpu, pParam, &cchReg, 0 /*pCpu->addrmode == CPUMODE_16BIT*/);
629 PUT_STR(pszReg, cchReg);
630 }
631
632 if (pParam->flags & USE_INDEX)
633 {
634 if (fBase)
635 PUT_C('+');
636
637 size_t cchReg;
638 const char *pszReg = MyDisasYasmFormatIndexReg(pCpu, pParam, &cchReg);
639 PUT_STR(pszReg, cchReg);
640
641 if (pParam->flags & USE_SCALE)
642 {
643 PUT_C('*');
644 PUT_C('0' + pParam->scale);
645 }
646 }
647 else
648 Assert(!(pParam->flags & USE_SCALE));
649
650 if (pParam->flags & (USE_DISPLACEMENT8 | USE_DISPLACEMENT16 | USE_DISPLACEMENT32 | USE_DISPLACEMENT64 | USE_RIPDISPLACEMENT32))
651 {
652 Assert(!(pParam->flags & USE_DISPLACEMENT64));
653 int32_t off;
654 if (pParam->flags & USE_DISPLACEMENT8)
655 off = pParam->disp8;
656 else if (pParam->flags & USE_DISPLACEMENT16)
657 off = pParam->disp16;
658 else if (pParam->flags & (USE_DISPLACEMENT32 | USE_RIPDISPLACEMENT32))
659 off = pParam->disp32;
660
661 if (fBase || (pParam->flags & USE_INDEX))
662 PUT_C(off >= 0 ? '+' : '-');
663
664 if (off < 0)
665 off = -off;
666 if (pParam->flags & USE_DISPLACEMENT8)
667 PUT_NUM_8( off);
668 else if (pParam->flags & USE_DISPLACEMENT16)
669 PUT_NUM_16(off);
670 else if (pParam->flags & USE_DISPLACEMENT32)
671 PUT_NUM_32(off);
672 else
673 {
674 PUT_NUM_32(off);
675 PUT_SZ(" wrt rip"); //??
676 }
677 }
678
679 if (pParam->flags & USE_EFFICIENT_ADDRESS)
680 PUT_C(']');
681 break;
682 }
683
684 case 'F': /* Eflags register (0 - popf/pushf only, avoided in adjustments above). */
685 AssertFailed();
686 break;
687
688 case 'I': /* Immediate data (ParseImmByte, ParseImmByteSX, ParseImmV, ParseImmUshort, ParseImmZ). */
689 Assert(*pszFmt == 'b' || *pszFmt == 'v' || *pszFmt == 'w' || *pszFmt == 'z'); pszFmt++;
690 switch (pParam->flags & ( USE_IMMEDIATE8 | USE_IMMEDIATE16 | USE_IMMEDIATE32 | USE_IMMEDIATE64
691 | USE_IMMEDIATE16_SX8 | USE_IMMEDIATE32_SX8))
692 {
693 case USE_IMMEDIATE8:
694 if ( (pOp->param1 >= OP_PARM_REG_GEN8_START && pOp->param1 <= OP_PARM_REG_GEN8_END)
695 || (pOp->param2 >= OP_PARM_REG_GEN8_START && pOp->param2 <= OP_PARM_REG_GEN8_END)
696 )
697 PUT_SZ("strict byte ");
698 PUT_NUM_8(pParam->parval);
699 break;
700
701 case USE_IMMEDIATE16:
702 if ( (int8_t)pParam->parval == (int16_t)pParam->parval
703 || (pOp->param1 >= OP_PARM_REG_GEN16_START && pOp->param1 <= OP_PARM_REG_GEN16_END)
704 || (pOp->param2 >= OP_PARM_REG_GEN16_START && pOp->param2 <= OP_PARM_REG_GEN16_END)
705 )
706 {
707 if (OP_PARM_VSUBTYPE(pParam->param) == OP_PARM_b)
708 PUT_SZ("strict byte ");
709 else if (OP_PARM_VSUBTYPE(pParam->param) == OP_PARM_v)
710 PUT_SZ("strict word ");
711 }
712 PUT_NUM_16(pParam->parval);
713 break;
714
715 case USE_IMMEDIATE16_SX8:
716 PUT_SZ("strict byte ");
717 PUT_NUM_16(pParam->parval);
718 break;
719
720 case USE_IMMEDIATE32:
721 if ( (int8_t)pParam->parval == (int32_t)pParam->parval
722 || (pOp->param1 >= OP_PARM_REG_GEN32_START && pOp->param1 <= OP_PARM_REG_GEN32_END)
723 || (pOp->param2 >= OP_PARM_REG_GEN32_START && pOp->param2 <= OP_PARM_REG_GEN32_END)
724 )
725 {
726 if (OP_PARM_VSUBTYPE(pParam->param) == OP_PARM_b)
727 PUT_SZ("strict byte ");
728 else if (OP_PARM_VSUBTYPE(pParam->param) == OP_PARM_v)
729 PUT_SZ("strict dword ");
730 }
731 PUT_NUM_32(pParam->parval);
732 break;
733
734 case USE_IMMEDIATE32_SX8:
735 PUT_SZ("strict byte ");
736 PUT_NUM_32(pParam->parval);
737 break;
738
739 case USE_IMMEDIATE64:
740 PUT_NUM_64(pParam->parval);
741 break;
742
743 default:
744 AssertFailed();
745 break;
746 }
747 break;
748
749 case 'J': /* Relative jump offset (ParseImmBRel + ParseImmVRel). */
750 {
751 int32_t offDisplacement;
752 Assert(iParam == 1);
753 bool fPrefix = pOp->opcode != OP_CALL
754 && pOp->opcode != OP_LOOP
755 && pOp->opcode != OP_LOOPE
756 && pOp->opcode != OP_LOOPNE
757 && pOp->opcode != OP_JECXZ;
758
759 if (pParam->flags & USE_IMMEDIATE8_REL)
760 {
761 if (fPrefix)
762 PUT_SZ("short ");
763 offDisplacement = (int8_t)pParam->parval;
764 Assert(*pszFmt == 'b'); pszFmt++;
765 }
766 else if (pParam->flags & USE_IMMEDIATE16_REL)
767 {
768 if (fPrefix)
769 PUT_SZ("near ");
770 offDisplacement = (int16_t)pParam->parval;
771 Assert(*pszFmt == 'v'); pszFmt++;
772 }
773 else
774 {
775 if (fPrefix)
776 PUT_SZ("near ");
777 offDisplacement = (int32_t)pParam->parval;
778 Assert(pParam->flags & USE_IMMEDIATE32_REL);
779 Assert(*pszFmt == 'v'); pszFmt++;
780 }
781
782 RTUINTPTR uTrgAddr = pCpu->opaddr + pCpu->opsize + offDisplacement;
783 if (pCpu->mode == CPUMODE_16BIT)
784 PUT_NUM_16(uTrgAddr);
785 else if (pCpu->mode == CPUMODE_32BIT)
786 PUT_NUM_32(uTrgAddr);
787 else
788 PUT_NUM_64(uTrgAddr);
789 break;
790 }
791
792 case 'A': /* Direct (jump/call) address (ParseImmAddr). */
793 Assert(*pszFmt == 'p'); pszFmt++;
794 PUT_FAR();
795 PUT_SIZE_OVERRIDE();
796 PUT_SEGMENT_OVERRIDE();
797 switch (pParam->flags & (USE_IMMEDIATE_ADDR_16_16 | USE_IMMEDIATE_ADDR_16_32 | USE_DISPLACEMENT64 | USE_DISPLACEMENT32 | USE_DISPLACEMENT16))
798 {
799 case USE_IMMEDIATE_ADDR_16_16:
800 PUT_NUM_16(pParam->parval >> 16);
801 PUT_C(':');
802 PUT_NUM_16(pParam->parval);
803 break;
804 case USE_IMMEDIATE_ADDR_16_32:
805 PUT_NUM_16(pParam->parval >> 32);
806 PUT_C(':');
807 PUT_NUM_32(pParam->parval);
808 break;
809 case USE_DISPLACEMENT16:
810 PUT_NUM_16(pParam->parval);
811 break;
812 case USE_DISPLACEMENT32:
813 PUT_NUM_32(pParam->parval);
814 break;
815 case USE_DISPLACEMENT64:
816 PUT_NUM_64(pParam->parval);
817 break;
818 default:
819 AssertFailed();
820 break;
821 }
822 break;
823
824 case 'O': /* No ModRM byte (ParseImmAddr). */
825 Assert(*pszFmt == 'b' || *pszFmt == 'v'); pszFmt++;
826 PUT_FAR();
827 PUT_SIZE_OVERRIDE();
828 PUT_C('[');
829 PUT_SEGMENT_OVERRIDE();
830 switch (pParam->flags & (USE_IMMEDIATE_ADDR_16_16 | USE_IMMEDIATE_ADDR_16_32 | USE_DISPLACEMENT64 | USE_DISPLACEMENT32 | USE_DISPLACEMENT16))
831 {
832 case USE_IMMEDIATE_ADDR_16_16:
833 PUT_NUM_16(pParam->parval >> 16);
834 PUT_C(':');
835 PUT_NUM_16(pParam->parval);
836 break;
837 case USE_IMMEDIATE_ADDR_16_32:
838 PUT_NUM_16(pParam->parval >> 32);
839 PUT_C(':');
840 PUT_NUM_32(pParam->parval);
841 break;
842 case USE_DISPLACEMENT16:
843 PUT_NUM_16(pParam->disp16);
844 break;
845 case USE_DISPLACEMENT32:
846 PUT_NUM_32(pParam->disp32);
847 break;
848 case USE_DISPLACEMENT64:
849 PUT_NUM_64(pParam->disp64);
850 break;
851 default:
852 AssertFailed();
853 break;
854 }
855 PUT_C(']');
856 break;
857
858 case 'X': /* DS:SI (ParseXb, ParseXv). */
859 case 'Y': /* ES:DI (ParseYb, ParseYv). */
860 {
861 Assert(*pszFmt == 'b' || *pszFmt == 'v'); pszFmt++;
862 PUT_FAR();
863 PUT_SIZE_OVERRIDE();
864 PUT_C('[');
865 if (pParam->flags & USE_POINTER_DS_BASED)
866 PUT_SZ("ds:");
867 else
868 PUT_SZ("es:");
869
870 size_t cchReg;
871 const char *pszReg = MyDisasYasmFormatBaseReg(pCpu, pParam, &cchReg, 0);
872 PUT_STR(pszReg, cchReg);
873 PUT_C(']');
874 break;
875 }
876
877 case 'e': /* Register based on operand size (e.g. %eAX) (ParseFixedReg). */
878 {
879 Assert(RT_C_IS_ALPHA(pszFmt[0]) && RT_C_IS_ALPHA(pszFmt[1]) && !RT_C_IS_ALPHA(pszFmt[2])); pszFmt += 2;
880 size_t cchReg;
881 const char *pszReg = MyDisasYasmFormatBaseReg(pCpu, pParam, &cchReg, 0);
882 PUT_STR(pszReg, cchReg);
883 break;
884 }
885
886 default:
887 AssertMsgFailed(("%c%s!\n", ch, pszFmt));
888 break;
889 }
890 AssertMsg(*pszFmt == ',' || *pszFmt == '\0', ("%c%s\n", ch, pszFmt));
891 }
892 else
893 {
894 PUT_C(ch);
895 if (ch == ',')
896 {
897 Assert(*pszFmt != ' ');
898 PUT_C(' ');
899 switch (++iParam)
900 {
901 case 2: pParam = &pCpu->param2; break;
902 case 3: pParam = &pCpu->param3; break;
903 default: pParam = NULL; break;
904 }
905 }
906 }
907 } /* while more to format */
908 }
909
910
911 /* Terminate it - on overflow we'll have reserved one byte for this. */
912 if (cchDst > 0)
913 *pszDst = '\0';
914
915 /* clean up macros */
916#undef PUT_PSZ
917#undef PUT_SZ
918#undef PUT_STR
919#undef PUT_C
920 return cchOutput;
921}
922#endif
923
924
925/**
926 * Default style.
927 *
928 * @param pState The disassembler state.
929 */
930static void MyDisasDefaultFormatter(PMYDISSTATE pState)
931{
932 RTPrintf("%s", pState->szLine);
933}
934
935
936/**
937 * Yasm style.
938 *
939 * @param pState The disassembler state.
940 */
941static void MyDisasYasmFormatter(PMYDISSTATE pState)
942{
943 char szTmp[256];
944#ifndef USE_MY_FORMATTER
945 /* a very quick hack. */
946 strcpy(szTmp, RTStrStripL(strchr(pState->szLine, ':') + 1));
947
948 char *psz = strrchr(szTmp, '[');
949 *psz = '\0';
950 RTStrStripR(szTmp);
951
952 psz = strstr(szTmp, " ptr ");
953 if (psz)
954 memset(psz, ' ', 5);
955
956 char *pszEnd = strchr(szTmp, '\0');
957 while (pszEnd - &szTmp[0] < 71)
958 *pszEnd++ = ' ';
959 *pszEnd = '\0';
960
961#else /* USE_MY_FORMATTER */
962 size_t cch = MyDisasYasmFormat(&pState->Cpu, szTmp, sizeof(szTmp));
963 Assert(cch < sizeof(szTmp));
964 while (cch < 71)
965 szTmp[cch++] = ' ';
966 szTmp[cch] = '\0';
967#endif /* USE_MY_FORMATTER */
968
969 RTPrintf(" %s ; %08llu %s", szTmp, pState->uAddress, pState->szLine);
970}
971
972
973/**
974 * Checks if the encoding of the current instruction is something
975 * we can never get the assembler to produce.
976 *
977 * @returns true if it's odd, false if it isn't.
978 * @param pCpu The disassembler output.
979 */
980static bool MyDisasYasmFormatterIsOddEncoding(PMYDISSTATE pState)
981{
982 /*
983 * Mod rm + SIB: Check for duplicate EBP encodings that yasm won't use for very good reasons.
984 */
985 if ( pState->Cpu.addrmode != CPUMODE_16BIT ///@todo correct?
986 && pState->Cpu.ModRM.Bits.Rm == 4
987 && pState->Cpu.ModRM.Bits.Mod != 3)
988 {
989 /* No scaled index SIB (index=4), except for ESP. */
990 if ( pState->Cpu.SIB.Bits.Index == 4
991 && pState->Cpu.SIB.Bits.Base != 4)
992 return true;
993
994 /* EBP + displacement */
995 if ( pState->Cpu.ModRM.Bits.Mod != 0
996 && pState->Cpu.SIB.Bits.Base == 5
997 && pState->Cpu.SIB.Bits.Scale == 0)
998 return true;
999 }
1000
1001 /*
1002 * Seems to be an instruction alias here, but I cannot find any docs on it... hrmpf!
1003 */
1004 if ( pState->Cpu.pCurInstr->opcode == OP_SHL
1005 && pState->Cpu.ModRM.Bits.Reg == 6)
1006 return true;
1007
1008 /*
1009 * Check for multiple prefixes of the same kind.
1010 */
1011 uint32_t fPrefixes = 0;
1012 for (uint8_t const *pu8 = pState->pbInstr;; pu8++)
1013 {
1014 uint32_t f;
1015 switch (*pu8)
1016 {
1017 case 0xf0:
1018 f = PREFIX_LOCK;
1019 break;
1020
1021 case 0xf2:
1022 case 0xf3:
1023 f = PREFIX_REP; /* yes, both */
1024 break;
1025
1026 case 0x2e:
1027 case 0x3e:
1028 case 0x26:
1029 case 0x36:
1030 case 0x64:
1031 case 0x65:
1032 f = PREFIX_SEG;
1033 break;
1034
1035 case 0x66:
1036 f = PREFIX_OPSIZE;
1037 break;
1038
1039 case 0x67:
1040 f = PREFIX_ADDRSIZE;
1041 break;
1042
1043 case 0x40: case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47:
1044 case 0x48: case 0x49: case 0x4a: case 0x4b: case 0x4c: case 0x4d: case 0x4e: case 0x4f:
1045 f = pState->Cpu.mode == CPUMODE_64BIT ? PREFIX_REX : 0;
1046 break;
1047
1048 default:
1049 f = 0;
1050 break;
1051 }
1052 if (!f)
1053 break; /* done */
1054 if (fPrefixes & f)
1055 return true;
1056 fPrefixes |= f;
1057 }
1058
1059 /* segment overrides are fun */
1060 if (fPrefixes & PREFIX_SEG)
1061 {
1062 /* no efficient address which it may apply to. */
1063 Assert((pState->Cpu.prefix & PREFIX_SEG) || pState->Cpu.mode == CPUMODE_64BIT);
1064 if ( !(pState->Cpu.param1.flags & USE_EFFICIENT_ADDRESS)
1065 && !(pState->Cpu.param2.flags & USE_EFFICIENT_ADDRESS)
1066 && !(pState->Cpu.param3.flags & USE_EFFICIENT_ADDRESS))
1067 return true;
1068 }
1069
1070 /* fixed register + addr override doesn't go down all that well. */
1071 if (fPrefixes & PREFIX_ADDRSIZE)
1072 {
1073 Assert(pState->Cpu.prefix & PREFIX_ADDRSIZE);
1074 if ( pState->Cpu.pCurInstr->param3 == OP_PARM_NONE
1075 && pState->Cpu.pCurInstr->param2 == OP_PARM_NONE
1076 && ( pState->Cpu.pCurInstr->param1 >= OP_PARM_REG_GEN32_START
1077 && pState->Cpu.pCurInstr->param1 <= OP_PARM_REG_GEN32_END))
1078 return true;
1079 }
1080
1081 /* nop w/ prefix(es). */
1082 if ( fPrefixes
1083 && pState->Cpu.pCurInstr->opcode == OP_NOP)
1084 return true;
1085
1086 /* There are probably a whole bunch of these... */
1087 if (fPrefixes & ~PREFIX_SEG)
1088 {
1089 switch (pState->Cpu.pCurInstr->opcode)
1090 {
1091 case OP_POP:
1092 case OP_PUSH:
1093 if ( pState->Cpu.pCurInstr->param1 >= OP_PARM_REG_SEG_START
1094 && pState->Cpu.pCurInstr->param1 <= OP_PARM_REG_SEG_END)
1095 return true;
1096 if ( (fPrefixes & ~PREFIX_OPSIZE)
1097 && pState->Cpu.pCurInstr->param1 >= OP_PARM_REG_GEN32_START
1098 && pState->Cpu.pCurInstr->param1 <= OP_PARM_REG_GEN32_END)
1099 return true;
1100 break;
1101
1102 case OP_POPA:
1103 case OP_POPF:
1104 case OP_PUSHA:
1105 case OP_PUSHF:
1106 if (fPrefixes & ~PREFIX_OPSIZE)
1107 return true;
1108 break;
1109 }
1110 }
1111
1112
1113 /* check for the version of xyz reg,reg instruction that the assembler doesn't use.
1114 expected: 1aee sbb ch, dh ; SBB r8, r/m8
1115 yasm: 18F5 sbb ch, dh ; SBB r/m8, r8 */
1116 if (pState->Cpu.ModRM.Bits.Mod == 3 /* reg,reg */)
1117 {
1118 switch (pState->Cpu.pCurInstr->opcode)
1119 {
1120 case OP_ADD:
1121 case OP_OR:
1122 case OP_ADC:
1123 case OP_SBB:
1124 case OP_AND:
1125 case OP_SUB:
1126 case OP_XOR:
1127 case OP_CMP:
1128 if ( ( pState->Cpu.pCurInstr->param1 == OP_PARM_Gb /* r8 */
1129 && pState->Cpu.pCurInstr->param2 == OP_PARM_Eb /* r8/mem8 */)
1130 || ( pState->Cpu.pCurInstr->param1 == OP_PARM_Gv /* rX */
1131 && pState->Cpu.pCurInstr->param2 == OP_PARM_Ev /* rX/memX */))
1132 return true;
1133
1134 /* 82 (see table A-6). */
1135 if (pState->Cpu.opcode == 0x82)
1136 return true;
1137 break;
1138
1139 /* ff /0, fe /0, ff /1, fe /0 */
1140 case OP_DEC:
1141 case OP_INC:
1142 return true;
1143
1144 case OP_POP:
1145 case OP_PUSH:
1146 Assert(pState->Cpu.opcode == 0x8f);
1147 return true;
1148
1149 default:
1150 break;
1151 }
1152 }
1153
1154 /* And some more - see table A-6. */
1155 if (pState->Cpu.opcode == 0x82)
1156 {
1157 switch (pState->Cpu.pCurInstr->opcode)
1158 {
1159 case OP_ADD:
1160 case OP_OR:
1161 case OP_ADC:
1162 case OP_SBB:
1163 case OP_AND:
1164 case OP_SUB:
1165 case OP_XOR:
1166 case OP_CMP:
1167 return true;
1168 break;
1169 }
1170 }
1171
1172
1173 /* check for REX.X = 1 without SIB. */
1174
1175 /* Yasm encodes setnbe al with /2 instead of /0 like the AMD manual
1176 says (intel doesn't appear to care). */
1177 switch (pState->Cpu.pCurInstr->opcode)
1178 {
1179 case OP_SETO:
1180 case OP_SETNO:
1181 case OP_SETC:
1182 case OP_SETNC:
1183 case OP_SETE:
1184 case OP_SETNE:
1185 case OP_SETBE:
1186 case OP_SETNBE:
1187 case OP_SETS:
1188 case OP_SETNS:
1189 case OP_SETP:
1190 case OP_SETNP:
1191 case OP_SETL:
1192 case OP_SETNL:
1193 case OP_SETLE:
1194 case OP_SETNLE:
1195 AssertMsg(pState->Cpu.opcode >= 0x90 && pState->Cpu.opcode <= 0x9f, ("%#x\n", pState->Cpu.opcode));
1196 if (pState->Cpu.ModRM.Bits.Reg != 2)
1197 return true;
1198 break;
1199 }
1200
1201 /*
1202 * The MOVZX reg32,mem16 instruction without an operand size prefix
1203 * doesn't quite make sense...
1204 */
1205 if ( pState->Cpu.pCurInstr->opcode == OP_MOVZX
1206 && pState->Cpu.opcode == 0xB7
1207 && (pState->Cpu.mode == CPUMODE_16BIT) != !!(fPrefixes & PREFIX_OPSIZE))
1208 return true;
1209
1210 return false;
1211}
1212
1213
1214/**
1215 * Masm style.
1216 *
1217 * @param pState The disassembler state.
1218 */
1219static void MyDisasMasmFormatter(PMYDISSTATE pState)
1220{
1221 RTPrintf("masm not implemented: %s", pState->szLine);
1222}
1223
1224
1225/**
1226 * This is a temporary workaround for catching a few illegal opcodes
1227 * that the disassembler is currently letting thru, just enough to make
1228 * the assemblers happy.
1229 *
1230 * We're too close to a release to dare mess with these things now as
1231 * they may consequences for performance and let alone introduce bugs.
1232 *
1233 * @returns true if it's valid. false if it isn't.
1234 *
1235 * @param pCpu The disassembler output.
1236 */
1237static bool MyDisasIsValidInstruction(DISCPUSTATE const *pCpu)
1238{
1239 switch (pCpu->pCurInstr->opcode)
1240 {
1241 /* These doesn't take memory operands. */
1242 case OP_MOV_CR:
1243 case OP_MOV_DR:
1244 case OP_MOV_TR:
1245 if (pCpu->ModRM.Bits.Mod != 3)
1246 return false;
1247 break;
1248
1249 /* The 0x8f /0 variant of this instruction doesn't get its /r value verified. */
1250 case OP_POP:
1251 if ( pCpu->opcode == 0x8f
1252 && pCpu->ModRM.Bits.Reg != 0)
1253 return false;
1254 break;
1255
1256 /* The 0xc6 /0 and 0xc7 /0 variants of this instruction don't get their /r values verified. */
1257 case OP_MOV:
1258 if ( ( pCpu->opcode == 0xc6
1259 || pCpu->opcode == 0xc7)
1260 && pCpu->ModRM.Bits.Reg != 0)
1261 return false;
1262 break;
1263
1264 default:
1265 break;
1266 }
1267
1268 return true;
1269}
1270
1271
1272/**
1273 * Callback for reading bytes.
1274 *
1275 * @todo This should check that the disassembler doesn't do unnecessary reads,
1276 * however the current doesn't do this and is just complicated...
1277 */
1278static DECLCALLBACK(int) MyDisasInstrRead(RTUINTPTR uSrcAddr, uint8_t *pbDst, uint32_t cbRead, void *pvDisCpu)
1279{
1280 PMYDISSTATE pState = (PMYDISSTATE)pvDisCpu;
1281 if (RT_LIKELY( pState->uNextAddr == uSrcAddr
1282 && pState->cbLeft >= cbRead))
1283 {
1284 /*
1285 * Straight forward reading.
1286 */
1287 if (cbRead == 1)
1288 {
1289 pState->cbLeft--;
1290 *pbDst = *pState->pbNext++;
1291 pState->uNextAddr++;
1292 }
1293 else
1294 {
1295 memcpy(pbDst, pState->pbNext, cbRead);
1296 pState->pbNext += cbRead;
1297 pState->cbLeft -= cbRead;
1298 pState->uNextAddr += cbRead;
1299 }
1300 }
1301 else
1302 {
1303 /*
1304 * Jumping up the stream.
1305 * This occures when the byte sequence is added to the output string.
1306 */
1307 uint64_t offReq64 = uSrcAddr - pState->uAddress;
1308 if (offReq64 < 32)
1309 {
1310 uint32_t offReq = offReq64;
1311 uintptr_t off = pState->pbNext - pState->pbInstr;
1312 if (off + pState->cbLeft <= offReq)
1313 {
1314 pState->pbNext += pState->cbLeft;
1315 pState->uNextAddr += pState->cbLeft;
1316 pState->cbLeft = 0;
1317
1318 memset(pbDst, 0xcc, cbRead);
1319 pState->rc = VERR_EOF;
1320 return VERR_EOF;
1321 }
1322
1323 /* reset the stream. */
1324 pState->cbLeft += off;
1325 pState->pbNext = pState->pbInstr;
1326 pState->uNextAddr = pState->uAddress;
1327
1328 /* skip ahead. */
1329 pState->cbLeft -= offReq;
1330 pState->pbNext += offReq;
1331 pState->uNextAddr += offReq;
1332
1333 /* do the reading. */
1334 if (pState->cbLeft >= cbRead)
1335 {
1336 memcpy(pbDst, pState->pbNext, cbRead);
1337 pState->cbLeft -= cbRead;
1338 pState->pbNext += cbRead;
1339 pState->uNextAddr += cbRead;
1340 }
1341 else
1342 {
1343 if (pState->cbLeft > 0)
1344 {
1345 memcpy(pbDst, pState->pbNext, pState->cbLeft);
1346 pbDst += pState->cbLeft;
1347 cbRead -= pState->cbLeft;
1348 pState->pbNext += pState->cbLeft;
1349 pState->uNextAddr += pState->cbLeft;
1350 pState->cbLeft = 0;
1351 }
1352 memset(pbDst, 0xcc, cbRead);
1353 pState->rc = VERR_EOF;
1354 return VERR_EOF;
1355 }
1356 }
1357 else
1358 {
1359 RTStrmPrintf(g_pStdErr, "Reading before current instruction!\n");
1360 memset(pbDst, 0x90, cbRead);
1361 pState->rc = VERR_INTERNAL_ERROR;
1362 return VERR_INTERNAL_ERROR;
1363 }
1364 }
1365
1366 return VINF_SUCCESS;
1367}
1368
1369
1370/**
1371 * Disassembles a block of memory.
1372 *
1373 * @returns VBox status code.
1374 * @param argv0 Program name (for errors and warnings).
1375 * @param enmCpuMode The cpu mode to disassemble in.
1376 * @param uAddress The address we're starting to disassemble at.
1377 * @param pbFile Where to start disassemble.
1378 * @param cbFile How much to disassemble.
1379 * @param enmStyle The assembly output style.
1380 * @param fListing Whether to print in a listing like mode.
1381 * @param enmUndefOp How to deal with undefined opcodes.
1382 */
1383static int MyDisasmBlock(const char *argv0, DISCPUMODE enmCpuMode, uint64_t uAddress, uint8_t *pbFile, size_t cbFile,
1384 ASMSTYLE enmStyle, bool fListing, UNDEFOPHANDLING enmUndefOp)
1385{
1386 /*
1387 * Initialize the CPU context.
1388 */
1389 MYDISSTATE State;
1390 State.Cpu.mode = enmCpuMode;
1391 State.Cpu.pfnReadBytes = MyDisasInstrRead;
1392 State.uAddress = uAddress;
1393 State.pbInstr = pbFile;
1394 State.cbInstr = 0;
1395 State.enmUndefOp = enmUndefOp;
1396 State.rc = VINF_SUCCESS;
1397 State.cbLeft = cbFile;
1398 State.pbNext = pbFile;
1399 State.uNextAddr = uAddress;
1400
1401 void (*pfnFormatter)(PMYDISSTATE pState);
1402 switch (enmStyle)
1403 {
1404 case kAsmStyle_Default:
1405 pfnFormatter = MyDisasDefaultFormatter;
1406 break;
1407
1408 case kAsmStyle_yasm:
1409 RTPrintf(" BITS %d\n", enmCpuMode == CPUMODE_16BIT ? 16 : enmCpuMode == CPUMODE_32BIT ? 32 : 64);
1410 pfnFormatter = MyDisasYasmFormatter;
1411 break;
1412
1413 case kAsmStyle_masm:
1414 pfnFormatter = MyDisasMasmFormatter;
1415 break;
1416
1417 default:
1418 AssertFailedReturn(VERR_INTERNAL_ERROR);
1419 }
1420
1421 /*
1422 * The loop.
1423 */
1424 int rcRet = VINF_SUCCESS;
1425 while (State.cbLeft > 0)
1426 {
1427 /*
1428 * Disassemble it.
1429 */
1430 State.cbInstr = 0;
1431 State.cbLeft += State.pbNext - State.pbInstr;
1432 State.uNextAddr = State.uAddress;
1433 State.pbNext = State.pbInstr;
1434
1435 int rc = DISInstr(&State.Cpu, State.uAddress, 0, &State.cbInstr, State.szLine);
1436 if ( RT_SUCCESS(rc)
1437 || ( ( rc == VERR_DIS_INVALID_OPCODE
1438 || rc == VERR_DIS_GEN_FAILURE)
1439 && State.enmUndefOp == kUndefOp_DefineByte))
1440 {
1441 State.fUndefOp = rc == VERR_DIS_INVALID_OPCODE
1442 || rc == VERR_DIS_GEN_FAILURE
1443 || State.Cpu.pCurInstr->opcode == OP_INVALID
1444 || State.Cpu.pCurInstr->opcode == OP_ILLUD2
1445 || ( State.enmUndefOp == kUndefOp_DefineByte
1446 && !MyDisasIsValidInstruction(&State.Cpu));
1447 if (State.fUndefOp && State.enmUndefOp == kUndefOp_DefineByte)
1448 {
1449 RTPrintf(" db");
1450 if (!State.cbInstr)
1451 State.cbInstr = 1;
1452 for (unsigned off = 0; off < State.cbInstr; off++)
1453 {
1454 uint8_t b;
1455 State.Cpu.pfnReadBytes(State.uAddress + off, &b, 1, &State.Cpu);
1456 RTPrintf(off ? ", %03xh" : " %03xh", b);
1457 }
1458 RTPrintf(" ; %s\n", State.szLine);
1459 }
1460 else if (!State.fUndefOp && State.enmUndefOp == kUndefOp_All)
1461 {
1462 RTPrintf("%s: error at %#RX64: unexpected valid instruction (op=%d)\n", argv0, State.uAddress, State.Cpu.pCurInstr->opcode);
1463 pfnFormatter(&State);
1464 rcRet = VERR_GENERAL_FAILURE;
1465 }
1466 else if (State.fUndefOp && State.enmUndefOp == kUndefOp_Fail)
1467 {
1468 RTPrintf("%s: error at %#RX64: undefined opcode (op=%d)\n", argv0, State.uAddress, State.Cpu.pCurInstr->opcode);
1469 pfnFormatter(&State);
1470 rcRet = VERR_GENERAL_FAILURE;
1471 }
1472 else
1473 {
1474 /* Use db for odd encodings that we can't make the assembler use. */
1475 if ( State.enmUndefOp == kUndefOp_DefineByte
1476 && MyDisasYasmFormatterIsOddEncoding(&State))
1477 {
1478 RTPrintf(" db");
1479 for (unsigned off = 0; off < State.cbInstr; off++)
1480 {
1481 uint8_t b;
1482 State.Cpu.pfnReadBytes(State.uAddress + off, &b, 1, &State.Cpu);
1483 RTPrintf(off ? ", %03xh" : " %03xh", b);
1484 }
1485 RTPrintf(" ; ");
1486 }
1487
1488 pfnFormatter(&State);
1489 }
1490 }
1491 else
1492 {
1493 State.cbInstr = State.pbNext - State.pbInstr;
1494 if (!State.cbLeft)
1495 RTPrintf("%s: error at %#RX64: read beyond the end (%Rrc)\n", argv0, State.uAddress, rc);
1496 else if (State.cbInstr)
1497 RTPrintf("%s: error at %#RX64: %Rrc cbInstr=%d\n", argv0, State.uAddress, rc, State.cbInstr);
1498 else
1499 {
1500 RTPrintf("%s: error at %#RX64: %Rrc cbInstr=%d!\n", argv0, State.uAddress, rc, State.cbInstr);
1501 if (rcRet == VINF_SUCCESS)
1502 rcRet = rc;
1503 break;
1504 }
1505 }
1506
1507
1508 /* next */
1509 State.uAddress += State.cbInstr;
1510 State.pbInstr += State.cbInstr;
1511 }
1512
1513 return rcRet;
1514}
1515
1516
1517/**
1518 * Prints usage info.
1519 *
1520 * @returns 1.
1521 * @param argv0 The program name.
1522 */
1523static int Usage(const char *argv0)
1524{
1525 RTStrmPrintf(g_pStdErr,
1526"usage: %s [options] <file1> [file2..fileN]\n"
1527" or: %s <--help|-h>\n"
1528"\n"
1529"Options:\n"
1530" --address|-a <address>\n"
1531" The base address. Default: 0\n"
1532" --max-bytes|-b <bytes>\n"
1533" The maximum number of bytes to disassemble. Default: 1GB\n"
1534" --cpumode|-c <16|32|64>\n"
1535" The cpu mode. Default: 32\n"
1536" --listing|-l, --no-listing|-L\n"
1537" Enables or disables listing mode. Default: --no-listing\n"
1538" --offset|-o <offset>\n"
1539" The file offset at which to start disassembling. Default: 0\n"
1540" --style|-s <default|yasm|masm>\n"
1541" The assembly output style. Default: default\n"
1542" --undef-op|-u <fail|all|db>\n"
1543" How to treat undefined opcodes. Default: fail\n"
1544 , argv0, argv0);
1545 return 1;
1546}
1547
1548
1549int main(int argc, char **argv)
1550{
1551 RTR3Init();
1552 const char * const argv0 = RTPathFilename(argv[0]);
1553
1554 /* options */
1555 uint64_t uAddress = 0;
1556 ASMSTYLE enmStyle = kAsmStyle_Default;
1557 UNDEFOPHANDLING enmUndefOp = kUndefOp_Fail;
1558 bool fListing = true;
1559 DISCPUMODE enmCpuMode = CPUMODE_32BIT;
1560 RTFOFF off = 0;
1561 RTFOFF cbMax = _1G;
1562
1563 /*
1564 * Parse arguments.
1565 */
1566 static const RTOPTIONDEF g_aOptions[] =
1567 {
1568 { "--address", 'a', RTGETOPT_REQ_UINT64 },
1569 { "--cpumode", 'c', RTGETOPT_REQ_UINT32 },
1570 { "--help", 'h', 0 },
1571 { "--bytes", 'b', RTGETOPT_REQ_INT64 },
1572 { "--listing", 'l', 0 },
1573 { "--no-listing", 'L', 0 },
1574 { "--offset", 'o', RTGETOPT_REQ_INT64 },
1575 { "--style", 's', RTGETOPT_REQ_STRING },
1576 { "--undef-op", 'u', RTGETOPT_REQ_STRING },
1577 };
1578
1579 int ch;
1580 int iArg = 1;
1581 RTOPTIONUNION ValueUnion;
1582 while ((ch = RTGetOpt(argc, argv, g_aOptions, RT_ELEMENTS(g_aOptions), &iArg, &ValueUnion)))
1583 {
1584 switch (ch)
1585 {
1586 case 'a':
1587 uAddress = ValueUnion.u64;
1588 break;
1589
1590 case 'b':
1591 cbMax = ValueUnion.i;
1592 break;
1593
1594 case 'c':
1595 if (ValueUnion.u32 == 16)
1596 enmCpuMode = CPUMODE_16BIT;
1597 else if (ValueUnion.u32 == 32)
1598 enmCpuMode = CPUMODE_32BIT;
1599 else if (ValueUnion.u32 == 64)
1600 enmCpuMode = CPUMODE_64BIT;
1601 else
1602 {
1603 RTStrmPrintf(g_pStdErr, "%s: Invalid CPU mode value %RU32\n", argv0, ValueUnion.u32);
1604 return 1;
1605 }
1606 break;
1607
1608 case 'h':
1609 return Usage(argv0);
1610
1611 case 'l':
1612 fListing = true;
1613 break;
1614
1615 case 'L':
1616 fListing = false;
1617 break;
1618
1619 case 'o':
1620 off = ValueUnion.i;
1621 break;
1622
1623 case 's':
1624 if (!strcmp(ValueUnion.psz, "default"))
1625 enmStyle = kAsmStyle_Default;
1626 else if (!strcmp(ValueUnion.psz, "yasm"))
1627 enmStyle = kAsmStyle_yasm;
1628 else if (!strcmp(ValueUnion.psz, "masm"))
1629 {
1630 enmStyle = kAsmStyle_masm;
1631 RTStrmPrintf(g_pStdErr, "%s: masm style isn't implemented yet\n", argv0);
1632 return 1;
1633 }
1634 else
1635 {
1636 RTStrmPrintf(g_pStdErr, "%s: unknown assembly style: %s\n", argv0, ValueUnion.psz);
1637 return 1;
1638 }
1639 break;
1640
1641 case 'u':
1642 if (!strcmp(ValueUnion.psz, "fail"))
1643 enmUndefOp = kUndefOp_Fail;
1644 else if (!strcmp(ValueUnion.psz, "all"))
1645 enmUndefOp = kUndefOp_All;
1646 else if (!strcmp(ValueUnion.psz, "db"))
1647 enmUndefOp = kUndefOp_DefineByte;
1648 else
1649 {
1650 RTStrmPrintf(g_pStdErr, "%s: unknown undefined opcode handling method: %s\n", argv0, ValueUnion.psz);
1651 return 1;
1652 }
1653 break;
1654
1655 default:
1656 RTStrmPrintf(g_pStdErr, "%s: syntax error: %Rrc\n", argv0, ch);
1657 return 1;
1658 }
1659 }
1660 if (iArg >= argc)
1661 return Usage(argv0);
1662
1663 /*
1664 * Process the files.
1665 */
1666 int rc = VINF_SUCCESS;
1667 for ( ; iArg < argc; iArg++)
1668 {
1669 /*
1670 * Read the file into memory.
1671 */
1672 void *pvFile;
1673 size_t cbFile;
1674 rc = RTFileReadAllEx(argv[iArg], off, cbMax, 0, &pvFile, &cbFile);
1675 if (RT_FAILURE(rc))
1676 {
1677 RTStrmPrintf(g_pStdErr, "%s: %s: %Rrc\n", argv0, argv[iArg], rc);
1678 break;
1679 }
1680
1681 /*
1682 * Disassemble it.
1683 */
1684 rc = MyDisasmBlock(argv0, enmCpuMode, uAddress, (uint8_t *)pvFile, cbFile, enmStyle, fListing, enmUndefOp);
1685 if (RT_FAILURE(rc))
1686 break;
1687 }
1688
1689 return RT_SUCCESS(rc) ? 0 : 1;
1690}
1691
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette