VirtualBox

source: vbox/trunk/src/VBox/Disassembler/testcase/tstDisasm-2.cpp@ 9101

最後變更 在這個檔案從9101是 9101,由 vboxsync 提交於 17 年 前

More yasm fixing.

  • 屬性 svn:eol-style 設為 native
  • 屬性 svn:keywords 設為 Id
檔案大小: 63.9 KB
 
1/* $Id: tstDisasm-2.cpp 9101 2008-05-26 02:27:35Z vboxsync $ */
2/** @file
3 * Testcase - Generic Disassembler Tool.
4 */
5
6/*
7 * Copyright (C) 2008 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.alldomusa.eu.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22/*******************************************************************************
23* Header Files *
24*******************************************************************************/
25#include <VBox/dis.h>
26#include <iprt/stream.h>
27#include <iprt/getopt.h>
28#include <iprt/file.h>
29#include <iprt/string.h>
30#include <iprt/runtime.h>
31#include <VBox/err.h>
32#include <iprt/ctype.h>
33
34
35/*******************************************************************************
36* Structures and Typedefs *
37*******************************************************************************/
38typedef enum { kAsmStyle_Default, kAsmStyle_yasm, kAsmStyle_masm, kAsmStyle_gas, kAsmStyle_invalid } ASMSTYLE;
39typedef enum { kUndefOp_Fail, kUndefOp_All, kUndefOp_DefineByte, kUndefOp_End } UNDEFOPHANDLING;
40
41typedef struct MYDISSTATE
42{
43 DISCPUSTATE Cpu;
44 uint64_t uAddress; /**< The current instruction address. */
45 uint8_t *pbInstr; /**< The current instruction (pointer). */
46 uint32_t cbInstr; /**< The size of the current instruction. */
47 bool fUndefOp; /**< Whether the current instruction is really an undefined opcode.*/
48 UNDEFOPHANDLING enmUndefOp; /**< How to treat undefined opcodes. */
49 int rc; /**< Set if we hit EOF. */
50 size_t cbLeft; /**< The number of bytes left. (read) */
51 uint8_t *pbNext; /**< The next byte. (read) */
52 uint64_t uNextAddr; /**< The address of the next byte. (read) */
53 char szLine[256]; /**< The disassembler text output. */
54} MYDISSTATE;
55typedef MYDISSTATE *PMYDISSTATE;
56
57
58/*
59 * Non-logging builds doesn't to full formatting so we must do it on our own.
60 * This should probably be moved into the disassembler later as it's needed for
61 * the vbox debugger as well.
62 *
63 * Comment in USE_MY_FORMATTER to enable it.
64 */
65#define USE_MY_FORMATTER
66
67#ifdef USE_MY_FORMATTER
68static const char g_aszYasmRegGen8x86[8][4] =
69{
70 "al\0", "cl\0", "dl\0", "bl\0", "ah\0", "ch\0", "dh\0", "bh\0"
71};
72static const char g_aszYasmRegGen8Amd64[16][5] =
73{
74 "al\0\0", "cl\0\0", "dl\0\0", "bl\0\0", "spb\0", "bpb\0", "sib\0", "dib\0", "r8b\0", "r9b\0", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b"
75};
76static const char g_aszYasmRegGen16[16][5] =
77{
78 "ax\0\0", "cx\0\0", "dx\0\0", "bx\0\0", "sp\0\0", "bp\0\0", "si\0\0", "di\0\0", "r8w\0", "r9w\0", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w"
79};
80static const char g_aszYasmRegGen1616[8][6] =
81{
82 "bx+si", "bx+di", "bp+si", "bp+di", "si\0\0\0", "di\0\0\0", "bp\0\0\0", "bx\0\0\0"
83};
84static const char g_aszYasmRegGen32[16][5] =
85{
86 "eax\0", "ecx\0", "edx\0", "ebx\0", "esp\0", "ebp\0", "esi\0", "edi\0", "r8d\0", "r9d\0", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d"
87};
88static const char g_aszYasmRegGen64[16][4] =
89{
90 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8\0", "r9\0", "r10", "r11", "r12", "r13", "r14", "r15"
91};
92static const char g_aszYasmRegSeg[6][3] =
93{
94 "es", "cs", "ss", "ds", "fs", "gs"
95};
96static const char g_aszYasmRegFP[8][4] =
97{
98 "st0", "st1", "st2", "st3", "st4", "st5", "st6", "st7"
99};
100static const char g_aszYasmRegMMX[8][4] =
101{
102 "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"
103};
104static const char g_aszYasmRegXMM[16][6] =
105{
106 "xmm0\0", "xmm1\0", "xmm2\0", "xmm3\0", "xmm4\0", "xmm5\0", "xmm6\0", "xmm7\0", "xmm8\0", "xmm9\0", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
107};
108static const char g_aszYasmRegCRx[16][5] =
109{
110 "cr0\0", "cr1\0", "cr2\0", "cr3\0", "cr4\0", "cr5\0", "cr6\0", "cr7\0", "cr8\0", "cr9\0", "cr10", "cr11", "cr12", "cr13", "cr14", "cr15"
111};
112static const char g_aszYasmRegDRx[16][5] =
113{
114 "dr0\0", "dr1\0", "dr2\0", "dr3\0", "dr4\0", "dr5\0", "dr6\0", "dr7\0", "dr8\0", "dr9\0", "dr10", "dr11", "dr12", "dr13", "dr14", "dr15"
115};
116static const char g_aszYasmRegTRx[16][5] =
117{
118 "tr0\0", "tr1\0", "tr2\0", "tr3\0", "tr4\0", "tr5\0", "tr6\0", "tr7\0", "tr8\0", "tr9\0", "tr10", "tr11", "tr12", "tr13", "tr14", "tr15"
119};
120
121
122DECLINLINE(const char *) MyDisasYasmFormatBaseReg(DISCPUSTATE const *pCpu, PCOP_PARAMETER pParam, size_t *pcchReg, bool fReg1616)
123{
124 switch (pParam->flags & ( USE_REG_GEN8 | USE_REG_GEN16 | USE_REG_GEN32 | USE_REG_GEN64
125 | USE_REG_FP | USE_REG_MMX | USE_REG_XMM | USE_REG_CR
126 | USE_REG_DBG | USE_REG_SEG | USE_REG_TEST))
127
128 {
129 case USE_REG_GEN8:
130 if (pCpu->opmode == CPUMODE_64BIT)
131 {
132 Assert(pParam->base.reg_gen < RT_ELEMENTS(g_aszYasmRegGen8Amd64));
133 const char *psz = g_aszYasmRegGen8Amd64[pParam->base.reg_gen];
134 *pcchReg = 2 + !!psz[2] + !!psz[3];
135 return psz;
136 }
137 *pcchReg = 2;
138 Assert(pParam->base.reg_gen < RT_ELEMENTS(g_aszYasmRegGen8x86));
139 return g_aszYasmRegGen8x86[pParam->base.reg_gen];
140
141 case USE_REG_GEN16:
142 {
143 if (fReg1616)
144 {
145 Assert(pParam->base.reg_gen < RT_ELEMENTS(g_aszYasmRegGen1616));
146 const char *psz = g_aszYasmRegGen1616[pParam->base.reg_gen];
147 *pcchReg = psz[2] ? 5 : 2;
148 return psz;
149 }
150
151 Assert(pParam->base.reg_gen < RT_ELEMENTS(g_aszYasmRegGen16));
152 const char *psz = g_aszYasmRegGen16[pParam->base.reg_gen];
153 *pcchReg = 2 + !!psz[2] + !!psz[3];
154 return psz;
155 }
156
157 case USE_REG_GEN32:
158 {
159 Assert(pParam->base.reg_gen < RT_ELEMENTS(g_aszYasmRegGen32));
160 const char *psz = g_aszYasmRegGen32[pParam->base.reg_gen];
161 *pcchReg = 2 + !!psz[2] + !!psz[3];
162 return psz;
163 }
164
165 case USE_REG_GEN64:
166 {
167 Assert(pParam->base.reg_gen < RT_ELEMENTS(g_aszYasmRegGen64));
168 const char *psz = g_aszYasmRegGen64[pParam->base.reg_gen];
169 *pcchReg = 2 + !!psz[2] + !!psz[3];
170 return psz;
171 }
172
173 case USE_REG_FP:
174 {
175 Assert(pParam->base.reg_fp < RT_ELEMENTS(g_aszYasmRegFP));
176 const char *psz = g_aszYasmRegFP[pParam->base.reg_fp];
177 *pcchReg = 3;
178 return psz;
179 }
180
181 case USE_REG_MMX:
182 {
183 Assert(pParam->base.reg_mmx < RT_ELEMENTS(g_aszYasmRegMMX));
184 const char *psz = g_aszYasmRegMMX[pParam->base.reg_mmx];
185 *pcchReg = 3;
186 return psz;
187 }
188
189 case USE_REG_XMM:
190 {
191 Assert(pParam->base.reg_xmm < RT_ELEMENTS(g_aszYasmRegXMM));
192 const char *psz = g_aszYasmRegXMM[pParam->base.reg_mmx];
193 *pcchReg = 4 + !!psz[4];
194 return psz;
195 }
196
197 case USE_REG_CR:
198 {
199 Assert(pParam->base.reg_ctrl < RT_ELEMENTS(g_aszYasmRegCRx));
200 const char *psz = g_aszYasmRegCRx[pParam->base.reg_ctrl];
201 *pcchReg = 3;
202 return psz;
203 }
204
205 case USE_REG_DBG:
206 {
207 Assert(pParam->base.reg_dbg < RT_ELEMENTS(g_aszYasmRegDRx));
208 const char *psz = g_aszYasmRegDRx[pParam->base.reg_dbg];
209 *pcchReg = 3;
210 return psz;
211 }
212
213 case USE_REG_SEG:
214 {
215 Assert(pParam->base.reg_seg < RT_ELEMENTS(g_aszYasmRegCRx));
216 const char *psz = g_aszYasmRegSeg[pParam->base.reg_seg];
217 *pcchReg = 2;
218 return psz;
219 }
220
221 case USE_REG_TEST:
222 {
223 Assert(pParam->base.reg_test < RT_ELEMENTS(g_aszYasmRegTRx));
224 const char *psz = g_aszYasmRegTRx[pParam->base.reg_test];
225 *pcchReg = 3;
226 return psz;
227 }
228
229 default:
230 AssertMsgFailed(("%#x\n", pParam->flags));
231 *pcchReg = 3;
232 return "r??";
233 }
234}
235
236DECLINLINE(const char *) MyDisasYasmFormatIndexReg(DISCPUSTATE const *pCpu, PCOP_PARAMETER pParam, size_t *pcchReg)
237{
238 switch (pCpu->addrmode)
239 {
240 case CPUMODE_16BIT:
241 {
242 Assert(pParam->index.reg_gen < RT_ELEMENTS(g_aszYasmRegGen16));
243 const char *psz = g_aszYasmRegGen16[pParam->index.reg_gen];
244 *pcchReg = 2 + !!psz[2] + !!psz[3];
245 return psz;
246 }
247
248 case CPUMODE_32BIT:
249 {
250 Assert(pParam->index.reg_gen < RT_ELEMENTS(g_aszYasmRegGen32));
251 const char *psz = g_aszYasmRegGen32[pParam->index.reg_gen];
252 *pcchReg = 2 + !!psz[2] + !!psz[3];
253 return psz;
254 }
255
256 case CPUMODE_64BIT:
257 {
258 Assert(pParam->index.reg_gen < RT_ELEMENTS(g_aszYasmRegGen64));
259 const char *psz = g_aszYasmRegGen64[pParam->index.reg_gen];
260 *pcchReg = 2 + !!psz[2] + !!psz[3];
261 return psz;
262 }
263
264 default:
265 AssertMsgFailed(("%#x %#x\n", pParam->flags, pCpu->addrmode));
266 *pcchReg = 3;
267 return "r??";
268 }
269}
270
271static size_t MyDisasYasmFormat(DISCPUSTATE const *pCpu, char *pszBuf, size_t cchBuf)
272{
273 PCOPCODE const pOp = pCpu->pCurInstr;
274 size_t cchOutput = 0;
275 char *pszDst = pszBuf;
276 size_t cchDst = cchBuf;
277
278 /* output macros */
279#define PUT_C(ch) \
280 do { \
281 cchOutput++; \
282 if (cchDst > 1) \
283 { \
284 cchDst--; \
285 *pszDst++ = (ch); \
286 } \
287 } while (0)
288#define PUT_STR(pszSrc, cchSrc) \
289 do { \
290 cchOutput += (cchSrc); \
291 if (cchDst > (cchSrc)) \
292 { \
293 memcpy(pszDst, (pszSrc), (cchSrc)); \
294 pszDst += (cchSrc); \
295 cchDst -= (cchSrc); \
296 } \
297 else if (cchDst > 1) \
298 { \
299 memcpy(pszDst, (pszSrc), cchDst - 1); \
300 pszDst += cchDst - 1; \
301 cchDst = 1; \
302 } \
303 } while (0)
304#define PUT_SZ(sz) \
305 PUT_STR((sz), sizeof(sz) - 1)
306#define PUT_PSZ(psz) \
307 do { const size_t cchTmp = strlen(psz); PUT_STR((psz), cchTmp); } while (0)
308#define PUT_NUM(cch, fmt, num) \
309 do { \
310 cchOutput += (cch); \
311 if (cchBuf > 1) \
312 { \
313 const size_t cchTmp = RTStrPrintf(pszDst, cchBuf, fmt, (num)); \
314 pszDst += cchTmp; \
315 cchBuf -= cchTmp; \
316 Assert(cchTmp == (cch) || cchBuf == 1); \
317 } \
318 } while (0)
319#define PUT_NUM_8(num) PUT_NUM(4, "0%02xh", (uint8_t)(num))
320#define PUT_NUM_16(num) PUT_NUM(6, "0%04xh", (uint16_t)(num))
321#define PUT_NUM_32(num) PUT_NUM(10, "0%08xh", (uint32_t)(num))
322#define PUT_NUM_64(num) PUT_NUM(18, "0%08xh", (uint64_t)(num))
323
324 /*
325 * Filter out invalid opcodes first as they need special
326 * treatment. UD2 is an exception and should be handled normally.
327 */
328 if ( pOp->opcode == OP_INVALID
329 || ( pOp->opcode == OP_ILLUD2
330 && (pCpu->prefix & PREFIX_LOCK)))
331 {
332
333 }
334 else
335 {
336 /*
337 * Prefixes
338 */
339 if (pCpu->prefix & PREFIX_LOCK)
340 PUT_SZ("lock ");
341 if(pCpu->prefix & PREFIX_REP)
342 PUT_SZ("rep ");
343 else if(pCpu->prefix & PREFIX_REPNE)
344 PUT_SZ("repne ");
345
346 /*
347 * Adjust the format string to avoid stuff the assembler cannot handle.
348 */
349 char szTmpFmt[48];
350 const char *pszFmt = pOp->pszOpcode;
351 switch (pOp->opcode)
352 {
353 case OP_JECXZ:
354 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "jcxz %Jb" : pCpu->opmode == CPUMODE_32BIT ? "jecxz %Jb" : "jrcxz %Jb";
355 break;
356 case OP_PUSHF:
357 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "pushfw" : pCpu->opmode == CPUMODE_32BIT ? "pushfd" : "pushfq";
358 break;
359 case OP_POPF:
360 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "popfw" : pCpu->opmode == CPUMODE_32BIT ? "popfd" : "popfq";
361 break;
362 case OP_PUSHA:
363 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "pushaw" : "pushad";
364 break;
365 case OP_POPA:
366 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "popaw" : "popad";
367 break;
368 case OP_INSB:
369 pszFmt = "insb";
370 break;
371 case OP_INSWD:
372 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "insw" : pCpu->opmode == CPUMODE_32BIT ? "insd" : "insq";
373 break;
374 case OP_OUTSB:
375 pszFmt = "outsb";
376 break;
377 case OP_OUTSWD:
378 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "outsw" : pCpu->opmode == CPUMODE_32BIT ? "outsd" : "outsq";
379 break;
380 case OP_MOVSB:
381 pszFmt = "movsb";
382 break;
383 case OP_MOVSWD:
384 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "movsw" : pCpu->opmode == CPUMODE_32BIT ? "movsd" : "movsq";
385 break;
386 case OP_CMPSB:
387 pszFmt = "cmpsb";
388 break;
389 case OP_CMPWD:
390 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "cmpsw" : pCpu->opmode == CPUMODE_32BIT ? "cmpsd" : "cmpsq";
391 break;
392 case OP_SCASB:
393 pszFmt = "scasb";
394 break;
395 case OP_SCASWD:
396 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "scasw" : pCpu->opmode == CPUMODE_32BIT ? "scasd" : "scasq";
397 break;
398 case OP_LODSB:
399 pszFmt = "lodsb";
400 break;
401 case OP_LODSWD:
402 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "lodsw" : pCpu->opmode == CPUMODE_32BIT ? "lodsd" : "lodsq";
403 break;
404 case OP_STOSB:
405 pszFmt = "stosb";
406 break;
407 case OP_STOSWD:
408 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "stosw" : pCpu->opmode == CPUMODE_32BIT ? "stosd" : "stosq";
409 break;
410 case OP_CBW:
411 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "cbw" : pCpu->opmode == CPUMODE_32BIT ? "cwde" : "cdqe";
412 break;
413 case OP_CWD:
414 pszFmt = pCpu->opmode == CPUMODE_16BIT ? "cwd" : pCpu->opmode == CPUMODE_32BIT ? "cdq" : "cqo";
415 break;
416 case OP_SHL:
417 Assert(pszFmt[3] == '/');
418 pszFmt += 4;
419 break;
420 case OP_XLAT:
421 pszFmt = "xlatb";
422 break;
423 case OP_INT3:
424 pszFmt = "int3";
425 break;
426
427 /*
428 * Don't know how to tell yasm to generate complicated nop stuff, so 'db' it.
429 */
430 case OP_NOP:
431 if (pCpu->opcode == 0x90)
432 /* fine, fine */;
433 else if (pszFmt[sizeof("nop %Ev")] == '/' && pszFmt[sizeof("nop %Ev") + 1] == 'p')
434 pszFmt = "prefetch %Eb";
435 else if (pCpu->opcode == 0x1f)
436 {
437 Assert(pCpu->opsize >= 3);
438 PUT_SZ("db 00fh, 01fh,");
439 PUT_NUM_8(pCpu->ModRM.u);
440 for (unsigned i = 3; i < pCpu->opsize; i++)
441 {
442 PUT_C(',');
443 PUT_NUM_8(0x90); ///@todo fixme.
444 }
445 pszFmt = "";
446 }
447 break;
448
449 default:
450 /* ST(X) -> stX (floating point) */
451 if (*pszFmt == 'f' && strchr(pszFmt, '('))
452 {
453 char *pszFmtDst = szTmpFmt;
454 char ch;
455 do
456 {
457 ch = *pszFmt++;
458 if (ch == 'S' && pszFmt[0] == 'T' && pszFmt[1] == '(')
459 {
460 *pszFmtDst++ = 's';
461 *pszFmtDst++ = 't';
462 pszFmt += 2;
463 ch = *pszFmt;
464 Assert(pszFmt[1] == ')');
465 pszFmt += 2;
466 *pszFmtDst++ = ch;
467 }
468 else
469 *pszFmtDst++ = ch;
470 } while (ch != '\0');
471 pszFmt = szTmpFmt;
472 }
473 break;
474
475 /*
476 * Horrible hacks.
477 */
478 case OP_FLD:
479 if (pCpu->opcode == 0xdb) /* m80fp workaround. */
480 *(int *)&pCpu->param1.param &= ~0x1f; /* make it pure OP_PARM_M */
481 break;
482 case OP_LAR: /* hack w -> v, probably not correct. */
483 *(int *)&pCpu->param2.param &= ~0x1f;
484 *(int *)&pCpu->param2.param |= OP_PARM_v;
485 break;
486 }
487
488 /*
489 * Formatting context and associated macros.
490 */
491 PCOP_PARAMETER pParam = &pCpu->param1;
492 int iParam = 1;
493
494#define PUT_FAR() \
495 do { \
496 if ( OP_PARM_VSUBTYPE(pParam->param) == OP_PARM_p \
497 && pOp->opcode != OP_LDS /* table bugs? */ \
498 && pOp->opcode != OP_LES \
499 && pOp->opcode != OP_LFS \
500 && pOp->opcode != OP_LGS \
501 && pOp->opcode != OP_LSS ) \
502 PUT_SZ("far "); \
503 } while (0)
504 /** @todo mov ah,ch ends up with a byte 'override'... */
505#define PUT_SIZE_OVERRIDE() \
506 do { \
507 switch (OP_PARM_VSUBTYPE(pParam->param)) \
508 { \
509 case OP_PARM_v: \
510 switch (pCpu->opmode) \
511 { \
512 case CPUMODE_16BIT: PUT_SZ("word "); break; \
513 case CPUMODE_32BIT: PUT_SZ("dword "); break; \
514 case CPUMODE_64BIT: PUT_SZ("qword "); break; \
515 default: break; \
516 } \
517 break; \
518 case OP_PARM_b: PUT_SZ("byte "); break; \
519 case OP_PARM_w: PUT_SZ("word "); break; \
520 case OP_PARM_d: PUT_SZ("dword "); break; \
521 case OP_PARM_q: PUT_SZ("qword "); break; \
522 case OP_PARM_dq: \
523 if (OP_PARM_VTYPE(pParam->param) != OP_PARM_W) /* these are 128 bit, pray they are all unambiguous.. */ \
524 PUT_SZ("qword "); \
525 break; \
526 case OP_PARM_p: break; /* see PUT_FAR */ \
527 case OP_PARM_s: if (pParam->flags & USE_REG_FP) PUT_SZ("tword "); break; /* ?? */ \
528 case OP_PARM_z: break; \
529 case OP_PARM_NONE: \
530 if ( OP_PARM_VTYPE(pParam->param) == OP_PARM_M \
531 && ((pParam->flags & USE_REG_FP) || pOp->opcode == OP_FLD)) \
532 PUT_SZ("tword "); \
533 break; \
534 default: break; /*no pointer type specified/necessary*/ \
535 } \
536 } while (0)
537 static const char s_szSegPrefix[6][4] = { "es:", "cs:", "ss:", "ds:", "fs:", "gs:" };
538#define PUT_SEGMENT_OVERRIDE() \
539 do { \
540 if (pCpu->prefix & PREFIX_SEG) \
541 PUT_STR(s_szSegPrefix[pCpu->prefix_seg], 3); \
542 } while (0)
543
544
545 /*
546 * Segment prefixing for instructions that doesn't do memory access.
547 */
548 if ( (pCpu->prefix & PREFIX_SEG)
549 && !(pCpu->param1.flags & USE_EFFICIENT_ADDRESS)
550 && !(pCpu->param2.flags & USE_EFFICIENT_ADDRESS)
551 && !(pCpu->param3.flags & USE_EFFICIENT_ADDRESS))
552 {
553 PUT_STR(s_szSegPrefix[pCpu->prefix_seg], 2);
554 PUT_C(' ');
555 }
556
557
558 /*
559 * The formatting loop.
560 */
561 char ch;
562 while ((ch = *pszFmt++) != '\0')
563 {
564 if (ch == '%')
565 {
566 ch = *pszFmt++;
567 switch (ch)
568 {
569 /*
570 * ModRM - Register only.
571 */
572 case 'C': /* Control register (ParseModRM / UseModRM). */
573 case 'D': /* Debug register (ParseModRM / UseModRM). */
574 case 'G': /* ModRM selects general register (ParseModRM / UseModRM). */
575 case 'S': /* ModRM byte selects a segment register (ParseModRM / UseModRM). */
576 case 'T': /* ModRM byte selects a test register (ParseModRM / UseModRM). */
577 case 'V': /* ModRM byte selects an XMM/SSE register (ParseModRM / UseModRM). */
578 case 'P': /* ModRM byte selects MMX register (ParseModRM / UseModRM). */
579 {
580 pszFmt += RT_C_IS_ALPHA(pszFmt[0]) ? RT_C_IS_ALPHA(pszFmt[1]) ? 2 : 1 : 0;
581 Assert(!(pParam->flags & (USE_INDEX | USE_SCALE) /* No SIB here... */));
582 Assert(!(pParam->flags & (USE_DISPLACEMENT8 | USE_DISPLACEMENT16 | USE_DISPLACEMENT32 | USE_DISPLACEMENT64 | USE_RIPDISPLACEMENT32)));
583
584 size_t cchReg;
585 const char *pszReg = MyDisasYasmFormatBaseReg(pCpu, pParam, &cchReg, 0 /* pCpu->addrmode == CPUMODE_16BIT */);
586 PUT_STR(pszReg, cchReg);
587 break;
588 }
589
590 /*
591 * ModRM - Register or memory.
592 */
593 case 'E': /* ModRM specifies parameter (ParseModRM / UseModRM / UseSIB). */
594 case 'Q': /* ModRM byte selects MMX register or memory address (ParseModRM / UseModRM). */
595 case 'R': /* ModRM byte may only refer to a general register (ParseModRM / UseModRM). */
596 case 'W': /* ModRM byte selects an XMM/SSE register or a memory address (ParseModRM / UseModRM). */
597 case 'M': /* ModRM may only refer to memory (ParseModRM / UseModRM). */
598 {
599 pszFmt += RT_C_IS_ALPHA(pszFmt[0]) ? RT_C_IS_ALPHA(pszFmt[1]) ? 2 : 1 : 0;
600
601 PUT_FAR();
602 if (pParam->flags & USE_EFFICIENT_ADDRESS)
603 {
604 /* Work around mov seg,[mem16] and mov [mem16],seg as these always make a 16-bit mem
605 while the register variants deals with 16, 32 & 64 in the normal fashion. */
606 if ( pParam->param != OP_PARM_Ev
607 || pOp->opcode != OP_MOV
608 || ( pOp->param1 != OP_PARM_Sw
609 && pOp->param2 != OP_PARM_Sw))
610 PUT_SIZE_OVERRIDE();
611 PUT_C('[');
612 }
613 if (pParam->flags & (USE_DISPLACEMENT8 | USE_DISPLACEMENT16 | USE_DISPLACEMENT32 | USE_DISPLACEMENT64 | USE_RIPDISPLACEMENT32))
614 {
615 if ( (pParam->flags & USE_DISPLACEMENT8)
616 && !pParam->disp8)
617 PUT_SZ("byte ");
618 else if ( (pParam->flags & USE_DISPLACEMENT16)
619 && (int8_t)pParam->disp16 == (int16_t)pParam->disp16)
620 PUT_SZ("word ");
621 else if ( (pParam->flags & USE_DISPLACEMENT32)
622 && (int8_t)pParam->disp32 == (int32_t)pParam->disp32)
623 PUT_SZ("dword ");
624 }
625 if (pParam->flags & USE_EFFICIENT_ADDRESS)
626 PUT_SEGMENT_OVERRIDE();
627
628 bool fBase = (pParam->flags & USE_BASE) /* When exactly is USE_BASE supposed to be set? disasmModRMReg doesn't set it. */
629 || ( (pParam->flags & (USE_REG_GEN8 | USE_REG_GEN16 | USE_REG_GEN32 | USE_REG_GEN64))
630 && !(pParam->flags & USE_EFFICIENT_ADDRESS));
631 if (fBase)
632 {
633 size_t cchReg;
634 const char *pszReg = MyDisasYasmFormatBaseReg(pCpu, pParam, &cchReg, 0 /*pCpu->addrmode == CPUMODE_16BIT*/);
635 PUT_STR(pszReg, cchReg);
636 }
637
638 if (pParam->flags & USE_INDEX)
639 {
640 if (fBase)
641 PUT_C('+');
642
643 size_t cchReg;
644 const char *pszReg = MyDisasYasmFormatIndexReg(pCpu, pParam, &cchReg);
645 PUT_STR(pszReg, cchReg);
646
647 if (pParam->flags & USE_SCALE)
648 {
649 PUT_C('*');
650 PUT_C('0' + pParam->scale);
651 }
652 }
653 else
654 Assert(!(pParam->flags & USE_SCALE));
655
656 if (pParam->flags & (USE_DISPLACEMENT8 | USE_DISPLACEMENT16 | USE_DISPLACEMENT32 | USE_DISPLACEMENT64 | USE_RIPDISPLACEMENT32))
657 {
658 Assert(!(pParam->flags & USE_DISPLACEMENT64));
659 int32_t off;
660 if (pParam->flags & USE_DISPLACEMENT8)
661 off = pParam->disp8;
662 else if (pParam->flags & USE_DISPLACEMENT16)
663 off = pParam->disp16;
664 else if (pParam->flags & (USE_DISPLACEMENT32 | USE_RIPDISPLACEMENT32))
665 off = pParam->disp32;
666
667 if (fBase || (pParam->flags & USE_INDEX))
668 PUT_C(off >= 0 ? '+' : '-');
669
670 if (off < 0)
671 off = -off;
672 if (pParam->flags & USE_DISPLACEMENT8)
673 PUT_NUM_8( off);
674 else if (pParam->flags & USE_DISPLACEMENT16)
675 PUT_NUM_16(off);
676 else if (pParam->flags & USE_DISPLACEMENT32)
677 PUT_NUM_32(off);
678 else
679 {
680 PUT_NUM_32(off);
681 PUT_SZ(" wrt rip"); //??
682 }
683 }
684
685 if (pParam->flags & USE_EFFICIENT_ADDRESS)
686 PUT_C(']');
687 break;
688 }
689
690 case 'F': /* Eflags register (0 - popf/pushf only, avoided in adjustments above). */
691 AssertFailed();
692 break;
693
694 case 'I': /* Immediate data (ParseImmByte, ParseImmByteSX, ParseImmV, ParseImmUshort, ParseImmZ). */
695 Assert(*pszFmt == 'b' || *pszFmt == 'v' || *pszFmt == 'w' || *pszFmt == 'z'); pszFmt++;
696 switch (pParam->flags & ( USE_IMMEDIATE8 | USE_IMMEDIATE16 | USE_IMMEDIATE32 | USE_IMMEDIATE64
697 | USE_IMMEDIATE16_SX8 | USE_IMMEDIATE32_SX8))
698 {
699 case USE_IMMEDIATE8:
700 if ( (pOp->param1 >= OP_PARM_REG_GEN8_START && pOp->param1 <= OP_PARM_REG_GEN8_END)
701 || (pOp->param2 >= OP_PARM_REG_GEN8_START && pOp->param2 <= OP_PARM_REG_GEN8_END)
702 )
703 PUT_SZ("strict byte ");
704 PUT_NUM_8(pParam->parval);
705 break;
706
707 case USE_IMMEDIATE16:
708 if ( (int8_t)pParam->parval == (int16_t)pParam->parval
709 || (pOp->param1 >= OP_PARM_REG_GEN16_START && pOp->param1 <= OP_PARM_REG_GEN16_END)
710 || (pOp->param2 >= OP_PARM_REG_GEN16_START && pOp->param2 <= OP_PARM_REG_GEN16_END)
711 || pCpu->mode != pCpu->opmode
712 )
713 {
714 if (OP_PARM_VSUBTYPE(pParam->param) == OP_PARM_b)
715 PUT_SZ("strict byte ");
716 else if (OP_PARM_VSUBTYPE(pParam->param) == OP_PARM_v)
717 PUT_SZ("strict word ");
718 }
719 PUT_NUM_16(pParam->parval);
720 break;
721
722 case USE_IMMEDIATE16_SX8:
723 PUT_SZ("strict byte ");
724 PUT_NUM_16(pParam->parval);
725 break;
726
727 case USE_IMMEDIATE32:
728 if ( (int8_t)pParam->parval == (int32_t)pParam->parval
729 || (pOp->param1 >= OP_PARM_REG_GEN32_START && pOp->param1 <= OP_PARM_REG_GEN32_END)
730 || (pOp->param2 >= OP_PARM_REG_GEN32_START && pOp->param2 <= OP_PARM_REG_GEN32_END)
731 || pCpu->opmode != (pCpu->mode == CPUMODE_16BIT ? CPUMODE_16BIT : CPUMODE_32BIT) /* not perfect */
732 )
733 {
734 if (OP_PARM_VSUBTYPE(pParam->param) == OP_PARM_b)
735 PUT_SZ("strict byte ");
736 else if (OP_PARM_VSUBTYPE(pParam->param) == OP_PARM_v)
737 PUT_SZ("strict dword ");
738 }
739 PUT_NUM_32(pParam->parval);
740 break;
741
742 case USE_IMMEDIATE32_SX8:
743 PUT_SZ("strict byte ");
744 PUT_NUM_32(pParam->parval);
745 break;
746
747 case USE_IMMEDIATE64:
748 PUT_NUM_64(pParam->parval);
749 break;
750
751 default:
752 AssertFailed();
753 break;
754 }
755 break;
756
757 case 'J': /* Relative jump offset (ParseImmBRel + ParseImmVRel). */
758 {
759 int32_t offDisplacement;
760 Assert(iParam == 1);
761 bool fPrefix = pOp->opcode != OP_CALL
762 && pOp->opcode != OP_LOOP
763 && pOp->opcode != OP_LOOPE
764 && pOp->opcode != OP_LOOPNE
765 && pOp->opcode != OP_JECXZ;
766
767 if (pParam->flags & USE_IMMEDIATE8_REL)
768 {
769 if (fPrefix)
770 PUT_SZ("short ");
771 offDisplacement = (int8_t)pParam->parval;
772 Assert(*pszFmt == 'b'); pszFmt++;
773 }
774 else if (pParam->flags & USE_IMMEDIATE16_REL)
775 {
776 if (fPrefix)
777 PUT_SZ("near ");
778 offDisplacement = (int16_t)pParam->parval;
779 Assert(*pszFmt == 'v'); pszFmt++;
780 }
781 else
782 {
783 if (fPrefix)
784 PUT_SZ("near ");
785 offDisplacement = (int32_t)pParam->parval;
786 Assert(pParam->flags & USE_IMMEDIATE32_REL);
787 Assert(*pszFmt == 'v'); pszFmt++;
788 }
789
790 RTUINTPTR uTrgAddr = pCpu->opaddr + pCpu->opsize + offDisplacement;
791 if (pCpu->mode == CPUMODE_16BIT)
792 PUT_NUM_16(uTrgAddr);
793 else if (pCpu->mode == CPUMODE_32BIT)
794 PUT_NUM_32(uTrgAddr);
795 else
796 PUT_NUM_64(uTrgAddr);
797 break;
798 }
799
800 case 'A': /* Direct (jump/call) address (ParseImmAddr). */
801 Assert(*pszFmt == 'p'); pszFmt++;
802 PUT_FAR();
803 PUT_SIZE_OVERRIDE();
804 PUT_SEGMENT_OVERRIDE();
805 switch (pParam->flags & (USE_IMMEDIATE_ADDR_16_16 | USE_IMMEDIATE_ADDR_16_32 | USE_DISPLACEMENT64 | USE_DISPLACEMENT32 | USE_DISPLACEMENT16))
806 {
807 case USE_IMMEDIATE_ADDR_16_16:
808 PUT_NUM_16(pParam->parval >> 16);
809 PUT_C(':');
810 PUT_NUM_16(pParam->parval);
811 break;
812 case USE_IMMEDIATE_ADDR_16_32:
813 PUT_NUM_16(pParam->parval >> 32);
814 PUT_C(':');
815 PUT_NUM_32(pParam->parval);
816 break;
817 case USE_DISPLACEMENT16:
818 PUT_NUM_16(pParam->parval);
819 break;
820 case USE_DISPLACEMENT32:
821 PUT_NUM_32(pParam->parval);
822 break;
823 case USE_DISPLACEMENT64:
824 PUT_NUM_64(pParam->parval);
825 break;
826 default:
827 AssertFailed();
828 break;
829 }
830 break;
831
832 case 'O': /* No ModRM byte (ParseImmAddr). */
833 Assert(*pszFmt == 'b' || *pszFmt == 'v'); pszFmt++;
834 PUT_FAR();
835 PUT_SIZE_OVERRIDE();
836 PUT_C('[');
837 PUT_SEGMENT_OVERRIDE();
838 switch (pParam->flags & (USE_IMMEDIATE_ADDR_16_16 | USE_IMMEDIATE_ADDR_16_32 | USE_DISPLACEMENT64 | USE_DISPLACEMENT32 | USE_DISPLACEMENT16))
839 {
840 case USE_IMMEDIATE_ADDR_16_16:
841 PUT_NUM_16(pParam->parval >> 16);
842 PUT_C(':');
843 PUT_NUM_16(pParam->parval);
844 break;
845 case USE_IMMEDIATE_ADDR_16_32:
846 PUT_NUM_16(pParam->parval >> 32);
847 PUT_C(':');
848 PUT_NUM_32(pParam->parval);
849 break;
850 case USE_DISPLACEMENT16:
851 PUT_NUM_16(pParam->disp16);
852 break;
853 case USE_DISPLACEMENT32:
854 PUT_NUM_32(pParam->disp32);
855 break;
856 case USE_DISPLACEMENT64:
857 PUT_NUM_64(pParam->disp64);
858 break;
859 default:
860 AssertFailed();
861 break;
862 }
863 PUT_C(']');
864 break;
865
866 case 'X': /* DS:SI (ParseXb, ParseXv). */
867 case 'Y': /* ES:DI (ParseYb, ParseYv). */
868 {
869 Assert(*pszFmt == 'b' || *pszFmt == 'v'); pszFmt++;
870 PUT_FAR();
871 PUT_SIZE_OVERRIDE();
872 PUT_C('[');
873 if (pParam->flags & USE_POINTER_DS_BASED)
874 PUT_SZ("ds:");
875 else
876 PUT_SZ("es:");
877
878 size_t cchReg;
879 const char *pszReg = MyDisasYasmFormatBaseReg(pCpu, pParam, &cchReg, 0);
880 PUT_STR(pszReg, cchReg);
881 PUT_C(']');
882 break;
883 }
884
885 case 'e': /* Register based on operand size (e.g. %eAX) (ParseFixedReg). */
886 {
887 Assert(RT_C_IS_ALPHA(pszFmt[0]) && RT_C_IS_ALPHA(pszFmt[1]) && !RT_C_IS_ALPHA(pszFmt[2])); pszFmt += 2;
888 size_t cchReg;
889 const char *pszReg = MyDisasYasmFormatBaseReg(pCpu, pParam, &cchReg, 0);
890 PUT_STR(pszReg, cchReg);
891 break;
892 }
893
894 default:
895 AssertMsgFailed(("%c%s!\n", ch, pszFmt));
896 break;
897 }
898 AssertMsg(*pszFmt == ',' || *pszFmt == '\0', ("%c%s\n", ch, pszFmt));
899 }
900 else
901 {
902 PUT_C(ch);
903 if (ch == ',')
904 {
905 Assert(*pszFmt != ' ');
906 PUT_C(' ');
907 switch (++iParam)
908 {
909 case 2: pParam = &pCpu->param2; break;
910 case 3: pParam = &pCpu->param3; break;
911 default: pParam = NULL; break;
912 }
913 }
914 }
915 } /* while more to format */
916 }
917
918
919 /* Terminate it - on overflow we'll have reserved one byte for this. */
920 if (cchDst > 0)
921 *pszDst = '\0';
922
923 /* clean up macros */
924#undef PUT_PSZ
925#undef PUT_SZ
926#undef PUT_STR
927#undef PUT_C
928 return cchOutput;
929}
930#endif
931
932
933/**
934 * Default style.
935 *
936 * @param pState The disassembler state.
937 */
938static void MyDisasDefaultFormatter(PMYDISSTATE pState)
939{
940 RTPrintf("%s", pState->szLine);
941}
942
943
944/**
945 * Yasm style.
946 *
947 * @param pState The disassembler state.
948 */
949static void MyDisasYasmFormatter(PMYDISSTATE pState)
950{
951 char szTmp[256];
952#ifndef USE_MY_FORMATTER
953 /* a very quick hack. */
954 strcpy(szTmp, RTStrStripL(strchr(pState->szLine, ':') + 1));
955
956 char *psz = strrchr(szTmp, '[');
957 *psz = '\0';
958 RTStrStripR(szTmp);
959
960 psz = strstr(szTmp, " ptr ");
961 if (psz)
962 memset(psz, ' ', 5);
963
964 char *pszEnd = strchr(szTmp, '\0');
965 while (pszEnd - &szTmp[0] < 71)
966 *pszEnd++ = ' ';
967 *pszEnd = '\0';
968
969#else /* USE_MY_FORMATTER */
970 size_t cch = MyDisasYasmFormat(&pState->Cpu, szTmp, sizeof(szTmp));
971 Assert(cch < sizeof(szTmp));
972 while (cch < 71)
973 szTmp[cch++] = ' ';
974 szTmp[cch] = '\0';
975#endif /* USE_MY_FORMATTER */
976
977 RTPrintf(" %s ; %08llu %s", szTmp, pState->uAddress, pState->szLine);
978}
979
980
981/**
982 * Checks if the encoding of the current instruction is something
983 * we can never get the assembler to produce.
984 *
985 * @returns true if it's odd, false if it isn't.
986 * @param pCpu The disassembler output.
987 */
988static bool MyDisasYasmFormatterIsOddEncoding(PMYDISSTATE pState)
989{
990 /*
991 * Mod rm + SIB: Check for duplicate EBP encodings that yasm won't use for very good reasons.
992 */
993 if ( pState->Cpu.addrmode != CPUMODE_16BIT ///@todo correct?
994 && pState->Cpu.ModRM.Bits.Rm == 4
995 && pState->Cpu.ModRM.Bits.Mod != 3)
996 {
997 /* No scaled index SIB (index=4), except for ESP. */
998 if ( pState->Cpu.SIB.Bits.Index == 4
999 && pState->Cpu.SIB.Bits.Base != 4)
1000 return true;
1001
1002 /* EBP + displacement */
1003 if ( pState->Cpu.ModRM.Bits.Mod != 0
1004 && pState->Cpu.SIB.Bits.Base == 5
1005 && pState->Cpu.SIB.Bits.Scale == 0)
1006 return true;
1007 }
1008
1009 /*
1010 * Seems to be an instruction alias here, but I cannot find any docs on it... hrmpf!
1011 */
1012 if ( pState->Cpu.pCurInstr->opcode == OP_SHL
1013 && pState->Cpu.ModRM.Bits.Reg == 6)
1014 return true;
1015
1016 /*
1017 * Check for multiple prefixes of the same kind.
1018 */
1019 uint32_t fPrefixes = 0;
1020 for (uint8_t const *pu8 = pState->pbInstr;; pu8++)
1021 {
1022 uint32_t f;
1023 switch (*pu8)
1024 {
1025 case 0xf0:
1026 f = PREFIX_LOCK;
1027 break;
1028
1029 case 0xf2:
1030 case 0xf3:
1031 f = PREFIX_REP; /* yes, both */
1032 break;
1033
1034 case 0x2e:
1035 case 0x3e:
1036 case 0x26:
1037 case 0x36:
1038 case 0x64:
1039 case 0x65:
1040 f = PREFIX_SEG;
1041 break;
1042
1043 case 0x66:
1044 f = PREFIX_OPSIZE;
1045 break;
1046
1047 case 0x67:
1048 f = PREFIX_ADDRSIZE;
1049 break;
1050
1051 case 0x40: case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47:
1052 case 0x48: case 0x49: case 0x4a: case 0x4b: case 0x4c: case 0x4d: case 0x4e: case 0x4f:
1053 f = pState->Cpu.mode == CPUMODE_64BIT ? PREFIX_REX : 0;
1054 break;
1055
1056 default:
1057 f = 0;
1058 break;
1059 }
1060 if (!f)
1061 break; /* done */
1062 if (fPrefixes & f)
1063 return true;
1064 fPrefixes |= f;
1065 }
1066
1067 /* segment overrides are fun */
1068 if (fPrefixes & PREFIX_SEG)
1069 {
1070 /* no efficient address which it may apply to. */
1071 Assert((pState->Cpu.prefix & PREFIX_SEG) || pState->Cpu.mode == CPUMODE_64BIT);
1072 if ( !(pState->Cpu.param1.flags & USE_EFFICIENT_ADDRESS)
1073 && !(pState->Cpu.param2.flags & USE_EFFICIENT_ADDRESS)
1074 && !(pState->Cpu.param3.flags & USE_EFFICIENT_ADDRESS))
1075 return true;
1076 }
1077
1078 /* fixed register + addr override doesn't go down all that well. */
1079 if (fPrefixes & PREFIX_ADDRSIZE)
1080 {
1081 Assert(pState->Cpu.prefix & PREFIX_ADDRSIZE);
1082 if ( pState->Cpu.pCurInstr->param3 == OP_PARM_NONE
1083 && pState->Cpu.pCurInstr->param2 == OP_PARM_NONE
1084 && ( pState->Cpu.pCurInstr->param1 >= OP_PARM_REG_GEN32_START
1085 && pState->Cpu.pCurInstr->param1 <= OP_PARM_REG_GEN32_END))
1086 return true;
1087 }
1088
1089 /* Almost all prefixes are bad. */
1090 if (fPrefixes)
1091 {
1092 switch (pState->Cpu.pCurInstr->opcode)
1093 {
1094 /* nop w/ prefix(es). */
1095 case OP_NOP:
1096 return true;
1097
1098 case OP_JMP:
1099 if ( pState->Cpu.pCurInstr->param1 != OP_PARM_Jb
1100 && pState->Cpu.pCurInstr->param1 != OP_PARM_Jv)
1101 break;
1102 /* fall thru */
1103 case OP_JO:
1104 case OP_JNO:
1105 case OP_JC:
1106 case OP_JNC:
1107 case OP_JE:
1108 case OP_JNE:
1109 case OP_JBE:
1110 case OP_JNBE:
1111 case OP_JS:
1112 case OP_JNS:
1113 case OP_JP:
1114 case OP_JNP:
1115 case OP_JL:
1116 case OP_JNL:
1117 case OP_JLE:
1118 case OP_JNLE:
1119 /** @todo branch hinting 0x2e/0x3e... */
1120 return true;
1121 }
1122
1123 }
1124
1125 /* All but the segment prefix is bad news. */
1126 if (fPrefixes & ~PREFIX_SEG)
1127 {
1128 switch (pState->Cpu.pCurInstr->opcode)
1129 {
1130 case OP_POP:
1131 case OP_PUSH:
1132 if ( pState->Cpu.pCurInstr->param1 >= OP_PARM_REG_SEG_START
1133 && pState->Cpu.pCurInstr->param1 <= OP_PARM_REG_SEG_END)
1134 return true;
1135 if ( (fPrefixes & ~PREFIX_OPSIZE)
1136 && pState->Cpu.pCurInstr->param1 >= OP_PARM_REG_GEN32_START
1137 && pState->Cpu.pCurInstr->param1 <= OP_PARM_REG_GEN32_END)
1138 return true;
1139 break;
1140
1141 case OP_POPA:
1142 case OP_POPF:
1143 case OP_PUSHA:
1144 case OP_PUSHF:
1145 if (fPrefixes & ~PREFIX_OPSIZE)
1146 return true;
1147 break;
1148 }
1149 }
1150
1151 /* Implicit 8-bit register instructions doesn't mix with operand size. */
1152 if ( (fPrefixes & PREFIX_OPSIZE)
1153 && ( ( pState->Cpu.pCurInstr->param1 == OP_PARM_Gb /* r8 */
1154 && pState->Cpu.pCurInstr->param2 == OP_PARM_Eb /* r8/mem8 */)
1155 || ( pState->Cpu.pCurInstr->param2 == OP_PARM_Gb /* r8 */
1156 && pState->Cpu.pCurInstr->param1 == OP_PARM_Eb /* r8/mem8 */))
1157 )
1158 {
1159 switch (pState->Cpu.pCurInstr->opcode)
1160 {
1161 case OP_ADD:
1162 case OP_OR:
1163 case OP_ADC:
1164 case OP_SBB:
1165 case OP_AND:
1166 case OP_SUB:
1167 case OP_XOR:
1168 case OP_CMP:
1169 return true;
1170 default:
1171 break;
1172 }
1173 }
1174
1175
1176 /*
1177 * Check for the version of xyz reg,reg instruction that the assembler doesn't use.
1178 *
1179 * For example:
1180 * expected: 1aee sbb ch, dh ; SBB r8, r/m8
1181 * yasm: 18F5 sbb ch, dh ; SBB r/m8, r8
1182 */
1183 if (pState->Cpu.ModRM.Bits.Mod == 3 /* reg,reg */)
1184 {
1185 switch (pState->Cpu.pCurInstr->opcode)
1186 {
1187 case OP_ADD:
1188 case OP_OR:
1189 case OP_ADC:
1190 case OP_SBB:
1191 case OP_AND:
1192 case OP_SUB:
1193 case OP_XOR:
1194 case OP_CMP:
1195 if ( ( pState->Cpu.pCurInstr->param1 == OP_PARM_Gb /* r8 */
1196 && pState->Cpu.pCurInstr->param2 == OP_PARM_Eb /* r8/mem8 */)
1197 || ( pState->Cpu.pCurInstr->param1 == OP_PARM_Gv /* rX */
1198 && pState->Cpu.pCurInstr->param2 == OP_PARM_Ev /* rX/memX */))
1199 return true;
1200
1201 /* 82 (see table A-6). */
1202 if (pState->Cpu.opcode == 0x82)
1203 return true;
1204 break;
1205
1206 /* ff /0, fe /0, ff /1, fe /0 */
1207 case OP_DEC:
1208 case OP_INC:
1209 return true;
1210
1211 case OP_POP:
1212 case OP_PUSH:
1213 Assert(pState->Cpu.opcode == 0x8f);
1214 return true;
1215
1216 default:
1217 break;
1218 }
1219 }
1220
1221 /* shl eax,1 will be assembled to the form without the immediate byte. */
1222 if ( pState->Cpu.pCurInstr->param2 == OP_PARM_Ib
1223 && (uint8_t)pState->Cpu.param2.parval == 1)
1224 {
1225 switch (pState->Cpu.pCurInstr->opcode)
1226 {
1227 case OP_SHL:
1228 case OP_SHR:
1229 case OP_SAR:
1230 case OP_RCL:
1231 case OP_RCR:
1232 case OP_ROL:
1233 case OP_ROR:
1234 return true;
1235 }
1236 }
1237
1238 /* And some more - see table A-6. */
1239 if (pState->Cpu.opcode == 0x82)
1240 {
1241 switch (pState->Cpu.pCurInstr->opcode)
1242 {
1243 case OP_ADD:
1244 case OP_OR:
1245 case OP_ADC:
1246 case OP_SBB:
1247 case OP_AND:
1248 case OP_SUB:
1249 case OP_XOR:
1250 case OP_CMP:
1251 return true;
1252 break;
1253 }
1254 }
1255
1256
1257 /* check for REX.X = 1 without SIB. */
1258
1259 /* Yasm encodes setnbe al with /2 instead of /0 like the AMD manual
1260 says (intel doesn't appear to care). */
1261 switch (pState->Cpu.pCurInstr->opcode)
1262 {
1263 case OP_SETO:
1264 case OP_SETNO:
1265 case OP_SETC:
1266 case OP_SETNC:
1267 case OP_SETE:
1268 case OP_SETNE:
1269 case OP_SETBE:
1270 case OP_SETNBE:
1271 case OP_SETS:
1272 case OP_SETNS:
1273 case OP_SETP:
1274 case OP_SETNP:
1275 case OP_SETL:
1276 case OP_SETNL:
1277 case OP_SETLE:
1278 case OP_SETNLE:
1279 AssertMsg(pState->Cpu.opcode >= 0x90 && pState->Cpu.opcode <= 0x9f, ("%#x\n", pState->Cpu.opcode));
1280 if (pState->Cpu.ModRM.Bits.Reg != 2)
1281 return true;
1282 break;
1283 }
1284
1285 /*
1286 * The MOVZX reg32,mem16 instruction without an operand size prefix
1287 * doesn't quite make sense...
1288 */
1289 if ( pState->Cpu.pCurInstr->opcode == OP_MOVZX
1290 && pState->Cpu.opcode == 0xB7
1291 && (pState->Cpu.mode == CPUMODE_16BIT) != !!(fPrefixes & PREFIX_OPSIZE))
1292 return true;
1293
1294 return false;
1295}
1296
1297
1298/**
1299 * Masm style.
1300 *
1301 * @param pState The disassembler state.
1302 */
1303static void MyDisasMasmFormatter(PMYDISSTATE pState)
1304{
1305 RTPrintf("masm not implemented: %s", pState->szLine);
1306}
1307
1308
1309/**
1310 * This is a temporary workaround for catching a few illegal opcodes
1311 * that the disassembler is currently letting thru, just enough to make
1312 * the assemblers happy.
1313 *
1314 * We're too close to a release to dare mess with these things now as
1315 * they may consequences for performance and let alone introduce bugs.
1316 *
1317 * @returns true if it's valid. false if it isn't.
1318 *
1319 * @param pCpu The disassembler output.
1320 */
1321static bool MyDisasIsValidInstruction(DISCPUSTATE const *pCpu)
1322{
1323 switch (pCpu->pCurInstr->opcode)
1324 {
1325 /* These doesn't take memory operands. */
1326 case OP_MOV_CR:
1327 case OP_MOV_DR:
1328 case OP_MOV_TR:
1329 if (pCpu->ModRM.Bits.Mod != 3)
1330 return false;
1331 break;
1332
1333 /* The 0x8f /0 variant of this instruction doesn't get its /r value verified. */
1334 case OP_POP:
1335 if ( pCpu->opcode == 0x8f
1336 && pCpu->ModRM.Bits.Reg != 0)
1337 return false;
1338 break;
1339
1340 /* The 0xc6 /0 and 0xc7 /0 variants of this instruction don't get their /r values verified. */
1341 case OP_MOV:
1342 if ( ( pCpu->opcode == 0xc6
1343 || pCpu->opcode == 0xc7)
1344 && pCpu->ModRM.Bits.Reg != 0)
1345 return false;
1346 break;
1347
1348 default:
1349 break;
1350 }
1351
1352 return true;
1353}
1354
1355
1356/**
1357 * Callback for reading bytes.
1358 *
1359 * @todo This should check that the disassembler doesn't do unnecessary reads,
1360 * however the current doesn't do this and is just complicated...
1361 */
1362static DECLCALLBACK(int) MyDisasInstrRead(RTUINTPTR uSrcAddr, uint8_t *pbDst, uint32_t cbRead, void *pvDisCpu)
1363{
1364 PMYDISSTATE pState = (PMYDISSTATE)pvDisCpu;
1365 if (RT_LIKELY( pState->uNextAddr == uSrcAddr
1366 && pState->cbLeft >= cbRead))
1367 {
1368 /*
1369 * Straight forward reading.
1370 */
1371 if (cbRead == 1)
1372 {
1373 pState->cbLeft--;
1374 *pbDst = *pState->pbNext++;
1375 pState->uNextAddr++;
1376 }
1377 else
1378 {
1379 memcpy(pbDst, pState->pbNext, cbRead);
1380 pState->pbNext += cbRead;
1381 pState->cbLeft -= cbRead;
1382 pState->uNextAddr += cbRead;
1383 }
1384 }
1385 else
1386 {
1387 /*
1388 * Jumping up the stream.
1389 * This occures when the byte sequence is added to the output string.
1390 */
1391 uint64_t offReq64 = uSrcAddr - pState->uAddress;
1392 if (offReq64 < 32)
1393 {
1394 uint32_t offReq = offReq64;
1395 uintptr_t off = pState->pbNext - pState->pbInstr;
1396 if (off + pState->cbLeft <= offReq)
1397 {
1398 pState->pbNext += pState->cbLeft;
1399 pState->uNextAddr += pState->cbLeft;
1400 pState->cbLeft = 0;
1401
1402 memset(pbDst, 0xcc, cbRead);
1403 pState->rc = VERR_EOF;
1404 return VERR_EOF;
1405 }
1406
1407 /* reset the stream. */
1408 pState->cbLeft += off;
1409 pState->pbNext = pState->pbInstr;
1410 pState->uNextAddr = pState->uAddress;
1411
1412 /* skip ahead. */
1413 pState->cbLeft -= offReq;
1414 pState->pbNext += offReq;
1415 pState->uNextAddr += offReq;
1416
1417 /* do the reading. */
1418 if (pState->cbLeft >= cbRead)
1419 {
1420 memcpy(pbDst, pState->pbNext, cbRead);
1421 pState->cbLeft -= cbRead;
1422 pState->pbNext += cbRead;
1423 pState->uNextAddr += cbRead;
1424 }
1425 else
1426 {
1427 if (pState->cbLeft > 0)
1428 {
1429 memcpy(pbDst, pState->pbNext, pState->cbLeft);
1430 pbDst += pState->cbLeft;
1431 cbRead -= pState->cbLeft;
1432 pState->pbNext += pState->cbLeft;
1433 pState->uNextAddr += pState->cbLeft;
1434 pState->cbLeft = 0;
1435 }
1436 memset(pbDst, 0xcc, cbRead);
1437 pState->rc = VERR_EOF;
1438 return VERR_EOF;
1439 }
1440 }
1441 else
1442 {
1443 RTStrmPrintf(g_pStdErr, "Reading before current instruction!\n");
1444 memset(pbDst, 0x90, cbRead);
1445 pState->rc = VERR_INTERNAL_ERROR;
1446 return VERR_INTERNAL_ERROR;
1447 }
1448 }
1449
1450 return VINF_SUCCESS;
1451}
1452
1453
1454/**
1455 * Disassembles a block of memory.
1456 *
1457 * @returns VBox status code.
1458 * @param argv0 Program name (for errors and warnings).
1459 * @param enmCpuMode The cpu mode to disassemble in.
1460 * @param uAddress The address we're starting to disassemble at.
1461 * @param pbFile Where to start disassemble.
1462 * @param cbFile How much to disassemble.
1463 * @param enmStyle The assembly output style.
1464 * @param fListing Whether to print in a listing like mode.
1465 * @param enmUndefOp How to deal with undefined opcodes.
1466 */
1467static int MyDisasmBlock(const char *argv0, DISCPUMODE enmCpuMode, uint64_t uAddress, uint8_t *pbFile, size_t cbFile,
1468 ASMSTYLE enmStyle, bool fListing, UNDEFOPHANDLING enmUndefOp)
1469{
1470 /*
1471 * Initialize the CPU context.
1472 */
1473 MYDISSTATE State;
1474 State.Cpu.mode = enmCpuMode;
1475 State.Cpu.pfnReadBytes = MyDisasInstrRead;
1476 State.uAddress = uAddress;
1477 State.pbInstr = pbFile;
1478 State.cbInstr = 0;
1479 State.enmUndefOp = enmUndefOp;
1480 State.rc = VINF_SUCCESS;
1481 State.cbLeft = cbFile;
1482 State.pbNext = pbFile;
1483 State.uNextAddr = uAddress;
1484
1485 void (*pfnFormatter)(PMYDISSTATE pState);
1486 switch (enmStyle)
1487 {
1488 case kAsmStyle_Default:
1489 pfnFormatter = MyDisasDefaultFormatter;
1490 break;
1491
1492 case kAsmStyle_yasm:
1493 RTPrintf(" BITS %d\n", enmCpuMode == CPUMODE_16BIT ? 16 : enmCpuMode == CPUMODE_32BIT ? 32 : 64);
1494 pfnFormatter = MyDisasYasmFormatter;
1495 break;
1496
1497 case kAsmStyle_masm:
1498 pfnFormatter = MyDisasMasmFormatter;
1499 break;
1500
1501 default:
1502 AssertFailedReturn(VERR_INTERNAL_ERROR);
1503 }
1504
1505 /*
1506 * The loop.
1507 */
1508 int rcRet = VINF_SUCCESS;
1509 while (State.cbLeft > 0)
1510 {
1511 /*
1512 * Disassemble it.
1513 */
1514 State.cbInstr = 0;
1515 State.cbLeft += State.pbNext - State.pbInstr;
1516 State.uNextAddr = State.uAddress;
1517 State.pbNext = State.pbInstr;
1518
1519 int rc = DISInstr(&State.Cpu, State.uAddress, 0, &State.cbInstr, State.szLine);
1520 if ( RT_SUCCESS(rc)
1521 || ( ( rc == VERR_DIS_INVALID_OPCODE
1522 || rc == VERR_DIS_GEN_FAILURE)
1523 && State.enmUndefOp == kUndefOp_DefineByte))
1524 {
1525 State.fUndefOp = rc == VERR_DIS_INVALID_OPCODE
1526 || rc == VERR_DIS_GEN_FAILURE
1527 || State.Cpu.pCurInstr->opcode == OP_INVALID
1528 || State.Cpu.pCurInstr->opcode == OP_ILLUD2
1529 || ( State.enmUndefOp == kUndefOp_DefineByte
1530 && !MyDisasIsValidInstruction(&State.Cpu));
1531 if (State.fUndefOp && State.enmUndefOp == kUndefOp_DefineByte)
1532 {
1533 RTPrintf(" db");
1534 if (!State.cbInstr)
1535 State.cbInstr = 1;
1536 for (unsigned off = 0; off < State.cbInstr; off++)
1537 {
1538 uint8_t b;
1539 State.Cpu.pfnReadBytes(State.uAddress + off, &b, 1, &State.Cpu);
1540 RTPrintf(off ? ", %03xh" : " %03xh", b);
1541 }
1542 RTPrintf(" ; %s\n", State.szLine);
1543 }
1544 else if (!State.fUndefOp && State.enmUndefOp == kUndefOp_All)
1545 {
1546 RTPrintf("%s: error at %#RX64: unexpected valid instruction (op=%d)\n", argv0, State.uAddress, State.Cpu.pCurInstr->opcode);
1547 pfnFormatter(&State);
1548 rcRet = VERR_GENERAL_FAILURE;
1549 }
1550 else if (State.fUndefOp && State.enmUndefOp == kUndefOp_Fail)
1551 {
1552 RTPrintf("%s: error at %#RX64: undefined opcode (op=%d)\n", argv0, State.uAddress, State.Cpu.pCurInstr->opcode);
1553 pfnFormatter(&State);
1554 rcRet = VERR_GENERAL_FAILURE;
1555 }
1556 else
1557 {
1558 /* Use db for odd encodings that we can't make the assembler use. */
1559 if ( State.enmUndefOp == kUndefOp_DefineByte
1560 && MyDisasYasmFormatterIsOddEncoding(&State))
1561 {
1562 RTPrintf(" db");
1563 for (unsigned off = 0; off < State.cbInstr; off++)
1564 {
1565 uint8_t b;
1566 State.Cpu.pfnReadBytes(State.uAddress + off, &b, 1, &State.Cpu);
1567 RTPrintf(off ? ", %03xh" : " %03xh", b);
1568 }
1569 RTPrintf(" ; ");
1570 }
1571
1572 pfnFormatter(&State);
1573 }
1574 }
1575 else
1576 {
1577 State.cbInstr = State.pbNext - State.pbInstr;
1578 if (!State.cbLeft)
1579 RTPrintf("%s: error at %#RX64: read beyond the end (%Rrc)\n", argv0, State.uAddress, rc);
1580 else if (State.cbInstr)
1581 RTPrintf("%s: error at %#RX64: %Rrc cbInstr=%d\n", argv0, State.uAddress, rc, State.cbInstr);
1582 else
1583 {
1584 RTPrintf("%s: error at %#RX64: %Rrc cbInstr=%d!\n", argv0, State.uAddress, rc, State.cbInstr);
1585 if (rcRet == VINF_SUCCESS)
1586 rcRet = rc;
1587 break;
1588 }
1589 }
1590
1591
1592 /* next */
1593 State.uAddress += State.cbInstr;
1594 State.pbInstr += State.cbInstr;
1595 }
1596
1597 return rcRet;
1598}
1599
1600
1601/**
1602 * Prints usage info.
1603 *
1604 * @returns 1.
1605 * @param argv0 The program name.
1606 */
1607static int Usage(const char *argv0)
1608{
1609 RTStrmPrintf(g_pStdErr,
1610"usage: %s [options] <file1> [file2..fileN]\n"
1611" or: %s <--help|-h>\n"
1612"\n"
1613"Options:\n"
1614" --address|-a <address>\n"
1615" The base address. Default: 0\n"
1616" --max-bytes|-b <bytes>\n"
1617" The maximum number of bytes to disassemble. Default: 1GB\n"
1618" --cpumode|-c <16|32|64>\n"
1619" The cpu mode. Default: 32\n"
1620" --listing|-l, --no-listing|-L\n"
1621" Enables or disables listing mode. Default: --no-listing\n"
1622" --offset|-o <offset>\n"
1623" The file offset at which to start disassembling. Default: 0\n"
1624" --style|-s <default|yasm|masm>\n"
1625" The assembly output style. Default: default\n"
1626" --undef-op|-u <fail|all|db>\n"
1627" How to treat undefined opcodes. Default: fail\n"
1628 , argv0, argv0);
1629 return 1;
1630}
1631
1632
1633int main(int argc, char **argv)
1634{
1635 RTR3Init();
1636 const char * const argv0 = RTPathFilename(argv[0]);
1637
1638 /* options */
1639 uint64_t uAddress = 0;
1640 ASMSTYLE enmStyle = kAsmStyle_Default;
1641 UNDEFOPHANDLING enmUndefOp = kUndefOp_Fail;
1642 bool fListing = true;
1643 DISCPUMODE enmCpuMode = CPUMODE_32BIT;
1644 RTFOFF off = 0;
1645 RTFOFF cbMax = _1G;
1646
1647 /*
1648 * Parse arguments.
1649 */
1650 static const RTOPTIONDEF g_aOptions[] =
1651 {
1652 { "--address", 'a', RTGETOPT_REQ_UINT64 },
1653 { "--cpumode", 'c', RTGETOPT_REQ_UINT32 },
1654 { "--help", 'h', 0 },
1655 { "--bytes", 'b', RTGETOPT_REQ_INT64 },
1656 { "--listing", 'l', 0 },
1657 { "--no-listing", 'L', 0 },
1658 { "--offset", 'o', RTGETOPT_REQ_INT64 },
1659 { "--style", 's', RTGETOPT_REQ_STRING },
1660 { "--undef-op", 'u', RTGETOPT_REQ_STRING },
1661 };
1662
1663 int ch;
1664 int iArg = 1;
1665 RTOPTIONUNION ValueUnion;
1666 while ((ch = RTGetOpt(argc, argv, g_aOptions, RT_ELEMENTS(g_aOptions), &iArg, &ValueUnion)))
1667 {
1668 switch (ch)
1669 {
1670 case 'a':
1671 uAddress = ValueUnion.u64;
1672 break;
1673
1674 case 'b':
1675 cbMax = ValueUnion.i;
1676 break;
1677
1678 case 'c':
1679 if (ValueUnion.u32 == 16)
1680 enmCpuMode = CPUMODE_16BIT;
1681 else if (ValueUnion.u32 == 32)
1682 enmCpuMode = CPUMODE_32BIT;
1683 else if (ValueUnion.u32 == 64)
1684 enmCpuMode = CPUMODE_64BIT;
1685 else
1686 {
1687 RTStrmPrintf(g_pStdErr, "%s: Invalid CPU mode value %RU32\n", argv0, ValueUnion.u32);
1688 return 1;
1689 }
1690 break;
1691
1692 case 'h':
1693 return Usage(argv0);
1694
1695 case 'l':
1696 fListing = true;
1697 break;
1698
1699 case 'L':
1700 fListing = false;
1701 break;
1702
1703 case 'o':
1704 off = ValueUnion.i;
1705 break;
1706
1707 case 's':
1708 if (!strcmp(ValueUnion.psz, "default"))
1709 enmStyle = kAsmStyle_Default;
1710 else if (!strcmp(ValueUnion.psz, "yasm"))
1711 enmStyle = kAsmStyle_yasm;
1712 else if (!strcmp(ValueUnion.psz, "masm"))
1713 {
1714 enmStyle = kAsmStyle_masm;
1715 RTStrmPrintf(g_pStdErr, "%s: masm style isn't implemented yet\n", argv0);
1716 return 1;
1717 }
1718 else
1719 {
1720 RTStrmPrintf(g_pStdErr, "%s: unknown assembly style: %s\n", argv0, ValueUnion.psz);
1721 return 1;
1722 }
1723 break;
1724
1725 case 'u':
1726 if (!strcmp(ValueUnion.psz, "fail"))
1727 enmUndefOp = kUndefOp_Fail;
1728 else if (!strcmp(ValueUnion.psz, "all"))
1729 enmUndefOp = kUndefOp_All;
1730 else if (!strcmp(ValueUnion.psz, "db"))
1731 enmUndefOp = kUndefOp_DefineByte;
1732 else
1733 {
1734 RTStrmPrintf(g_pStdErr, "%s: unknown undefined opcode handling method: %s\n", argv0, ValueUnion.psz);
1735 return 1;
1736 }
1737 break;
1738
1739 default:
1740 RTStrmPrintf(g_pStdErr, "%s: syntax error: %Rrc\n", argv0, ch);
1741 return 1;
1742 }
1743 }
1744 if (iArg >= argc)
1745 return Usage(argv0);
1746
1747 /*
1748 * Process the files.
1749 */
1750 int rc = VINF_SUCCESS;
1751 for ( ; iArg < argc; iArg++)
1752 {
1753 /*
1754 * Read the file into memory.
1755 */
1756 void *pvFile;
1757 size_t cbFile;
1758 rc = RTFileReadAllEx(argv[iArg], off, cbMax, 0, &pvFile, &cbFile);
1759 if (RT_FAILURE(rc))
1760 {
1761 RTStrmPrintf(g_pStdErr, "%s: %s: %Rrc\n", argv0, argv[iArg], rc);
1762 break;
1763 }
1764
1765 /*
1766 * Disassemble it.
1767 */
1768 rc = MyDisasmBlock(argv0, enmCpuMode, uAddress, (uint8_t *)pvFile, cbFile, enmStyle, fListing, enmUndefOp);
1769 if (RT_FAILURE(rc))
1770 break;
1771 }
1772
1773 return RT_SUCCESS(rc) ? 0 : 1;
1774}
1775
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette