VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 100651

最後變更 在這個檔案從100651是 100633,由 vboxsync 提交於 20 月 前

VMM/IEM: Check for too early IEMCPU::iEffSeg use during recompiler code generation. bugref:10369

  • 屬性 svn:eol-style 設為 LF
  • 屬性 svn:executable 設為 *
  • 屬性 svn:keywords 設為 Author Date Id Revision
檔案大小: 260.7 KB
 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 100633 2023-07-18 14:05:10Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13from __future__ import print_function;
14
15__copyright__ = \
16"""
17Copyright (C) 2017-2023 Oracle and/or its affiliates.
18
19This file is part of VirtualBox base platform packages, as
20available from https://www.alldomusa.eu.org.
21
22This program is free software; you can redistribute it and/or
23modify it under the terms of the GNU General Public License
24as published by the Free Software Foundation, in version 3 of the
25License.
26
27This program is distributed in the hope that it will be useful, but
28WITHOUT ANY WARRANTY; without even the implied warranty of
29MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
30General Public License for more details.
31
32You should have received a copy of the GNU General Public License
33along with this program; if not, see <https://www.gnu.org/licenses>.
34
35The contents of this file may alternatively be used under the terms
36of the Common Development and Distribution License Version 1.0
37(CDDL), a copy of it is provided in the "COPYING.CDDL" file included
38in the VirtualBox distribution, in which case the provisions of the
39CDDL are applicable instead of those of the GPL.
40
41You may elect to license modified versions of this file under the
42terms and conditions of either the GPL or the CDDL or both.
43
44SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
45"""
46__version__ = "$Revision: 100633 $"
47
48# pylint: disable=anomalous-backslash-in-string,too-many-lines
49
50# Standard python imports.
51import os;
52import re;
53import sys;
54import traceback;
55
56## Only the main script needs to modify the path.
57#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
58# 'ValidationKit');
59#sys.path.append(g_ksValidationKitDir);
60#
61#from common import utils; - Windows build boxes doesn't have pywin32.
62
63# Python 3 hacks:
64if sys.version_info[0] >= 3:
65 long = int; # pylint: disable=redefined-builtin,invalid-name
66
67
68g_kdX86EFlagsConstants = {
69 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
70 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
71 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
72 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
73 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
74 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
75 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
76 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
77 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
78 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
79 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
80 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
81 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
82 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
83 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
84 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
85 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
86 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
87 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
88 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
89};
90
91## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
92g_kdEFlagsMnemonics = {
93 # Debugger flag notation (sorted by value):
94 'cf': 'X86_EFL_CF', ##< Carry Flag.
95 'nc': '!X86_EFL_CF', ##< No Carry.
96
97 'po': 'X86_EFL_PF', ##< Parity Pdd.
98 'pe': '!X86_EFL_PF', ##< Parity Even.
99
100 'af': 'X86_EFL_AF', ##< Aux Flag.
101 'na': '!X86_EFL_AF', ##< No Aux.
102
103 'zr': 'X86_EFL_ZF', ##< ZeRo.
104 'nz': '!X86_EFL_ZF', ##< No Zero.
105
106 'ng': 'X86_EFL_SF', ##< NeGative (sign).
107 'pl': '!X86_EFL_SF', ##< PLuss (sign).
108
109 'tf': 'X86_EFL_TF', ##< Trap flag.
110
111 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
112 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
113
114 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
115 'up': '!X86_EFL_DF', ##< UP (string op direction).
116
117 'ov': 'X86_EFL_OF', ##< OVerflow.
118 'nv': '!X86_EFL_OF', ##< No Overflow.
119
120 'nt': 'X86_EFL_NT', ##< Nested Task.
121 'rf': 'X86_EFL_RF', ##< Resume Flag.
122 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
123 'ac': 'X86_EFL_AC', ##< Alignment Check.
124 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
125 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
126
127 # Reference manual notation not covered above (sorted by value):
128 'pf': 'X86_EFL_PF',
129 'zf': 'X86_EFL_ZF',
130 'sf': 'X86_EFL_SF',
131 'if': 'X86_EFL_IF',
132 'df': 'X86_EFL_DF',
133 'of': 'X86_EFL_OF',
134 'iopl': 'X86_EFL_IOPL',
135 'id': 'X86_EFL_ID',
136};
137
138## Constants and values for CR0.
139g_kdX86Cr0Constants = {
140 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
141 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
142 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
143 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
144 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
145 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
146 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
147 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
148 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
149 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
150 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
151};
152
153## Constants and values for CR4.
154g_kdX86Cr4Constants = {
155 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
156 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
157 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
158 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
159 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
160 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
161 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
162 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
163 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
164 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
165 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
166 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
167 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
168 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
169 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
170 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
171 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
172 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
173};
174
175## XSAVE components (XCR0).
176g_kdX86XSaveCConstants = {
177 'XSAVE_C_X87': 0x00000001,
178 'XSAVE_C_SSE': 0x00000002,
179 'XSAVE_C_YMM': 0x00000004,
180 'XSAVE_C_BNDREGS': 0x00000008,
181 'XSAVE_C_BNDCSR': 0x00000010,
182 'XSAVE_C_OPMASK': 0x00000020,
183 'XSAVE_C_ZMM_HI256': 0x00000040,
184 'XSAVE_C_ZMM_16HI': 0x00000080,
185 'XSAVE_C_PKRU': 0x00000200,
186 'XSAVE_C_LWP': 0x4000000000000000,
187 'XSAVE_C_X': 0x8000000000000000,
188 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
189 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
190};
191
192
193## \@op[1-4] locations
194g_kdOpLocations = {
195 'reg': [], ## modrm.reg
196 'rm': [], ## modrm.rm
197 'imm': [], ## immediate instruction data
198 'vvvv': [], ## VEX.vvvv
199
200 # fixed registers.
201 'AL': [],
202 'rAX': [],
203 'rDX': [],
204 'rSI': [],
205 'rDI': [],
206 'rFLAGS': [],
207 'CS': [],
208 'DS': [],
209 'ES': [],
210 'FS': [],
211 'GS': [],
212 'SS': [],
213};
214
215## \@op[1-4] types
216##
217## Value fields:
218## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
219## - 1: the location (g_kdOpLocations).
220## - 2: disassembler format string version of the type.
221## - 3: disassembler OP_PARAM_XXX (XXX only).
222## - 4: IEM form matching instruction.
223##
224## Note! See the A.2.1 in SDM vol 2 for the type names.
225g_kdOpTypes = {
226 # Fixed addresses
227 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', 'FIXED', ),
228
229 # ModR/M.rm
230 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', 'RM', ),
231 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
232 'Ed_WO': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
233 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
234 'Eq_WO': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
235 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ),
236 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', 'RM', ),
237 'Ey': ( 'IDX_UseModRM', 'rm', '%Ey', 'Ey', 'RM', ),
238 'Qd': ( 'IDX_UseModRM', 'rm', '%Qd', 'Qd', 'RM', ),
239 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
240 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
241 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
242 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
243 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
244 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
245 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
246 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
247 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
248 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
249 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
250 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
251 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
252 'Wq_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
253 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
254 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
255 'Wx_WO': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
256
257 # ModR/M.rm - register only.
258 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', 'REG' ),
259 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', 'REG' ),
260 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
261 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
262 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
263 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
264 'Ux': ( 'IDX_UseModRM', 'rm', '%Ux', 'Ux', 'REG' ),
265 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', 'REG' ),
266
267 # ModR/M.rm - memory only.
268 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', 'MEM', ), ##< Only used by BOUND.
269 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', 'MEM', ),
270 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
271 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
272 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
273 'Mdq': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
274 'Mdq_WO': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
275 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
276 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
277 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', 'MEM', ),
278 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', 'MEM', ),
279 'Mx': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
280 'Mx_WO': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
281 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
282 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
283
284 # ModR/M.reg
285 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', '', ),
286 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', '', ),
287 'Gd': ( 'IDX_UseModRM', 'reg', '%Gd', 'Gd', '', ),
288 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
289 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
290 'Gy': ( 'IDX_UseModRM', 'reg', '%Gy', 'Gy', '', ),
291 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', '', ),
292 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', '', ),
293 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
294 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
295 'Vd': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
296 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
297 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
298 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
299 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
300 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
301 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
302 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
303 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
304 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
305 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
306 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
307 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
308 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
309 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
310 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
311 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
312 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
313 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
314 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', '', ),
315 'Vx': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
316 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
317
318 # VEX.vvvv
319 'By': ( 'IDX_UseModRM', 'vvvv', '%By', 'By', 'V', ),
320 'Hps': ( 'IDX_UseModRM', 'vvvv', '%Hps', 'Hps', 'V', ),
321 'Hpd': ( 'IDX_UseModRM', 'vvvv', '%Hpd', 'Hpd', 'V', ),
322 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', 'V', ),
323 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', 'V', ),
324 'Hq': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'Hq', 'V', ),
325 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', 'V', ),
326 'Hx': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'Hx', 'V', ),
327
328 # Immediate values.
329 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', '', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
330 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', '', ),
331 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', '', ),
332 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', '', ),
333 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', '', ), ##< o16: word, o32: dword, o64: qword
334 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', '', ), ##< o16: word, o32|o64:dword
335
336 # Address operands (no ModR/M).
337 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', '', ),
338 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', '', ),
339
340 # Relative jump targets
341 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', '', ),
342 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', '', ),
343
344 # DS:rSI
345 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', '', ),
346 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', '', ),
347 # ES:rDI
348 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', '', ),
349 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', '', ),
350
351 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', '', ),
352
353 # Fixed registers.
354 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', '', ),
355 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', '', ),
356 'rDX': ( 'IDX_ParseFixedReg', 'rDX', '%eDX', 'REG_EDX', '', ),
357 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', '', ), # 8086: push CS
358 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', '', ),
359 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', '', ),
360 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', '', ),
361 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', '', ),
362 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', '', ),
363};
364
365# IDX_ParseFixedReg
366# IDX_ParseVexDest
367
368
369## IEMFORM_XXX mappings.
370g_kdIemForms = { # sEncoding, [ sWhere1, ... ] opcodesub ),
371 'RM': ( 'ModR/M', [ 'reg', 'rm' ], '', ),
372 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
373 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
374 'RMI': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
375 'RMI_REG': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
376 'RMI_MEM': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
377 'MR': ( 'ModR/M', [ 'rm', 'reg' ], '', ),
378 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
379 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
380 'MRI': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '', ),
381 'MRI_REG': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '11 mr/reg', ),
382 'MRI_MEM': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '!11 mr/reg', ),
383 'M': ( 'ModR/M', [ 'rm', ], '', ),
384 'M_REG': ( 'ModR/M', [ 'rm', ], '', ),
385 'M_MEM': ( 'ModR/M', [ 'rm', ], '', ),
386 'R': ( 'ModR/M', [ 'reg', ], '', ),
387
388 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '', ),
389 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
390 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
391 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '', ),
392 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
393 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
394 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], '' ),
395 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], '' ),
396 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], '' ),
397 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], '' ),
398 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '', ),
399 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '11 mr/reg', ),
400 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '!11 mr/reg', ),
401 'VEX_RMV': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '', ),
402 'VEX_RMV_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '11 mr/reg', ),
403 'VEX_RMV_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '!11 mr/reg', ),
404 'VEX_RMI': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
405 'VEX_RMI_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
406 'VEX_RMI_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
407 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '', ),
408 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '11 mr/reg', ),
409 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '!11 mr/reg', ),
410
411 'VEX_VM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '', ),
412 'VEX_VM_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '11 mr/reg', ),
413 'VEX_VM_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '!11 mr/reg', ),
414
415 'FIXED': ( 'fixed', None, '', ),
416};
417
418## \@oppfx values.
419g_kdPrefixes = {
420 'none': [],
421 '0x66': [],
422 '0xf3': [],
423 '0xf2': [],
424};
425
426## Special \@opcode tag values.
427g_kdSpecialOpcodes = {
428 '/reg': [],
429 'mr/reg': [],
430 '11 /reg': [],
431 '!11 /reg': [],
432 '11 mr/reg': [],
433 '!11 mr/reg': [],
434};
435
436## Special \@opcodesub tag values.
437## The first value is the real value for aliases.
438## The second value is for bs3cg1.
439g_kdSubOpcodes = {
440 'none': [ None, '', ],
441 '11 mr/reg': [ '11 mr/reg', '', ],
442 '11': [ '11 mr/reg', '', ], ##< alias
443 '!11 mr/reg': [ '!11 mr/reg', '', ],
444 '!11': [ '!11 mr/reg', '', ], ##< alias
445 'rex.w=0': [ 'rex.w=0', 'WZ', ],
446 'w=0': [ 'rex.w=0', '', ], ##< alias
447 'rex.w=1': [ 'rex.w=1', 'WNZ', ],
448 'w=1': [ 'rex.w=1', '', ], ##< alias
449 'vex.l=0': [ 'vex.l=0', 'L0', ],
450 'vex.l=1': [ 'vex.l=0', 'L1', ],
451 '11 mr/reg vex.l=0': [ '11 mr/reg vex.l=0', 'L0', ],
452 '11 mr/reg vex.l=1': [ '11 mr/reg vex.l=1', 'L1', ],
453 '!11 mr/reg vex.l=0': [ '!11 mr/reg vex.l=0', 'L0', ],
454 '!11 mr/reg vex.l=1': [ '!11 mr/reg vex.l=1', 'L1', ],
455};
456
457## Valid values for \@openc
458g_kdEncodings = {
459 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
460 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
461 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
462 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
463 'prefix': [ None, ], ##< Prefix
464};
465
466## \@opunused, \@opinvalid, \@opinvlstyle
467g_kdInvalidStyles = {
468 'immediate': [], ##< CPU stops decoding immediately after the opcode.
469 'vex.modrm': [], ##< VEX+ModR/M, everyone.
470 'intel-modrm': [], ##< Intel decodes ModR/M.
471 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
472 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
473 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
474};
475
476g_kdCpuNames = {
477 '8086': (),
478 '80186': (),
479 '80286': (),
480 '80386': (),
481 '80486': (),
482};
483
484## \@opcpuid
485g_kdCpuIdFlags = {
486 'vme': 'X86_CPUID_FEATURE_EDX_VME',
487 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
488 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
489 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
490 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
491 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
492 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
493 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
494 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
495 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
496 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
497 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
498 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
499 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
500 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
501 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
502 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
503 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
504 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
505 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
506 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
507 'sse4.1': 'X86_CPUID_FEATURE_ECX_SSE4_1',
508 'sse4.2': 'X86_CPUID_FEATURE_ECX_SSE4_2',
509 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
510 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
511 'aes': 'X86_CPUID_FEATURE_ECX_AES',
512 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
513 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
514 'avx2': 'X86_CPUID_STEXT_FEATURE_EBX_AVX2',
515 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
516 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
517
518 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
519 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
520 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
521 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
522 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
523 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
524 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
525 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
526 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
527 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
528};
529
530## \@ophints values.
531# pylint: disable=line-too-long
532g_kdHints = {
533 'invalid': 'DISOPTYPE_INVALID', ##<
534 'harmless': 'DISOPTYPE_HARMLESS', ##<
535 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
536 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
537 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
538 'portio': 'DISOPTYPE_PORTIO', ##<
539 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
540 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
541 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
542 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
543 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
544 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
545 'illegal': 'DISOPTYPE_ILLEGAL', ##<
546 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
547 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
548 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
549 'x86_portio_read': 'DISOPTYPE_X86_PORTIO_READ', ##<
550 'x86_portio_write': 'DISOPTYPE_X86_PORTIO_WRITE', ##<
551 'x86_invalid_64': 'DISOPTYPE_X86_INVALID_64', ##< Invalid in 64 bits mode
552 'x86_only_64': 'DISOPTYPE_X86_ONLY_64', ##< Only valid in 64 bits mode
553 'x86_default_64_op_size': 'DISOPTYPE_X86_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
554 'x86_forced_64_op_size': 'DISOPTYPE_X86_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
555 'x86_rexb_extends_opreg': 'DISOPTYPE_X86_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
556 'x86_mod_fixed_11': 'DISOPTYPE_X86_MOD_FIXED_11', ##< modrm.mod is always 11b
557 'x86_forced_32_op_size_x86': 'DISOPTYPE_X86_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
558 ## (only in 16 & 32 bits mode!)
559 'x86_avx': 'DISOPTYPE_X86_AVX', ##< AVX,AVX2,++ instruction. Not implemented yet!
560 'x86_sse': 'DISOPTYPE_X86_SSE', ##< SSE,SSE2,SSE3,++ instruction. Not implemented yet!
561 'x86_mmx': 'DISOPTYPE_X86_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
562 'x86_fpu': 'DISOPTYPE_X86_FPU', ##< FPU instruction. Not implemented yet!
563 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
564 'ignores_rexw': '', ##< Ignores REX.W.
565 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
566 'vex_l_zero': '', ##< VEX.L must be 0.
567 'vex_l_ignored': '', ##< VEX.L is ignored.
568 'vex_v_zero': '', ##< VEX.V must be 0. (generate sub-table?)
569 'lock_allowed': '', ##< Lock prefix allowed.
570};
571# pylint: enable=line-too-long
572
573## \@opxcpttype values (see SDMv2 2.4, 2.7).
574g_kdXcptTypes = {
575 'none': [],
576 '1': [],
577 '2': [],
578 '3': [],
579 '4': [],
580 '4UA': [],
581 '5': [],
582 '5LZ': [], # LZ = VEX.L must be zero.
583 '6': [],
584 '7': [],
585 '7LZ': [],
586 '8': [],
587 '11': [],
588 '12': [],
589 'E1': [],
590 'E1NF': [],
591 'E2': [],
592 'E3': [],
593 'E3NF': [],
594 'E4': [],
595 'E4NF': [],
596 'E5': [],
597 'E5NF': [],
598 'E6': [],
599 'E6NF': [],
600 'E7NF': [],
601 'E9': [],
602 'E9NF': [],
603 'E10': [],
604 'E11': [],
605 'E12': [],
606 'E12NF': [],
607};
608
609
610def _isValidOpcodeByte(sOpcode):
611 """
612 Checks if sOpcode is a valid lower case opcode byte.
613 Returns true/false.
614 """
615 if len(sOpcode) == 4:
616 if sOpcode[:2] == '0x':
617 if sOpcode[2] in '0123456789abcdef':
618 if sOpcode[3] in '0123456789abcdef':
619 return True;
620 return False;
621
622
623class InstructionMap(object):
624 """
625 Instruction map.
626
627 The opcode map provides the lead opcode bytes (empty for the one byte
628 opcode map). An instruction can be member of multiple opcode maps as long
629 as it uses the same opcode value within the map (because of VEX).
630 """
631
632 kdEncodings = {
633 'legacy': [],
634 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
635 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
636 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
637 'xop8': [], ##< XOP prefix with vvvvv = 8
638 'xop9': [], ##< XOP prefix with vvvvv = 9
639 'xop10': [], ##< XOP prefix with vvvvv = 10
640 };
641 ## Selectors.
642 ## 1. The first value is the number of table entries required by a
643 ## decoder or disassembler for this type of selector.
644 ## 2. The second value is how many entries per opcode byte if applicable.
645 kdSelectors = {
646 'byte': [ 256, 1, ], ##< next opcode byte selects the instruction (default).
647 'byte+pfx': [ 1024, 4, ], ##< next opcode byte selects the instruction together with the 0x66, 0xf2 and 0xf3 prefixes.
648 '/r': [ 8, 1, ], ##< modrm.reg selects the instruction.
649 'memreg /r':[ 16, 1, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
650 'mod /r': [ 32, 1, ], ##< modrm.reg and modrm.mod selects the instruction.
651 '!11 /r': [ 8, 1, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
652 '11 /r': [ 8, 1, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
653 '11': [ 64, 1, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
654 };
655
656 ## Define the subentry number according to the Instruction::sPrefix
657 ## value for 'byte+pfx' selected tables.
658 kiPrefixOrder = {
659 'none': 0,
660 '0x66': 1,
661 '0xf3': 2,
662 '0xf2': 3,
663 };
664
665 def __init__(self, sName, sIemName = None, asLeadOpcodes = None, sSelector = 'byte+pfx',
666 sEncoding = 'legacy', sDisParse = None):
667 assert sSelector in self.kdSelectors;
668 assert sEncoding in self.kdEncodings;
669 if asLeadOpcodes is None:
670 asLeadOpcodes = [];
671 else:
672 for sOpcode in asLeadOpcodes:
673 assert _isValidOpcodeByte(sOpcode);
674 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
675
676 self.sName = sName;
677 self.sIemName = sIemName;
678 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
679 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
680 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
681 self.aoInstructions = [] # type: Instruction
682 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
683
684 def copy(self, sNewName, sPrefixFilter = None):
685 """
686 Copies the table with filtering instruction by sPrefix if not None.
687 """
688 oCopy = InstructionMap(sNewName, sIemName = self.sIemName, asLeadOpcodes = self.asLeadOpcodes,
689 sSelector = 'byte' if sPrefixFilter is not None and self.sSelector == 'byte+pfx'
690 else self.sSelector,
691 sEncoding = self.sEncoding, sDisParse = self.sDisParse);
692 if sPrefixFilter is None:
693 oCopy.aoInstructions = list(self.aoInstructions);
694 else:
695 oCopy.aoInstructions = [oInstr for oInstr in self.aoInstructions if oInstr.sPrefix == sPrefixFilter];
696 return oCopy;
697
698 def getTableSize(self):
699 """
700 Number of table entries. This corresponds directly to the selector.
701 """
702 return self.kdSelectors[self.sSelector][0];
703
704 def getEntriesPerByte(self):
705 """
706 Number of table entries per opcode bytes.
707
708 This only really makes sense for the 'byte' and 'byte+pfx' selectors, for
709 the others it will just return 1.
710 """
711 return self.kdSelectors[self.sSelector][1];
712
713 def getInstructionIndex(self, oInstr):
714 """
715 Returns the table index for the instruction.
716 """
717 bOpcode = oInstr.getOpcodeByte();
718
719 # The byte selectors are simple. We need a full opcode byte and need just return it.
720 if self.sSelector == 'byte':
721 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
722 return bOpcode;
723
724 # The byte + prefix selector is similarly simple, though requires a prefix as well as the full opcode.
725 if self.sSelector == 'byte+pfx':
726 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
727 assert self.kiPrefixOrder.get(oInstr.sPrefix, -16384) >= 0;
728 return bOpcode * 4 + self.kiPrefixOrder.get(oInstr.sPrefix, -16384);
729
730 # The other selectors needs masking and shifting.
731 if self.sSelector == '/r':
732 return (bOpcode >> 3) & 0x7;
733
734 if self.sSelector == 'mod /r':
735 return (bOpcode >> 3) & 0x1f;
736
737 if self.sSelector == 'memreg /r':
738 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
739
740 if self.sSelector == '!11 /r':
741 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
742 return (bOpcode >> 3) & 0x7;
743
744 if self.sSelector == '11 /r':
745 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
746 return (bOpcode >> 3) & 0x7;
747
748 if self.sSelector == '11':
749 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
750 return bOpcode & 0x3f;
751
752 assert False, self.sSelector;
753 return -1;
754
755 def getInstructionsInTableOrder(self):
756 """
757 Get instructions in table order.
758
759 Returns array of instructions. Normally there is exactly one
760 instruction per entry. However the entry could also be None if
761 not instruction was specified for that opcode value. Or there
762 could be a list of instructions to deal with special encodings
763 where for instance prefix (e.g. REX.W) encodes a different
764 instruction or different CPUs have different instructions or
765 prefixes in the same place.
766 """
767 # Start with empty table.
768 cTable = self.getTableSize();
769 aoTable = [None] * cTable;
770
771 # Insert the instructions.
772 for oInstr in self.aoInstructions:
773 if oInstr.sOpcode:
774 idxOpcode = self.getInstructionIndex(oInstr);
775 assert idxOpcode < cTable, str(idxOpcode);
776
777 oExisting = aoTable[idxOpcode];
778 if oExisting is None:
779 aoTable[idxOpcode] = oInstr;
780 elif not isinstance(oExisting, list):
781 aoTable[idxOpcode] = list([oExisting, oInstr]);
782 else:
783 oExisting.append(oInstr);
784
785 return aoTable;
786
787
788 def getDisasTableName(self):
789 """
790 Returns the disassembler table name for this map.
791 """
792 sName = 'g_aDisas';
793 for sWord in self.sName.split('_'):
794 if sWord == 'm': # suffix indicating modrm.mod==mem
795 sName += '_m';
796 elif sWord == 'r': # suffix indicating modrm.mod==reg
797 sName += '_r';
798 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
799 sName += '_' + sWord;
800 else:
801 sWord = sWord.replace('grp', 'Grp');
802 sWord = sWord.replace('map', 'Map');
803 sName += sWord[0].upper() + sWord[1:];
804 return sName;
805
806 def getDisasRangeName(self):
807 """
808 Returns the disassembler table range name for this map.
809 """
810 return self.getDisasTableName().replace('g_aDisas', 'g_Disas') + 'Range';
811
812 def isVexMap(self):
813 """ Returns True if a VEX map. """
814 return self.sEncoding.startswith('vex');
815
816
817class TestType(object):
818 """
819 Test value type.
820
821 This base class deals with integer like values. The fUnsigned constructor
822 parameter indicates the default stance on zero vs sign extending. It is
823 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
824 """
825 def __init__(self, sName, acbSizes = None, fUnsigned = True):
826 self.sName = sName;
827 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
828 self.fUnsigned = fUnsigned;
829
830 class BadValue(Exception):
831 """ Bad value exception. """
832 def __init__(self, sMessage):
833 Exception.__init__(self, sMessage);
834 self.sMessage = sMessage;
835
836 ## For ascii ~ operator.
837 kdHexInv = {
838 '0': 'f',
839 '1': 'e',
840 '2': 'd',
841 '3': 'c',
842 '4': 'b',
843 '5': 'a',
844 '6': '9',
845 '7': '8',
846 '8': '7',
847 '9': '6',
848 'a': '5',
849 'b': '4',
850 'c': '3',
851 'd': '2',
852 'e': '1',
853 'f': '0',
854 };
855
856 def get(self, sValue):
857 """
858 Get the shortest normal sized byte representation of oValue.
859
860 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
861 The latter form is for AND+OR pairs where the first entry is what to
862 AND with the field and the second the one or OR with.
863
864 Raises BadValue if invalid value.
865 """
866 if not sValue:
867 raise TestType.BadValue('empty value');
868
869 # Deal with sign and detect hexadecimal or decimal.
870 fSignExtend = not self.fUnsigned;
871 if sValue[0] == '-' or sValue[0] == '+':
872 fSignExtend = True;
873 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
874 else:
875 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
876
877 # try convert it to long integer.
878 try:
879 iValue = long(sValue, 16 if fHex else 10);
880 except Exception as oXcpt:
881 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
882
883 # Convert the hex string and pad it to a decent value. Negative values
884 # needs to be manually converted to something non-negative (~-n + 1).
885 if iValue >= 0:
886 sHex = hex(iValue);
887 if sys.version_info[0] < 3:
888 assert sHex[-1] == 'L';
889 sHex = sHex[:-1];
890 assert sHex[:2] == '0x';
891 sHex = sHex[2:];
892 else:
893 sHex = hex(-iValue - 1);
894 if sys.version_info[0] < 3:
895 assert sHex[-1] == 'L';
896 sHex = sHex[:-1];
897 assert sHex[:2] == '0x';
898 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
899 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
900 sHex = 'f' + sHex;
901
902 cDigits = len(sHex);
903 if cDigits <= self.acbSizes[-1] * 2:
904 for cb in self.acbSizes:
905 cNaturalDigits = cb * 2;
906 if cDigits <= cNaturalDigits:
907 break;
908 else:
909 cNaturalDigits = self.acbSizes[-1] * 2;
910 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
911 assert isinstance(cNaturalDigits, int)
912
913 if cNaturalDigits != cDigits:
914 cNeeded = cNaturalDigits - cDigits;
915 if iValue >= 0:
916 sHex = ('0' * cNeeded) + sHex;
917 else:
918 sHex = ('f' * cNeeded) + sHex;
919
920 # Invert and convert to bytearray and return it.
921 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
922
923 return ((fSignExtend, abValue),);
924
925 def validate(self, sValue):
926 """
927 Returns True if value is okay, error message on failure.
928 """
929 try:
930 self.get(sValue);
931 except TestType.BadValue as oXcpt:
932 return oXcpt.sMessage;
933 return True;
934
935 def isAndOrPair(self, sValue):
936 """
937 Checks if sValue is a pair.
938 """
939 _ = sValue;
940 return False;
941
942
943class TestTypeEflags(TestType):
944 """
945 Special value parsing for EFLAGS/RFLAGS/FLAGS.
946 """
947
948 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
949
950 def __init__(self, sName):
951 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
952
953 def get(self, sValue):
954 fClear = 0;
955 fSet = 0;
956 for sFlag in sValue.split(','):
957 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
958 if sConstant is None:
959 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
960 if sConstant[0] == '!':
961 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
962 else:
963 fSet |= g_kdX86EFlagsConstants[sConstant];
964
965 aoSet = TestType.get(self, '0x%x' % (fSet,));
966 if fClear != 0:
967 aoClear = TestType.get(self, '%#x' % (fClear,))
968 assert self.isAndOrPair(sValue) is True;
969 return (aoClear[0], aoSet[0]);
970 assert self.isAndOrPair(sValue) is False;
971 return aoSet;
972
973 def isAndOrPair(self, sValue):
974 for sZeroFlag in self.kdZeroValueFlags:
975 if sValue.find(sZeroFlag) >= 0:
976 return True;
977 return False;
978
979class TestTypeFromDict(TestType):
980 """
981 Special value parsing for CR0.
982 """
983
984 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
985
986 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
987 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
988 self.kdConstantsAndValues = kdConstantsAndValues;
989 self.sConstantPrefix = sConstantPrefix;
990
991 def get(self, sValue):
992 fValue = 0;
993 for sFlag in sValue.split(','):
994 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
995 if fFlagValue is None:
996 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
997 fValue |= fFlagValue;
998 return TestType.get(self, '0x%x' % (fValue,));
999
1000
1001class TestInOut(object):
1002 """
1003 One input or output state modifier.
1004
1005 This should be thought as values to modify BS3REGCTX and extended (needs
1006 to be structured) state.
1007 """
1008 ## Assigned operators.
1009 kasOperators = [
1010 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
1011 '&~=',
1012 '&=',
1013 '|=',
1014 '='
1015 ];
1016 ## Types
1017 kdTypes = {
1018 'uint': TestType('uint', fUnsigned = True),
1019 'int': TestType('int'),
1020 'efl': TestTypeEflags('efl'),
1021 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
1022 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
1023 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
1024 };
1025 ## CPU context fields.
1026 kdFields = {
1027 # name: ( default type, [both|input|output], )
1028 # Operands.
1029 'op1': ( 'uint', 'both', ), ## \@op1
1030 'op2': ( 'uint', 'both', ), ## \@op2
1031 'op3': ( 'uint', 'both', ), ## \@op3
1032 'op4': ( 'uint', 'both', ), ## \@op4
1033 # Flags.
1034 'efl': ( 'efl', 'both', ),
1035 'efl_undef': ( 'uint', 'output', ),
1036 # 8-bit GPRs.
1037 'al': ( 'uint', 'both', ),
1038 'cl': ( 'uint', 'both', ),
1039 'dl': ( 'uint', 'both', ),
1040 'bl': ( 'uint', 'both', ),
1041 'ah': ( 'uint', 'both', ),
1042 'ch': ( 'uint', 'both', ),
1043 'dh': ( 'uint', 'both', ),
1044 'bh': ( 'uint', 'both', ),
1045 'r8l': ( 'uint', 'both', ),
1046 'r9l': ( 'uint', 'both', ),
1047 'r10l': ( 'uint', 'both', ),
1048 'r11l': ( 'uint', 'both', ),
1049 'r12l': ( 'uint', 'both', ),
1050 'r13l': ( 'uint', 'both', ),
1051 'r14l': ( 'uint', 'both', ),
1052 'r15l': ( 'uint', 'both', ),
1053 # 16-bit GPRs.
1054 'ax': ( 'uint', 'both', ),
1055 'dx': ( 'uint', 'both', ),
1056 'cx': ( 'uint', 'both', ),
1057 'bx': ( 'uint', 'both', ),
1058 'sp': ( 'uint', 'both', ),
1059 'bp': ( 'uint', 'both', ),
1060 'si': ( 'uint', 'both', ),
1061 'di': ( 'uint', 'both', ),
1062 'r8w': ( 'uint', 'both', ),
1063 'r9w': ( 'uint', 'both', ),
1064 'r10w': ( 'uint', 'both', ),
1065 'r11w': ( 'uint', 'both', ),
1066 'r12w': ( 'uint', 'both', ),
1067 'r13w': ( 'uint', 'both', ),
1068 'r14w': ( 'uint', 'both', ),
1069 'r15w': ( 'uint', 'both', ),
1070 # 32-bit GPRs.
1071 'eax': ( 'uint', 'both', ),
1072 'edx': ( 'uint', 'both', ),
1073 'ecx': ( 'uint', 'both', ),
1074 'ebx': ( 'uint', 'both', ),
1075 'esp': ( 'uint', 'both', ),
1076 'ebp': ( 'uint', 'both', ),
1077 'esi': ( 'uint', 'both', ),
1078 'edi': ( 'uint', 'both', ),
1079 'r8d': ( 'uint', 'both', ),
1080 'r9d': ( 'uint', 'both', ),
1081 'r10d': ( 'uint', 'both', ),
1082 'r11d': ( 'uint', 'both', ),
1083 'r12d': ( 'uint', 'both', ),
1084 'r13d': ( 'uint', 'both', ),
1085 'r14d': ( 'uint', 'both', ),
1086 'r15d': ( 'uint', 'both', ),
1087 # 64-bit GPRs.
1088 'rax': ( 'uint', 'both', ),
1089 'rdx': ( 'uint', 'both', ),
1090 'rcx': ( 'uint', 'both', ),
1091 'rbx': ( 'uint', 'both', ),
1092 'rsp': ( 'uint', 'both', ),
1093 'rbp': ( 'uint', 'both', ),
1094 'rsi': ( 'uint', 'both', ),
1095 'rdi': ( 'uint', 'both', ),
1096 'r8': ( 'uint', 'both', ),
1097 'r9': ( 'uint', 'both', ),
1098 'r10': ( 'uint', 'both', ),
1099 'r11': ( 'uint', 'both', ),
1100 'r12': ( 'uint', 'both', ),
1101 'r13': ( 'uint', 'both', ),
1102 'r14': ( 'uint', 'both', ),
1103 'r15': ( 'uint', 'both', ),
1104 # 16-bit, 32-bit or 64-bit registers according to operand size.
1105 'oz.rax': ( 'uint', 'both', ),
1106 'oz.rdx': ( 'uint', 'both', ),
1107 'oz.rcx': ( 'uint', 'both', ),
1108 'oz.rbx': ( 'uint', 'both', ),
1109 'oz.rsp': ( 'uint', 'both', ),
1110 'oz.rbp': ( 'uint', 'both', ),
1111 'oz.rsi': ( 'uint', 'both', ),
1112 'oz.rdi': ( 'uint', 'both', ),
1113 'oz.r8': ( 'uint', 'both', ),
1114 'oz.r9': ( 'uint', 'both', ),
1115 'oz.r10': ( 'uint', 'both', ),
1116 'oz.r11': ( 'uint', 'both', ),
1117 'oz.r12': ( 'uint', 'both', ),
1118 'oz.r13': ( 'uint', 'both', ),
1119 'oz.r14': ( 'uint', 'both', ),
1120 'oz.r15': ( 'uint', 'both', ),
1121 # Control registers.
1122 'cr0': ( 'cr0', 'both', ),
1123 'cr4': ( 'cr4', 'both', ),
1124 'xcr0': ( 'xcr0', 'both', ),
1125 # FPU Registers
1126 'fcw': ( 'uint', 'both', ),
1127 'fsw': ( 'uint', 'both', ),
1128 'ftw': ( 'uint', 'both', ),
1129 'fop': ( 'uint', 'both', ),
1130 'fpuip': ( 'uint', 'both', ),
1131 'fpucs': ( 'uint', 'both', ),
1132 'fpudp': ( 'uint', 'both', ),
1133 'fpuds': ( 'uint', 'both', ),
1134 'mxcsr': ( 'uint', 'both', ),
1135 'st0': ( 'uint', 'both', ),
1136 'st1': ( 'uint', 'both', ),
1137 'st2': ( 'uint', 'both', ),
1138 'st3': ( 'uint', 'both', ),
1139 'st4': ( 'uint', 'both', ),
1140 'st5': ( 'uint', 'both', ),
1141 'st6': ( 'uint', 'both', ),
1142 'st7': ( 'uint', 'both', ),
1143 # MMX registers.
1144 'mm0': ( 'uint', 'both', ),
1145 'mm1': ( 'uint', 'both', ),
1146 'mm2': ( 'uint', 'both', ),
1147 'mm3': ( 'uint', 'both', ),
1148 'mm4': ( 'uint', 'both', ),
1149 'mm5': ( 'uint', 'both', ),
1150 'mm6': ( 'uint', 'both', ),
1151 'mm7': ( 'uint', 'both', ),
1152 # SSE registers.
1153 'xmm0': ( 'uint', 'both', ),
1154 'xmm1': ( 'uint', 'both', ),
1155 'xmm2': ( 'uint', 'both', ),
1156 'xmm3': ( 'uint', 'both', ),
1157 'xmm4': ( 'uint', 'both', ),
1158 'xmm5': ( 'uint', 'both', ),
1159 'xmm6': ( 'uint', 'both', ),
1160 'xmm7': ( 'uint', 'both', ),
1161 'xmm8': ( 'uint', 'both', ),
1162 'xmm9': ( 'uint', 'both', ),
1163 'xmm10': ( 'uint', 'both', ),
1164 'xmm11': ( 'uint', 'both', ),
1165 'xmm12': ( 'uint', 'both', ),
1166 'xmm13': ( 'uint', 'both', ),
1167 'xmm14': ( 'uint', 'both', ),
1168 'xmm15': ( 'uint', 'both', ),
1169 'xmm0.lo': ( 'uint', 'both', ),
1170 'xmm1.lo': ( 'uint', 'both', ),
1171 'xmm2.lo': ( 'uint', 'both', ),
1172 'xmm3.lo': ( 'uint', 'both', ),
1173 'xmm4.lo': ( 'uint', 'both', ),
1174 'xmm5.lo': ( 'uint', 'both', ),
1175 'xmm6.lo': ( 'uint', 'both', ),
1176 'xmm7.lo': ( 'uint', 'both', ),
1177 'xmm8.lo': ( 'uint', 'both', ),
1178 'xmm9.lo': ( 'uint', 'both', ),
1179 'xmm10.lo': ( 'uint', 'both', ),
1180 'xmm11.lo': ( 'uint', 'both', ),
1181 'xmm12.lo': ( 'uint', 'both', ),
1182 'xmm13.lo': ( 'uint', 'both', ),
1183 'xmm14.lo': ( 'uint', 'both', ),
1184 'xmm15.lo': ( 'uint', 'both', ),
1185 'xmm0.hi': ( 'uint', 'both', ),
1186 'xmm1.hi': ( 'uint', 'both', ),
1187 'xmm2.hi': ( 'uint', 'both', ),
1188 'xmm3.hi': ( 'uint', 'both', ),
1189 'xmm4.hi': ( 'uint', 'both', ),
1190 'xmm5.hi': ( 'uint', 'both', ),
1191 'xmm6.hi': ( 'uint', 'both', ),
1192 'xmm7.hi': ( 'uint', 'both', ),
1193 'xmm8.hi': ( 'uint', 'both', ),
1194 'xmm9.hi': ( 'uint', 'both', ),
1195 'xmm10.hi': ( 'uint', 'both', ),
1196 'xmm11.hi': ( 'uint', 'both', ),
1197 'xmm12.hi': ( 'uint', 'both', ),
1198 'xmm13.hi': ( 'uint', 'both', ),
1199 'xmm14.hi': ( 'uint', 'both', ),
1200 'xmm15.hi': ( 'uint', 'both', ),
1201 'xmm0.lo.zx': ( 'uint', 'both', ),
1202 'xmm1.lo.zx': ( 'uint', 'both', ),
1203 'xmm2.lo.zx': ( 'uint', 'both', ),
1204 'xmm3.lo.zx': ( 'uint', 'both', ),
1205 'xmm4.lo.zx': ( 'uint', 'both', ),
1206 'xmm5.lo.zx': ( 'uint', 'both', ),
1207 'xmm6.lo.zx': ( 'uint', 'both', ),
1208 'xmm7.lo.zx': ( 'uint', 'both', ),
1209 'xmm8.lo.zx': ( 'uint', 'both', ),
1210 'xmm9.lo.zx': ( 'uint', 'both', ),
1211 'xmm10.lo.zx': ( 'uint', 'both', ),
1212 'xmm11.lo.zx': ( 'uint', 'both', ),
1213 'xmm12.lo.zx': ( 'uint', 'both', ),
1214 'xmm13.lo.zx': ( 'uint', 'both', ),
1215 'xmm14.lo.zx': ( 'uint', 'both', ),
1216 'xmm15.lo.zx': ( 'uint', 'both', ),
1217 'xmm0.dw0': ( 'uint', 'both', ),
1218 'xmm1.dw0': ( 'uint', 'both', ),
1219 'xmm2.dw0': ( 'uint', 'both', ),
1220 'xmm3.dw0': ( 'uint', 'both', ),
1221 'xmm4.dw0': ( 'uint', 'both', ),
1222 'xmm5.dw0': ( 'uint', 'both', ),
1223 'xmm6.dw0': ( 'uint', 'both', ),
1224 'xmm7.dw0': ( 'uint', 'both', ),
1225 'xmm8.dw0': ( 'uint', 'both', ),
1226 'xmm9.dw0': ( 'uint', 'both', ),
1227 'xmm10.dw0': ( 'uint', 'both', ),
1228 'xmm11.dw0': ( 'uint', 'both', ),
1229 'xmm12.dw0': ( 'uint', 'both', ),
1230 'xmm13.dw0': ( 'uint', 'both', ),
1231 'xmm14.dw0': ( 'uint', 'both', ),
1232 'xmm15_dw0': ( 'uint', 'both', ),
1233 # AVX registers.
1234 'ymm0': ( 'uint', 'both', ),
1235 'ymm1': ( 'uint', 'both', ),
1236 'ymm2': ( 'uint', 'both', ),
1237 'ymm3': ( 'uint', 'both', ),
1238 'ymm4': ( 'uint', 'both', ),
1239 'ymm5': ( 'uint', 'both', ),
1240 'ymm6': ( 'uint', 'both', ),
1241 'ymm7': ( 'uint', 'both', ),
1242 'ymm8': ( 'uint', 'both', ),
1243 'ymm9': ( 'uint', 'both', ),
1244 'ymm10': ( 'uint', 'both', ),
1245 'ymm11': ( 'uint', 'both', ),
1246 'ymm12': ( 'uint', 'both', ),
1247 'ymm13': ( 'uint', 'both', ),
1248 'ymm14': ( 'uint', 'both', ),
1249 'ymm15': ( 'uint', 'both', ),
1250
1251 # Special ones.
1252 'value.xcpt': ( 'uint', 'output', ),
1253 };
1254
1255 def __init__(self, sField, sOp, sValue, sType):
1256 assert sField in self.kdFields;
1257 assert sOp in self.kasOperators;
1258 self.sField = sField;
1259 self.sOp = sOp;
1260 self.sValue = sValue;
1261 self.sType = sType;
1262 assert isinstance(sField, str);
1263 assert isinstance(sOp, str);
1264 assert isinstance(sType, str);
1265 assert isinstance(sValue, str);
1266
1267
1268class TestSelector(object):
1269 """
1270 One selector for an instruction test.
1271 """
1272 ## Selector compare operators.
1273 kasCompareOps = [ '==', '!=' ];
1274 ## Selector variables and their valid values.
1275 kdVariables = {
1276 # Operand size.
1277 'size': {
1278 'o16': 'size_o16',
1279 'o32': 'size_o32',
1280 'o64': 'size_o64',
1281 },
1282 # VEX.L value.
1283 'vex.l': {
1284 '0': 'vexl_0',
1285 '1': 'vexl_1',
1286 },
1287 # Execution ring.
1288 'ring': {
1289 '0': 'ring_0',
1290 '1': 'ring_1',
1291 '2': 'ring_2',
1292 '3': 'ring_3',
1293 '0..2': 'ring_0_thru_2',
1294 '1..3': 'ring_1_thru_3',
1295 },
1296 # Basic code mode.
1297 'codebits': {
1298 '64': 'code_64bit',
1299 '32': 'code_32bit',
1300 '16': 'code_16bit',
1301 },
1302 # cpu modes.
1303 'mode': {
1304 'real': 'mode_real',
1305 'prot': 'mode_prot',
1306 'long': 'mode_long',
1307 'v86': 'mode_v86',
1308 'smm': 'mode_smm',
1309 'vmx': 'mode_vmx',
1310 'svm': 'mode_svm',
1311 },
1312 # paging on/off
1313 'paging': {
1314 'on': 'paging_on',
1315 'off': 'paging_off',
1316 },
1317 # CPU vendor
1318 'vendor': {
1319 'amd': 'vendor_amd',
1320 'intel': 'vendor_intel',
1321 'via': 'vendor_via',
1322 },
1323 };
1324 ## Selector shorthand predicates.
1325 ## These translates into variable expressions.
1326 kdPredicates = {
1327 'o16': 'size==o16',
1328 'o32': 'size==o32',
1329 'o64': 'size==o64',
1330 'ring0': 'ring==0',
1331 '!ring0': 'ring==1..3',
1332 'ring1': 'ring==1',
1333 'ring2': 'ring==2',
1334 'ring3': 'ring==3',
1335 'user': 'ring==3',
1336 'supervisor': 'ring==0..2',
1337 '16-bit': 'codebits==16',
1338 '32-bit': 'codebits==32',
1339 '64-bit': 'codebits==64',
1340 'real': 'mode==real',
1341 'prot': 'mode==prot',
1342 'long': 'mode==long',
1343 'v86': 'mode==v86',
1344 'smm': 'mode==smm',
1345 'vmx': 'mode==vmx',
1346 'svm': 'mode==svm',
1347 'paging': 'paging==on',
1348 '!paging': 'paging==off',
1349 'amd': 'vendor==amd',
1350 '!amd': 'vendor!=amd',
1351 'intel': 'vendor==intel',
1352 '!intel': 'vendor!=intel',
1353 'via': 'vendor==via',
1354 '!via': 'vendor!=via',
1355 };
1356
1357 def __init__(self, sVariable, sOp, sValue):
1358 assert sVariable in self.kdVariables;
1359 assert sOp in self.kasCompareOps;
1360 assert sValue in self.kdVariables[sVariable];
1361 self.sVariable = sVariable;
1362 self.sOp = sOp;
1363 self.sValue = sValue;
1364
1365
1366class InstructionTest(object):
1367 """
1368 Instruction test.
1369 """
1370
1371 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1372 self.oInstr = oInstr # type: InstructionTest
1373 self.aoInputs = [] # type: list(TestInOut)
1374 self.aoOutputs = [] # type: list(TestInOut)
1375 self.aoSelectors = [] # type: list(TestSelector)
1376
1377 def toString(self, fRepr = False):
1378 """
1379 Converts it to string representation.
1380 """
1381 asWords = [];
1382 if self.aoSelectors:
1383 for oSelector in self.aoSelectors:
1384 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1385 asWords.append('/');
1386
1387 for oModifier in self.aoInputs:
1388 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1389
1390 asWords.append('->');
1391
1392 for oModifier in self.aoOutputs:
1393 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1394
1395 if fRepr:
1396 return '<' + ' '.join(asWords) + '>';
1397 return ' '.join(asWords);
1398
1399 def __str__(self):
1400 """ Provide string represenation. """
1401 return self.toString(False);
1402
1403 def __repr__(self):
1404 """ Provide unambigious string representation. """
1405 return self.toString(True);
1406
1407class Operand(object):
1408 """
1409 Instruction operand.
1410 """
1411
1412 def __init__(self, sWhere, sType):
1413 assert sWhere in g_kdOpLocations, sWhere;
1414 assert sType in g_kdOpTypes, sType;
1415 self.sWhere = sWhere; ##< g_kdOpLocations
1416 self.sType = sType; ##< g_kdOpTypes
1417
1418 def usesModRM(self):
1419 """ Returns True if using some form of ModR/M encoding. """
1420 return self.sType[0] in ['E', 'G', 'M'];
1421
1422
1423
1424class Instruction(object): # pylint: disable=too-many-instance-attributes
1425 """
1426 Instruction.
1427 """
1428
1429 def __init__(self, sSrcFile, iLine):
1430 ## @name Core attributes.
1431 ## @{
1432 self.oParent = None # type: Instruction
1433 self.sMnemonic = None;
1434 self.sBrief = None;
1435 self.asDescSections = [] # type: list(str)
1436 self.aoMaps = [] # type: list(InstructionMap)
1437 self.aoOperands = [] # type: list(Operand)
1438 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1439 self.sOpcode = None # type: str
1440 self.sSubOpcode = None # type: str
1441 self.sEncoding = None;
1442 self.asFlTest = None;
1443 self.asFlModify = None;
1444 self.asFlUndefined = None;
1445 self.asFlSet = None;
1446 self.asFlClear = None;
1447 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1448 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1449 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1450 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1451 self.aoTests = [] # type: list(InstructionTest)
1452 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1453 self.oCpuExpr = None; ##< Some CPU restriction expression...
1454 self.sGroup = None;
1455 self.fUnused = False; ##< Unused instruction.
1456 self.fInvalid = False; ##< Invalid instruction (like UD2).
1457 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1458 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1459 ## @}
1460
1461 ## @name Implementation attributes.
1462 ## @{
1463 self.sStats = None;
1464 self.sFunction = None;
1465 self.fStub = False;
1466 self.fUdStub = False;
1467 ## @}
1468
1469 ## @name Decoding info
1470 ## @{
1471 self.sSrcFile = sSrcFile;
1472 self.iLineCreated = iLine;
1473 self.iLineCompleted = None;
1474 self.cOpTags = 0;
1475 self.iLineFnIemOpMacro = -1;
1476 self.iLineMnemonicMacro = -1;
1477 ## @}
1478
1479 ## @name Intermediate input fields.
1480 ## @{
1481 self.sRawDisOpNo = None;
1482 self.asRawDisParams = [];
1483 self.sRawIemOpFlags = None;
1484 self.sRawOldOpcodes = None;
1485 self.asCopyTests = [];
1486 ## @}
1487
1488 def toString(self, fRepr = False):
1489 """ Turn object into a string. """
1490 aasFields = [];
1491
1492 aasFields.append(['opcode', self.sOpcode]);
1493 if self.sPrefix:
1494 aasFields.append(['prefix', self.sPrefix]);
1495 aasFields.append(['mnemonic', self.sMnemonic]);
1496 for iOperand, oOperand in enumerate(self.aoOperands):
1497 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1498 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1499 aasFields.append(['encoding', self.sEncoding]);
1500 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1501 aasFields.append(['disenum', self.sDisEnum]);
1502 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1503 aasFields.append(['group', self.sGroup]);
1504 if self.fUnused: aasFields.append(['unused', 'True']);
1505 if self.fInvalid: aasFields.append(['invalid', 'True']);
1506 aasFields.append(['invlstyle', self.sInvalidStyle]);
1507 aasFields.append(['fltest', self.asFlTest]);
1508 aasFields.append(['flmodify', self.asFlModify]);
1509 aasFields.append(['flundef', self.asFlUndefined]);
1510 aasFields.append(['flset', self.asFlSet]);
1511 aasFields.append(['flclear', self.asFlClear]);
1512 aasFields.append(['mincpu', self.sMinCpu]);
1513 aasFields.append(['stats', self.sStats]);
1514 aasFields.append(['sFunction', self.sFunction]);
1515 if self.fStub: aasFields.append(['fStub', 'True']);
1516 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1517 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1518 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1519 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1520
1521 sRet = '<' if fRepr else '';
1522 for sField, sValue in aasFields:
1523 if sValue is not None:
1524 if len(sRet) > 1:
1525 sRet += '; ';
1526 sRet += '%s=%s' % (sField, sValue,);
1527 if fRepr:
1528 sRet += '>';
1529
1530 return sRet;
1531
1532 def __str__(self):
1533 """ Provide string represenation. """
1534 return self.toString(False);
1535
1536 def __repr__(self):
1537 """ Provide unambigious string representation. """
1538 return self.toString(True);
1539
1540 def copy(self, oMap = None, sOpcode = None, sSubOpcode = None, sPrefix = None):
1541 """
1542 Makes a copy of the object for the purpose of putting in a different map
1543 or a different place in the current map.
1544 """
1545 oCopy = Instruction(self.sSrcFile, self.iLineCreated);
1546
1547 oCopy.oParent = self;
1548 oCopy.sMnemonic = self.sMnemonic;
1549 oCopy.sBrief = self.sBrief;
1550 oCopy.asDescSections = list(self.asDescSections);
1551 oCopy.aoMaps = [oMap,] if oMap else list(self.aoMaps);
1552 oCopy.aoOperands = list(self.aoOperands); ## Deeper copy?
1553 oCopy.sPrefix = sPrefix if sPrefix else self.sPrefix;
1554 oCopy.sOpcode = sOpcode if sOpcode else self.sOpcode;
1555 oCopy.sSubOpcode = sSubOpcode if sSubOpcode else self.sSubOpcode;
1556 oCopy.sEncoding = self.sEncoding;
1557 oCopy.asFlTest = self.asFlTest;
1558 oCopy.asFlModify = self.asFlModify;
1559 oCopy.asFlUndefined = self.asFlUndefined;
1560 oCopy.asFlSet = self.asFlSet;
1561 oCopy.asFlClear = self.asFlClear;
1562 oCopy.dHints = dict(self.dHints);
1563 oCopy.sDisEnum = self.sDisEnum;
1564 oCopy.asCpuIds = list(self.asCpuIds);
1565 oCopy.asReqFeatures = list(self.asReqFeatures);
1566 oCopy.aoTests = list(self.aoTests); ## Deeper copy?
1567 oCopy.sMinCpu = self.sMinCpu;
1568 oCopy.oCpuExpr = self.oCpuExpr;
1569 oCopy.sGroup = self.sGroup;
1570 oCopy.fUnused = self.fUnused;
1571 oCopy.fInvalid = self.fInvalid;
1572 oCopy.sInvalidStyle = self.sInvalidStyle;
1573 oCopy.sXcptType = self.sXcptType;
1574
1575 oCopy.sStats = self.sStats;
1576 oCopy.sFunction = self.sFunction;
1577 oCopy.fStub = self.fStub;
1578 oCopy.fUdStub = self.fUdStub;
1579
1580 oCopy.iLineCompleted = self.iLineCompleted;
1581 oCopy.cOpTags = self.cOpTags;
1582 oCopy.iLineFnIemOpMacro = self.iLineFnIemOpMacro;
1583 oCopy.iLineMnemonicMacro = self.iLineMnemonicMacro;
1584
1585 oCopy.sRawDisOpNo = self.sRawDisOpNo;
1586 oCopy.asRawDisParams = list(self.asRawDisParams);
1587 oCopy.sRawIemOpFlags = self.sRawIemOpFlags;
1588 oCopy.sRawOldOpcodes = self.sRawOldOpcodes;
1589 oCopy.asCopyTests = list(self.asCopyTests);
1590
1591 return oCopy;
1592
1593 def getOpcodeByte(self):
1594 """
1595 Decodes sOpcode into a byte range integer value.
1596 Raises exception if sOpcode is None or invalid.
1597 """
1598 if self.sOpcode is None:
1599 raise Exception('No opcode byte for %s!' % (self,));
1600 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1601
1602 # Full hex byte form.
1603 if sOpcode[:2] == '0x':
1604 return int(sOpcode, 16);
1605
1606 # The /r form:
1607 if len(sOpcode) == 2 and sOpcode[0] == '/' and sOpcode[1].isdigit():
1608 return int(sOpcode[1:]) << 3;
1609
1610 # The 11/r form:
1611 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1612 return (int(sOpcode[-1:]) << 3) | 0xc0;
1613
1614 # The !11/r form (returns mod=1):
1615 ## @todo this doesn't really work...
1616 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1617 return (int(sOpcode[-1:]) << 3) | 0x80;
1618
1619 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1620
1621 @staticmethod
1622 def _flagsToIntegerMask(asFlags):
1623 """
1624 Returns the integer mask value for asFlags.
1625 """
1626 uRet = 0;
1627 if asFlags:
1628 for sFlag in asFlags:
1629 sConstant = g_kdEFlagsMnemonics[sFlag];
1630 assert sConstant[0] != '!', sConstant
1631 uRet |= g_kdX86EFlagsConstants[sConstant];
1632 return uRet;
1633
1634 def getTestedFlagsMask(self):
1635 """ Returns asFlTest into a integer mask value """
1636 return self._flagsToIntegerMask(self.asFlTest);
1637
1638 def getModifiedFlagsMask(self):
1639 """ Returns asFlModify into a integer mask value """
1640 return self._flagsToIntegerMask(self.asFlModify);
1641
1642 def getUndefinedFlagsMask(self):
1643 """ Returns asFlUndefined into a integer mask value """
1644 return self._flagsToIntegerMask(self.asFlUndefined);
1645
1646 def getSetFlagsMask(self):
1647 """ Returns asFlSet into a integer mask value """
1648 return self._flagsToIntegerMask(self.asFlSet);
1649
1650 def getClearedFlagsMask(self):
1651 """ Returns asFlClear into a integer mask value """
1652 return self._flagsToIntegerMask(self.asFlClear);
1653
1654 def onlyInVexMaps(self):
1655 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1656 if not self.aoMaps:
1657 return False;
1658 for oMap in self.aoMaps:
1659 if not oMap.isVexMap():
1660 return False;
1661 return True;
1662
1663
1664
1665## All the instructions.
1666g_aoAllInstructions = [] # type: list(Instruction)
1667
1668## All the instructions indexed by statistics name (opstat).
1669g_dAllInstructionsByStat = {} # type: dict(Instruction)
1670
1671## All the instructions indexed by function name (opfunction).
1672g_dAllInstructionsByFunction = {} # type: dict(list(Instruction))
1673
1674## Instructions tagged by oponlytest
1675g_aoOnlyTestInstructions = [] # type: list(Instruction)
1676
1677## Instruction maps.
1678g_aoInstructionMaps = [
1679 InstructionMap('one', 'g_apfnOneByteMap', sSelector = 'byte'),
1680 InstructionMap('grp1_80', asLeadOpcodes = ['0x80',], sSelector = '/r'),
1681 InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1682 InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1683 InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1684 InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1685 InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1686 InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1687 InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1688 InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1689 InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1690 InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1691 ## @todo g_apfnEscF1_E0toFF
1692 InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1693 InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1694 InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1695 InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1696 InstructionMap('grp11_c6_m', asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1697 InstructionMap('grp11_c6_r', asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1698 InstructionMap('grp11_c7_m', asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1699 InstructionMap('grp11_c7_r', asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1700
1701 InstructionMap('two0f', 'g_apfnTwoByteMap', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1702 InstructionMap('grp6', 'g_apfnGroup6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1703 InstructionMap('grp7_m', 'g_apfnGroup7Mem', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1704 InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1705 InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1706 InstructionMap('grp9', 'g_apfnGroup9RegReg', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1707 ## @todo What about g_apfnGroup9MemReg?
1708 InstructionMap('grp10', None, asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1709 InstructionMap('grp12', 'g_apfnGroup12RegReg', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1710 InstructionMap('grp13', 'g_apfnGroup13RegReg', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1711 InstructionMap('grp14', 'g_apfnGroup14RegReg', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1712 InstructionMap('grp15', 'g_apfnGroup15MemReg', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1713 ## @todo What about g_apfnGroup15RegReg?
1714 InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1715 InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1716 InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1717
1718 InstructionMap('three0f38', 'g_apfnThreeByte0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1719 InstructionMap('three0f3a', 'g_apfnThreeByte0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1720
1721 InstructionMap('vexmap1', 'g_apfnVexMap1', sEncoding = 'vex1'),
1722 InstructionMap('vexgrp12', 'g_apfnVexGroup12RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1723 InstructionMap('vexgrp13', 'g_apfnVexGroup13RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1724 InstructionMap('vexgrp14', 'g_apfnVexGroup14RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1725 InstructionMap('vexgrp15', 'g_apfnVexGroup15MemReg', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1726 InstructionMap('vexgrp17', 'g_apfnVexGroup17_f3', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1727
1728 InstructionMap('vexmap2', 'g_apfnVexMap2', sEncoding = 'vex2'),
1729 InstructionMap('vexmap3', 'g_apfnVexMap3', sEncoding = 'vex3'),
1730
1731 InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1732 InstructionMap('xopmap8', sEncoding = 'xop8'),
1733 InstructionMap('xopmap9', sEncoding = 'xop9'),
1734 InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1735 InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1736 InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1737 InstructionMap('xopmap10', sEncoding = 'xop10'),
1738 InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1739];
1740g_dInstructionMaps = { oMap.sName: oMap for oMap in g_aoInstructionMaps };
1741g_dInstructionMapsByIemName = { oMap.sIemName: oMap for oMap in g_aoInstructionMaps };
1742
1743
1744#
1745# Decoder functions.
1746#
1747
1748class DecoderFunction(object):
1749 """
1750 Decoder function.
1751
1752 This is mainly for searching for scoping searches for variables used in
1753 microcode blocks.
1754 """
1755 def __init__(self, sSrcFile, iBeginLine, sName, asDefArgs):
1756 self.sName = sName; ##< The function name.
1757 self.asDefArgs = asDefArgs; ##< The FNIEMOP*DEF/STUB* macro argument list, 0th element is the macro name.
1758 self.sSrcFile = sSrcFile; ##< The source file the function is defined in.
1759 self.iBeginLine = iBeginLine; ##< The start line.
1760 self.iEndLine = -1; ##< The line the function (probably) ends on.
1761 self.asLines = [] # type: list(str) ##< The raw lines the function is made up of.
1762
1763 def complete(self, iEndLine, asLines):
1764 """
1765 Completes the function.
1766 """
1767 assert self.iEndLine == -1;
1768 self.iEndLine = iEndLine;
1769 self.asLines = asLines;
1770
1771
1772#
1773# "Microcode" statements and blocks
1774#
1775
1776class McStmt(object):
1777 """
1778 Statement in a microcode block.
1779 """
1780 def __init__(self, sName, asParams):
1781 self.sName = sName; ##< 'IEM_MC_XXX' or 'C++'.
1782 self.asParams = asParams;
1783 self.oUser = None;
1784
1785 def renderCode(self, cchIndent = 0):
1786 """
1787 Renders the code for the statement.
1788 """
1789 return ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ');\n';
1790
1791 @staticmethod
1792 def renderCodeForList(aoStmts, cchIndent = 0):
1793 """
1794 Renders a list of statements.
1795 """
1796 return ''.join([oStmt.renderCode(cchIndent) for oStmt in aoStmts]);
1797
1798 @staticmethod
1799 def findStmtByNames(aoStmts, dNames):
1800 """
1801 Returns first statement with any of the given names in from the list.
1802
1803 Note! The names are passed as a dictionary for quick lookup, the value
1804 does not matter.
1805 """
1806 for oStmt in aoStmts:
1807 if oStmt.sName in dNames:
1808 return oStmt;
1809 if isinstance(oStmt, McStmtCond):
1810 oHit = McStmt.findStmtByNames(oStmt.aoIfBranch, dNames);
1811 if not oHit:
1812 oHit = McStmt.findStmtByNames(oStmt.aoElseBranch, dNames);
1813 if oHit:
1814 return oHit;
1815 return None;
1816
1817 def isCppStmt(self):
1818 """ Checks if this is a C++ statement. """
1819 return self.sName.startswith('C++');
1820
1821class McStmtCond(McStmt):
1822 """
1823 Base class for conditional statements (IEM_MC_IF_XXX).
1824 """
1825 def __init__(self, sName, asParams, aoIfBranch = None, aoElseBranch = None):
1826 McStmt.__init__(self, sName, asParams);
1827 self.aoIfBranch = [] if aoIfBranch is None else list(aoIfBranch);
1828 self.aoElseBranch = [] if aoElseBranch is None else list(aoElseBranch);
1829
1830 def renderCode(self, cchIndent = 0):
1831 sRet = ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ') {\n';
1832 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1833 if self.aoElseBranch:
1834 sRet += ' ' * cchIndent + '} IEM_MC_ELSE() {\n';
1835 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1836 sRet += ' ' * cchIndent + '} IEM_MC_ENDIF();\n';
1837 return sRet;
1838
1839class McStmtVar(McStmt):
1840 """ IEM_MC_LOCAL_VAR, IEM_MC_LOCAL_CONST """
1841 def __init__(self, sName, asParams, sType, sVarName, sConstValue = None):
1842 McStmt.__init__(self, sName, asParams);
1843 self.sType = sType;
1844 self.sVarName = sVarName;
1845 self.sConstValue = sConstValue; ##< None if not const.
1846
1847class McStmtArg(McStmtVar):
1848 """ IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF """
1849 def __init__(self, sName, asParams, sType, sVarName, iArg, sConstValue = None, sRef = None, sRefType = 'none'):
1850 McStmtVar.__init__(self, sName, asParams, sType, sVarName, sConstValue);
1851 self.iArg = iArg;
1852 self.sRef = sRef; ##< The reference string (local variable, register).
1853 self.sRefType = sRefType; ##< The kind of reference: 'local', 'none'.
1854 assert sRefType in ('none', 'local');
1855
1856
1857class McStmtCall(McStmt):
1858 """ IEM_MC_CALL_* """
1859 def __init__(self, sName, asParams, iFnParam, iRcNameParam = -1):
1860 McStmt.__init__(self, sName, asParams);
1861 self.idxFn = iFnParam;
1862 self.idxParams = iFnParam + 1;
1863 self.sFn = asParams[iFnParam];
1864 self.iRcName = None if iRcNameParam < 0 else asParams[iRcNameParam];
1865
1866class McCppGeneric(McStmt):
1867 """
1868 Generic C++/C statement.
1869 """
1870 def __init__(self, sCode, fDecode = True, sName = 'C++', cchIndent = 0):
1871 McStmt.__init__(self, sName, [sCode,]);
1872 self.fDecode = fDecode;
1873 self.cchIndent = cchIndent;
1874
1875 def renderCode(self, cchIndent = 0):
1876 cchIndent += self.cchIndent;
1877 sRet = ' ' * cchIndent + self.asParams[0] + '\n';
1878 if self.fDecode:
1879 sRet = sRet.replace('\n', ' // C++ decode\n');
1880 else:
1881 sRet = sRet.replace('\n', ' // C++ normal\n');
1882 return sRet;
1883
1884class McCppCond(McStmtCond):
1885 """
1886 C++/C 'if' statement.
1887 """
1888 def __init__(self, sCode, fDecode = True, aoIfBranch = None, aoElseBranch = None, cchIndent = 0):
1889 McStmtCond.__init__(self, 'C++/if', [sCode,], aoIfBranch, aoElseBranch);
1890 self.fDecode = fDecode;
1891 self.cchIndent = cchIndent;
1892
1893 def renderCode(self, cchIndent = 0):
1894 cchIndent += self.cchIndent;
1895 sAnnotation = '// C++ decode' if self.fDecode else '// C++ normal';
1896 sRet = ' ' * cchIndent + 'if (' + self.asParams[0] + ') ' + sAnnotation + '\n';
1897 sRet += ' ' * cchIndent + '{\n';
1898 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1899 sRet += ' ' * cchIndent + '}\n';
1900 if self.aoElseBranch:
1901 sRet += ' ' * cchIndent + 'else ' + sAnnotation + '\n';
1902 sRet += ' ' * cchIndent + '{\n';
1903 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1904 sRet += ' ' * cchIndent + '}\n';
1905 return sRet;
1906
1907class McCppPreProc(McCppGeneric):
1908 """
1909 C++/C Preprocessor directive.
1910 """
1911 def __init__(self, sCode):
1912 McCppGeneric.__init__(self, sCode, False, sName = 'C++/preproc');
1913
1914 def renderCode(self, cchIndent = 0):
1915 return self.asParams[0] + '\n';
1916
1917
1918class McBlock(object):
1919 """
1920 Microcode block (IEM_MC_BEGIN ... IEM_MC_END, IEM_MC_DEFER_TO_CIMPL_x_RET).
1921 """
1922
1923 def __init__(self, sSrcFile, iBeginLine, offBeginLine, oFunction, iInFunction, cchIndent = None):
1924 ## The source file containing the block.
1925 self.sSrcFile = sSrcFile;
1926 ## The line with the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement.
1927 self.iBeginLine = iBeginLine;
1928 ## The offset of the IEM_MC_BEGIN/IEM_MC_DEFER_TO_CIMPL_X_RET statement within the line.
1929 self.offBeginLine = offBeginLine;
1930 ## The line with the IEM_MC_END statement / last line of IEM_MC_DEFER_TO_CIMPL_X_RET.
1931 self.iEndLine = -1;
1932 ## The offset of the IEM_MC_END statement within the line / semicolon offset for defer-to.
1933 self.offEndLine = 0;
1934 ## The offset following the IEM_MC_END/IEM_MC_DEFER_TO_CIMPL_X_RET semicolon.
1935 self.offAfterEnd = 0;
1936 ## The function the block resides in.
1937 self.oFunction = oFunction;
1938 ## The name of the function the block resides in. DEPRECATED.
1939 self.sFunction = oFunction.sName;
1940 ## The block number within the function.
1941 self.iInFunction = iInFunction;
1942 self.cchIndent = cchIndent if cchIndent else offBeginLine;
1943 self.asLines = [] # type: list(str) ##< The raw lines the block is made up of.
1944 ## Decoded statements in the block.
1945 self.aoStmts = [] # type: list(McStmt)
1946
1947 def complete(self, iEndLine, offEndLine, offAfterEnd, asLines):
1948 """
1949 Completes the microcode block.
1950 """
1951 assert self.iEndLine == -1;
1952 self.iEndLine = iEndLine;
1953 self.offEndLine = offEndLine;
1954 self.offAfterEnd = offAfterEnd;
1955 self.asLines = asLines;
1956
1957 def raiseDecodeError(self, sRawCode, off, sMessage):
1958 """ Raises a decoding error. """
1959 offStartOfLine = sRawCode.rfind('\n', 0, off) + 1;
1960 iLine = sRawCode.count('\n', 0, off);
1961 raise ParserException('%s:%d:%d: parsing error: %s'
1962 % (self.sSrcFile, self.iBeginLine + iLine, off - offStartOfLine + 1, sMessage,));
1963
1964 def raiseStmtError(self, sName, sMessage):
1965 """ Raises a statement parser error. """
1966 raise ParserException('%s:%d: %s: parsing error: %s' % (self.sSrcFile, self.iBeginLine, sName, sMessage,));
1967
1968 def checkStmtParamCount(self, sName, asParams, cParamsExpected):
1969 """ Check the parameter count, raising an error it doesn't match. """
1970 if len(asParams) != cParamsExpected:
1971 raise ParserException('%s:%d: %s: Expected %s parameters, found %s!'
1972 % (self.sSrcFile, self.iBeginLine, sName, cParamsExpected, len(asParams),));
1973 return True;
1974
1975 @staticmethod
1976 def parseMcGeneric(oSelf, sName, asParams):
1977 """ Generic parser that returns a plain McStmt object. """
1978 _ = oSelf;
1979 return McStmt(sName, asParams);
1980
1981 @staticmethod
1982 def parseMcGenericCond(oSelf, sName, asParams):
1983 """ Generic parser that returns a plain McStmtCond object. """
1984 _ = oSelf;
1985 return McStmtCond(sName, asParams);
1986
1987 @staticmethod
1988 def parseMcBegin(oSelf, sName, asParams):
1989 """ IEM_MC_BEGIN """
1990 oSelf.checkStmtParamCount(sName, asParams, 2);
1991 return McBlock.parseMcGeneric(oSelf, sName, asParams);
1992
1993 @staticmethod
1994 def parseMcArg(oSelf, sName, asParams):
1995 """ IEM_MC_ARG """
1996 oSelf.checkStmtParamCount(sName, asParams, 3);
1997 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[2]));
1998
1999 @staticmethod
2000 def parseMcArgConst(oSelf, sName, asParams):
2001 """ IEM_MC_ARG_CONST """
2002 oSelf.checkStmtParamCount(sName, asParams, 4);
2003 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sConstValue = asParams[2]);
2004
2005 @staticmethod
2006 def parseMcArgLocalRef(oSelf, sName, asParams):
2007 """ IEM_MC_ARG_LOCAL_REF """
2008 oSelf.checkStmtParamCount(sName, asParams, 4);
2009 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sRef = asParams[2], sRefType = 'local');
2010
2011 @staticmethod
2012 def parseMcArgLocalEFlags(oSelf, sName, asParams):
2013 """ IEM_MC_ARG_LOCAL_EFLAGS """
2014 oSelf.checkStmtParamCount(sName, asParams, 3);
2015 # Note! We split this one up into IEM_MC_LOCAL_VAR and IEM_MC_ARG_LOCAL_REF.
2016 return (
2017 McStmtVar('IEM_MC_LOCAL', ['uint32_t', asParams[1],], 'uint32_t', asParams[1]),
2018 McStmtArg('IEM_MC_ARG_LOCAL_REF', ['uint32_t *', asParams[0], asParams[1], asParams[2]],
2019 'uint32_t *', asParams[0], int(asParams[2]), sRef = asParams[1], sRefType = 'local'),
2020 );
2021
2022 @staticmethod
2023 def parseMcLocal(oSelf, sName, asParams):
2024 """ IEM_MC_LOCAL """
2025 oSelf.checkStmtParamCount(sName, asParams, 2);
2026 return McStmtVar(sName, asParams, asParams[0], asParams[1]);
2027
2028 @staticmethod
2029 def parseMcLocalConst(oSelf, sName, asParams):
2030 """ IEM_MC_LOCAL_CONST """
2031 oSelf.checkStmtParamCount(sName, asParams, 3);
2032 return McStmtVar(sName, asParams, asParams[0], asParams[1], sConstValue = asParams[2]);
2033
2034 @staticmethod
2035 def parseMcCallAImpl(oSelf, sName, asParams):
2036 """ IEM_MC_CALL_AIMPL_3|4 """
2037 cArgs = int(sName[-1]);
2038 oSelf.checkStmtParamCount(sName, asParams, 2 + cArgs);
2039 return McStmtCall(sName, asParams, 1, 0);
2040
2041 @staticmethod
2042 def parseMcCallVoidAImpl(oSelf, sName, asParams):
2043 """ IEM_MC_CALL_VOID_AIMPL_2|3 """
2044 cArgs = int(sName[-1]);
2045 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2046 return McStmtCall(sName, asParams, 0);
2047
2048 @staticmethod
2049 def parseMcCallAvxAImpl(oSelf, sName, asParams):
2050 """ IEM_MC_CALL_AVX_AIMPL_2|3 """
2051 cArgs = int(sName[-1]);
2052 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2053 return McStmtCall(sName, asParams, 0);
2054
2055 @staticmethod
2056 def parseMcCallFpuAImpl(oSelf, sName, asParams):
2057 """ IEM_MC_CALL_FPU_AIMPL_1|2|3 """
2058 cArgs = int(sName[-1]);
2059 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2060 return McStmtCall(sName, asParams, 0);
2061
2062 @staticmethod
2063 def parseMcCallMmxAImpl(oSelf, sName, asParams):
2064 """ IEM_MC_CALL_MMX_AIMPL_2|3 """
2065 cArgs = int(sName[-1]);
2066 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2067 return McStmtCall(sName, asParams, 0);
2068
2069 @staticmethod
2070 def parseMcCallSseAImpl(oSelf, sName, asParams):
2071 """ IEM_MC_CALL_SSE_AIMPL_2|3 """
2072 cArgs = int(sName[-1]);
2073 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2074 return McStmtCall(sName, asParams, 0);
2075
2076 @staticmethod
2077 def parseMcCallCImpl(oSelf, sName, asParams):
2078 """ IEM_MC_CALL_CIMPL_0|1|2|3|4|5 """
2079 cArgs = int(sName[-1]);
2080 oSelf.checkStmtParamCount(sName, asParams, 2 + cArgs);
2081 return McStmtCall(sName, asParams, 1);
2082
2083 @staticmethod
2084 def stripComments(sCode):
2085 """ Returns sCode with comments removed. """
2086 off = 0;
2087 while off < len(sCode):
2088 off = sCode.find('/', off);
2089 if off < 0 or off + 1 >= len(sCode):
2090 break;
2091
2092 if sCode[off + 1] == '/':
2093 # C++ comment.
2094 offEnd = sCode.find('\n', off + 2);
2095 if offEnd < 0:
2096 return sCode[:off].rstrip();
2097 sCode = sCode[ : off] + sCode[offEnd : ];
2098 off += 1;
2099
2100 elif sCode[off + 1] == '*':
2101 # C comment
2102 offEnd = sCode.find('*/', off + 2);
2103 if offEnd < 0:
2104 return sCode[:off].rstrip();
2105 sSep = ' ';
2106 if (off > 0 and sCode[off - 1].isspace()) or (offEnd + 2 < len(sCode) and sCode[offEnd + 2].isspace()):
2107 sSep = '';
2108 sCode = sCode[ : off] + sSep + sCode[offEnd + 2 : ];
2109 off += len(sSep);
2110
2111 else:
2112 # Not a comment.
2113 off += 1;
2114 return sCode;
2115
2116 @staticmethod
2117 def extractParam(sCode, offParam):
2118 """
2119 Extracts the parameter value at offParam in sCode.
2120 Returns stripped value and the end offset of the terminating ',' or ')'.
2121 """
2122 # Extract it.
2123 cNesting = 0;
2124 offStart = offParam;
2125 while offParam < len(sCode):
2126 ch = sCode[offParam];
2127 if ch == '(':
2128 cNesting += 1;
2129 elif ch == ')':
2130 if cNesting == 0:
2131 break;
2132 cNesting -= 1;
2133 elif ch == ',' and cNesting == 0:
2134 break;
2135 offParam += 1;
2136 return (sCode[offStart : offParam].strip(), offParam);
2137
2138 @staticmethod
2139 def extractParams(sCode, offOpenParen):
2140 """
2141 Parses a parameter list.
2142 Returns the list of parameter values and the offset of the closing parentheses.
2143 Returns (None, len(sCode)) on if no closing parentheses was found.
2144 """
2145 assert sCode[offOpenParen] == '(';
2146 asParams = [];
2147 off = offOpenParen + 1;
2148 while off < len(sCode):
2149 ch = sCode[off];
2150 if ch.isspace():
2151 off += 1;
2152 elif ch != ')':
2153 (sParam, off) = McBlock.extractParam(sCode, off);
2154 asParams.append(sParam);
2155 assert off < len(sCode), 'off=%s sCode=%s:"%s"' % (off, len(sCode), sCode,);
2156 if sCode[off] == ',':
2157 off += 1;
2158 else:
2159 return (asParams, off);
2160 return (None, off);
2161
2162 @staticmethod
2163 def findClosingBraces(sCode, off, offStop):
2164 """
2165 Finds the matching '}' for the '{' at off in sCode.
2166 Returns offset of the matching '}' on success, otherwise -1.
2167
2168 Note! Does not take comments into account.
2169 """
2170 cDepth = 1;
2171 off += 1;
2172 while off < offStop:
2173 offClose = sCode.find('}', off, offStop);
2174 if offClose < 0:
2175 break;
2176 cDepth += sCode.count('{', off, offClose);
2177 cDepth -= 1;
2178 if cDepth == 0:
2179 return offClose;
2180 off = offClose + 1;
2181 return -1;
2182
2183 @staticmethod
2184 def countSpacesAt(sCode, off, offStop):
2185 """ Returns the number of space characters at off in sCode. """
2186 offStart = off;
2187 while off < offStop and sCode[off].isspace():
2188 off += 1;
2189 return off - offStart;
2190
2191 @staticmethod
2192 def skipSpacesAt(sCode, off, offStop):
2193 """ Returns first offset at or after off for a non-space character. """
2194 return off + McBlock.countSpacesAt(sCode, off, offStop);
2195
2196 @staticmethod
2197 def isSubstrAt(sStr, off, sSubStr):
2198 """ Returns true of sSubStr is found at off in sStr. """
2199 return sStr[off : off + len(sSubStr)] == sSubStr;
2200
2201 koReCppCtrlStmts = re.compile(r'\b(if\s*[(]|else\b|while\s*[(]|for\s*[(]|do\b)');
2202 koReIemDecoderVars = re.compile( r'iem\.s\.(fPrefixes|uRexReg|uRexB|uRexIndex|iEffSeg|offModRm|cbOpcode|offOpcode'
2203 + r'|enmEffOpSize|enmDefOpSize|enmDefAddrMode|enmEffAddrMode|idxPrefix'
2204 + r'|uVex3rdReg|uVexLength|fEvxStuff|uFpuOpcode|abOpcode'
2205 + r')');
2206
2207 def decodeCode(self, sRawCode, off = 0, offStop = -1, iLevel = 0): # pylint: disable=too-many-statements,too-many-branches
2208 """
2209 Decodes sRawCode[off : offStop].
2210
2211 Returns list of McStmt instances.
2212 Raises ParserException on failure.
2213 """
2214 if offStop < 0:
2215 offStop = len(sRawCode);
2216 aoStmts = [];
2217 while off < offStop:
2218 ch = sRawCode[off];
2219
2220 #
2221 # Skip spaces and comments.
2222 #
2223 if ch.isspace():
2224 off += 1;
2225
2226 elif ch == '/':
2227 ch = sRawCode[off + 1];
2228 if ch == '/': # C++ comment.
2229 off = sRawCode.find('\n', off + 2);
2230 if off < 0:
2231 break;
2232 off += 1;
2233 elif ch == '*': # C comment.
2234 off = sRawCode.find('*/', off + 2);
2235 if off < 0:
2236 break;
2237 off += 2;
2238 else:
2239 self.raiseDecodeError(sRawCode, off, 'Unexpected "/"');
2240
2241 #
2242 # Is it a MC statement.
2243 #
2244 elif ch == 'I' and sRawCode[off : off + len('IEM_MC_')] == 'IEM_MC_':
2245 # All MC statements ends with a semicolon, except for conditionals which ends with a '{'.
2246 # Extract it and strip comments from it.
2247 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_IF_'):
2248 offEnd = sRawCode.find(';', off + len('IEM_MC_'));
2249 if offEnd <= off:
2250 self.raiseDecodeError(sRawCode, off, 'MC statement without a ";"');
2251 else:
2252 offEnd = sRawCode.find('{', off + len('IEM_MC_IF_'));
2253 if offEnd <= off:
2254 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without a "{"');
2255 if sRawCode.find(';', off + len('IEM_MC_IF_'), offEnd) > off:
2256 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without an immediate "{"');
2257 offEnd -= 1;
2258 while offEnd > off and sRawCode[offEnd - 1].isspace():
2259 offEnd -= 1;
2260
2261 sRawStmt = self.stripComments(sRawCode[off : offEnd]);
2262
2263 # Isolate the statement name.
2264 offOpenParen = sRawStmt.find('(');
2265 if offOpenParen < 0:
2266 self.raiseDecodeError(sRawCode, off, 'MC statement without a "("');
2267 sName = sRawStmt[: offOpenParen].strip();
2268
2269 # Extract the parameters.
2270 (asParams, offCloseParen) = self.extractParams(sRawStmt, offOpenParen);
2271 if asParams is None:
2272 self.raiseDecodeError(sRawCode, off, 'MC statement without a closing parenthesis');
2273 if offCloseParen + 1 != len(sRawStmt):
2274 self.raiseDecodeError(sRawCode, off,
2275 'Unexpected code following MC statement: %s' % (sRawStmt[offCloseParen + 1:]));
2276
2277 # Hand it to the handler.
2278 fnParser = g_dMcStmtParsers.get(sName);
2279 if not fnParser:
2280 self.raiseDecodeError(sRawCode, off, 'Unknown MC statement: %s' % (sName,));
2281 oStmt = fnParser(self, sName, asParams);
2282 if not isinstance(oStmt, (list, tuple)):
2283 aoStmts.append(oStmt);
2284 else:
2285 aoStmts.extend(oStmt);
2286
2287 #
2288 # If conditional, we need to parse the whole statement.
2289 #
2290 # For reasons of simplicity, we assume the following structure
2291 # and parse each branch in a recursive call:
2292 # IEM_MC_IF_XXX() {
2293 # IEM_MC_WHATEVER();
2294 # } IEM_MC_ELSE() {
2295 # IEM_MC_WHATEVER();
2296 # } IEM_MC_ENDIF();
2297 #
2298 if sName.startswith('IEM_MC_IF_'):
2299 if iLevel > 1:
2300 self.raiseDecodeError(sRawCode, off, 'Too deep nesting of conditionals.');
2301
2302 # Find start of the IF block:
2303 offBlock1 = self.skipSpacesAt(sRawCode, offEnd, offStop);
2304 if sRawCode[offBlock1] != '{':
2305 self.raiseDecodeError(sRawCode, offBlock1, 'Expected "{" following %s' % (sName,));
2306
2307 # Find the end of it.
2308 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2309 if offBlock1End < 0:
2310 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing IF block of %s' % (sName,));
2311
2312 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1 + 1, offBlock1End, iLevel + 1);
2313
2314 # Is there an else section?
2315 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2316 if self.isSubstrAt(sRawCode, off, 'IEM_MC_ELSE'):
2317 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ELSE'), offStop);
2318 if sRawCode[off] != '(':
2319 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ELSE"');
2320 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2321 if sRawCode[off] != ')':
2322 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ELSE("');
2323
2324 # Find start of the ELSE block.
2325 offBlock2 = self.skipSpacesAt(sRawCode, off + 1, offStop);
2326 if sRawCode[offBlock2] != '{':
2327 self.raiseDecodeError(sRawCode, offBlock2, 'Expected "{" following IEM_MC_ELSE()"');
2328
2329 # Find the end of it.
2330 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2331 if offBlock2End < 0:
2332 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing ELSE block of %s' % (sName,));
2333
2334 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2 + 1, offBlock2End, iLevel + 1);
2335 off = self.skipSpacesAt(sRawCode, offBlock2End + 1, offStop);
2336
2337 # Parse past the endif statement.
2338 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_ENDIF'):
2339 self.raiseDecodeError(sRawCode, off, 'Expected IEM_MC_ENDIF for closing %s' % (sName,));
2340 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ENDIF'), offStop);
2341 if sRawCode[off] != '(':
2342 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ENDIF"');
2343 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2344 if sRawCode[off] != ')':
2345 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ENDIF("');
2346 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2347 if sRawCode[off] != ';':
2348 self.raiseDecodeError(sRawCode, off, 'Expected ";" following IEM_MC_ENDIF()"');
2349 off += 1;
2350
2351 else:
2352 # Advance.
2353 off = offEnd + 1;
2354
2355 #
2356 # Otherwise it must be a C/C++ statement of sorts.
2357 #
2358 else:
2359 # Find the end of the statement. if and else requires special handling.
2360 sCondExpr = None;
2361 oMatch = self.koReCppCtrlStmts.match(sRawCode, off);
2362 if oMatch:
2363 if oMatch.group(1)[-1] == '(':
2364 (sCondExpr, offEnd) = self.extractParam(sRawCode, oMatch.end());
2365 else:
2366 offEnd = oMatch.end();
2367 if not oMatch.group(1).startswith('if') and oMatch.group(1) != 'else':
2368 self.raiseDecodeError(sRawCode, off, 'Only if/else control statements allowed: %s' % (oMatch.group(1),));
2369 elif ch == '#':
2370 offEnd = sRawCode.find('\n', off, offStop);
2371 if offEnd < 0:
2372 offEnd = offStop;
2373 offEnd -= 1;
2374 while offEnd > off and sRawCode[offEnd - 1].isspace():
2375 offEnd -= 1;
2376 else:
2377 offEnd = sRawCode.find(';', off);
2378 if offEnd < 0:
2379 self.raiseDecodeError(sRawCode, off, 'C++ statement without a ";"');
2380
2381 # Check this and the following statement whether it might have
2382 # something to do with decoding. This is a statement filter
2383 # criteria when generating the threaded functions blocks.
2384 offNextEnd = sRawCode.find(';', offEnd + 1);
2385 fDecode = ( sRawCode.find('IEM_OPCODE_', off, max(offEnd, offNextEnd)) >= 0
2386 or sRawCode.find('IEMOP_HLP_DONE_', off, max(offEnd, offNextEnd)) >= 0
2387 or sRawCode.find('IEMOP_HLP_DECODED_', off, offEnd) >= 0
2388 or sRawCode.find('IEMOP_HLP_RAISE_UD_IF_MISSING_GUEST_FEATURE', off, offEnd) >= 0
2389 );
2390
2391 if not oMatch:
2392 if ch != '#':
2393 aoStmts.append(McCppGeneric(sRawCode[off : offEnd + 1], fDecode));
2394 else:
2395 aoStmts.append(McCppPreProc(sRawCode[off : offEnd + 1]));
2396 off = offEnd + 1;
2397 elif oMatch.group(1).startswith('if'):
2398 #
2399 # if () xxx [else yyy] statement.
2400 #
2401 oStmt = McCppCond(sCondExpr, fDecode);
2402 aoStmts.append(oStmt);
2403 off = offEnd + 1;
2404
2405 # Following the if () we can either have a {} containing zero or more statements
2406 # or we have a single statement.
2407 offBlock1 = self.skipSpacesAt(sRawCode, offEnd + 1, offStop);
2408 if sRawCode[offBlock1] == '{':
2409 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2410 if offBlock1End < 0:
2411 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing if block');
2412 offBlock1 += 1;
2413 else:
2414 offBlock1End = sRawCode.find(';', offBlock1, offStop);
2415 if offBlock1End < 0:
2416 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line if block"');
2417
2418 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1, offBlock1End, iLevel + 1);
2419
2420 # The else is optional and can likewise be followed by {} or a single statement.
2421 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2422 if self.isSubstrAt(sRawCode, off, 'else') and sRawCode[off + len('else')].isspace():
2423 offBlock2 = self.skipSpacesAt(sRawCode, off + len('else'), offStop);
2424 if sRawCode[offBlock2] == '{':
2425 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2426 if offBlock2End < 0:
2427 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing else block');
2428 offBlock2 += 1;
2429 else:
2430 offBlock2End = sRawCode.find(';', offBlock2, offStop);
2431 if offBlock2End < 0:
2432 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line else block"');
2433
2434 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2, offBlock2End, iLevel + 1);
2435 off = offBlock2End + 1;
2436
2437 elif oMatch.group(1) == 'else':
2438 # Problematic 'else' branch, typically involving #ifdefs.
2439 self.raiseDecodeError(sRawCode, off, 'Mixed up else/#ifdef or something confusing us.');
2440
2441 return aoStmts;
2442
2443 def decode(self):
2444 """
2445 Decodes the block, populating self.aoStmts if necessary.
2446 Returns the statement list.
2447 Raises ParserException on failure.
2448 """
2449 if not self.aoStmts:
2450 self.aoStmts = self.decodeCode(''.join(self.asLines));
2451 return self.aoStmts;
2452
2453
2454 def checkForTooEarlyEffSegUse(self, aoStmts):
2455 """
2456 Checks if iEffSeg is used before the effective address has been decoded.
2457 Returns None on success, error string on failure.
2458
2459 See r158454 for an example of this issue.
2460 """
2461
2462 # Locate the IEM_MC_CALC_RM_EFF_ADDR statement, if found, scan backwards
2463 # for IEMCPU::iEffSeg references. No need to check conditional branches,
2464 # as we're ASSUMING these will not occur before address calculation.
2465 for iStmt, oStmt in enumerate(aoStmts):
2466 if oStmt.sName == 'IEM_MC_CALC_RM_EFF_ADDR':
2467 while iStmt > 0:
2468 iStmt -= 1;
2469 oStmt = aoStmts[iStmt];
2470 for sArg in oStmt.asParams:
2471 if sArg.find('pVCpu->iem.s.iEffSeg') >= 0:
2472 return "statement #%u: pVCpu->iem.s.iEffSeg is used prior to IEM_MC_CALC_RM_EFF_ADDR!" % (iStmt + 1,);
2473 break;
2474 return None;
2475
2476 def check(self):
2477 """
2478 Performs some sanity checks on the block.
2479 Returns error string list, empty if all is fine.
2480 """
2481 aoStmts = self.decode();
2482 asRet = [];
2483
2484 sRet = self.checkForTooEarlyEffSegUse(aoStmts);
2485 if sRet:
2486 asRet.append(sRet);
2487
2488 return asRet;
2489
2490
2491
2492## IEM_MC_XXX -> parser dictionary.
2493# The raw table was generated via the following command
2494# sed -n -e "s/^# *define *\(IEM_MC_[A-Z_0-9]*\)[ (].*$/ '\1': McBlock.parseMcGeneric,/p" include/IEMMc.h \
2495# | sort | uniq | gawk "{printf """ %%-60s %%s\n""", $1, $2}"
2496g_dMcStmtParsers = {
2497 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE': McBlock.parseMcGeneric,
2498 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ': McBlock.parseMcGeneric,
2499 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE': McBlock.parseMcGeneric,
2500 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ': McBlock.parseMcGeneric,
2501 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE': McBlock.parseMcGeneric,
2502 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ': McBlock.parseMcGeneric,
2503 'IEM_MC_ADD_GREG_U16': McBlock.parseMcGeneric,
2504 'IEM_MC_ADD_GREG_U16_TO_LOCAL': McBlock.parseMcGeneric,
2505 'IEM_MC_ADD_GREG_U32': McBlock.parseMcGeneric,
2506 'IEM_MC_ADD_GREG_U32_TO_LOCAL': McBlock.parseMcGeneric,
2507 'IEM_MC_ADD_GREG_U64': McBlock.parseMcGeneric,
2508 'IEM_MC_ADD_GREG_U64_TO_LOCAL': McBlock.parseMcGeneric,
2509 'IEM_MC_ADD_GREG_U8': McBlock.parseMcGeneric,
2510 'IEM_MC_ADD_GREG_U8_TO_LOCAL': McBlock.parseMcGeneric,
2511 'IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR': McBlock.parseMcGeneric,
2512 'IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR': McBlock.parseMcGeneric,
2513 'IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR': McBlock.parseMcGeneric,
2514 'IEM_MC_ADVANCE_RIP_AND_FINISH': McBlock.parseMcGeneric,
2515 'IEM_MC_AND_2LOCS_U32': McBlock.parseMcGeneric,
2516 'IEM_MC_AND_ARG_U16': McBlock.parseMcGeneric,
2517 'IEM_MC_AND_ARG_U32': McBlock.parseMcGeneric,
2518 'IEM_MC_AND_ARG_U64': McBlock.parseMcGeneric,
2519 'IEM_MC_AND_GREG_U16': McBlock.parseMcGeneric,
2520 'IEM_MC_AND_GREG_U32': McBlock.parseMcGeneric,
2521 'IEM_MC_AND_GREG_U64': McBlock.parseMcGeneric,
2522 'IEM_MC_AND_GREG_U8': McBlock.parseMcGeneric,
2523 'IEM_MC_AND_LOCAL_U16': McBlock.parseMcGeneric,
2524 'IEM_MC_AND_LOCAL_U32': McBlock.parseMcGeneric,
2525 'IEM_MC_AND_LOCAL_U64': McBlock.parseMcGeneric,
2526 'IEM_MC_AND_LOCAL_U8': McBlock.parseMcGeneric,
2527 'IEM_MC_ARG': McBlock.parseMcArg,
2528 'IEM_MC_ARG_CONST': McBlock.parseMcArgConst,
2529 'IEM_MC_ARG_LOCAL_EFLAGS': McBlock.parseMcArgLocalEFlags,
2530 'IEM_MC_ARG_LOCAL_REF': McBlock.parseMcArgLocalRef,
2531 'IEM_MC_ASSIGN': McBlock.parseMcGeneric,
2532 'IEM_MC_ASSIGN_TO_SMALLER': McBlock.parseMcGeneric,
2533 'IEM_MC_ASSIGN_U8_SX_U64': McBlock.parseMcGeneric,
2534 'IEM_MC_ASSIGN_U32_SX_U64': McBlock.parseMcGeneric,
2535 'IEM_MC_BEGIN': McBlock.parseMcGeneric,
2536 'IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX': McBlock.parseMcGeneric,
2537 'IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX': McBlock.parseMcGeneric,
2538 'IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX': McBlock.parseMcGeneric,
2539 'IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX': McBlock.parseMcGeneric,
2540 'IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX': McBlock.parseMcGeneric,
2541 'IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX': McBlock.parseMcGeneric,
2542 'IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX': McBlock.parseMcGeneric,
2543 'IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX': McBlock.parseMcGeneric,
2544 'IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX': McBlock.parseMcGeneric,
2545 'IEM_MC_BSWAP_LOCAL_U16': McBlock.parseMcGeneric,
2546 'IEM_MC_BSWAP_LOCAL_U32': McBlock.parseMcGeneric,
2547 'IEM_MC_BSWAP_LOCAL_U64': McBlock.parseMcGeneric,
2548 'IEM_MC_CALC_RM_EFF_ADDR': McBlock.parseMcGeneric,
2549 'IEM_MC_CALL_AIMPL_3': McBlock.parseMcCallAImpl,
2550 'IEM_MC_CALL_AIMPL_4': McBlock.parseMcCallAImpl,
2551 'IEM_MC_CALL_AVX_AIMPL_2': McBlock.parseMcCallAvxAImpl,
2552 'IEM_MC_CALL_AVX_AIMPL_3': McBlock.parseMcCallAvxAImpl,
2553 'IEM_MC_CALL_CIMPL_0': McBlock.parseMcCallCImpl,
2554 'IEM_MC_CALL_CIMPL_1': McBlock.parseMcCallCImpl,
2555 'IEM_MC_CALL_CIMPL_2': McBlock.parseMcCallCImpl,
2556 'IEM_MC_CALL_CIMPL_3': McBlock.parseMcCallCImpl,
2557 'IEM_MC_CALL_CIMPL_4': McBlock.parseMcCallCImpl,
2558 'IEM_MC_CALL_CIMPL_5': McBlock.parseMcCallCImpl,
2559 'IEM_MC_CALL_FPU_AIMPL_1': McBlock.parseMcCallFpuAImpl,
2560 'IEM_MC_CALL_FPU_AIMPL_2': McBlock.parseMcCallFpuAImpl,
2561 'IEM_MC_CALL_FPU_AIMPL_3': McBlock.parseMcCallFpuAImpl,
2562 'IEM_MC_CALL_MMX_AIMPL_2': McBlock.parseMcCallMmxAImpl,
2563 'IEM_MC_CALL_MMX_AIMPL_3': McBlock.parseMcCallMmxAImpl,
2564 'IEM_MC_CALL_SSE_AIMPL_2': McBlock.parseMcCallSseAImpl,
2565 'IEM_MC_CALL_SSE_AIMPL_3': McBlock.parseMcCallSseAImpl,
2566 'IEM_MC_CALL_VOID_AIMPL_0': McBlock.parseMcCallVoidAImpl,
2567 'IEM_MC_CALL_VOID_AIMPL_1': McBlock.parseMcCallVoidAImpl,
2568 'IEM_MC_CALL_VOID_AIMPL_2': McBlock.parseMcCallVoidAImpl,
2569 'IEM_MC_CALL_VOID_AIMPL_3': McBlock.parseMcCallVoidAImpl,
2570 'IEM_MC_CALL_VOID_AIMPL_4': McBlock.parseMcCallVoidAImpl,
2571 'IEM_MC_CLEAR_EFL_BIT': McBlock.parseMcGeneric,
2572 'IEM_MC_CLEAR_FSW_EX': McBlock.parseMcGeneric,
2573 'IEM_MC_CLEAR_HIGH_GREG_U64': McBlock.parseMcGeneric,
2574 'IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF': McBlock.parseMcGeneric,
2575 'IEM_MC_CLEAR_XREG_U32_MASK': McBlock.parseMcGeneric,
2576 'IEM_MC_CLEAR_YREG_128_UP': McBlock.parseMcGeneric,
2577 'IEM_MC_COMMIT_EFLAGS': McBlock.parseMcGeneric,
2578 'IEM_MC_COPY_XREG_U128': McBlock.parseMcGeneric,
2579 'IEM_MC_COPY_YREG_U128_ZX_VLMAX': McBlock.parseMcGeneric,
2580 'IEM_MC_COPY_YREG_U256_ZX_VLMAX': McBlock.parseMcGeneric,
2581 'IEM_MC_COPY_YREG_U64_ZX_VLMAX': McBlock.parseMcGeneric,
2582 'IEM_MC_DEFER_TO_CIMPL_0_RET': McBlock.parseMcGeneric,
2583 'IEM_MC_DEFER_TO_CIMPL_1_RET': McBlock.parseMcGeneric,
2584 'IEM_MC_DEFER_TO_CIMPL_2_RET': McBlock.parseMcGeneric,
2585 'IEM_MC_DEFER_TO_CIMPL_3_RET': McBlock.parseMcGeneric,
2586 'IEM_MC_END': McBlock.parseMcGeneric,
2587 'IEM_MC_FETCH_EFLAGS': McBlock.parseMcGeneric,
2588 'IEM_MC_FETCH_EFLAGS_U8': McBlock.parseMcGeneric,
2589 'IEM_MC_FETCH_FCW': McBlock.parseMcGeneric,
2590 'IEM_MC_FETCH_FSW': McBlock.parseMcGeneric,
2591 'IEM_MC_FETCH_GREG_U16': McBlock.parseMcGeneric,
2592 'IEM_MC_FETCH_GREG_U16_SX_U32': McBlock.parseMcGeneric,
2593 'IEM_MC_FETCH_GREG_U16_SX_U64': McBlock.parseMcGeneric,
2594 'IEM_MC_FETCH_GREG_U16_ZX_U32': McBlock.parseMcGeneric,
2595 'IEM_MC_FETCH_GREG_U16_ZX_U64': McBlock.parseMcGeneric,
2596 'IEM_MC_FETCH_GREG_U32': McBlock.parseMcGeneric,
2597 'IEM_MC_FETCH_GREG_U32_SX_U64': McBlock.parseMcGeneric,
2598 'IEM_MC_FETCH_GREG_U32_ZX_U64': McBlock.parseMcGeneric,
2599 'IEM_MC_FETCH_GREG_U64': McBlock.parseMcGeneric,
2600 'IEM_MC_FETCH_GREG_U64_ZX_U64': McBlock.parseMcGeneric,
2601 'IEM_MC_FETCH_GREG_U8': McBlock.parseMcGeneric,
2602 'IEM_MC_FETCH_GREG_U8_SX_U16': McBlock.parseMcGeneric,
2603 'IEM_MC_FETCH_GREG_U8_SX_U32': McBlock.parseMcGeneric,
2604 'IEM_MC_FETCH_GREG_U8_SX_U64': McBlock.parseMcGeneric,
2605 'IEM_MC_FETCH_GREG_U8_ZX_U16': McBlock.parseMcGeneric,
2606 'IEM_MC_FETCH_GREG_U8_ZX_U32': McBlock.parseMcGeneric,
2607 'IEM_MC_FETCH_GREG_U8_ZX_U64': McBlock.parseMcGeneric,
2608 'IEM_MC_FETCH_MEM_D80': McBlock.parseMcGeneric,
2609 'IEM_MC_FETCH_MEM_I16': McBlock.parseMcGeneric,
2610 'IEM_MC_FETCH_MEM_I32': McBlock.parseMcGeneric,
2611 'IEM_MC_FETCH_MEM_I64': McBlock.parseMcGeneric,
2612 'IEM_MC_FETCH_MEM_R32': McBlock.parseMcGeneric,
2613 'IEM_MC_FETCH_MEM_R64': McBlock.parseMcGeneric,
2614 'IEM_MC_FETCH_MEM_R80': McBlock.parseMcGeneric,
2615 'IEM_MC_FETCH_MEM_S32_SX_U64': McBlock.parseMcGeneric,
2616 'IEM_MC_FETCH_MEM_U128': McBlock.parseMcGeneric,
2617 'IEM_MC_FETCH_MEM_U128_ALIGN_SSE': McBlock.parseMcGeneric,
2618 'IEM_MC_FETCH_MEM_U128_NO_AC': McBlock.parseMcGeneric,
2619 'IEM_MC_FETCH_MEM_U16': McBlock.parseMcGeneric,
2620 'IEM_MC_FETCH_MEM_U16_DISP': McBlock.parseMcGeneric,
2621 'IEM_MC_FETCH_MEM_U16_SX_U32': McBlock.parseMcGeneric,
2622 'IEM_MC_FETCH_MEM_U16_SX_U64': McBlock.parseMcGeneric,
2623 'IEM_MC_FETCH_MEM_U16_ZX_U32': McBlock.parseMcGeneric,
2624 'IEM_MC_FETCH_MEM_U16_ZX_U64': McBlock.parseMcGeneric,
2625 'IEM_MC_FETCH_MEM_U256': McBlock.parseMcGeneric,
2626 'IEM_MC_FETCH_MEM_U256_ALIGN_AVX': McBlock.parseMcGeneric,
2627 'IEM_MC_FETCH_MEM_U256_NO_AC': McBlock.parseMcGeneric,
2628 'IEM_MC_FETCH_MEM_U32': McBlock.parseMcGeneric,
2629 'IEM_MC_FETCH_MEM_U32_DISP': McBlock.parseMcGeneric,
2630 'IEM_MC_FETCH_MEM_U32_SX_U64': McBlock.parseMcGeneric,
2631 'IEM_MC_FETCH_MEM_U32_ZX_U64': McBlock.parseMcGeneric,
2632 'IEM_MC_FETCH_MEM_U64': McBlock.parseMcGeneric,
2633 'IEM_MC_FETCH_MEM_U64_ALIGN_U128': McBlock.parseMcGeneric,
2634 'IEM_MC_FETCH_MEM_U64_DISP': McBlock.parseMcGeneric,
2635 'IEM_MC_FETCH_MEM_U8': McBlock.parseMcGeneric,
2636 'IEM_MC_FETCH_MEM_U8_SX_U16': McBlock.parseMcGeneric,
2637 'IEM_MC_FETCH_MEM_U8_SX_U32': McBlock.parseMcGeneric,
2638 'IEM_MC_FETCH_MEM_U8_SX_U64': McBlock.parseMcGeneric,
2639 'IEM_MC_FETCH_MEM_U8_ZX_U16': McBlock.parseMcGeneric,
2640 'IEM_MC_FETCH_MEM_U8_ZX_U32': McBlock.parseMcGeneric,
2641 'IEM_MC_FETCH_MEM_U8_ZX_U64': McBlock.parseMcGeneric,
2642 'IEM_MC_FETCH_MEM_XMM': McBlock.parseMcGeneric,
2643 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE': McBlock.parseMcGeneric,
2644 'IEM_MC_FETCH_MEM_XMM_NO_AC': McBlock.parseMcGeneric,
2645 'IEM_MC_FETCH_MEM_XMM_U32': McBlock.parseMcGeneric,
2646 'IEM_MC_FETCH_MEM_XMM_U64': McBlock.parseMcGeneric,
2647 'IEM_MC_FETCH_MEM_YMM': McBlock.parseMcGeneric,
2648 'IEM_MC_FETCH_MEM_YMM_ALIGN_AVX': McBlock.parseMcGeneric,
2649 'IEM_MC_FETCH_MEM_YMM_NO_AC': McBlock.parseMcGeneric,
2650 'IEM_MC_FETCH_MEM16_U8': McBlock.parseMcGeneric,
2651 'IEM_MC_FETCH_MEM32_U8': McBlock.parseMcGeneric,
2652 'IEM_MC_FETCH_MREG_U32': McBlock.parseMcGeneric,
2653 'IEM_MC_FETCH_MREG_U64': McBlock.parseMcGeneric,
2654 'IEM_MC_FETCH_SREG_BASE_U32': McBlock.parseMcGeneric,
2655 'IEM_MC_FETCH_SREG_BASE_U64': McBlock.parseMcGeneric,
2656 'IEM_MC_FETCH_SREG_U16': McBlock.parseMcGeneric,
2657 'IEM_MC_FETCH_SREG_ZX_U32': McBlock.parseMcGeneric,
2658 'IEM_MC_FETCH_SREG_ZX_U64': McBlock.parseMcGeneric,
2659 'IEM_MC_FETCH_XREG_U128': McBlock.parseMcGeneric,
2660 'IEM_MC_FETCH_XREG_U16': McBlock.parseMcGeneric,
2661 'IEM_MC_FETCH_XREG_U32': McBlock.parseMcGeneric,
2662 'IEM_MC_FETCH_XREG_U64': McBlock.parseMcGeneric,
2663 'IEM_MC_FETCH_XREG_U8': McBlock.parseMcGeneric,
2664 'IEM_MC_FETCH_XREG_XMM': McBlock.parseMcGeneric,
2665 'IEM_MC_FETCH_YREG_2ND_U64': McBlock.parseMcGeneric,
2666 'IEM_MC_FETCH_YREG_U128': McBlock.parseMcGeneric,
2667 'IEM_MC_FETCH_YREG_U256': McBlock.parseMcGeneric,
2668 'IEM_MC_FETCH_YREG_U32': McBlock.parseMcGeneric,
2669 'IEM_MC_FETCH_YREG_U64': McBlock.parseMcGeneric,
2670 'IEM_MC_FLIP_EFL_BIT': McBlock.parseMcGeneric,
2671 'IEM_MC_FPU_FROM_MMX_MODE': McBlock.parseMcGeneric,
2672 'IEM_MC_FPU_STACK_DEC_TOP': McBlock.parseMcGeneric,
2673 'IEM_MC_FPU_STACK_FREE': McBlock.parseMcGeneric,
2674 'IEM_MC_FPU_STACK_INC_TOP': McBlock.parseMcGeneric,
2675 'IEM_MC_FPU_STACK_PUSH_OVERFLOW': McBlock.parseMcGeneric,
2676 'IEM_MC_FPU_STACK_PUSH_OVERFLOW_MEM_OP': McBlock.parseMcGeneric,
2677 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW': McBlock.parseMcGeneric,
2678 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW_TWO': McBlock.parseMcGeneric,
2679 'IEM_MC_FPU_STACK_UNDERFLOW': McBlock.parseMcGeneric,
2680 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP': McBlock.parseMcGeneric,
2681 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP': McBlock.parseMcGeneric,
2682 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP': McBlock.parseMcGeneric,
2683 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP_POP': McBlock.parseMcGeneric,
2684 'IEM_MC_FPU_TO_MMX_MODE': McBlock.parseMcGeneric,
2685 'IEM_MC_IF_CX_IS_NZ': McBlock.parseMcGenericCond,
2686 'IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2687 'IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET': McBlock.parseMcGenericCond,
2688 'IEM_MC_IF_ECX_IS_NZ': McBlock.parseMcGenericCond,
2689 'IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2690 'IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET': McBlock.parseMcGenericCond,
2691 'IEM_MC_IF_EFL_ANY_BITS_SET': McBlock.parseMcGenericCond,
2692 'IEM_MC_IF_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2693 'IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ': McBlock.parseMcGenericCond,
2694 'IEM_MC_IF_EFL_BIT_SET': McBlock.parseMcGenericCond,
2695 'IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE': McBlock.parseMcGenericCond,
2696 'IEM_MC_IF_EFL_BITS_EQ': McBlock.parseMcGenericCond,
2697 'IEM_MC_IF_EFL_BITS_NE': McBlock.parseMcGenericCond,
2698 'IEM_MC_IF_EFL_NO_BITS_SET': McBlock.parseMcGenericCond,
2699 'IEM_MC_IF_FCW_IM': McBlock.parseMcGenericCond,
2700 'IEM_MC_IF_FPUREG_IS_EMPTY': McBlock.parseMcGenericCond,
2701 'IEM_MC_IF_FPUREG_NOT_EMPTY': McBlock.parseMcGenericCond,
2702 'IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80': McBlock.parseMcGenericCond,
2703 'IEM_MC_IF_GREG_BIT_SET': McBlock.parseMcGenericCond,
2704 'IEM_MC_IF_LOCAL_IS_Z': McBlock.parseMcGenericCond,
2705 'IEM_MC_IF_MXCSR_XCPT_PENDING': McBlock.parseMcGenericCond,
2706 'IEM_MC_IF_RCX_IS_NZ': McBlock.parseMcGenericCond,
2707 'IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2708 'IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET': McBlock.parseMcGenericCond,
2709 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80': McBlock.parseMcGenericCond,
2710 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST': McBlock.parseMcGenericCond,
2711 'IEM_MC_IMPLICIT_AVX_AIMPL_ARGS': McBlock.parseMcGeneric,
2712 'IEM_MC_INT_CLEAR_ZMM_256_UP': McBlock.parseMcGeneric,
2713 'IEM_MC_LOCAL': McBlock.parseMcLocal,
2714 'IEM_MC_LOCAL_CONST': McBlock.parseMcLocalConst,
2715 'IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT': McBlock.parseMcGeneric,
2716 'IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE': McBlock.parseMcGeneric,
2717 'IEM_MC_MAYBE_RAISE_FPU_XCPT': McBlock.parseMcGeneric,
2718 'IEM_MC_MAYBE_RAISE_FSGSBASE_XCPT': McBlock.parseMcGeneric,
2719 'IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT': McBlock.parseMcGeneric,
2720 'IEM_MC_MAYBE_RAISE_NON_CANONICAL_ADDR_GP0': McBlock.parseMcGeneric,
2721 'IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': McBlock.parseMcGeneric,
2722 'IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT': McBlock.parseMcGeneric,
2723 'IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE': McBlock.parseMcGeneric,
2724 'IEM_MC_MEM_COMMIT_AND_UNMAP': McBlock.parseMcGeneric,
2725 'IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE': McBlock.parseMcGeneric,
2726 'IEM_MC_MEM_MAP': McBlock.parseMcGeneric,
2727 'IEM_MC_MEM_MAP_EX': McBlock.parseMcGeneric,
2728 'IEM_MC_MERGE_YREG_U32_U96_ZX_VLMAX': McBlock.parseMcGeneric,
2729 'IEM_MC_MERGE_YREG_U64_U64_ZX_VLMAX': McBlock.parseMcGeneric,
2730 'IEM_MC_MERGE_YREG_U64HI_U64HI_ZX_VLMAX': McBlock.parseMcGeneric,
2731 'IEM_MC_MERGE_YREG_U64LO_U64LO_ZX_VLMAX': McBlock.parseMcGeneric,
2732 'IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX': McBlock.parseMcGeneric,
2733 'IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX': McBlock.parseMcGeneric,
2734 'IEM_MC_MODIFIED_MREG': McBlock.parseMcGeneric,
2735 'IEM_MC_MODIFIED_MREG_BY_REF': McBlock.parseMcGeneric,
2736 'IEM_MC_OR_2LOCS_U32': McBlock.parseMcGeneric,
2737 'IEM_MC_OR_GREG_U16': McBlock.parseMcGeneric,
2738 'IEM_MC_OR_GREG_U32': McBlock.parseMcGeneric,
2739 'IEM_MC_OR_GREG_U64': McBlock.parseMcGeneric,
2740 'IEM_MC_OR_GREG_U8': McBlock.parseMcGeneric,
2741 'IEM_MC_OR_LOCAL_U16': McBlock.parseMcGeneric,
2742 'IEM_MC_OR_LOCAL_U32': McBlock.parseMcGeneric,
2743 'IEM_MC_OR_LOCAL_U8': McBlock.parseMcGeneric,
2744 'IEM_MC_POP_U16': McBlock.parseMcGeneric,
2745 'IEM_MC_POP_U32': McBlock.parseMcGeneric,
2746 'IEM_MC_POP_U64': McBlock.parseMcGeneric,
2747 'IEM_MC_PREPARE_AVX_USAGE': McBlock.parseMcGeneric,
2748 'IEM_MC_PREPARE_FPU_USAGE': McBlock.parseMcGeneric,
2749 'IEM_MC_PREPARE_SSE_USAGE': McBlock.parseMcGeneric,
2750 'IEM_MC_PUSH_FPU_RESULT': McBlock.parseMcGeneric,
2751 'IEM_MC_PUSH_FPU_RESULT_MEM_OP': McBlock.parseMcGeneric,
2752 'IEM_MC_PUSH_FPU_RESULT_TWO': McBlock.parseMcGeneric,
2753 'IEM_MC_PUSH_U16': McBlock.parseMcGeneric,
2754 'IEM_MC_PUSH_U32': McBlock.parseMcGeneric,
2755 'IEM_MC_PUSH_U32_SREG': McBlock.parseMcGeneric,
2756 'IEM_MC_PUSH_U64': McBlock.parseMcGeneric,
2757 'IEM_MC_RAISE_DIVIDE_ERROR': McBlock.parseMcGeneric,
2758 'IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO': McBlock.parseMcGeneric,
2759 'IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED': McBlock.parseMcGeneric,
2760 'IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': McBlock.parseMcGeneric,
2761 'IEM_MC_REF_EFLAGS': McBlock.parseMcGeneric,
2762 'IEM_MC_REF_FPUREG': McBlock.parseMcGeneric,
2763 'IEM_MC_REF_GREG_I32': McBlock.parseMcGeneric,
2764 'IEM_MC_REF_GREG_I32_CONST': McBlock.parseMcGeneric,
2765 'IEM_MC_REF_GREG_I64': McBlock.parseMcGeneric,
2766 'IEM_MC_REF_GREG_I64_CONST': McBlock.parseMcGeneric,
2767 'IEM_MC_REF_GREG_U16': McBlock.parseMcGeneric,
2768 'IEM_MC_REF_GREG_U32': McBlock.parseMcGeneric,
2769 'IEM_MC_REF_GREG_U64': McBlock.parseMcGeneric,
2770 'IEM_MC_REF_GREG_U8': McBlock.parseMcGeneric,
2771 'IEM_MC_REF_LOCAL': McBlock.parseMcGeneric,
2772 'IEM_MC_REF_MREG_U32_CONST': McBlock.parseMcGeneric,
2773 'IEM_MC_REF_MREG_U64': McBlock.parseMcGeneric,
2774 'IEM_MC_REF_MREG_U64_CONST': McBlock.parseMcGeneric,
2775 'IEM_MC_REF_MXCSR': McBlock.parseMcGeneric,
2776 'IEM_MC_REF_XREG_R32_CONST': McBlock.parseMcGeneric,
2777 'IEM_MC_REF_XREG_R64_CONST': McBlock.parseMcGeneric,
2778 'IEM_MC_REF_XREG_U128': McBlock.parseMcGeneric,
2779 'IEM_MC_REF_XREG_U128_CONST': McBlock.parseMcGeneric,
2780 'IEM_MC_REF_XREG_U32_CONST': McBlock.parseMcGeneric,
2781 'IEM_MC_REF_XREG_U64_CONST': McBlock.parseMcGeneric,
2782 'IEM_MC_REF_XREG_XMM_CONST': McBlock.parseMcGeneric,
2783 'IEM_MC_REF_YREG_U128': McBlock.parseMcGeneric,
2784 'IEM_MC_REF_YREG_U128_CONST': McBlock.parseMcGeneric,
2785 'IEM_MC_REF_YREG_U64_CONST': McBlock.parseMcGeneric,
2786 'IEM_MC_REL_JMP_S16_AND_FINISH': McBlock.parseMcGeneric,
2787 'IEM_MC_REL_JMP_S32_AND_FINISH': McBlock.parseMcGeneric,
2788 'IEM_MC_REL_JMP_S8_AND_FINISH': McBlock.parseMcGeneric,
2789 'IEM_MC_RETURN_ON_FAILURE': McBlock.parseMcGeneric,
2790 'IEM_MC_SAR_LOCAL_S16': McBlock.parseMcGeneric,
2791 'IEM_MC_SAR_LOCAL_S32': McBlock.parseMcGeneric,
2792 'IEM_MC_SAR_LOCAL_S64': McBlock.parseMcGeneric,
2793 'IEM_MC_SET_EFL_BIT': McBlock.parseMcGeneric,
2794 'IEM_MC_SET_FPU_RESULT': McBlock.parseMcGeneric,
2795 'IEM_MC_SET_RIP_U16_AND_FINISH': McBlock.parseMcGeneric,
2796 'IEM_MC_SET_RIP_U32_AND_FINISH': McBlock.parseMcGeneric,
2797 'IEM_MC_SET_RIP_U64_AND_FINISH': McBlock.parseMcGeneric,
2798 'IEM_MC_SHL_LOCAL_S16': McBlock.parseMcGeneric,
2799 'IEM_MC_SHL_LOCAL_S32': McBlock.parseMcGeneric,
2800 'IEM_MC_SHL_LOCAL_S64': McBlock.parseMcGeneric,
2801 'IEM_MC_SHR_LOCAL_U8': McBlock.parseMcGeneric,
2802 'IEM_MC_SSE_UPDATE_MXCSR': McBlock.parseMcGeneric,
2803 'IEM_MC_STORE_FPU_RESULT': McBlock.parseMcGeneric,
2804 'IEM_MC_STORE_FPU_RESULT_MEM_OP': McBlock.parseMcGeneric,
2805 'IEM_MC_STORE_FPU_RESULT_THEN_POP': McBlock.parseMcGeneric,
2806 'IEM_MC_STORE_FPU_RESULT_WITH_MEM_OP_THEN_POP': McBlock.parseMcGeneric,
2807 'IEM_MC_STORE_FPUREG_R80_SRC_REF': McBlock.parseMcGeneric,
2808 'IEM_MC_STORE_GREG_I64': McBlock.parseMcGeneric,
2809 'IEM_MC_STORE_GREG_U16': McBlock.parseMcGeneric,
2810 'IEM_MC_STORE_GREG_U16_CONST': McBlock.parseMcGeneric,
2811 'IEM_MC_STORE_GREG_U32': McBlock.parseMcGeneric,
2812 'IEM_MC_STORE_GREG_U32_CONST': McBlock.parseMcGeneric,
2813 'IEM_MC_STORE_GREG_U64': McBlock.parseMcGeneric,
2814 'IEM_MC_STORE_GREG_U64_CONST': McBlock.parseMcGeneric,
2815 'IEM_MC_STORE_GREG_U8': McBlock.parseMcGeneric,
2816 'IEM_MC_STORE_GREG_U8_CONST': McBlock.parseMcGeneric,
2817 'IEM_MC_STORE_MEM_I16_CONST_BY_REF': McBlock.parseMcGeneric,
2818 'IEM_MC_STORE_MEM_I32_CONST_BY_REF': McBlock.parseMcGeneric,
2819 'IEM_MC_STORE_MEM_I64_CONST_BY_REF': McBlock.parseMcGeneric,
2820 'IEM_MC_STORE_MEM_I8_CONST_BY_REF': McBlock.parseMcGeneric,
2821 'IEM_MC_STORE_MEM_INDEF_D80_BY_REF': McBlock.parseMcGeneric,
2822 'IEM_MC_STORE_MEM_NEG_QNAN_R32_BY_REF': McBlock.parseMcGeneric,
2823 'IEM_MC_STORE_MEM_NEG_QNAN_R64_BY_REF': McBlock.parseMcGeneric,
2824 'IEM_MC_STORE_MEM_NEG_QNAN_R80_BY_REF': McBlock.parseMcGeneric,
2825 'IEM_MC_STORE_MEM_U128': McBlock.parseMcGeneric,
2826 'IEM_MC_STORE_MEM_U128_ALIGN_SSE': McBlock.parseMcGeneric,
2827 'IEM_MC_STORE_MEM_U16': McBlock.parseMcGeneric,
2828 'IEM_MC_STORE_MEM_U16_CONST': McBlock.parseMcGeneric,
2829 'IEM_MC_STORE_MEM_U256': McBlock.parseMcGeneric,
2830 'IEM_MC_STORE_MEM_U256_ALIGN_AVX': McBlock.parseMcGeneric,
2831 'IEM_MC_STORE_MEM_U32': McBlock.parseMcGeneric,
2832 'IEM_MC_STORE_MEM_U32_CONST': McBlock.parseMcGeneric,
2833 'IEM_MC_STORE_MEM_U64': McBlock.parseMcGeneric,
2834 'IEM_MC_STORE_MEM_U64_CONST': McBlock.parseMcGeneric,
2835 'IEM_MC_STORE_MEM_U8': McBlock.parseMcGeneric,
2836 'IEM_MC_STORE_MEM_U8_CONST': McBlock.parseMcGeneric,
2837 'IEM_MC_STORE_MREG_U32_ZX_U64': McBlock.parseMcGeneric,
2838 'IEM_MC_STORE_MREG_U64': McBlock.parseMcGeneric,
2839 'IEM_MC_STORE_SREG_BASE_U32': McBlock.parseMcGeneric,
2840 'IEM_MC_STORE_SREG_BASE_U64': McBlock.parseMcGeneric,
2841 'IEM_MC_STORE_SSE_RESULT': McBlock.parseMcGeneric,
2842 'IEM_MC_STORE_XREG_HI_U64': McBlock.parseMcGeneric,
2843 'IEM_MC_STORE_XREG_R32': McBlock.parseMcGeneric,
2844 'IEM_MC_STORE_XREG_R64': McBlock.parseMcGeneric,
2845 'IEM_MC_STORE_XREG_U128': McBlock.parseMcGeneric,
2846 'IEM_MC_STORE_XREG_U16': McBlock.parseMcGeneric,
2847 'IEM_MC_STORE_XREG_U32': McBlock.parseMcGeneric,
2848 'IEM_MC_STORE_XREG_U32_U128': McBlock.parseMcGeneric,
2849 'IEM_MC_STORE_XREG_U32_ZX_U128': McBlock.parseMcGeneric,
2850 'IEM_MC_STORE_XREG_U64': McBlock.parseMcGeneric,
2851 'IEM_MC_STORE_XREG_U64_ZX_U128': McBlock.parseMcGeneric,
2852 'IEM_MC_STORE_XREG_U8': McBlock.parseMcGeneric,
2853 'IEM_MC_STORE_XREG_XMM': McBlock.parseMcGeneric,
2854 'IEM_MC_STORE_XREG_XMM_U32': McBlock.parseMcGeneric,
2855 'IEM_MC_STORE_XREG_XMM_U64': McBlock.parseMcGeneric,
2856 'IEM_MC_STORE_YREG_U128': McBlock.parseMcGeneric,
2857 'IEM_MC_STORE_YREG_U128_ZX_VLMAX': McBlock.parseMcGeneric,
2858 'IEM_MC_STORE_YREG_U256_ZX_VLMAX': McBlock.parseMcGeneric,
2859 'IEM_MC_STORE_YREG_U32_ZX_VLMAX': McBlock.parseMcGeneric,
2860 'IEM_MC_STORE_YREG_U64_ZX_VLMAX': McBlock.parseMcGeneric,
2861 'IEM_MC_SUB_GREG_U16': McBlock.parseMcGeneric,
2862 'IEM_MC_SUB_GREG_U32': McBlock.parseMcGeneric,
2863 'IEM_MC_SUB_GREG_U64': McBlock.parseMcGeneric,
2864 'IEM_MC_SUB_GREG_U8': McBlock.parseMcGeneric,
2865 'IEM_MC_SUB_LOCAL_U16': McBlock.parseMcGeneric,
2866 'IEM_MC_UPDATE_FPU_OPCODE_IP': McBlock.parseMcGeneric,
2867 'IEM_MC_UPDATE_FSW': McBlock.parseMcGeneric,
2868 'IEM_MC_UPDATE_FSW_CONST': McBlock.parseMcGeneric,
2869 'IEM_MC_UPDATE_FSW_THEN_POP': McBlock.parseMcGeneric,
2870 'IEM_MC_UPDATE_FSW_THEN_POP_POP': McBlock.parseMcGeneric,
2871 'IEM_MC_UPDATE_FSW_WITH_MEM_OP': McBlock.parseMcGeneric,
2872 'IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP': McBlock.parseMcGeneric,
2873};
2874
2875## List of microcode blocks.
2876g_aoMcBlocks = [] # type: list(McBlock)
2877
2878
2879
2880class ParserException(Exception):
2881 """ Parser exception """
2882 def __init__(self, sMessage):
2883 Exception.__init__(self, sMessage);
2884
2885
2886class SimpleParser(object): # pylint: disable=too-many-instance-attributes
2887 """
2888 Parser of IEMAllInstruction*.cpp.h instruction specifications.
2889 """
2890
2891 ## @name Parser state.
2892 ## @{
2893 kiCode = 0;
2894 kiCommentMulti = 1;
2895 ## @}
2896
2897 class Macro(object):
2898 """ Macro """
2899 def __init__(self, sName, asArgs, sBody, iLine):
2900 self.sName = sName; ##< The macro name.
2901 self.asArgs = asArgs; ##< None if simple macro, list of parameters otherwise.
2902 self.sBody = sBody;
2903 self.iLine = iLine;
2904 self.oReArgMatch = re.compile(r'(\s*##\s*|\b)(' + '|'.join(asArgs) + r')(\s*##\s*|\b)') if asArgs else None;
2905
2906 @staticmethod
2907 def _needSpace(ch):
2908 """ This is just to make the expanded output a bit prettier. """
2909 return ch.isspace() and ch != '(';
2910
2911 def expandMacro(self, oParent, asArgs = None):
2912 """ Expands the macro body with the given arguments. """
2913 _ = oParent;
2914 sBody = self.sBody;
2915
2916 if self.oReArgMatch:
2917 assert len(asArgs) == len(self.asArgs);
2918 #oParent.debug('%s: %s' % (self.sName, self.oReArgMatch.pattern,));
2919
2920 dArgs = { self.asArgs[iArg]: sValue for iArg, sValue in enumerate(asArgs) };
2921 oMatch = self.oReArgMatch.search(sBody);
2922 while oMatch:
2923 sName = oMatch.group(2);
2924 #oParent.debug('%s %s..%s (%s)' % (sName, oMatch.start(), oMatch.end(),oMatch.group()));
2925 sValue = dArgs[sName];
2926 sPre = '';
2927 if not oMatch.group(1) and oMatch.start() > 0 and self._needSpace(sBody[oMatch.start()]):
2928 sPre = ' ';
2929 sPost = '';
2930 if not oMatch.group(3) and oMatch.end() < len(sBody) and self._needSpace(sBody[oMatch.end()]):
2931 sPost = ' ';
2932 sBody = sBody[ : oMatch.start()] + sPre + sValue + sPost + sBody[oMatch.end() : ];
2933 oMatch = self.oReArgMatch.search(sBody, oMatch.start() + len(sValue));
2934 else:
2935 assert not asArgs;
2936
2937 return sBody;
2938
2939
2940 def __init__(self, sSrcFile, asLines, sDefaultMap, oInheritMacrosFrom = None):
2941 self.sSrcFile = sSrcFile;
2942 self.asLines = asLines;
2943 self.iLine = 0;
2944 self.iState = self.kiCode;
2945 self.sComment = '';
2946 self.iCommentLine = 0;
2947 self.aoCurInstrs = [] # type: list(Instruction)
2948 self.oCurFunction = None # type: DecoderFunction
2949 self.iMcBlockInFunc = 0;
2950 self.oCurMcBlock = None # type: McBlock
2951 self.dMacros = {} # type: Dict[str,SimpleParser.Macro]
2952 self.oReMacros = None # type: re ##< Regular expression matching invocations of anything in self.dMacros.
2953 if oInheritMacrosFrom:
2954 self.dMacros = dict(oInheritMacrosFrom.dMacros);
2955 self.oReMacros = oInheritMacrosFrom.oReMacros;
2956
2957 assert sDefaultMap in g_dInstructionMaps;
2958 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
2959
2960 self.cTotalInstr = 0;
2961 self.cTotalStubs = 0;
2962 self.cTotalTagged = 0;
2963 self.cTotalMcBlocks = 0;
2964
2965 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
2966 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
2967 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
2968 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
2969 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
2970 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
2971 self.oReFunTable = re.compile('^(IEM_STATIC|static) +const +PFNIEMOP +g_apfn[A-Za-z0-9_]+ *\[ *\d* *\] *= *$');
2972 self.oReComment = re.compile('//.*?$|/\*.*?\*/'); ## Full comments.
2973 self.oReHashDefine = re.compile('^\s*#\s*define\s+(.*)$');
2974 self.oReHashDefine2 = re.compile('(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)\(([^)]*)\)\s*(.*)\Z'); ##< With arguments.
2975 self.oReHashDefine3 = re.compile('(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)[^(]\s*(.*)\Z'); ##< Simple, no arguments.
2976 self.oReHashUndef = re.compile('^\s*#\s*undef\s+(.*)$');
2977 self.oReMcBeginEnd = re.compile(r'\bIEM_MC_(BEGIN|END|DEFER_TO_CIMPL_[0-5]_RET)\s*\(');
2978 self.fDebug = True;
2979 self.fDebugMc = False;
2980 self.fDebugPreProc = False;
2981
2982 self.dTagHandlers = {
2983 '@opbrief': self.parseTagOpBrief,
2984 '@opdesc': self.parseTagOpDesc,
2985 '@opmnemonic': self.parseTagOpMnemonic,
2986 '@op1': self.parseTagOpOperandN,
2987 '@op2': self.parseTagOpOperandN,
2988 '@op3': self.parseTagOpOperandN,
2989 '@op4': self.parseTagOpOperandN,
2990 '@oppfx': self.parseTagOpPfx,
2991 '@opmaps': self.parseTagOpMaps,
2992 '@opcode': self.parseTagOpcode,
2993 '@opcodesub': self.parseTagOpcodeSub,
2994 '@openc': self.parseTagOpEnc,
2995 '@opfltest': self.parseTagOpEFlags,
2996 '@opflmodify': self.parseTagOpEFlags,
2997 '@opflundef': self.parseTagOpEFlags,
2998 '@opflset': self.parseTagOpEFlags,
2999 '@opflclear': self.parseTagOpEFlags,
3000 '@ophints': self.parseTagOpHints,
3001 '@opdisenum': self.parseTagOpDisEnum,
3002 '@opmincpu': self.parseTagOpMinCpu,
3003 '@opcpuid': self.parseTagOpCpuId,
3004 '@opgroup': self.parseTagOpGroup,
3005 '@opunused': self.parseTagOpUnusedInvalid,
3006 '@opinvalid': self.parseTagOpUnusedInvalid,
3007 '@opinvlstyle': self.parseTagOpUnusedInvalid,
3008 '@optest': self.parseTagOpTest,
3009 '@optestign': self.parseTagOpTestIgnore,
3010 '@optestignore': self.parseTagOpTestIgnore,
3011 '@opcopytests': self.parseTagOpCopyTests,
3012 '@oponly': self.parseTagOpOnlyTest,
3013 '@oponlytest': self.parseTagOpOnlyTest,
3014 '@opxcpttype': self.parseTagOpXcptType,
3015 '@opstats': self.parseTagOpStats,
3016 '@opfunction': self.parseTagOpFunction,
3017 '@opdone': self.parseTagOpDone,
3018 };
3019 for i in range(48):
3020 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
3021 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
3022
3023 self.asErrors = [];
3024
3025 def raiseError(self, sMessage):
3026 """
3027 Raise error prefixed with the source and line number.
3028 """
3029 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
3030
3031 def raiseCommentError(self, iLineInComment, sMessage):
3032 """
3033 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
3034 """
3035 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3036
3037 def error(self, sMessage):
3038 """
3039 Adds an error.
3040 returns False;
3041 """
3042 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
3043 return False;
3044
3045 def errorOnLine(self, iLine, sMessage):
3046 """
3047 Adds an error.
3048 returns False;
3049 """
3050 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, iLine, sMessage,));
3051 return False;
3052
3053 def errorComment(self, iLineInComment, sMessage):
3054 """
3055 Adds a comment error.
3056 returns False;
3057 """
3058 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
3059 return False;
3060
3061 def printErrors(self):
3062 """
3063 Print the errors to stderr.
3064 Returns number of errors.
3065 """
3066 if self.asErrors:
3067 sys.stderr.write(u''.join(self.asErrors));
3068 return len(self.asErrors);
3069
3070 def debug(self, sMessage):
3071 """
3072 For debugging.
3073 """
3074 if self.fDebug:
3075 print('debug: %s' % (sMessage,), file = sys.stderr);
3076
3077 def stripComments(self, sLine):
3078 """
3079 Returns sLine with comments stripped.
3080
3081 Complains if traces of incomplete multi-line comments are encountered.
3082 """
3083 sLine = self.oReComment.sub(" ", sLine);
3084 if sLine.find('/*') >= 0 or sLine.find('*/') >= 0:
3085 self.error('Unexpected multi-line comment will not be handled correctly. Please simplify.');
3086 return sLine;
3087
3088 def parseFunctionTable(self, sLine):
3089 """
3090 Parses a PFNIEMOP table, updating/checking the @oppfx value.
3091
3092 Note! Updates iLine as it consumes the whole table.
3093 """
3094
3095 #
3096 # Extract the table name.
3097 #
3098 sName = re.search(' *([a-zA-Z_0-9]+) *\[', sLine).group(1);
3099 oMap = g_dInstructionMapsByIemName.get(sName);
3100 if not oMap:
3101 self.debug('No map for PFNIEMOP table: %s' % (sName,));
3102 oMap = self.oDefaultMap; # This is wrong wrong wrong.
3103
3104 #
3105 # All but the g_apfnOneByteMap & g_apfnEscF1_E0toFF tables uses four
3106 # entries per byte:
3107 # no prefix, 066h prefix, f3h prefix, f2h prefix
3108 # Those tables has 256 & 32 entries respectively.
3109 #
3110 cEntriesPerByte = 4;
3111 cValidTableLength = 1024;
3112 asPrefixes = ('none', '0x66', '0xf3', '0xf2');
3113
3114 oEntriesMatch = re.search('\[ *(256|32) *\]', sLine);
3115 if oEntriesMatch:
3116 cEntriesPerByte = 1;
3117 cValidTableLength = int(oEntriesMatch.group(1));
3118 asPrefixes = (None,);
3119
3120 #
3121 # The next line should be '{' and nothing else.
3122 #
3123 if self.iLine >= len(self.asLines) or not re.match('^ *{ *$', self.asLines[self.iLine]):
3124 return self.errorOnLine(self.iLine + 1, 'Expected lone "{" on line following PFNIEMOP table %s start' % (sName, ));
3125 self.iLine += 1;
3126
3127 #
3128 # Parse till we find the end of the table.
3129 #
3130 iEntry = 0;
3131 while self.iLine < len(self.asLines):
3132 # Get the next line and strip comments and spaces (assumes no
3133 # multi-line comments).
3134 sLine = self.asLines[self.iLine];
3135 self.iLine += 1;
3136 sLine = self.stripComments(sLine).strip();
3137
3138 # Split the line up into entries, expanding IEMOP_X4 usage.
3139 asEntries = sLine.split(',');
3140 for i in range(len(asEntries) - 1, -1, -1):
3141 sEntry = asEntries[i].strip();
3142 if sEntry.startswith('IEMOP_X4(') and sEntry[-1] == ')':
3143 sEntry = (sEntry[len('IEMOP_X4('):-1]).strip();
3144 asEntries.insert(i + 1, sEntry);
3145 asEntries.insert(i + 1, sEntry);
3146 asEntries.insert(i + 1, sEntry);
3147 if sEntry:
3148 asEntries[i] = sEntry;
3149 else:
3150 del asEntries[i];
3151
3152 # Process the entries.
3153 for sEntry in asEntries:
3154 if sEntry in ('};', '}'):
3155 if iEntry != cValidTableLength:
3156 return self.error('Wrong table length for %s: %#x, expected %#x' % (sName, iEntry, cValidTableLength, ));
3157 return True;
3158 if sEntry.startswith('iemOp_Invalid'):
3159 pass; # skip
3160 else:
3161 # Look up matching instruction by function.
3162 sPrefix = asPrefixes[iEntry % cEntriesPerByte];
3163 sOpcode = '%#04x' % (iEntry // cEntriesPerByte);
3164 aoInstr = g_dAllInstructionsByFunction.get(sEntry);
3165 if aoInstr:
3166 if not isinstance(aoInstr, list):
3167 aoInstr = [aoInstr,];
3168 oInstr = None;
3169 for oCurInstr in aoInstr:
3170 if oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix == sPrefix:
3171 pass;
3172 elif oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix is None:
3173 oCurInstr.sPrefix = sPrefix;
3174 elif oCurInstr.sOpcode is None and oCurInstr.sPrefix is None:
3175 oCurInstr.sOpcode = sOpcode;
3176 oCurInstr.sPrefix = sPrefix;
3177 else:
3178 continue;
3179 oInstr = oCurInstr;
3180 break;
3181 if not oInstr:
3182 oInstr = aoInstr[0].copy(oMap = oMap, sOpcode = sOpcode, sPrefix = sPrefix);
3183 aoInstr.append(oInstr);
3184 g_dAllInstructionsByFunction[sEntry] = aoInstr;
3185 g_aoAllInstructions.append(oInstr);
3186 oMap.aoInstructions.append(oInstr);
3187 else:
3188 self.debug('Function "%s", entry %#04x / byte %#04x in %s, is not associated with an instruction.'
3189 % (sEntry, iEntry, iEntry // cEntriesPerByte, sName,));
3190 iEntry += 1;
3191
3192 return self.error('Unexpected end of file in PFNIEMOP table');
3193
3194 def addInstruction(self, iLine = None):
3195 """
3196 Adds an instruction.
3197 """
3198 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
3199 g_aoAllInstructions.append(oInstr);
3200 self.aoCurInstrs.append(oInstr);
3201 return oInstr;
3202
3203 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
3204 """
3205 Derives the mnemonic and operands from a IEM stats base name like string.
3206 """
3207 if oInstr.sMnemonic is None:
3208 asWords = sStats.split('_');
3209 oInstr.sMnemonic = asWords[0].lower();
3210 if len(asWords) > 1 and not oInstr.aoOperands:
3211 for sType in asWords[1:]:
3212 if sType in g_kdOpTypes:
3213 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
3214 else:
3215 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
3216 return False;
3217 return True;
3218
3219 def doneInstructionOne(self, oInstr, iLine):
3220 """
3221 Complete the parsing by processing, validating and expanding raw inputs.
3222 """
3223 assert oInstr.iLineCompleted is None;
3224 oInstr.iLineCompleted = iLine;
3225
3226 #
3227 # Specified instructions.
3228 #
3229 if oInstr.cOpTags > 0:
3230 if oInstr.sStats is None:
3231 pass;
3232
3233 #
3234 # Unspecified legacy stuff. We generally only got a few things to go on here.
3235 # /** Opcode 0x0f 0x00 /0. */
3236 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
3237 #
3238 else:
3239 #if oInstr.sRawOldOpcodes:
3240 #
3241 #if oInstr.sMnemonic:
3242 pass;
3243
3244 #
3245 # Common defaults.
3246 #
3247
3248 # Guess mnemonic and operands from stats if the former is missing.
3249 if oInstr.sMnemonic is None:
3250 if oInstr.sStats is not None:
3251 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
3252 elif oInstr.sFunction is not None:
3253 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
3254
3255 # Derive the disassembler op enum constant from the mnemonic.
3256 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
3257 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
3258
3259 # Derive the IEM statistics base name from mnemonic and operand types.
3260 if oInstr.sStats is None:
3261 if oInstr.sFunction is not None:
3262 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
3263 elif oInstr.sMnemonic is not None:
3264 oInstr.sStats = oInstr.sMnemonic;
3265 for oOperand in oInstr.aoOperands:
3266 if oOperand.sType:
3267 oInstr.sStats += '_' + oOperand.sType;
3268
3269 # Derive the IEM function name from mnemonic and operand types.
3270 if oInstr.sFunction is None:
3271 if oInstr.sMnemonic is not None:
3272 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
3273 for oOperand in oInstr.aoOperands:
3274 if oOperand.sType:
3275 oInstr.sFunction += '_' + oOperand.sType;
3276 elif oInstr.sStats:
3277 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
3278
3279 #
3280 # Apply default map and then add the instruction to all it's groups.
3281 #
3282 if not oInstr.aoMaps:
3283 oInstr.aoMaps = [ self.oDefaultMap, ];
3284 for oMap in oInstr.aoMaps:
3285 oMap.aoInstructions.append(oInstr);
3286
3287 #
3288 # Derive encoding from operands and maps.
3289 #
3290 if oInstr.sEncoding is None:
3291 if not oInstr.aoOperands:
3292 if oInstr.fUnused and oInstr.sSubOpcode:
3293 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
3294 else:
3295 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
3296 elif oInstr.aoOperands[0].usesModRM():
3297 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
3298 or oInstr.onlyInVexMaps():
3299 oInstr.sEncoding = 'VEX.ModR/M';
3300 else:
3301 oInstr.sEncoding = 'ModR/M';
3302
3303 #
3304 # Check the opstat value and add it to the opstat indexed dictionary.
3305 #
3306 if oInstr.sStats:
3307 if oInstr.sStats not in g_dAllInstructionsByStat:
3308 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
3309 else:
3310 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
3311 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
3312
3313 #
3314 # Add to function indexed dictionary. We allow multiple instructions per function.
3315 #
3316 if oInstr.sFunction:
3317 if oInstr.sFunction not in g_dAllInstructionsByFunction:
3318 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
3319 else:
3320 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
3321
3322 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
3323 return True;
3324
3325 def doneInstructions(self, iLineInComment = None, fEndOfFunction = False):
3326 """
3327 Done with current instruction.
3328 """
3329 for oInstr in self.aoCurInstrs:
3330 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
3331 if oInstr.fStub:
3332 self.cTotalStubs += 1;
3333
3334 self.cTotalInstr += len(self.aoCurInstrs);
3335
3336 self.sComment = '';
3337 self.aoCurInstrs = [];
3338 if fEndOfFunction:
3339 #self.debug('%s: oCurFunction=None' % (self.iLine, ));
3340 if self.oCurFunction:
3341 self.oCurFunction.complete(self.iLine, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine]);
3342 self.oCurFunction = None;
3343 self.iMcBlockInFunc = 0;
3344 return True;
3345
3346 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
3347 """
3348 Sets the sAttrib of all current instruction to oValue. If fOverwrite
3349 is False, only None values and empty strings are replaced.
3350 """
3351 for oInstr in self.aoCurInstrs:
3352 if fOverwrite is not True:
3353 oOldValue = getattr(oInstr, sAttrib);
3354 if oOldValue is not None:
3355 continue;
3356 setattr(oInstr, sAttrib, oValue);
3357
3358 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
3359 """
3360 Sets the iEntry of the array sAttrib of all current instruction to oValue.
3361 If fOverwrite is False, only None values and empty strings are replaced.
3362 """
3363 for oInstr in self.aoCurInstrs:
3364 aoArray = getattr(oInstr, sAttrib);
3365 while len(aoArray) <= iEntry:
3366 aoArray.append(None);
3367 if fOverwrite is True or aoArray[iEntry] is None:
3368 aoArray[iEntry] = oValue;
3369
3370 def parseCommentOldOpcode(self, asLines):
3371 """ Deals with 'Opcode 0xff /4' like comments """
3372 asWords = asLines[0].split();
3373 if len(asWords) >= 2 \
3374 and asWords[0] == 'Opcode' \
3375 and ( asWords[1].startswith('0x')
3376 or asWords[1].startswith('0X')):
3377 asWords = asWords[:1];
3378 for iWord, sWord in enumerate(asWords):
3379 if sWord.startswith('0X'):
3380 sWord = '0x' + sWord[:2];
3381 asWords[iWord] = asWords;
3382 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
3383
3384 return False;
3385
3386 def ensureInstructionForOpTag(self, iTagLine):
3387 """ Ensure there is an instruction for the op-tag being parsed. """
3388 if not self.aoCurInstrs:
3389 self.addInstruction(self.iCommentLine + iTagLine);
3390 for oInstr in self.aoCurInstrs:
3391 oInstr.cOpTags += 1;
3392 if oInstr.cOpTags == 1:
3393 self.cTotalTagged += 1;
3394 return self.aoCurInstrs[-1];
3395
3396 @staticmethod
3397 def flattenSections(aasSections):
3398 """
3399 Flattens multiline sections into stripped single strings.
3400 Returns list of strings, on section per string.
3401 """
3402 asRet = [];
3403 for asLines in aasSections:
3404 if asLines:
3405 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
3406 return asRet;
3407
3408 @staticmethod
3409 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
3410 """
3411 Flattens sections into a simple stripped string with newlines as
3412 section breaks. The final section does not sport a trailing newline.
3413 """
3414 # Typical: One section with a single line.
3415 if len(aasSections) == 1 and len(aasSections[0]) == 1:
3416 return aasSections[0][0].strip();
3417
3418 sRet = '';
3419 for iSection, asLines in enumerate(aasSections):
3420 if asLines:
3421 if iSection > 0:
3422 sRet += sSectionSep;
3423 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
3424 return sRet;
3425
3426
3427
3428 ## @name Tag parsers
3429 ## @{
3430
3431 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
3432 """
3433 Tag: \@opbrief
3434 Value: Text description, multiple sections, appended.
3435
3436 Brief description. If not given, it's the first sentence from @opdesc.
3437 """
3438 oInstr = self.ensureInstructionForOpTag(iTagLine);
3439
3440 # Flatten and validate the value.
3441 sBrief = self.flattenAllSections(aasSections);
3442 if not sBrief:
3443 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
3444 if sBrief[-1] != '.':
3445 sBrief = sBrief + '.';
3446 if len(sBrief) > 180:
3447 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
3448 offDot = sBrief.find('.');
3449 while 0 <= offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
3450 offDot = sBrief.find('.', offDot + 1);
3451 if offDot >= 0 and offDot != len(sBrief) - 1:
3452 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
3453
3454 # Update the instruction.
3455 if oInstr.sBrief is not None:
3456 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
3457 % (sTag, oInstr.sBrief, sBrief,));
3458 _ = iEndLine;
3459 return True;
3460
3461 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
3462 """
3463 Tag: \@opdesc
3464 Value: Text description, multiple sections, appended.
3465
3466 It is used to describe instructions.
3467 """
3468 oInstr = self.ensureInstructionForOpTag(iTagLine);
3469 if aasSections:
3470 oInstr.asDescSections.extend(self.flattenSections(aasSections));
3471 return True;
3472
3473 _ = sTag; _ = iEndLine;
3474 return True;
3475
3476 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
3477 """
3478 Tag: @opmenmonic
3479 Value: mnemonic
3480
3481 The 'mnemonic' value must be a valid C identifier string. Because of
3482 prefixes, groups and whatnot, there times when the mnemonic isn't that
3483 of an actual assembler mnemonic.
3484 """
3485 oInstr = self.ensureInstructionForOpTag(iTagLine);
3486
3487 # Flatten and validate the value.
3488 sMnemonic = self.flattenAllSections(aasSections);
3489 if not self.oReMnemonic.match(sMnemonic):
3490 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
3491 if oInstr.sMnemonic is not None:
3492 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
3493 % (sTag, oInstr.sMnemonic, sMnemonic,));
3494 oInstr.sMnemonic = sMnemonic
3495
3496 _ = iEndLine;
3497 return True;
3498
3499 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
3500 """
3501 Tags: \@op1, \@op2, \@op3, \@op4
3502 Value: [where:]type
3503
3504 The 'where' value indicates where the operand is found, like the 'reg'
3505 part of the ModR/M encoding. See Instruction.kdOperandLocations for
3506 a list.
3507
3508 The 'type' value indicates the operand type. These follow the types
3509 given in the opcode tables in the CPU reference manuals.
3510 See Instruction.kdOperandTypes for a list.
3511
3512 """
3513 oInstr = self.ensureInstructionForOpTag(iTagLine);
3514 idxOp = int(sTag[-1]) - 1;
3515 assert 0 <= idxOp < 4;
3516
3517 # flatten, split up, and validate the "where:type" value.
3518 sFlattened = self.flattenAllSections(aasSections);
3519 asSplit = sFlattened.split(':');
3520 if len(asSplit) == 1:
3521 sType = asSplit[0];
3522 sWhere = None;
3523 elif len(asSplit) == 2:
3524 (sWhere, sType) = asSplit;
3525 else:
3526 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
3527
3528 if sType not in g_kdOpTypes:
3529 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
3530 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
3531 if sWhere is None:
3532 sWhere = g_kdOpTypes[sType][1];
3533 elif sWhere not in g_kdOpLocations:
3534 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
3535 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
3536
3537 # Insert the operand, refusing to overwrite an existing one.
3538 while idxOp >= len(oInstr.aoOperands):
3539 oInstr.aoOperands.append(None);
3540 if oInstr.aoOperands[idxOp] is not None:
3541 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
3542 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
3543 sWhere, sType,));
3544 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
3545
3546 _ = iEndLine;
3547 return True;
3548
3549 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
3550 """
3551 Tag: \@opmaps
3552 Value: map[,map2]
3553
3554 Indicates which maps the instruction is in. There is a default map
3555 associated with each input file.
3556 """
3557 oInstr = self.ensureInstructionForOpTag(iTagLine);
3558
3559 # Flatten, split up and validate the value.
3560 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
3561 asMaps = sFlattened.split(',');
3562 if not asMaps:
3563 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
3564 for sMap in asMaps:
3565 if sMap not in g_dInstructionMaps:
3566 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
3567 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
3568
3569 # Add the maps to the current list. Throw errors on duplicates.
3570 for oMap in oInstr.aoMaps:
3571 if oMap.sName in asMaps:
3572 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
3573
3574 for sMap in asMaps:
3575 oMap = g_dInstructionMaps[sMap];
3576 if oMap not in oInstr.aoMaps:
3577 oInstr.aoMaps.append(oMap);
3578 else:
3579 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
3580
3581 _ = iEndLine;
3582 return True;
3583
3584 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
3585 """
3586 Tag: \@oppfx
3587 Value: n/a|none|0x66|0xf3|0xf2
3588
3589 Required prefix for the instruction. (In a (E)VEX context this is the
3590 value of the 'pp' field rather than an actual prefix.)
3591 """
3592 oInstr = self.ensureInstructionForOpTag(iTagLine);
3593
3594 # Flatten and validate the value.
3595 sFlattened = self.flattenAllSections(aasSections);
3596 asPrefixes = sFlattened.split();
3597 if len(asPrefixes) > 1:
3598 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
3599
3600 sPrefix = asPrefixes[0].lower();
3601 if sPrefix == 'none':
3602 sPrefix = 'none';
3603 elif sPrefix == 'n/a':
3604 sPrefix = None;
3605 else:
3606 if len(sPrefix) == 2:
3607 sPrefix = '0x' + sPrefix;
3608 if not _isValidOpcodeByte(sPrefix):
3609 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
3610
3611 if sPrefix is not None and sPrefix not in g_kdPrefixes:
3612 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
3613
3614 # Set it.
3615 if oInstr.sPrefix is not None:
3616 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
3617 oInstr.sPrefix = sPrefix;
3618
3619 _ = iEndLine;
3620 return True;
3621
3622 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
3623 """
3624 Tag: \@opcode
3625 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
3626
3627 The opcode byte or sub-byte for the instruction in the context of a map.
3628 """
3629 oInstr = self.ensureInstructionForOpTag(iTagLine);
3630
3631 # Flatten and validate the value.
3632 sOpcode = self.flattenAllSections(aasSections);
3633 if _isValidOpcodeByte(sOpcode):
3634 pass;
3635 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
3636 pass;
3637 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
3638 pass;
3639 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
3640 pass;
3641 else:
3642 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
3643
3644 # Set it.
3645 if oInstr.sOpcode is not None:
3646 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
3647 oInstr.sOpcode = sOpcode;
3648
3649 _ = iEndLine;
3650 return True;
3651
3652 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
3653 """
3654 Tag: \@opcodesub
3655 Value: none | 11 mr/reg | !11 mr/reg | rex.w=0 | rex.w=1 | vex.l=0 | vex.l=1
3656 | 11 mr/reg vex.l=0 | 11 mr/reg vex.l=1 | !11 mr/reg vex.l=0 | !11 mr/reg vex.l=1
3657
3658 This is a simple way of dealing with encodings where the mod=3 and mod!=3
3659 represents exactly two different instructions. The more proper way would
3660 be to go via maps with two members, but this is faster.
3661 """
3662 oInstr = self.ensureInstructionForOpTag(iTagLine);
3663
3664 # Flatten and validate the value.
3665 sSubOpcode = self.flattenAllSections(aasSections);
3666 if sSubOpcode not in g_kdSubOpcodes:
3667 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
3668 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
3669
3670 # Set it.
3671 if oInstr.sSubOpcode is not None:
3672 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
3673 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
3674 oInstr.sSubOpcode = sSubOpcode;
3675
3676 _ = iEndLine;
3677 return True;
3678
3679 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
3680 """
3681 Tag: \@openc
3682 Value: ModR/M|fixed|prefix|<map name>
3683
3684 The instruction operand encoding style.
3685 """
3686 oInstr = self.ensureInstructionForOpTag(iTagLine);
3687
3688 # Flatten and validate the value.
3689 sEncoding = self.flattenAllSections(aasSections);
3690 if sEncoding in g_kdEncodings:
3691 pass;
3692 elif sEncoding in g_dInstructionMaps:
3693 pass;
3694 elif not _isValidOpcodeByte(sEncoding):
3695 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
3696
3697 # Set it.
3698 if oInstr.sEncoding is not None:
3699 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
3700 % ( sTag, oInstr.sEncoding, sEncoding,));
3701 oInstr.sEncoding = sEncoding;
3702
3703 _ = iEndLine;
3704 return True;
3705
3706 ## EFlags tag to Instruction attribute name.
3707 kdOpFlagToAttr = {
3708 '@opfltest': 'asFlTest',
3709 '@opflmodify': 'asFlModify',
3710 '@opflundef': 'asFlUndefined',
3711 '@opflset': 'asFlSet',
3712 '@opflclear': 'asFlClear',
3713 };
3714
3715 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
3716 """
3717 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
3718 Value: <eflags specifier>
3719
3720 """
3721 oInstr = self.ensureInstructionForOpTag(iTagLine);
3722
3723 # Flatten, split up and validate the values.
3724 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
3725 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
3726 asFlags = [];
3727 else:
3728 fRc = True;
3729 for iFlag, sFlag in enumerate(asFlags):
3730 if sFlag not in g_kdEFlagsMnemonics:
3731 if sFlag.strip() in g_kdEFlagsMnemonics:
3732 asFlags[iFlag] = sFlag.strip();
3733 else:
3734 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
3735 if not fRc:
3736 return False;
3737
3738 # Set them.
3739 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
3740 if asOld is not None:
3741 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
3742 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
3743
3744 _ = iEndLine;
3745 return True;
3746
3747 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
3748 """
3749 Tag: \@ophints
3750 Value: Comma or space separated list of flags and hints.
3751
3752 This covers the disassembler flags table and more.
3753 """
3754 oInstr = self.ensureInstructionForOpTag(iTagLine);
3755
3756 # Flatten as a space separated list, split it up and validate the values.
3757 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
3758 if len(asHints) == 1 and asHints[0].lower() == 'none':
3759 asHints = [];
3760 else:
3761 fRc = True;
3762 for iHint, sHint in enumerate(asHints):
3763 if sHint not in g_kdHints:
3764 if sHint.strip() in g_kdHints:
3765 sHint[iHint] = sHint.strip();
3766 else:
3767 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
3768 if not fRc:
3769 return False;
3770
3771 # Append them.
3772 for sHint in asHints:
3773 if sHint not in oInstr.dHints:
3774 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
3775 else:
3776 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
3777
3778 _ = iEndLine;
3779 return True;
3780
3781 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
3782 """
3783 Tag: \@opdisenum
3784 Value: OP_XXXX
3785
3786 This is for select a specific (legacy) disassembler enum value for the
3787 instruction.
3788 """
3789 oInstr = self.ensureInstructionForOpTag(iTagLine);
3790
3791 # Flatten and split.
3792 asWords = self.flattenAllSections(aasSections).split();
3793 if len(asWords) != 1:
3794 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
3795 if not asWords:
3796 return False;
3797 sDisEnum = asWords[0];
3798 if not self.oReDisEnum.match(sDisEnum):
3799 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
3800 % (sTag, sDisEnum, self.oReDisEnum.pattern));
3801
3802 # Set it.
3803 if oInstr.sDisEnum is not None:
3804 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
3805 oInstr.sDisEnum = sDisEnum;
3806
3807 _ = iEndLine;
3808 return True;
3809
3810 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
3811 """
3812 Tag: \@opmincpu
3813 Value: <simple CPU name>
3814
3815 Indicates when this instruction was introduced.
3816 """
3817 oInstr = self.ensureInstructionForOpTag(iTagLine);
3818
3819 # Flatten the value, split into words, make sure there's just one, valid it.
3820 asCpus = self.flattenAllSections(aasSections).split();
3821 if len(asCpus) > 1:
3822 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
3823
3824 sMinCpu = asCpus[0];
3825 if sMinCpu in g_kdCpuNames:
3826 oInstr.sMinCpu = sMinCpu;
3827 else:
3828 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
3829 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
3830
3831 # Set it.
3832 if oInstr.sMinCpu is None:
3833 oInstr.sMinCpu = sMinCpu;
3834 elif oInstr.sMinCpu != sMinCpu:
3835 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
3836
3837 _ = iEndLine;
3838 return True;
3839
3840 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
3841 """
3842 Tag: \@opcpuid
3843 Value: none | <CPUID flag specifier>
3844
3845 CPUID feature bit which is required for the instruction to be present.
3846 """
3847 oInstr = self.ensureInstructionForOpTag(iTagLine);
3848
3849 # Flatten as a space separated list, split it up and validate the values.
3850 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
3851 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
3852 asCpuIds = [];
3853 else:
3854 fRc = True;
3855 for iCpuId, sCpuId in enumerate(asCpuIds):
3856 if sCpuId not in g_kdCpuIdFlags:
3857 if sCpuId.strip() in g_kdCpuIdFlags:
3858 sCpuId[iCpuId] = sCpuId.strip();
3859 else:
3860 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
3861 if not fRc:
3862 return False;
3863
3864 # Append them.
3865 for sCpuId in asCpuIds:
3866 if sCpuId not in oInstr.asCpuIds:
3867 oInstr.asCpuIds.append(sCpuId);
3868 else:
3869 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
3870
3871 _ = iEndLine;
3872 return True;
3873
3874 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
3875 """
3876 Tag: \@opgroup
3877 Value: op_grp1[_subgrp2[_subsubgrp3]]
3878
3879 Instruction grouping.
3880 """
3881 oInstr = self.ensureInstructionForOpTag(iTagLine);
3882
3883 # Flatten as a space separated list, split it up and validate the values.
3884 asGroups = self.flattenAllSections(aasSections).split();
3885 if len(asGroups) != 1:
3886 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
3887 sGroup = asGroups[0];
3888 if not self.oReGroupName.match(sGroup):
3889 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
3890 % (sTag, sGroup, self.oReGroupName.pattern));
3891
3892 # Set it.
3893 if oInstr.sGroup is not None:
3894 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
3895 oInstr.sGroup = sGroup;
3896
3897 _ = iEndLine;
3898 return True;
3899
3900 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
3901 """
3902 Tag: \@opunused, \@opinvalid, \@opinvlstyle
3903 Value: <invalid opcode behaviour style>
3904
3905 The \@opunused indicates the specification is for a currently unused
3906 instruction encoding.
3907
3908 The \@opinvalid indicates the specification is for an invalid currently
3909 instruction encoding (like UD2).
3910
3911 The \@opinvlstyle just indicates how CPUs decode the instruction when
3912 not supported (\@opcpuid, \@opmincpu) or disabled.
3913 """
3914 oInstr = self.ensureInstructionForOpTag(iTagLine);
3915
3916 # Flatten as a space separated list, split it up and validate the values.
3917 asStyles = self.flattenAllSections(aasSections).split();
3918 if len(asStyles) != 1:
3919 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
3920 sStyle = asStyles[0];
3921 if sStyle not in g_kdInvalidStyles:
3922 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
3923 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
3924 # Set it.
3925 if oInstr.sInvalidStyle is not None:
3926 return self.errorComment(iTagLine,
3927 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
3928 % ( sTag, oInstr.sInvalidStyle, sStyle,));
3929 oInstr.sInvalidStyle = sStyle;
3930 if sTag == '@opunused':
3931 oInstr.fUnused = True;
3932 elif sTag == '@opinvalid':
3933 oInstr.fInvalid = True;
3934
3935 _ = iEndLine;
3936 return True;
3937
3938 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
3939 """
3940 Tag: \@optest
3941 Value: [<selectors>[ ]?] <inputs> -> <outputs>
3942 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
3943
3944 The main idea here is to generate basic instruction tests.
3945
3946 The probably simplest way of handling the diverse input, would be to use
3947 it to produce size optimized byte code for a simple interpreter that
3948 modifies the register input and output states.
3949
3950 An alternative to the interpreter would be creating multiple tables,
3951 but that becomes rather complicated wrt what goes where and then to use
3952 them in an efficient manner.
3953 """
3954 oInstr = self.ensureInstructionForOpTag(iTagLine);
3955
3956 #
3957 # Do it section by section.
3958 #
3959 for asSectionLines in aasSections:
3960 #
3961 # Sort the input into outputs, inputs and selector conditions.
3962 #
3963 sFlatSection = self.flattenAllSections([asSectionLines,]);
3964 if not sFlatSection:
3965 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
3966 continue;
3967 oTest = InstructionTest(oInstr);
3968
3969 asSelectors = [];
3970 asInputs = [];
3971 asOutputs = [];
3972 asCur = asOutputs;
3973 fRc = True;
3974 asWords = sFlatSection.split();
3975 for iWord in range(len(asWords) - 1, -1, -1):
3976 sWord = asWords[iWord];
3977 # Check for array switchers.
3978 if sWord == '->':
3979 if asCur != asOutputs:
3980 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
3981 break;
3982 asCur = asInputs;
3983 elif sWord == '/':
3984 if asCur != asInputs:
3985 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
3986 break;
3987 asCur = asSelectors;
3988 else:
3989 asCur.insert(0, sWord);
3990
3991 #
3992 # Validate and add selectors.
3993 #
3994 for sCond in asSelectors:
3995 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
3996 oSelector = None;
3997 for sOp in TestSelector.kasCompareOps:
3998 off = sCondExp.find(sOp);
3999 if off >= 0:
4000 sVariable = sCondExp[:off];
4001 sValue = sCondExp[off + len(sOp):];
4002 if sVariable in TestSelector.kdVariables:
4003 if sValue in TestSelector.kdVariables[sVariable]:
4004 oSelector = TestSelector(sVariable, sOp, sValue);
4005 else:
4006 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
4007 % ( sTag, sValue, sCond,
4008 TestSelector.kdVariables[sVariable].keys(),));
4009 else:
4010 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
4011 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
4012 break;
4013 if oSelector is not None:
4014 for oExisting in oTest.aoSelectors:
4015 if oExisting.sVariable == oSelector.sVariable:
4016 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
4017 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
4018 oTest.aoSelectors.append(oSelector);
4019 else:
4020 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
4021
4022 #
4023 # Validate outputs and inputs, adding them to the test as we go along.
4024 #
4025 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
4026 asValidFieldKinds = [ 'both', sDesc, ];
4027 for sItem in asItems:
4028 oItem = None;
4029 for sOp in TestInOut.kasOperators:
4030 off = sItem.find(sOp);
4031 if off < 0:
4032 continue;
4033 sField = sItem[:off];
4034 sValueType = sItem[off + len(sOp):];
4035 if sField in TestInOut.kdFields \
4036 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
4037 asSplit = sValueType.split(':', 1);
4038 sValue = asSplit[0];
4039 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
4040 if sType in TestInOut.kdTypes:
4041 oValid = TestInOut.kdTypes[sType].validate(sValue);
4042 if oValid is True:
4043 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
4044 oItem = TestInOut(sField, sOp, sValue, sType);
4045 else:
4046 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
4047 % ( sTag, sDesc, sItem, ));
4048 else:
4049 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
4050 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
4051 else:
4052 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
4053 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
4054 else:
4055 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
4056 % ( sTag, sDesc, sField, sItem,
4057 ', '.join([sKey for sKey, asVal in TestInOut.kdFields.items()
4058 if asVal[1] in asValidFieldKinds]),));
4059 break;
4060 if oItem is not None:
4061 for oExisting in aoDst:
4062 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
4063 self.errorComment(iTagLine,
4064 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
4065 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
4066 aoDst.append(oItem);
4067 else:
4068 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
4069
4070 #
4071 # .
4072 #
4073 if fRc:
4074 oInstr.aoTests.append(oTest);
4075 else:
4076 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
4077 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
4078 % (sTag, asSelectors, asInputs, asOutputs,));
4079
4080 _ = iEndLine;
4081 return True;
4082
4083 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
4084 """
4085 Numbered \@optest tag. Either \@optest42 or \@optest[42].
4086 """
4087 oInstr = self.ensureInstructionForOpTag(iTagLine);
4088
4089 iTest = 0;
4090 if sTag[-1] == ']':
4091 iTest = int(sTag[8:-1]);
4092 else:
4093 iTest = int(sTag[7:]);
4094
4095 if iTest != len(oInstr.aoTests):
4096 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
4097 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
4098
4099 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
4100 """
4101 Tag: \@optestign | \@optestignore
4102 Value: <value is ignored>
4103
4104 This is a simple trick to ignore a test while debugging another.
4105
4106 See also \@oponlytest.
4107 """
4108 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
4109 return True;
4110
4111 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
4112 """
4113 Tag: \@opcopytests
4114 Value: <opstat | function> [..]
4115 Example: \@opcopytests add_Eb_Gb
4116
4117 Trick to avoid duplicating tests for different encodings of the same
4118 operation.
4119 """
4120 oInstr = self.ensureInstructionForOpTag(iTagLine);
4121
4122 # Flatten, validate and append the copy job to the instruction. We execute
4123 # them after parsing all the input so we can handle forward references.
4124 asToCopy = self.flattenAllSections(aasSections).split();
4125 if not asToCopy:
4126 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
4127 for sToCopy in asToCopy:
4128 if sToCopy not in oInstr.asCopyTests:
4129 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
4130 oInstr.asCopyTests.append(sToCopy);
4131 else:
4132 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
4133 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
4134 else:
4135 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
4136
4137 _ = iEndLine;
4138 return True;
4139
4140 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
4141 """
4142 Tag: \@oponlytest | \@oponly
4143 Value: none
4144
4145 Only test instructions with this tag. This is a trick that is handy
4146 for singling out one or two new instructions or tests.
4147
4148 See also \@optestignore.
4149 """
4150 oInstr = self.ensureInstructionForOpTag(iTagLine);
4151
4152 # Validate and add instruction to only test dictionary.
4153 sValue = self.flattenAllSections(aasSections).strip();
4154 if sValue:
4155 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
4156
4157 if oInstr not in g_aoOnlyTestInstructions:
4158 g_aoOnlyTestInstructions.append(oInstr);
4159
4160 _ = iEndLine;
4161 return True;
4162
4163 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
4164 """
4165 Tag: \@opxcpttype
4166 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
4167
4168 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
4169 """
4170 oInstr = self.ensureInstructionForOpTag(iTagLine);
4171
4172 # Flatten as a space separated list, split it up and validate the values.
4173 asTypes = self.flattenAllSections(aasSections).split();
4174 if len(asTypes) != 1:
4175 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
4176 sType = asTypes[0];
4177 if sType not in g_kdXcptTypes:
4178 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
4179 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
4180 # Set it.
4181 if oInstr.sXcptType is not None:
4182 return self.errorComment(iTagLine,
4183 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
4184 % ( sTag, oInstr.sXcptType, sType,));
4185 oInstr.sXcptType = sType;
4186
4187 _ = iEndLine;
4188 return True;
4189
4190 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
4191 """
4192 Tag: \@opfunction
4193 Value: <VMM function name>
4194
4195 This is for explicitly setting the IEM function name. Normally we pick
4196 this up from the FNIEMOP_XXX macro invocation after the description, or
4197 generate it from the mnemonic and operands.
4198
4199 It it thought it maybe necessary to set it when specifying instructions
4200 which implementation isn't following immediately or aren't implemented yet.
4201 """
4202 oInstr = self.ensureInstructionForOpTag(iTagLine);
4203
4204 # Flatten and validate the value.
4205 sFunction = self.flattenAllSections(aasSections);
4206 if not self.oReFunctionName.match(sFunction):
4207 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
4208 % (sTag, sFunction, self.oReFunctionName.pattern));
4209
4210 if oInstr.sFunction is not None:
4211 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
4212 % (sTag, oInstr.sFunction, sFunction,));
4213 oInstr.sFunction = sFunction;
4214
4215 _ = iEndLine;
4216 return True;
4217
4218 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
4219 """
4220 Tag: \@opstats
4221 Value: <VMM statistics base name>
4222
4223 This is for explicitly setting the statistics name. Normally we pick
4224 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
4225 the mnemonic and operands.
4226
4227 It it thought it maybe necessary to set it when specifying instructions
4228 which implementation isn't following immediately or aren't implemented yet.
4229 """
4230 oInstr = self.ensureInstructionForOpTag(iTagLine);
4231
4232 # Flatten and validate the value.
4233 sStats = self.flattenAllSections(aasSections);
4234 if not self.oReStatsName.match(sStats):
4235 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
4236 % (sTag, sStats, self.oReStatsName.pattern));
4237
4238 if oInstr.sStats is not None:
4239 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
4240 % (sTag, oInstr.sStats, sStats,));
4241 oInstr.sStats = sStats;
4242
4243 _ = iEndLine;
4244 return True;
4245
4246 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
4247 """
4248 Tag: \@opdone
4249 Value: none
4250
4251 Used to explictily flush the instructions that have been specified.
4252 """
4253 sFlattened = self.flattenAllSections(aasSections);
4254 if sFlattened != '':
4255 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
4256 _ = sTag; _ = iEndLine;
4257 return self.doneInstructions();
4258
4259 ## @}
4260
4261
4262 def parseComment(self):
4263 """
4264 Parse the current comment (self.sComment).
4265
4266 If it's a opcode specifiying comment, we reset the macro stuff.
4267 """
4268 #
4269 # Reject if comment doesn't seem to contain anything interesting.
4270 #
4271 if self.sComment.find('Opcode') < 0 \
4272 and self.sComment.find('@') < 0:
4273 return False;
4274
4275 #
4276 # Split the comment into lines, removing leading asterisks and spaces.
4277 # Also remove leading and trailing empty lines.
4278 #
4279 asLines = self.sComment.split('\n');
4280 for iLine, sLine in enumerate(asLines):
4281 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
4282
4283 while asLines and not asLines[0]:
4284 self.iCommentLine += 1;
4285 asLines.pop(0);
4286
4287 while asLines and not asLines[-1]:
4288 asLines.pop(len(asLines) - 1);
4289
4290 #
4291 # Check for old style: Opcode 0x0f 0x12
4292 #
4293 if asLines[0].startswith('Opcode '):
4294 self.parseCommentOldOpcode(asLines);
4295
4296 #
4297 # Look for @op* tagged data.
4298 #
4299 cOpTags = 0;
4300 sFlatDefault = None;
4301 sCurTag = '@default';
4302 iCurTagLine = 0;
4303 asCurSection = [];
4304 aasSections = [ asCurSection, ];
4305 for iLine, sLine in enumerate(asLines):
4306 if not sLine.startswith('@'):
4307 if sLine:
4308 asCurSection.append(sLine);
4309 elif asCurSection:
4310 asCurSection = [];
4311 aasSections.append(asCurSection);
4312 else:
4313 #
4314 # Process the previous tag.
4315 #
4316 if not asCurSection and len(aasSections) > 1:
4317 aasSections.pop(-1);
4318 if sCurTag in self.dTagHandlers:
4319 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
4320 cOpTags += 1;
4321 elif sCurTag.startswith('@op'):
4322 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
4323 elif sCurTag == '@default':
4324 sFlatDefault = self.flattenAllSections(aasSections);
4325 elif '@op' + sCurTag[1:] in self.dTagHandlers:
4326 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
4327 elif sCurTag in ['@encoding', '@opencoding']:
4328 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
4329
4330 #
4331 # New tag.
4332 #
4333 asSplit = sLine.split(None, 1);
4334 sCurTag = asSplit[0].lower();
4335 if len(asSplit) > 1:
4336 asCurSection = [asSplit[1],];
4337 else:
4338 asCurSection = [];
4339 aasSections = [asCurSection, ];
4340 iCurTagLine = iLine;
4341
4342 #
4343 # Process the final tag.
4344 #
4345 if not asCurSection and len(aasSections) > 1:
4346 aasSections.pop(-1);
4347 if sCurTag in self.dTagHandlers:
4348 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
4349 cOpTags += 1;
4350 elif sCurTag.startswith('@op'):
4351 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
4352 elif sCurTag == '@default':
4353 sFlatDefault = self.flattenAllSections(aasSections);
4354
4355 #
4356 # Don't allow default text in blocks containing @op*.
4357 #
4358 if cOpTags > 0 and sFlatDefault:
4359 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
4360
4361 return True;
4362
4363 def parseMacroInvocation(self, sInvocation, offStartInvocation = 0):
4364 """
4365 Parses a macro invocation.
4366
4367 Returns three values:
4368 1. A list of macro arguments, where the zero'th is the macro name.
4369 2. The offset following the macro invocation, into sInvocation of
4370 this is on the same line or into the last line if it is on a
4371 different line.
4372 3. Number of additional lines the invocation spans (i.e. zero if
4373 it is all contained within sInvocation).
4374 """
4375 # First the name.
4376 offOpen = sInvocation.find('(', offStartInvocation);
4377 if offOpen <= offStartInvocation:
4378 self.raiseError("macro invocation open parenthesis not found");
4379 sName = sInvocation[offStartInvocation:offOpen].strip();
4380 if not self.oReMacroName.match(sName):
4381 self.raiseError("invalid macro name '%s'" % (sName,));
4382 asRet = [sName, ];
4383
4384 # Arguments.
4385 iLine = self.iLine;
4386 cDepth = 1;
4387 off = offOpen + 1;
4388 offStart = off;
4389 offCurLn = 0;
4390 chQuote = None;
4391 while cDepth > 0:
4392 if off >= len(sInvocation):
4393 if iLine >= len(self.asLines):
4394 self.error('macro invocation beyond end of file');
4395 return (asRet, off - offCurLn, iLine - self.iLine);
4396 offCurLn = off;
4397 sInvocation += self.asLines[iLine];
4398 iLine += 1;
4399 ch = sInvocation[off];
4400
4401 if chQuote:
4402 if ch == '\\' and off + 1 < len(sInvocation):
4403 off += 1;
4404 elif ch == chQuote:
4405 chQuote = None;
4406 elif ch in ('"', '\'',):
4407 chQuote = ch;
4408 elif ch in (',', ')',):
4409 if cDepth == 1:
4410 asRet.append(sInvocation[offStart:off].strip());
4411 offStart = off + 1;
4412 if ch == ')':
4413 cDepth -= 1;
4414 elif ch == '(':
4415 cDepth += 1;
4416 off += 1;
4417
4418 return (asRet, off - offCurLn, iLine - self.iLine);
4419
4420 def findAndParseMacroInvocationEx(self, sCode, sMacro, offStart = 0):
4421 """
4422 Returns (None, len(sCode), 0) if not found, otherwise the
4423 parseMacroInvocation() return value.
4424 """
4425 offHit = sCode.find(sMacro, offStart);
4426 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
4427 return self.parseMacroInvocation(sCode, offHit);
4428 return (None, len(sCode), 0);
4429
4430 def findAndParseMacroInvocation(self, sCode, sMacro):
4431 """
4432 Returns None if not found, arguments as per parseMacroInvocation if found.
4433 """
4434 return self.findAndParseMacroInvocationEx(sCode, sMacro)[0];
4435
4436 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
4437 """
4438 Returns same as findAndParseMacroInvocation.
4439 """
4440 for sMacro in asMacro:
4441 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
4442 if asRet is not None:
4443 return asRet;
4444 return None;
4445
4446 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
4447 sDisHints, sIemHints, asOperands):
4448 """
4449 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
4450 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
4451 """
4452 #
4453 # Some invocation checks.
4454 #
4455 if sUpper != sUpper.upper():
4456 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
4457 if sLower != sLower.lower():
4458 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
4459 if sUpper.lower() != sLower:
4460 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
4461 if not self.oReMnemonic.match(sLower):
4462 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
4463
4464 #
4465 # Check if sIemHints tells us to not consider this macro invocation.
4466 #
4467 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
4468 return True;
4469
4470 # Apply to the last instruction only for now.
4471 if not self.aoCurInstrs:
4472 self.addInstruction();
4473 oInstr = self.aoCurInstrs[-1];
4474 if oInstr.iLineMnemonicMacro == -1:
4475 oInstr.iLineMnemonicMacro = self.iLine;
4476 else:
4477 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
4478 % (sMacro, oInstr.iLineMnemonicMacro,));
4479
4480 # Mnemonic
4481 if oInstr.sMnemonic is None:
4482 oInstr.sMnemonic = sLower;
4483 elif oInstr.sMnemonic != sLower:
4484 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
4485
4486 # Process operands.
4487 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
4488 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
4489 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
4490 for iOperand, sType in enumerate(asOperands):
4491 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
4492 if sWhere is None:
4493 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
4494 if iOperand < len(oInstr.aoOperands): # error recovery.
4495 sWhere = oInstr.aoOperands[iOperand].sWhere;
4496 sType = oInstr.aoOperands[iOperand].sType;
4497 else:
4498 sWhere = 'reg';
4499 sType = 'Gb';
4500 if iOperand == len(oInstr.aoOperands):
4501 oInstr.aoOperands.append(Operand(sWhere, sType))
4502 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
4503 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
4504 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
4505 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
4506
4507 # Encoding.
4508 if sForm not in g_kdIemForms:
4509 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
4510 else:
4511 if oInstr.sEncoding is None:
4512 oInstr.sEncoding = g_kdIemForms[sForm][0];
4513 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
4514 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
4515 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
4516
4517 # Check the parameter locations for the encoding.
4518 if g_kdIemForms[sForm][1] is not None:
4519 if len(g_kdIemForms[sForm][1]) > len(oInstr.aoOperands):
4520 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
4521 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
4522 else:
4523 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
4524 if oInstr.aoOperands[iOperand].sWhere != sWhere:
4525 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
4526 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
4527 sOpFormMatch = g_kdOpTypes[oInstr.aoOperands[iOperand].sType][4];
4528 if (sOpFormMatch in [ 'REG', 'MEM', ] and sForm.find('_' + sOpFormMatch) < 0) \
4529 or (sOpFormMatch in [ 'FIXED', ] and sForm.find(sOpFormMatch) < 0) \
4530 or (sOpFormMatch == 'RM' and (sForm.find('_MEM') > 0 or sForm.find('_REG') > 0) ) \
4531 or (sOpFormMatch == 'V' and ( not (sForm.find('VEX') > 0 or sForm.find('XOP')) \
4532 or sForm.replace('VEX','').find('V') < 0) ):
4533 self.error('%s: current instruction @op%u and a_Form type does not match: %s/%s vs %s'
4534 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sType, sOpFormMatch, sForm, ));
4535 if len(g_kdIemForms[sForm][1]) < len(oInstr.aoOperands):
4536 for iOperand in range(len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands)):
4537 if oInstr.aoOperands[iOperand].sType != 'FIXED' \
4538 and g_kdOpTypes[oInstr.aoOperands[iOperand].sType][0] != 'IDX_ParseFixedReg':
4539 self.error('%s: Expected FIXED type operand #%u following operands given by a_Form=%s: %s (%s)'
4540 % (sMacro, iOperand, sForm, oInstr.aoOperands[iOperand].sType,
4541 oInstr.aoOperands[iOperand].sWhere));
4542
4543
4544 # Check @opcodesub
4545 if oInstr.sSubOpcode \
4546 and g_kdIemForms[sForm][2] \
4547 and oInstr.sSubOpcode.find(g_kdIemForms[sForm][2]) < 0:
4548 self.error('%s: current instruction @opcodesub and a_Form does not match: %s vs %s (%s)'
4549 % (sMacro, oInstr.sSubOpcode, g_kdIemForms[sForm][2], sForm,));
4550
4551 # Stats.
4552 if not self.oReStatsName.match(sStats):
4553 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
4554 elif oInstr.sStats is None:
4555 oInstr.sStats = sStats;
4556 elif oInstr.sStats != sStats:
4557 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
4558 % (sMacro, oInstr.sStats, sStats,));
4559
4560 # Process the hints (simply merge with @ophints w/o checking anything).
4561 for sHint in sDisHints.split('|'):
4562 sHint = sHint.strip();
4563 if sHint.startswith('DISOPTYPE_'):
4564 sShortHint = sHint[len('DISOPTYPE_'):].lower();
4565 if sShortHint in g_kdHints:
4566 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
4567 else:
4568 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
4569 elif sHint != '0':
4570 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
4571
4572 for sHint in sIemHints.split('|'):
4573 sHint = sHint.strip();
4574 if sHint.startswith('IEMOPHINT_'):
4575 sShortHint = sHint[len('IEMOPHINT_'):].lower();
4576 if sShortHint in g_kdHints:
4577 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
4578 else:
4579 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
4580 elif sHint != '0':
4581 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
4582
4583 _ = sAsm;
4584 return True;
4585
4586 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
4587 """
4588 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
4589 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
4590 """
4591 if not asOperands:
4592 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
4593 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
4594 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
4595
4596 def workerIemMcBegin(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine):
4597 """
4598 Process a IEM_MC_BEGIN macro invocation.
4599 """
4600 if self.fDebugMc:
4601 self.debug('IEM_MC_BEGIN on %s off %s' % (self.iLine, offBeginStatementInLine,));
4602 #self.debug('%s<eos>' % (sCode,));
4603
4604 # Check preconditions.
4605 if not self.oCurFunction:
4606 self.raiseError('IEM_MC_BEGIN w/o current function (%s)' % (sCode,));
4607 if self.oCurMcBlock:
4608 self.raiseError('IEM_MC_BEGIN before IEM_MC_END. Previous IEM_MC_BEGIN at line %u' % (self.oCurMcBlock.iBeginLine,));
4609
4610 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
4611 cchIndent = offBeginStatementInCodeStr;
4612 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
4613 if offPrevNewline >= 0:
4614 cchIndent -= offPrevNewline + 1;
4615 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
4616
4617 # Start a new block.
4618 self.oCurMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine,
4619 self.oCurFunction, self.iMcBlockInFunc, cchIndent);
4620 g_aoMcBlocks.append(self.oCurMcBlock);
4621 self.cTotalMcBlocks += 1;
4622 self.iMcBlockInFunc += 1;
4623 return True;
4624
4625 @staticmethod
4626 def extractLinesFromMacroExpansionLine(sRawLine, offBegin, offEnd, sBeginStmt = 'IEM_MC_BEGIN'):
4627 """
4628 Helper used by workerIemMcEnd and workerIemMcDeferToCImplXRet for
4629 extracting a statement block from a string that's the result of macro
4630 expansion and therefore contains multiple "sub-lines" as it were.
4631
4632 Returns list of lines covering offBegin thru offEnd in sRawLine.
4633 """
4634
4635 off = sRawLine.find('\n', offEnd);
4636 if off > 0:
4637 sRawLine = sRawLine[:off + 1];
4638
4639 off = sRawLine.rfind('\n', 0, offBegin) + 1;
4640 sRawLine = sRawLine[off:];
4641 if not sRawLine.strip().startswith(sBeginStmt):
4642 sRawLine = sRawLine[offBegin - off:]
4643
4644 return [sLine + '\n' for sLine in sRawLine.split('\n')];
4645
4646 def workerIemMcEnd(self, offEndStatementInLine):
4647 """
4648 Process a IEM_MC_END macro invocation.
4649 """
4650 if self.fDebugMc:
4651 self.debug('IEM_MC_END on %s off %s' % (self.iLine, offEndStatementInLine,));
4652
4653 # Check preconditions.
4654 if not self.oCurMcBlock:
4655 self.raiseError('IEM_MC_END w/o IEM_MC_BEGIN.');
4656
4657 #
4658 # HACK ALERT! For blocks orginating from macro expansion the start and
4659 # end line will be the same, but the line has multiple
4660 # newlines inside it. So, we have to do some extra tricks
4661 # to get the lines out of there. We ASSUME macros aren't
4662 # messy, but keep IEM_MC_BEGIN/END on separate lines.
4663 #
4664 if self.iLine > self.oCurMcBlock.iBeginLine:
4665 asLines = self.asLines[self.oCurMcBlock.iBeginLine - 1 : self.iLine];
4666 if not asLines[0].strip().startswith('IEM_MC_BEGIN'):
4667 self.raiseError('IEM_MC_BEGIN is not the first word on the line');
4668 else:
4669 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1],
4670 self.oCurMcBlock.offBeginLine, offEndStatementInLine);
4671
4672 #
4673 # Strip anything following the IEM_MC_END(); statement in the final line,
4674 # so that we don't carry on any trailing 'break' after macro expansions
4675 # like for iemOp_movsb_Xb_Yb.
4676 #
4677 while asLines[-1].strip() == '':
4678 asLines.pop();
4679 sFinal = asLines[-1];
4680 offFinalEnd = sFinal.find('IEM_MC_END');
4681 offEndInFinal = offFinalEnd;
4682 if offFinalEnd < 0: self.raiseError('bogus IEM_MC_END: Not in final line: %s' % (sFinal,));
4683 offFinalEnd += len('IEM_MC_END');
4684
4685 while sFinal[offFinalEnd].isspace():
4686 offFinalEnd += 1;
4687 if sFinal[offFinalEnd] != '(': self.raiseError('bogus IEM_MC_END: Expected "(" at %s: %s' % (offFinalEnd, sFinal,));
4688 offFinalEnd += 1;
4689
4690 while sFinal[offFinalEnd].isspace():
4691 offFinalEnd += 1;
4692 if sFinal[offFinalEnd] != ')': self.raiseError('bogus IEM_MC_END: Expected ")" at %s: %s' % (offFinalEnd, sFinal,));
4693 offFinalEnd += 1;
4694
4695 while sFinal[offFinalEnd].isspace():
4696 offFinalEnd += 1;
4697 if sFinal[offFinalEnd] != ';': self.raiseError('bogus IEM_MC_END: Expected ";" at %s: %s' % (offFinalEnd, sFinal,));
4698 offFinalEnd += 1;
4699
4700 asLines[-1] = sFinal[: offFinalEnd];
4701
4702 #
4703 # Complete and discard the current block.
4704 #
4705 self.oCurMcBlock.complete(self.iLine, offEndStatementInLine,
4706 offEndStatementInLine + offFinalEnd - offEndInFinal, asLines);
4707 self.oCurMcBlock = None;
4708 return True;
4709
4710 def workerIemMcDeferToCImplXRet(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine, cParams):
4711 """
4712 Process a IEM_MC_DEFER_TO_CIMPL_[0-5]_RET macro invocation.
4713 """
4714 sStmt = 'IEM_MC_DEFER_TO_CIMPL_%d_RET' % (cParams,);
4715 if self.fDebugMc:
4716 self.debug('%s on %s off %s' % (sStmt, self.iLine, offBeginStatementInLine,));
4717 #self.debug('%s<eos>' % (sCode,));
4718
4719 # Check preconditions.
4720 if not self.oCurFunction:
4721 self.raiseError('%s w/o current function (%s)' % (sStmt, sCode,));
4722 if self.oCurMcBlock:
4723 self.raiseError('%s inside IEM_MC_BEGIN blocki starting at line %u' % (sStmt, self.oCurMcBlock.iBeginLine,));
4724
4725 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
4726 cchIndent = offBeginStatementInCodeStr;
4727 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
4728 if offPrevNewline >= 0:
4729 cchIndent -= offPrevNewline + 1;
4730 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.oCurFunction.sName));
4731
4732 # Start a new block.
4733 oMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine,
4734 self.oCurFunction, self.iMcBlockInFunc, cchIndent);
4735
4736 # Parse the statment.
4737 asArgs, offAfter, cLines = self.findAndParseMacroInvocationEx(sCode, sStmt, offBeginStatementInCodeStr);
4738 if asArgs is None:
4739 self.raiseError('%s: Closing parenthesis not found!' % (sStmt,));
4740 if len(asArgs) != cParams + 3:
4741 self.raiseError('%s: findAndParseMacroInvocationEx returns %s args, expected %s!'
4742 % (sStmt, len(asArgs), cParams + 3,));
4743
4744 oMcBlock.aoStmts = [McStmtCall(asArgs[0], asArgs[1:], 1),];
4745
4746 # These MCs are not typically part of macro expansions, but let's get
4747 # it out of the way immediately if it's the case.
4748 if cLines > 0 or self.asLines[oMcBlock.iBeginLine - 1].count('\n') <= 1:
4749 asLines = self.asLines[self.iLine - 1 : self.iLine - 1 + cLines + 1];
4750 assert offAfter < len(asLines[-1]) and asLines[-1][offAfter] == ';', \
4751 'iBeginLine=%d iLine=%d offAfter=%s line: "%s"' % (oMcBlock.iBeginLine, self.iLine, offAfter, asLines[-1],);
4752 asLines[-1] = asLines[-1][:offAfter + 1];
4753 else:
4754 asLines = self.extractLinesFromMacroExpansionLine(self.asLines[self.iLine - 1], offBeginStatementInCodeStr,
4755 offAfter, sStmt);
4756 assert asLines[-1].find(';') >= 0;
4757 asLines[-1] = asLines[-1][:asLines[-1].find(';') + 1];
4758
4759 assert asLines[0].find(sStmt) >= 0;
4760 #if not asLines[0].strip().startswith(sStmt):
4761 # self.raiseError('%s is not the first word on the line: %s' % (sStmt, asLines[0].strip()));
4762
4763 # Advance to the line with the closing ')'.
4764 self.iLine += cLines;
4765
4766 # Complete the block.
4767 oMcBlock.complete(self.iLine, 0 if cLines > 0 else offBeginStatementInCodeStr, offAfter + 1, asLines);
4768
4769 g_aoMcBlocks.append(oMcBlock);
4770 self.cTotalMcBlocks += 1;
4771 self.iMcBlockInFunc += 1;
4772
4773 return True;
4774
4775 def workerStartFunction(self, asArgs):
4776 """
4777 Deals with the start of a decoder function.
4778
4779 These are all defined using one of the FNIEMOP*_DEF* and FNIEMOP_*STUB*
4780 macros, so we get a argument list for these where the 0th argument is the
4781 macro name.
4782 """
4783 # Complete any existing function.
4784 if self.oCurFunction:
4785 self.oCurFunction.complete(self.iLine - 1, self.asLines[self.oCurFunction.iBeginLine - 1 : self.iLine - 1]);
4786
4787 # Create the new function.
4788 self.oCurFunction = DecoderFunction(self.sSrcFile, self.iLine, asArgs[1], asArgs);
4789 return True;
4790
4791 def checkCodeForMacro(self, sCode, offLine):
4792 """
4793 Checks code for relevant macro invocation.
4794 """
4795
4796 #
4797 # Scan macro invocations.
4798 #
4799 if sCode.find('(') > 0:
4800 # Look for instruction decoder function definitions. ASSUME single line.
4801 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4802 [ 'FNIEMOP_DEF',
4803 'FNIEMOPRM_DEF',
4804 'FNIEMOP_STUB',
4805 'FNIEMOP_STUB_1',
4806 'FNIEMOP_UD_STUB',
4807 'FNIEMOP_UD_STUB_1' ]);
4808 if asArgs is not None:
4809 self.workerStartFunction(asArgs);
4810 #self.debug('%s: oCurFunction=%s' % (self.iLine, self.oCurFunction.sName,));
4811
4812 if not self.aoCurInstrs:
4813 self.addInstruction();
4814 for oInstr in self.aoCurInstrs:
4815 if oInstr.iLineFnIemOpMacro == -1:
4816 oInstr.iLineFnIemOpMacro = self.iLine;
4817 else:
4818 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
4819 self.setInstrunctionAttrib('sFunction', asArgs[1]);
4820 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
4821 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
4822 if asArgs[0].find('STUB') > 0:
4823 self.doneInstructions(fEndOfFunction = True);
4824 return True;
4825
4826 # Check for worker function definitions, so we can get a context for MC blocks.
4827 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4828 [ 'FNIEMOP_DEF_1',
4829 'FNIEMOP_DEF_2', ]);
4830 if asArgs is not None:
4831 self.workerStartFunction(asArgs);
4832 #self.debug('%s: oCurFunction=%s (%s)' % (self.iLine, self.oCurFunction.sName, asArgs[0]));
4833 return True;
4834
4835 # IEMOP_HLP_DONE_VEX_DECODING_*
4836 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4837 [ 'IEMOP_HLP_DONE_VEX_DECODING',
4838 'IEMOP_HLP_DONE_VEX_DECODING_L0',
4839 'IEMOP_HLP_DONE_VEX_DECODING_NO_VVVV',
4840 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV',
4841 ]);
4842 if asArgs is not None:
4843 sMacro = asArgs[0];
4844 if sMacro in ('IEMOP_HLP_DONE_VEX_DECODING_L0', 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV', ):
4845 for oInstr in self.aoCurInstrs:
4846 if 'vex_l_zero' not in oInstr.dHints:
4847 if oInstr.iLineMnemonicMacro >= 0:
4848 self.errorOnLine(oInstr.iLineMnemonicMacro,
4849 'Missing IEMOPHINT_VEX_L_ZERO! (%s on line %d)' % (sMacro, self.iLine,));
4850 oInstr.dHints['vex_l_zero'] = True;
4851
4852 #
4853 # IEMOP_MNEMONIC*
4854 #
4855 if sCode.find('IEMOP_MNEMONIC') >= 0:
4856 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
4857 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
4858 if asArgs is not None:
4859 if len(self.aoCurInstrs) == 1:
4860 oInstr = self.aoCurInstrs[0];
4861 if oInstr.sStats is None:
4862 oInstr.sStats = asArgs[1];
4863 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
4864
4865 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
4866 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
4867 if asArgs is not None:
4868 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6],
4869 asArgs[7], []);
4870 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
4871 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
4872 if asArgs is not None:
4873 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7],
4874 asArgs[8], [asArgs[6],]);
4875 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
4876 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
4877 if asArgs is not None:
4878 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8],
4879 asArgs[9], [asArgs[6], asArgs[7]]);
4880 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints,
4881 # a_fIemHints)
4882 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
4883 if asArgs is not None:
4884 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
4885 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
4886 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
4887 # a_fIemHints)
4888 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
4889 if asArgs is not None:
4890 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
4891 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
4892
4893 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
4894 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
4895 if asArgs is not None:
4896 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
4897 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
4898 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
4899 if asArgs is not None:
4900 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
4901 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
4902 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
4903 if asArgs is not None:
4904 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
4905 [asArgs[4], asArgs[5],]);
4906 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
4907 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
4908 if asArgs is not None:
4909 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
4910 [asArgs[4], asArgs[5], asArgs[6],]);
4911 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
4912 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
4913 if asArgs is not None:
4914 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
4915 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
4916
4917 #
4918 # IEM_MC_BEGIN + IEM_MC_END.
4919 # We must support multiple instances per code snippet.
4920 #
4921 offCode = sCode.find('IEM_MC_');
4922 if offCode >= 0:
4923 for oMatch in self.oReMcBeginEnd.finditer(sCode, offCode):
4924 if oMatch.group(1) == 'END':
4925 self.workerIemMcEnd(offLine + oMatch.start());
4926 elif oMatch.group(1) == 'BEGIN':
4927 self.workerIemMcBegin(sCode, oMatch.start(), offLine + oMatch.start());
4928 else:
4929 self.workerIemMcDeferToCImplXRet(sCode, oMatch.start(), offLine + oMatch.start(),
4930 int(oMatch.group(1)[len('DEFER_TO_CIMPL_')]));
4931 return True;
4932
4933 return False;
4934
4935 def workerPreProcessRecreateMacroRegex(self):
4936 """
4937 Recreates self.oReMacros when self.dMacros changes.
4938 """
4939 if self.dMacros:
4940 sRegex = '';
4941 for sName, oMacro in self.dMacros.items():
4942 if sRegex:
4943 sRegex += '|' + sName;
4944 else:
4945 sRegex = '\\b(' + sName;
4946 if oMacro.asArgs is not None:
4947 sRegex += '\s*\(';
4948 else:
4949 sRegex += '\\b';
4950 sRegex += ')';
4951 self.oReMacros = re.compile(sRegex);
4952 else:
4953 self.oReMacros = None;
4954 return True;
4955
4956 def workerPreProcessDefine(self, sRest):
4957 """
4958 Handles a macro #define, the sRest is what follows after the directive word.
4959 """
4960
4961 #
4962 # If using line continutation, just concat all the lines together,
4963 # preserving the newline character but not the escaping.
4964 #
4965 iLineStart = self.iLine;
4966 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
4967 sRest = sRest[0:-2].rstrip() + '\n' + self.asLines[self.iLine];
4968 self.iLine += 1;
4969 #self.debug('workerPreProcessDefine: sRest=%s<EOS>' % (sRest,));
4970
4971 #
4972 # Use regex to split out the name, argument list and body.
4973 # If this fails, we assume it's a simple macro.
4974 #
4975 oMatch = self.oReHashDefine2.match(sRest);
4976 if oMatch:
4977 asArgs = [sParam.strip() for sParam in oMatch.group(2).split(',')];
4978 sBody = oMatch.group(3);
4979 else:
4980 oMatch = self.oReHashDefine3.match(sRest);
4981 if not oMatch:
4982 self.debug('workerPreProcessDefine: wtf? sRest=%s' % (sRest,));
4983 return self.error('bogus macro definition: %s' % (sRest,));
4984 asArgs = None;
4985 sBody = oMatch.group(2);
4986 sName = oMatch.group(1);
4987 assert sName == sName.strip();
4988 #self.debug('workerPreProcessDefine: sName=%s asArgs=%s sBody=%s<EOS>' % (sName, asArgs, sBody));
4989
4990 #
4991 # Is this of any interest to us? We do NOT support MC blocks wihtin
4992 # nested macro expansion, just to avoid lots of extra work.
4993 #
4994 if sBody.find("IEM_MC_BEGIN") < 0:
4995 #self.debug('workerPreProcessDefine: irrelevant (%s: %s)' % (sName, sBody));
4996 return True;
4997
4998 #
4999 # Add the macro.
5000 #
5001 if self.fDebugPreProc:
5002 self.debug('#define %s on line %u' % (sName, self.iLine,));
5003 self.dMacros[sName] = SimpleParser.Macro(sName, asArgs, sBody.strip(), iLineStart);
5004 return self.workerPreProcessRecreateMacroRegex();
5005
5006 def workerPreProcessUndef(self, sRest):
5007 """
5008 Handles a macro #undef, the sRest is what follows after the directive word.
5009 """
5010 # Quick comment strip and isolate the name.
5011 offSlash = sRest.find('/');
5012 if offSlash > 0:
5013 sRest = sRest[:offSlash];
5014 sName = sRest.strip();
5015
5016 # Remove the macro if we're clocking it.
5017 if sName in self.dMacros:
5018 if self.fDebugPreProc:
5019 self.debug('#undef %s on line %u' % (sName, self.iLine,));
5020 del self.dMacros[sName];
5021 return self.workerPreProcessRecreateMacroRegex();
5022
5023 return True;
5024
5025 def checkPreProcessorDirectiveForDefineUndef(self, sLine):
5026 """
5027 Handles a preprocessor directive.
5028 """
5029 oMatch = self.oReHashDefine.match(sLine);
5030 if oMatch:
5031 return self.workerPreProcessDefine(oMatch.group(1) + '\n');
5032
5033 oMatch = self.oReHashUndef.match(sLine);
5034 if oMatch:
5035 return self.workerPreProcessUndef(oMatch.group(1) + '\n');
5036 return False;
5037
5038 def expandMacros(self, sLine, oMatch):
5039 """
5040 Expands macros we know about in the given line.
5041 Currently we ASSUME there is only one and that is what oMatch matched.
5042 """
5043 #
5044 # Get our bearings.
5045 #
5046 offMatch = oMatch.start();
5047 sName = oMatch.group(1);
5048 assert sName == sLine[oMatch.start() : oMatch.end()];
5049 fWithArgs = sName.endswith('(');
5050 if fWithArgs:
5051 sName = sName[:-1].strip();
5052 oMacro = self.dMacros[sName] # type: SimpleParser.Macro
5053
5054 #
5055 # Deal with simple macro invocations w/o parameters.
5056 #
5057 if not fWithArgs:
5058 if self.fDebugPreProc:
5059 self.debug('expanding simple macro %s on line %u' % (sName, self.iLine,));
5060 return sLine[:offMatch] + oMacro.expandMacro(self) + sLine[oMatch.end():];
5061
5062 #
5063 # Complicated macro with parameters.
5064 # Start by extracting the parameters. ASSUMES they are all on the same line!
5065 #
5066 cLevel = 1;
5067 offCur = oMatch.end();
5068 offCurArg = offCur;
5069 asArgs = [];
5070 while True:
5071 if offCur >= len(sLine):
5072 self.raiseError('expandMacros: Invocation of macro %s spans multiple lines!' % (sName,));
5073 ch = sLine[offCur];
5074 if ch == '(':
5075 cLevel += 1;
5076 elif ch == ')':
5077 cLevel -= 1;
5078 if cLevel == 0:
5079 asArgs.append(sLine[offCurArg:offCur].strip());
5080 break;
5081 elif ch == ',' and cLevel == 1:
5082 asArgs.append(sLine[offCurArg:offCur].strip());
5083 offCurArg = offCur + 1;
5084 offCur += 1;
5085 if len(oMacro.asArgs) == 0 and len(asArgs) == 1 and asArgs[0] == '': # trick for empty parameter list.
5086 asArgs = [];
5087 if len(oMacro.asArgs) != len(asArgs):
5088 self.raiseError('expandMacros: Argument mismatch in %s invocation' % (oMacro.sName,));
5089
5090 #
5091 # Do the expanding.
5092 #
5093 if self.fDebugPreProc:
5094 self.debug('expanding macro %s on line %u with arguments %s' % (sName, self.iLine, asArgs));
5095 return sLine[:offMatch] + oMacro.expandMacro(self, asArgs) + sLine[offCur + 1 :];
5096
5097 def parse(self):
5098 """
5099 Parses the given file.
5100
5101 Returns number or errors.
5102 Raises exception on fatal trouble.
5103 """
5104 #self.debug('Parsing %s' % (self.sSrcFile,));
5105
5106 #
5107 # Loop thru the lines.
5108 #
5109 # Please mind that self.iLine may be updated by checkCodeForMacro and
5110 # other worker methods.
5111 #
5112 while self.iLine < len(self.asLines):
5113 sLine = self.asLines[self.iLine];
5114 self.iLine += 1;
5115 #self.debug('line %u: %s' % (self.iLine, sLine[:-1]));
5116
5117 # Expand macros we know about if we're currently in code.
5118 if self.iState == self.kiCode and self.oReMacros:
5119 oMatch = self.oReMacros.search(sLine);
5120 if oMatch:
5121 sLine = self.expandMacros(sLine, oMatch);
5122 if self.fDebugPreProc:
5123 self.debug('line %d: expanded\n%s ==>\n%s' % (self.iLine, self.asLines[self.iLine - 1], sLine[:-1],));
5124 self.asLines[self.iLine - 1] = sLine;
5125
5126 # Look for comments.
5127 offSlash = sLine.find('/');
5128 if offSlash >= 0:
5129 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
5130 offLine = 0;
5131 while offLine < len(sLine):
5132 if self.iState == self.kiCode:
5133 # Look for substantial multiline comment so we pass the following MC as a whole line:
5134 # IEM_MC_ARG_CONST(uint8_t, bImmArg, /*=*/ bImm, 2);
5135 # Note! We ignore C++ comments here, assuming these aren't used in lines with C-style comments.
5136 offHit = sLine.find('/*', offLine);
5137 while offHit >= 0:
5138 offEnd = sLine.find('*/', offHit + 2);
5139 if offEnd < 0 or offEnd - offHit >= 16: # 16 chars is a bit random.
5140 break;
5141 offHit = sLine.find('/*', offEnd);
5142
5143 if offHit >= 0:
5144 self.checkCodeForMacro(sLine[offLine:offHit], offLine);
5145 self.sComment = '';
5146 self.iCommentLine = self.iLine;
5147 self.iState = self.kiCommentMulti;
5148 offLine = offHit + 2;
5149 else:
5150 self.checkCodeForMacro(sLine[offLine:], offLine);
5151 offLine = len(sLine);
5152
5153 elif self.iState == self.kiCommentMulti:
5154 offHit = sLine.find('*/', offLine);
5155 if offHit >= 0:
5156 self.sComment += sLine[offLine:offHit];
5157 self.iState = self.kiCode;
5158 offLine = offHit + 2;
5159 self.parseComment();
5160 else:
5161 self.sComment += sLine[offLine:];
5162 offLine = len(sLine);
5163 else:
5164 assert False;
5165 # C++ line comment.
5166 elif offSlash > 0:
5167 self.checkCodeForMacro(sLine[:offSlash], 0);
5168
5169 # No slash, but append the line if in multi-line comment.
5170 elif self.iState == self.kiCommentMulti:
5171 #self.debug('line %d: multi' % (self.iLine,));
5172 self.sComment += sLine;
5173
5174 # No slash, but check if this is a macro #define or #undef, since we
5175 # need to be able to selectively expand the ones containing MC blocks.
5176 elif self.iState == self.kiCode and sLine.lstrip().startswith('#'):
5177 if self.fDebugPreProc:
5178 self.debug('line %d: pre-proc' % (self.iLine,));
5179 self.checkPreProcessorDirectiveForDefineUndef(sLine);
5180
5181 # No slash, but check code line for relevant macro.
5182 elif ( self.iState == self.kiCode
5183 and (sLine.find('IEMOP_') >= 0 or sLine.find('FNIEMOPRM_DEF') >= 0 or sLine.find('IEM_MC') >= 0)):
5184 #self.debug('line %d: macro' % (self.iLine,));
5185 self.checkCodeForMacro(sLine, 0);
5186
5187 # If the line is a '}' in the first position, complete the instructions.
5188 elif self.iState == self.kiCode and sLine[0] == '}':
5189 #self.debug('line %d: }' % (self.iLine,));
5190 self.doneInstructions(fEndOfFunction = True);
5191
5192 # Look for instruction table on the form 'IEM_STATIC const PFNIEMOP g_apfnVexMap3'
5193 # so we can check/add @oppfx info from it.
5194 elif self.iState == self.kiCode and sLine.find('PFNIEMOP') > 0 and self.oReFunTable.match(sLine):
5195 self.parseFunctionTable(sLine);
5196
5197 self.doneInstructions(fEndOfFunction = True);
5198 self.debug('%3s%% / %3s stubs out of %4s instructions and %4s MC blocks in %s'
5199 % (self.cTotalStubs * 100 // max(self.cTotalInstr, 1), self.cTotalStubs, self.cTotalInstr,
5200 self.cTotalMcBlocks, os.path.basename(self.sSrcFile),));
5201 return self.printErrors();
5202
5203## The parsed content of IEMAllInstructionsCommonBodyMacros.h.
5204g_oParsedCommonBodyMacros = None # type: SimpleParser
5205
5206def __parseFileByName(sSrcFile, sDefaultMap):
5207 """
5208 Parses one source file for instruction specfications.
5209 """
5210 #
5211 # Read sSrcFile into a line array.
5212 #
5213 try:
5214 oFile = open(sSrcFile, "r"); # pylint: disable=consider-using-with,unspecified-encoding
5215 except Exception as oXcpt:
5216 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
5217 try:
5218 asLines = oFile.readlines();
5219 except Exception as oXcpt:
5220 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
5221 finally:
5222 oFile.close();
5223
5224 #
5225 # On the first call, we parse IEMAllInstructionsCommonBodyMacros.h so we
5226 # can use the macros from it when processing the other files.
5227 #
5228 global g_oParsedCommonBodyMacros;
5229 if g_oParsedCommonBodyMacros is None:
5230 # Locate the file.
5231 sCommonBodyMacros = os.path.join(os.path.split(sSrcFile)[0], 'IEMAllInstructionsCommonBodyMacros.h');
5232 if not os.path.isfile(sCommonBodyMacros):
5233 sCommonBodyMacros = os.path.join(os.path.split(__file__)[0], 'IEMAllInstructionsCommonBodyMacros.h');
5234
5235 # Read it.
5236 try:
5237 with open(sCommonBodyMacros, "r") as oIncFile: # pylint: disable=unspecified-encoding
5238 asIncFiles = oIncFile.readlines();
5239 except Exception as oXcpt:
5240 raise Exception("failed to open/read %s: %s" % (sCommonBodyMacros, oXcpt,));
5241
5242 # Parse it.
5243 try:
5244 oParser = SimpleParser(sCommonBodyMacros, asIncFiles, 'one');
5245 if oParser.parse() != 0:
5246 raise ParserException('%s: errors: See above' % (sCommonBodyMacros, ));
5247 if oParser.cTotalInstr != 0 or oParser.cTotalStubs != 0 or oParser.cTotalTagged != 0 or oParser.cTotalMcBlocks != 0:
5248 raise ParserException('%s: error: Unexpectedly found %u instr, %u tags, %u stubs and %u MCs, expecting zero. %s'
5249 % (sCommonBodyMacros, oParser.cTotalInstr, oParser.cTotalStubs, oParser.cTotalTagged,
5250 oParser.cTotalMcBlocks,
5251 ', '.join(sorted( [str(oMcBlock.iBeginLine) for oMcBlock in g_aoMcBlocks]
5252 + [str(oInstr.iLineCreated) for oInstr in g_aoAllInstructions])),));
5253 except ParserException as oXcpt:
5254 print(str(oXcpt), file = sys.stderr);
5255 raise;
5256 g_oParsedCommonBodyMacros = oParser;
5257
5258 #
5259 # Do the parsing.
5260 #
5261 try:
5262 oParser = SimpleParser(sSrcFile, asLines, sDefaultMap, g_oParsedCommonBodyMacros);
5263 return (oParser.parse(), oParser) ;
5264 except ParserException as oXcpt:
5265 print(str(oXcpt), file = sys.stderr);
5266 raise;
5267
5268
5269def __doTestCopying():
5270 """
5271 Executes the asCopyTests instructions.
5272 """
5273 asErrors = [];
5274 for oDstInstr in g_aoAllInstructions:
5275 if oDstInstr.asCopyTests:
5276 for sSrcInstr in oDstInstr.asCopyTests:
5277 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
5278 if oSrcInstr:
5279 aoSrcInstrs = [oSrcInstr,];
5280 else:
5281 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
5282 if aoSrcInstrs:
5283 for oSrcInstr in aoSrcInstrs:
5284 if oSrcInstr != oDstInstr:
5285 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
5286 else:
5287 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
5288 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
5289 else:
5290 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
5291 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
5292
5293 if asErrors:
5294 sys.stderr.write(u''.join(asErrors));
5295 return len(asErrors);
5296
5297
5298def __applyOnlyTest():
5299 """
5300 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
5301 all other instructions so that only these get tested.
5302 """
5303 if g_aoOnlyTestInstructions:
5304 for oInstr in g_aoAllInstructions:
5305 if oInstr.aoTests:
5306 if oInstr not in g_aoOnlyTestInstructions:
5307 oInstr.aoTests = [];
5308 return 0;
5309
5310## List of all main instruction files and their default maps.
5311g_aasAllInstrFilesAndDefaultMap = (
5312 ( 'IEMAllInstructionsCommon.cpp.h', 'one', ),
5313 ( 'IEMAllInstructionsOneByte.cpp.h', 'one', ),
5314 ( 'IEMAllInstructionsTwoByte0f.cpp.h', 'two0f', ),
5315 ( 'IEMAllInstructionsThree0f38.cpp.h', 'three0f38', ),
5316 ( 'IEMAllInstructionsThree0f3a.cpp.h', 'three0f3a', ),
5317 ( 'IEMAllInstructionsVexMap1.cpp.h', 'vexmap1', ),
5318 ( 'IEMAllInstructionsVexMap2.cpp.h', 'vexmap2', ),
5319 ( 'IEMAllInstructionsVexMap3.cpp.h', 'vexmap3', ),
5320 ( 'IEMAllInstructions3DNow.cpp.h', '3dnow', ),
5321);
5322
5323def __parseFilesWorker(asFilesAndDefaultMap):
5324 """
5325 Parses all the IEMAllInstruction*.cpp.h files.
5326
5327 Returns a list of the parsers on success.
5328 Raises exception on failure.
5329 """
5330 sSrcDir = os.path.dirname(os.path.abspath(__file__));
5331 cErrors = 0;
5332 aoParsers = [];
5333 for sFilename, sDefaultMap in asFilesAndDefaultMap:
5334 if not os.path.split(sFilename)[0] and not os.path.exists(sFilename):
5335 sFilename = os.path.join(sSrcDir, sFilename);
5336 cThisErrors, oParser = __parseFileByName(sFilename, sDefaultMap);
5337 cErrors += cThisErrors;
5338 aoParsers.append(oParser);
5339 cErrors += __doTestCopying();
5340 cErrors += __applyOnlyTest();
5341
5342 # Total stub stats:
5343 cTotalStubs = 0;
5344 for oInstr in g_aoAllInstructions:
5345 cTotalStubs += oInstr.fStub;
5346 print('debug: %3s%% / %3s stubs out of %4s instructions and %4s MC blocks in total'
5347 % (cTotalStubs * 100 // len(g_aoAllInstructions), cTotalStubs, len(g_aoAllInstructions), len(g_aoMcBlocks),),
5348 file = sys.stderr);
5349
5350 if cErrors != 0:
5351 raise Exception('%d parse errors' % (cErrors,));
5352 return aoParsers;
5353
5354
5355def parseFiles(asFiles):
5356 """
5357 Parses a selection of IEMAllInstruction*.cpp.h files.
5358
5359 Returns a list of the parsers on success.
5360 Raises exception on failure.
5361 """
5362 # Look up default maps for the files and call __parseFilesWorker to do the job.
5363 asFilesAndDefaultMap = [];
5364 for sFilename in asFiles:
5365 sName = os.path.split(sFilename)[1].lower();
5366 sMap = None;
5367 for asCur in g_aasAllInstrFilesAndDefaultMap:
5368 if asCur[0].lower() == sName:
5369 sMap = asCur[1];
5370 break;
5371 if not sMap:
5372 raise Exception('Unable to classify file: %s' % (sFilename,));
5373 asFilesAndDefaultMap.append((sFilename, sMap));
5374
5375 return __parseFilesWorker(asFilesAndDefaultMap);
5376
5377
5378def parseAll():
5379 """
5380 Parses all the IEMAllInstruction*.cpp.h files.
5381
5382 Returns a list of the parsers on success.
5383 Raises exception on failure.
5384 """
5385 return __parseFilesWorker(g_aasAllInstrFilesAndDefaultMap);
5386
5387
5388#
5389# Generators (may perhaps move later).
5390#
5391def __formatDisassemblerTableEntry(oInstr):
5392 """
5393 """
5394 sMacro = 'OP';
5395 cMaxOperands = 3;
5396 if len(oInstr.aoOperands) > 3:
5397 sMacro = 'OPVEX'
5398 cMaxOperands = 4;
5399 assert len(oInstr.aoOperands) <= cMaxOperands;
5400
5401 #
5402 # Format string.
5403 #
5404 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
5405 for iOperand, oOperand in enumerate(oInstr.aoOperands):
5406 sTmp += ' ' if iOperand == 0 else ',';
5407 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
5408 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
5409 else:
5410 sTmp += g_kdOpTypes[oOperand.sType][2];
5411 sTmp += '",';
5412 asColumns = [ sTmp, ];
5413
5414 #
5415 # Decoders.
5416 #
5417 iStart = len(asColumns);
5418 if oInstr.sEncoding is None:
5419 pass;
5420 elif oInstr.sEncoding == 'ModR/M':
5421 # ASSUME the first operand is using the ModR/M encoding
5422 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM(), "oInstr=%s" % (oInstr,);
5423 asColumns.append('IDX_ParseModRM,');
5424 elif oInstr.sEncoding in [ 'prefix', ]:
5425 for oOperand in oInstr.aoOperands:
5426 asColumns.append('0,');
5427 elif oInstr.sEncoding in [ 'fixed', 'VEX.fixed' ]:
5428 pass;
5429 elif oInstr.sEncoding == 'VEX.ModR/M':
5430 asColumns.append('IDX_ParseModRM,');
5431 elif oInstr.sEncoding == 'vex2':
5432 asColumns.append('IDX_ParseVex2b,')
5433 elif oInstr.sEncoding == 'vex3':
5434 asColumns.append('IDX_ParseVex3b,')
5435 elif oInstr.sEncoding in g_dInstructionMaps:
5436 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
5437 else:
5438 ## @todo
5439 #IDX_ParseTwoByteEsc,
5440 #IDX_ParseGrp1,
5441 #IDX_ParseShiftGrp2,
5442 #IDX_ParseGrp3,
5443 #IDX_ParseGrp4,
5444 #IDX_ParseGrp5,
5445 #IDX_Parse3DNow,
5446 #IDX_ParseGrp6,
5447 #IDX_ParseGrp7,
5448 #IDX_ParseGrp8,
5449 #IDX_ParseGrp9,
5450 #IDX_ParseGrp10,
5451 #IDX_ParseGrp12,
5452 #IDX_ParseGrp13,
5453 #IDX_ParseGrp14,
5454 #IDX_ParseGrp15,
5455 #IDX_ParseGrp16,
5456 #IDX_ParseThreeByteEsc4,
5457 #IDX_ParseThreeByteEsc5,
5458 #IDX_ParseModFence,
5459 #IDX_ParseEscFP,
5460 #IDX_ParseNopPause,
5461 #IDX_ParseInvOpModRM,
5462 assert False, str(oInstr);
5463
5464 # Check for immediates and stuff in the remaining operands.
5465 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
5466 sIdx = g_kdOpTypes[oOperand.sType][0];
5467 #if sIdx != 'IDX_UseModRM':
5468 asColumns.append(sIdx + ',');
5469 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
5470
5471 #
5472 # Opcode and operands.
5473 #
5474 assert oInstr.sDisEnum, str(oInstr);
5475 asColumns.append(oInstr.sDisEnum + ',');
5476 iStart = len(asColumns)
5477 for oOperand in oInstr.aoOperands:
5478 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
5479 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
5480
5481 #
5482 # Flags.
5483 #
5484 sTmp = '';
5485 for sHint in sorted(oInstr.dHints.keys()):
5486 sDefine = g_kdHints[sHint];
5487 if sDefine.startswith('DISOPTYPE_'):
5488 if sTmp:
5489 sTmp += ' | ' + sDefine;
5490 else:
5491 sTmp += sDefine;
5492 if sTmp:
5493 sTmp += '),';
5494 else:
5495 sTmp += '0),';
5496 asColumns.append(sTmp);
5497
5498 #
5499 # Format the columns into a line.
5500 #
5501 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
5502 sLine = '';
5503 for i, s in enumerate(asColumns):
5504 if len(sLine) < aoffColumns[i]:
5505 sLine += ' ' * (aoffColumns[i] - len(sLine));
5506 else:
5507 sLine += ' ';
5508 sLine += s;
5509
5510 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
5511 # DISOPTYPE_HARMLESS),
5512 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
5513 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
5514 return sLine;
5515
5516def __checkIfShortTable(aoTableOrdered, oMap):
5517 """
5518 Returns (iInstr, cInstructions, fShortTable)
5519 """
5520
5521 # Determin how much we can trim off.
5522 cInstructions = len(aoTableOrdered);
5523 while cInstructions > 0 and aoTableOrdered[cInstructions - 1] is None:
5524 cInstructions -= 1;
5525
5526 iInstr = 0;
5527 while iInstr < cInstructions and aoTableOrdered[iInstr] is None:
5528 iInstr += 1;
5529
5530 # If we can save more than 30%, we go for the short table version.
5531 if iInstr + len(aoTableOrdered) - cInstructions >= len(aoTableOrdered) // 30:
5532 return (iInstr, cInstructions, True);
5533 _ = oMap; # Use this for overriding.
5534
5535 # Output the full table.
5536 return (0, len(aoTableOrdered), False);
5537
5538def generateDisassemblerTables(oDstFile = sys.stdout):
5539 """
5540 Generates disassembler tables.
5541
5542 Returns exit code.
5543 """
5544
5545 #
5546 # Parse all.
5547 #
5548 try:
5549 parseAll();
5550 except Exception as oXcpt:
5551 print('error: parseAll failed: %s' % (oXcpt,), file = sys.stderr);
5552 traceback.print_exc(file = sys.stderr);
5553 return 1;
5554
5555
5556 #
5557 # The disassembler uses a slightly different table layout to save space,
5558 # since several of the prefix varia
5559 #
5560 aoDisasmMaps = [];
5561 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
5562 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
5563 if oMap.sSelector != 'byte+pfx':
5564 aoDisasmMaps.append(oMap);
5565 else:
5566 # Split the map by prefix.
5567 aoDisasmMaps.append(oMap.copy(oMap.sName, 'none'));
5568 aoDisasmMaps.append(oMap.copy(oMap.sName + '_66', '0x66'));
5569 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F3', '0xf3'));
5570 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F2', '0xf2'));
5571
5572 #
5573 # Dump each map.
5574 #
5575 asHeaderLines = [];
5576 print("debug: maps=%s\n" % (', '.join([oMap.sName for oMap in aoDisasmMaps]),), file = sys.stderr);
5577 for oMap in aoDisasmMaps:
5578 sName = oMap.sName;
5579
5580 if not sName.startswith("vex"): continue; # only looking at the vex maps at the moment.
5581
5582 #
5583 # Get the instructions for the map and see if we can do a short version or not.
5584 #
5585 aoTableOrder = oMap.getInstructionsInTableOrder();
5586 cEntriesPerByte = oMap.getEntriesPerByte();
5587 (iInstrStart, iInstrEnd, fShortTable) = __checkIfShortTable(aoTableOrder, oMap);
5588
5589 #
5590 # Output the table start.
5591 # Note! Short tables are static and only accessible via the map range record.
5592 #
5593 asLines = [];
5594 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
5595 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
5596 if fShortTable:
5597 asLines.append('%sconst DISOPCODE %s[] =' % ('static ' if fShortTable else '', oMap.getDisasTableName(),));
5598 else:
5599 asHeaderLines.append('extern const DISOPCODE %s[%d];' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5600 asLines.append( 'const DISOPCODE %s[%d] =' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5601 asLines.append('{');
5602
5603 if fShortTable and (iInstrStart & ((0x10 * cEntriesPerByte) - 1)) != 0:
5604 asLines.append(' /* %#04x: */' % (iInstrStart,));
5605
5606 #
5607 # Output the instructions.
5608 #
5609 iInstr = iInstrStart;
5610 while iInstr < iInstrEnd:
5611 oInstr = aoTableOrder[iInstr];
5612 if (iInstr & ((0x10 * cEntriesPerByte) - 1)) == 0:
5613 if iInstr != iInstrStart:
5614 asLines.append('');
5615 asLines.append(' /* %x */' % ((iInstr // cEntriesPerByte) >> 4,));
5616
5617 if oInstr is None:
5618 # Invalid. Optimize blocks of invalid instructions.
5619 cInvalidInstrs = 1;
5620 while iInstr + cInvalidInstrs < len(aoTableOrder) and aoTableOrder[iInstr + cInvalidInstrs] is None:
5621 cInvalidInstrs += 1;
5622 if (iInstr & (0x10 * cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= 0x10 * cEntriesPerByte:
5623 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (0x10 * cEntriesPerByte,));
5624 iInstr += 0x10 * cEntriesPerByte - 1;
5625 elif cEntriesPerByte > 1:
5626 if (iInstr & (cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= cEntriesPerByte:
5627 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (cEntriesPerByte,));
5628 iInstr += 3;
5629 else:
5630 asLines.append(' /* %#04x/%d */ INVALID_OPCODE,'
5631 % (iInstr // cEntriesPerByte, iInstr % cEntriesPerByte));
5632 else:
5633 asLines.append(' /* %#04x */ INVALID_OPCODE,' % (iInstr));
5634 elif isinstance(oInstr, list):
5635 if len(oInstr) != 0:
5636 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper, /* \n -- %s */'
5637 % (iInstr, '\n -- '.join([str(oItem) for oItem in oInstr]),));
5638 else:
5639 asLines.append(__formatDisassemblerTableEntry(oInstr));
5640 else:
5641 asLines.append(__formatDisassemblerTableEntry(oInstr));
5642
5643 iInstr += 1;
5644
5645 if iInstrStart >= iInstrEnd:
5646 asLines.append(' /* dummy */ INVALID_OPCODE');
5647
5648 asLines.append('};');
5649 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5650
5651 #
5652 # We always emit a map range record, assuming the linker will eliminate the unnecessary ones.
5653 #
5654 asHeaderLines.append('extern const DISOPMAPDESC %sRange;' % (oMap.getDisasRangeName()));
5655 asLines.append('const DISOPMAPDESC %s = { &%s[0], %#04x, RT_ELEMENTS(%s) };'
5656 % (oMap.getDisasRangeName(), oMap.getDisasTableName(), iInstrStart, oMap.getDisasTableName(),));
5657
5658 #
5659 # Write out the lines.
5660 #
5661 oDstFile.write('\n'.join(asLines));
5662 oDstFile.write('\n');
5663 oDstFile.write('\n');
5664 #break; #for now
5665 return 0;
5666
5667if __name__ == '__main__':
5668 sys.exit(generateDisassemblerTables());
5669
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette