VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 98873

最後變更 在這個檔案從98873是 98873,由 vboxsync 提交於 2 年 前

VMM/IEM: Started extending IEMAllInstructionsPython.py to pick up IEM_MC_BEGIN/END blocks and added a new script for generating the threaded functions and producing the modified IEMAllInstructions*.cpp.h files. Also added IEMAllInstructionsThreadedRecompiler.cpp. bugref:10369

  • 屬性 svn:eol-style 設為 LF
  • 屬性 svn:executable 設為 *
  • 屬性 svn:keywords 設為 Author Date Id Revision
檔案大小: 187.4 KB
 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 98873 2023-03-08 01:51:04Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13from __future__ import print_function;
14
15__copyright__ = \
16"""
17Copyright (C) 2017-2023 Oracle and/or its affiliates.
18
19This file is part of VirtualBox base platform packages, as
20available from https://www.alldomusa.eu.org.
21
22This program is free software; you can redistribute it and/or
23modify it under the terms of the GNU General Public License
24as published by the Free Software Foundation, in version 3 of the
25License.
26
27This program is distributed in the hope that it will be useful, but
28WITHOUT ANY WARRANTY; without even the implied warranty of
29MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
30General Public License for more details.
31
32You should have received a copy of the GNU General Public License
33along with this program; if not, see <https://www.gnu.org/licenses>.
34
35The contents of this file may alternatively be used under the terms
36of the Common Development and Distribution License Version 1.0
37(CDDL), a copy of it is provided in the "COPYING.CDDL" file included
38in the VirtualBox distribution, in which case the provisions of the
39CDDL are applicable instead of those of the GPL.
40
41You may elect to license modified versions of this file under the
42terms and conditions of either the GPL or the CDDL or both.
43
44SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
45"""
46__version__ = "$Revision: 98873 $"
47
48# pylint: disable=anomalous-backslash-in-string,too-many-lines
49
50# Standard python imports.
51import os;
52import re;
53import sys;
54import traceback;
55
56## Only the main script needs to modify the path.
57#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
58# 'ValidationKit');
59#sys.path.append(g_ksValidationKitDir);
60#
61#from common import utils; - Windows build boxes doesn't have pywin32.
62
63# Python 3 hacks:
64if sys.version_info[0] >= 3:
65 long = int; # pylint: disable=redefined-builtin,invalid-name
66
67
68g_kdX86EFlagsConstants = {
69 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
70 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
71 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
72 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
73 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
74 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
75 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
76 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
77 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
78 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
79 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
80 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
81 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
82 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
83 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
84 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
85 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
86 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
87 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
88 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
89};
90
91## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
92g_kdEFlagsMnemonics = {
93 # Debugger flag notation (sorted by value):
94 'cf': 'X86_EFL_CF', ##< Carry Flag.
95 'nc': '!X86_EFL_CF', ##< No Carry.
96
97 'po': 'X86_EFL_PF', ##< Parity Pdd.
98 'pe': '!X86_EFL_PF', ##< Parity Even.
99
100 'af': 'X86_EFL_AF', ##< Aux Flag.
101 'na': '!X86_EFL_AF', ##< No Aux.
102
103 'zr': 'X86_EFL_ZF', ##< ZeRo.
104 'nz': '!X86_EFL_ZF', ##< No Zero.
105
106 'ng': 'X86_EFL_SF', ##< NeGative (sign).
107 'pl': '!X86_EFL_SF', ##< PLuss (sign).
108
109 'tf': 'X86_EFL_TF', ##< Trap flag.
110
111 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
112 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
113
114 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
115 'up': '!X86_EFL_DF', ##< UP (string op direction).
116
117 'ov': 'X86_EFL_OF', ##< OVerflow.
118 'nv': '!X86_EFL_OF', ##< No Overflow.
119
120 'nt': 'X86_EFL_NT', ##< Nested Task.
121 'rf': 'X86_EFL_RF', ##< Resume Flag.
122 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
123 'ac': 'X86_EFL_AC', ##< Alignment Check.
124 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
125 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
126
127 # Reference manual notation not covered above (sorted by value):
128 'pf': 'X86_EFL_PF',
129 'zf': 'X86_EFL_ZF',
130 'sf': 'X86_EFL_SF',
131 'if': 'X86_EFL_IF',
132 'df': 'X86_EFL_DF',
133 'of': 'X86_EFL_OF',
134 'iopl': 'X86_EFL_IOPL',
135 'id': 'X86_EFL_ID',
136};
137
138## Constants and values for CR0.
139g_kdX86Cr0Constants = {
140 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
141 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
142 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
143 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
144 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
145 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
146 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
147 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
148 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
149 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
150 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
151};
152
153## Constants and values for CR4.
154g_kdX86Cr4Constants = {
155 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
156 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
157 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
158 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
159 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
160 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
161 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
162 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
163 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
164 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
165 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
166 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
167 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
168 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
169 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
170 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
171 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
172 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
173};
174
175## XSAVE components (XCR0).
176g_kdX86XSaveCConstants = {
177 'XSAVE_C_X87': 0x00000001,
178 'XSAVE_C_SSE': 0x00000002,
179 'XSAVE_C_YMM': 0x00000004,
180 'XSAVE_C_BNDREGS': 0x00000008,
181 'XSAVE_C_BNDCSR': 0x00000010,
182 'XSAVE_C_OPMASK': 0x00000020,
183 'XSAVE_C_ZMM_HI256': 0x00000040,
184 'XSAVE_C_ZMM_16HI': 0x00000080,
185 'XSAVE_C_PKRU': 0x00000200,
186 'XSAVE_C_LWP': 0x4000000000000000,
187 'XSAVE_C_X': 0x8000000000000000,
188 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
189 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
190};
191
192
193## \@op[1-4] locations
194g_kdOpLocations = {
195 'reg': [], ## modrm.reg
196 'rm': [], ## modrm.rm
197 'imm': [], ## immediate instruction data
198 'vvvv': [], ## VEX.vvvv
199
200 # fixed registers.
201 'AL': [],
202 'rAX': [],
203 'rDX': [],
204 'rSI': [],
205 'rDI': [],
206 'rFLAGS': [],
207 'CS': [],
208 'DS': [],
209 'ES': [],
210 'FS': [],
211 'GS': [],
212 'SS': [],
213};
214
215## \@op[1-4] types
216##
217## Value fields:
218## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
219## - 1: the location (g_kdOpLocations).
220## - 2: disassembler format string version of the type.
221## - 3: disassembler OP_PARAM_XXX (XXX only).
222## - 4: IEM form matching instruction.
223##
224## Note! See the A.2.1 in SDM vol 2 for the type names.
225g_kdOpTypes = {
226 # Fixed addresses
227 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', 'FIXED', ),
228
229 # ModR/M.rm
230 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', 'RM', ),
231 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
232 'Ed_WO': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
233 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
234 'Eq_WO': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
235 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ),
236 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', 'RM', ),
237 'Ey': ( 'IDX_UseModRM', 'rm', '%Ey', 'Ey', 'RM', ),
238 'Qd': ( 'IDX_UseModRM', 'rm', '%Qd', 'Qd', 'RM', ),
239 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
240 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
241 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
242 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
243 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
244 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
245 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
246 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
247 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
248 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
249 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
250 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
251 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
252 'Wq_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
253 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
254 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
255 'Wx_WO': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
256
257 # ModR/M.rm - register only.
258 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', 'REG' ),
259 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', 'REG' ),
260 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
261 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
262 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
263 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
264 'Ux': ( 'IDX_UseModRM', 'rm', '%Ux', 'Ux', 'REG' ),
265 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', 'REG' ),
266
267 # ModR/M.rm - memory only.
268 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', 'MEM', ), ##< Only used by BOUND.
269 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', 'MEM', ),
270 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
271 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
272 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
273 'Mdq': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
274 'Mdq_WO': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
275 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
276 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
277 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', 'MEM', ),
278 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', 'MEM', ),
279 'Mx': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
280 'Mx_WO': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
281 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
282 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
283
284 # ModR/M.reg
285 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', '', ),
286 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', '', ),
287 'Gd': ( 'IDX_UseModRM', 'reg', '%Gd', 'Gd', '', ),
288 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
289 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
290 'Gy': ( 'IDX_UseModRM', 'reg', '%Gy', 'Gy', '', ),
291 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', '', ),
292 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', '', ),
293 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
294 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
295 'Vd': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
296 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
297 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
298 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
299 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
300 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
301 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
302 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
303 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
304 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
305 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
306 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
307 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
308 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
309 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
310 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
311 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
312 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
313 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
314 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', '', ),
315 'Vx': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
316 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
317
318 # VEX.vvvv
319 'By': ( 'IDX_UseModRM', 'vvvv', '%By', 'By', 'V', ),
320 'Hps': ( 'IDX_UseModRM', 'vvvv', '%Hps', 'Hps', 'V', ),
321 'Hpd': ( 'IDX_UseModRM', 'vvvv', '%Hpd', 'Hpd', 'V', ),
322 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', 'V', ),
323 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', 'V', ),
324 'Hq': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'Hq', 'V', ),
325 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', 'V', ),
326 'Hx': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'Hx', 'V', ),
327
328 # Immediate values.
329 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', '', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
330 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', '', ),
331 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', '', ),
332 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', '', ),
333 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', '', ), ##< o16: word, o32: dword, o64: qword
334 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', '', ), ##< o16: word, o32|o64:dword
335
336 # Address operands (no ModR/M).
337 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', '', ),
338 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', '', ),
339
340 # Relative jump targets
341 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', '', ),
342 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', '', ),
343
344 # DS:rSI
345 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', '', ),
346 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', '', ),
347 # ES:rDI
348 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', '', ),
349 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', '', ),
350
351 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', '', ),
352
353 # Fixed registers.
354 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', '', ),
355 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', '', ),
356 'rDX': ( 'IDX_ParseFixedReg', 'rDX', '%eDX', 'REG_EDX', '', ),
357 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', '', ), # 8086: push CS
358 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', '', ),
359 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', '', ),
360 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', '', ),
361 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', '', ),
362 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', '', ),
363};
364
365# IDX_ParseFixedReg
366# IDX_ParseVexDest
367
368
369## IEMFORM_XXX mappings.
370g_kdIemForms = { # sEncoding, [ sWhere1, ... ] opcodesub ),
371 'RM': ( 'ModR/M', [ 'reg', 'rm' ], '', ),
372 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
373 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
374 'RMI': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
375 'RMI_REG': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
376 'RMI_MEM': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
377 'MR': ( 'ModR/M', [ 'rm', 'reg' ], '', ),
378 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
379 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
380 'MRI': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '', ),
381 'MRI_REG': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '11 mr/reg', ),
382 'MRI_MEM': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '!11 mr/reg', ),
383 'M': ( 'ModR/M', [ 'rm', ], '', ),
384 'M_REG': ( 'ModR/M', [ 'rm', ], '', ),
385 'M_MEM': ( 'ModR/M', [ 'rm', ], '', ),
386 'R': ( 'ModR/M', [ 'reg', ], '', ),
387
388 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '', ),
389 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
390 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
391 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '', ),
392 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
393 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
394 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], '' ),
395 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], '' ),
396 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], '' ),
397 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], '' ),
398 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '', ),
399 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '11 mr/reg', ),
400 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '!11 mr/reg', ),
401 'VEX_RMV': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '', ),
402 'VEX_RMV_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '11 mr/reg', ),
403 'VEX_RMV_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '!11 mr/reg', ),
404 'VEX_RMI': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
405 'VEX_RMI_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
406 'VEX_RMI_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
407 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '', ),
408 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '11 mr/reg', ),
409 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '!11 mr/reg', ),
410
411 'VEX_VM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '', ),
412 'VEX_VM_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '11 mr/reg', ),
413 'VEX_VM_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '!11 mr/reg', ),
414
415 'FIXED': ( 'fixed', None, '', ),
416};
417
418## \@oppfx values.
419g_kdPrefixes = {
420 'none': [],
421 '0x66': [],
422 '0xf3': [],
423 '0xf2': [],
424};
425
426## Special \@opcode tag values.
427g_kdSpecialOpcodes = {
428 '/reg': [],
429 'mr/reg': [],
430 '11 /reg': [],
431 '!11 /reg': [],
432 '11 mr/reg': [],
433 '!11 mr/reg': [],
434};
435
436## Special \@opcodesub tag values.
437## The first value is the real value for aliases.
438## The second value is for bs3cg1.
439g_kdSubOpcodes = {
440 'none': [ None, '', ],
441 '11 mr/reg': [ '11 mr/reg', '', ],
442 '11': [ '11 mr/reg', '', ], ##< alias
443 '!11 mr/reg': [ '!11 mr/reg', '', ],
444 '!11': [ '!11 mr/reg', '', ], ##< alias
445 'rex.w=0': [ 'rex.w=0', 'WZ', ],
446 'w=0': [ 'rex.w=0', '', ], ##< alias
447 'rex.w=1': [ 'rex.w=1', 'WNZ', ],
448 'w=1': [ 'rex.w=1', '', ], ##< alias
449 'vex.l=0': [ 'vex.l=0', 'L0', ],
450 'vex.l=1': [ 'vex.l=0', 'L1', ],
451 '11 mr/reg vex.l=0': [ '11 mr/reg vex.l=0', 'L0', ],
452 '11 mr/reg vex.l=1': [ '11 mr/reg vex.l=1', 'L1', ],
453 '!11 mr/reg vex.l=0': [ '!11 mr/reg vex.l=0', 'L0', ],
454 '!11 mr/reg vex.l=1': [ '!11 mr/reg vex.l=1', 'L1', ],
455};
456
457## Valid values for \@openc
458g_kdEncodings = {
459 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
460 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
461 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
462 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
463 'prefix': [ None, ], ##< Prefix
464};
465
466## \@opunused, \@opinvalid, \@opinvlstyle
467g_kdInvalidStyles = {
468 'immediate': [], ##< CPU stops decoding immediately after the opcode.
469 'vex.modrm': [], ##< VEX+ModR/M, everyone.
470 'intel-modrm': [], ##< Intel decodes ModR/M.
471 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
472 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
473 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
474};
475
476g_kdCpuNames = {
477 '8086': (),
478 '80186': (),
479 '80286': (),
480 '80386': (),
481 '80486': (),
482};
483
484## \@opcpuid
485g_kdCpuIdFlags = {
486 'vme': 'X86_CPUID_FEATURE_EDX_VME',
487 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
488 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
489 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
490 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
491 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
492 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
493 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
494 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
495 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
496 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
497 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
498 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
499 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
500 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
501 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
502 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
503 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
504 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
505 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
506 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
507 'sse4.1': 'X86_CPUID_FEATURE_ECX_SSE4_1',
508 'sse4.2': 'X86_CPUID_FEATURE_ECX_SSE4_2',
509 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
510 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
511 'aes': 'X86_CPUID_FEATURE_ECX_AES',
512 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
513 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
514 'avx2': 'X86_CPUID_STEXT_FEATURE_EBX_AVX2',
515 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
516 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
517
518 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
519 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
520 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
521 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
522 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
523 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
524 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
525 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
526 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
527 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
528};
529
530## \@ophints values.
531g_kdHints = {
532 'invalid': 'DISOPTYPE_INVALID', ##<
533 'harmless': 'DISOPTYPE_HARMLESS', ##<
534 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
535 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
536 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
537 'portio': 'DISOPTYPE_PORTIO', ##<
538 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
539 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
540 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
541 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
542 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
543 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
544 'illegal': 'DISOPTYPE_ILLEGAL', ##<
545 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
546 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
547 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
548 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
549 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
550 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
551 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
552 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
553 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
554 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
555 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
556 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
557 ## (only in 16 & 32 bits mode!)
558 'avx': 'DISOPTYPE_AVX', ##< AVX,AVX2,++ instruction. Not implemented yet!
559 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,++ instruction. Not implemented yet!
560 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
561 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
562 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
563 'ignores_rexw': '', ##< Ignores REX.W.
564 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
565 'vex_l_zero': '', ##< VEX.L must be 0.
566 'vex_l_ignored': '', ##< VEX.L is ignored.
567 'vex_v_zero': '', ##< VEX.V must be 0. (generate sub-table?)
568 'lock_allowed': '', ##< Lock prefix allowed.
569};
570
571## \@opxcpttype values (see SDMv2 2.4, 2.7).
572g_kdXcptTypes = {
573 'none': [],
574 '1': [],
575 '2': [],
576 '3': [],
577 '4': [],
578 '4UA': [],
579 '5': [],
580 '5LZ': [], # LZ = VEX.L must be zero.
581 '6': [],
582 '7': [],
583 '7LZ': [],
584 '8': [],
585 '11': [],
586 '12': [],
587 'E1': [],
588 'E1NF': [],
589 'E2': [],
590 'E3': [],
591 'E3NF': [],
592 'E4': [],
593 'E4NF': [],
594 'E5': [],
595 'E5NF': [],
596 'E6': [],
597 'E6NF': [],
598 'E7NF': [],
599 'E9': [],
600 'E9NF': [],
601 'E10': [],
602 'E11': [],
603 'E12': [],
604 'E12NF': [],
605};
606
607
608def _isValidOpcodeByte(sOpcode):
609 """
610 Checks if sOpcode is a valid lower case opcode byte.
611 Returns true/false.
612 """
613 if len(sOpcode) == 4:
614 if sOpcode[:2] == '0x':
615 if sOpcode[2] in '0123456789abcdef':
616 if sOpcode[3] in '0123456789abcdef':
617 return True;
618 return False;
619
620
621class InstructionMap(object):
622 """
623 Instruction map.
624
625 The opcode map provides the lead opcode bytes (empty for the one byte
626 opcode map). An instruction can be member of multiple opcode maps as long
627 as it uses the same opcode value within the map (because of VEX).
628 """
629
630 kdEncodings = {
631 'legacy': [],
632 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
633 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
634 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
635 'xop8': [], ##< XOP prefix with vvvvv = 8
636 'xop9': [], ##< XOP prefix with vvvvv = 9
637 'xop10': [], ##< XOP prefix with vvvvv = 10
638 };
639 ## Selectors.
640 ## 1. The first value is the number of table entries required by a
641 ## decoder or disassembler for this type of selector.
642 ## 2. The second value is how many entries per opcode byte if applicable.
643 kdSelectors = {
644 'byte': [ 256, 1, ], ##< next opcode byte selects the instruction (default).
645 'byte+pfx': [ 1024, 4, ], ##< next opcode byte selects the instruction together with the 0x66, 0xf2 and 0xf3 prefixes.
646 '/r': [ 8, 1, ], ##< modrm.reg selects the instruction.
647 'memreg /r':[ 16, 1, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
648 'mod /r': [ 32, 1, ], ##< modrm.reg and modrm.mod selects the instruction.
649 '!11 /r': [ 8, 1, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
650 '11 /r': [ 8, 1, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
651 '11': [ 64, 1, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
652 };
653
654 ## Define the subentry number according to the Instruction::sPrefix
655 ## value for 'byte+pfx' selected tables.
656 kiPrefixOrder = {
657 'none': 0,
658 '0x66': 1,
659 '0xf3': 2,
660 '0xf2': 3,
661 };
662
663 def __init__(self, sName, sIemName = None, asLeadOpcodes = None, sSelector = 'byte+pfx',
664 sEncoding = 'legacy', sDisParse = None):
665 assert sSelector in self.kdSelectors;
666 assert sEncoding in self.kdEncodings;
667 if asLeadOpcodes is None:
668 asLeadOpcodes = [];
669 else:
670 for sOpcode in asLeadOpcodes:
671 assert _isValidOpcodeByte(sOpcode);
672 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
673
674 self.sName = sName;
675 self.sIemName = sIemName;
676 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
677 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
678 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
679 self.aoInstructions = [] # type: Instruction
680 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
681
682 def copy(self, sNewName, sPrefixFilter = None):
683 """
684 Copies the table with filtering instruction by sPrefix if not None.
685 """
686 oCopy = InstructionMap(sNewName, sIemName = self.sIemName, asLeadOpcodes = self.asLeadOpcodes,
687 sSelector = 'byte' if sPrefixFilter is not None and self.sSelector == 'byte+pfx'
688 else self.sSelector,
689 sEncoding = self.sEncoding, sDisParse = self.sDisParse);
690 if sPrefixFilter is None:
691 oCopy.aoInstructions = list(self.aoInstructions);
692 else:
693 oCopy.aoInstructions = [oInstr for oInstr in self.aoInstructions if oInstr.sPrefix == sPrefixFilter];
694 return oCopy;
695
696 def getTableSize(self):
697 """
698 Number of table entries. This corresponds directly to the selector.
699 """
700 return self.kdSelectors[self.sSelector][0];
701
702 def getEntriesPerByte(self):
703 """
704 Number of table entries per opcode bytes.
705
706 This only really makes sense for the 'byte' and 'byte+pfx' selectors, for
707 the others it will just return 1.
708 """
709 return self.kdSelectors[self.sSelector][1];
710
711 def getInstructionIndex(self, oInstr):
712 """
713 Returns the table index for the instruction.
714 """
715 bOpcode = oInstr.getOpcodeByte();
716
717 # The byte selectors are simple. We need a full opcode byte and need just return it.
718 if self.sSelector == 'byte':
719 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
720 return bOpcode;
721
722 # The byte + prefix selector is similarly simple, though requires a prefix as well as the full opcode.
723 if self.sSelector == 'byte+pfx':
724 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
725 assert self.kiPrefixOrder.get(oInstr.sPrefix, -16384) >= 0;
726 return bOpcode * 4 + self.kiPrefixOrder.get(oInstr.sPrefix, -16384);
727
728 # The other selectors needs masking and shifting.
729 if self.sSelector == '/r':
730 return (bOpcode >> 3) & 0x7;
731
732 if self.sSelector == 'mod /r':
733 return (bOpcode >> 3) & 0x1f;
734
735 if self.sSelector == 'memreg /r':
736 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
737
738 if self.sSelector == '!11 /r':
739 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
740 return (bOpcode >> 3) & 0x7;
741
742 if self.sSelector == '11 /r':
743 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
744 return (bOpcode >> 3) & 0x7;
745
746 if self.sSelector == '11':
747 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
748 return bOpcode & 0x3f;
749
750 assert False, self.sSelector;
751 return -1;
752
753 def getInstructionsInTableOrder(self):
754 """
755 Get instructions in table order.
756
757 Returns array of instructions. Normally there is exactly one
758 instruction per entry. However the entry could also be None if
759 not instruction was specified for that opcode value. Or there
760 could be a list of instructions to deal with special encodings
761 where for instance prefix (e.g. REX.W) encodes a different
762 instruction or different CPUs have different instructions or
763 prefixes in the same place.
764 """
765 # Start with empty table.
766 cTable = self.getTableSize();
767 aoTable = [None] * cTable;
768
769 # Insert the instructions.
770 for oInstr in self.aoInstructions:
771 if oInstr.sOpcode:
772 idxOpcode = self.getInstructionIndex(oInstr);
773 assert idxOpcode < cTable, str(idxOpcode);
774
775 oExisting = aoTable[idxOpcode];
776 if oExisting is None:
777 aoTable[idxOpcode] = oInstr;
778 elif not isinstance(oExisting, list):
779 aoTable[idxOpcode] = list([oExisting, oInstr]);
780 else:
781 oExisting.append(oInstr);
782
783 return aoTable;
784
785
786 def getDisasTableName(self):
787 """
788 Returns the disassembler table name for this map.
789 """
790 sName = 'g_aDisas';
791 for sWord in self.sName.split('_'):
792 if sWord == 'm': # suffix indicating modrm.mod==mem
793 sName += '_m';
794 elif sWord == 'r': # suffix indicating modrm.mod==reg
795 sName += '_r';
796 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
797 sName += '_' + sWord;
798 else:
799 sWord = sWord.replace('grp', 'Grp');
800 sWord = sWord.replace('map', 'Map');
801 sName += sWord[0].upper() + sWord[1:];
802 return sName;
803
804 def getDisasRangeName(self):
805 """
806 Returns the disassembler table range name for this map.
807 """
808 return self.getDisasTableName().replace('g_aDisas', 'g_Disas') + 'Range';
809
810 def isVexMap(self):
811 """ Returns True if a VEX map. """
812 return self.sEncoding.startswith('vex');
813
814
815class TestType(object):
816 """
817 Test value type.
818
819 This base class deals with integer like values. The fUnsigned constructor
820 parameter indicates the default stance on zero vs sign extending. It is
821 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
822 """
823 def __init__(self, sName, acbSizes = None, fUnsigned = True):
824 self.sName = sName;
825 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
826 self.fUnsigned = fUnsigned;
827
828 class BadValue(Exception):
829 """ Bad value exception. """
830 def __init__(self, sMessage):
831 Exception.__init__(self, sMessage);
832 self.sMessage = sMessage;
833
834 ## For ascii ~ operator.
835 kdHexInv = {
836 '0': 'f',
837 '1': 'e',
838 '2': 'd',
839 '3': 'c',
840 '4': 'b',
841 '5': 'a',
842 '6': '9',
843 '7': '8',
844 '8': '7',
845 '9': '6',
846 'a': '5',
847 'b': '4',
848 'c': '3',
849 'd': '2',
850 'e': '1',
851 'f': '0',
852 };
853
854 def get(self, sValue):
855 """
856 Get the shortest normal sized byte representation of oValue.
857
858 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
859 The latter form is for AND+OR pairs where the first entry is what to
860 AND with the field and the second the one or OR with.
861
862 Raises BadValue if invalid value.
863 """
864 if not sValue:
865 raise TestType.BadValue('empty value');
866
867 # Deal with sign and detect hexadecimal or decimal.
868 fSignExtend = not self.fUnsigned;
869 if sValue[0] == '-' or sValue[0] == '+':
870 fSignExtend = True;
871 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
872 else:
873 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
874
875 # try convert it to long integer.
876 try:
877 iValue = long(sValue, 16 if fHex else 10);
878 except Exception as oXcpt:
879 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
880
881 # Convert the hex string and pad it to a decent value. Negative values
882 # needs to be manually converted to something non-negative (~-n + 1).
883 if iValue >= 0:
884 sHex = hex(iValue);
885 if sys.version_info[0] < 3:
886 assert sHex[-1] == 'L';
887 sHex = sHex[:-1];
888 assert sHex[:2] == '0x';
889 sHex = sHex[2:];
890 else:
891 sHex = hex(-iValue - 1);
892 if sys.version_info[0] < 3:
893 assert sHex[-1] == 'L';
894 sHex = sHex[:-1];
895 assert sHex[:2] == '0x';
896 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
897 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
898 sHex = 'f' + sHex;
899
900 cDigits = len(sHex);
901 if cDigits <= self.acbSizes[-1] * 2:
902 for cb in self.acbSizes:
903 cNaturalDigits = cb * 2;
904 if cDigits <= cNaturalDigits:
905 break;
906 else:
907 cNaturalDigits = self.acbSizes[-1] * 2;
908 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
909 assert isinstance(cNaturalDigits, int)
910
911 if cNaturalDigits != cDigits:
912 cNeeded = cNaturalDigits - cDigits;
913 if iValue >= 0:
914 sHex = ('0' * cNeeded) + sHex;
915 else:
916 sHex = ('f' * cNeeded) + sHex;
917
918 # Invert and convert to bytearray and return it.
919 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
920
921 return ((fSignExtend, abValue),);
922
923 def validate(self, sValue):
924 """
925 Returns True if value is okay, error message on failure.
926 """
927 try:
928 self.get(sValue);
929 except TestType.BadValue as oXcpt:
930 return oXcpt.sMessage;
931 return True;
932
933 def isAndOrPair(self, sValue):
934 """
935 Checks if sValue is a pair.
936 """
937 _ = sValue;
938 return False;
939
940
941class TestTypeEflags(TestType):
942 """
943 Special value parsing for EFLAGS/RFLAGS/FLAGS.
944 """
945
946 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
947
948 def __init__(self, sName):
949 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
950
951 def get(self, sValue):
952 fClear = 0;
953 fSet = 0;
954 for sFlag in sValue.split(','):
955 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
956 if sConstant is None:
957 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
958 if sConstant[0] == '!':
959 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
960 else:
961 fSet |= g_kdX86EFlagsConstants[sConstant];
962
963 aoSet = TestType.get(self, '0x%x' % (fSet,));
964 if fClear != 0:
965 aoClear = TestType.get(self, '%#x' % (fClear,))
966 assert self.isAndOrPair(sValue) is True;
967 return (aoClear[0], aoSet[0]);
968 assert self.isAndOrPair(sValue) is False;
969 return aoSet;
970
971 def isAndOrPair(self, sValue):
972 for sZeroFlag in self.kdZeroValueFlags:
973 if sValue.find(sZeroFlag) >= 0:
974 return True;
975 return False;
976
977class TestTypeFromDict(TestType):
978 """
979 Special value parsing for CR0.
980 """
981
982 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
983
984 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
985 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
986 self.kdConstantsAndValues = kdConstantsAndValues;
987 self.sConstantPrefix = sConstantPrefix;
988
989 def get(self, sValue):
990 fValue = 0;
991 for sFlag in sValue.split(','):
992 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
993 if fFlagValue is None:
994 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
995 fValue |= fFlagValue;
996 return TestType.get(self, '0x%x' % (fValue,));
997
998
999class TestInOut(object):
1000 """
1001 One input or output state modifier.
1002
1003 This should be thought as values to modify BS3REGCTX and extended (needs
1004 to be structured) state.
1005 """
1006 ## Assigned operators.
1007 kasOperators = [
1008 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
1009 '&~=',
1010 '&=',
1011 '|=',
1012 '='
1013 ];
1014 ## Types
1015 kdTypes = {
1016 'uint': TestType('uint', fUnsigned = True),
1017 'int': TestType('int'),
1018 'efl': TestTypeEflags('efl'),
1019 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
1020 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
1021 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
1022 };
1023 ## CPU context fields.
1024 kdFields = {
1025 # name: ( default type, [both|input|output], )
1026 # Operands.
1027 'op1': ( 'uint', 'both', ), ## \@op1
1028 'op2': ( 'uint', 'both', ), ## \@op2
1029 'op3': ( 'uint', 'both', ), ## \@op3
1030 'op4': ( 'uint', 'both', ), ## \@op4
1031 # Flags.
1032 'efl': ( 'efl', 'both', ),
1033 'efl_undef': ( 'uint', 'output', ),
1034 # 8-bit GPRs.
1035 'al': ( 'uint', 'both', ),
1036 'cl': ( 'uint', 'both', ),
1037 'dl': ( 'uint', 'both', ),
1038 'bl': ( 'uint', 'both', ),
1039 'ah': ( 'uint', 'both', ),
1040 'ch': ( 'uint', 'both', ),
1041 'dh': ( 'uint', 'both', ),
1042 'bh': ( 'uint', 'both', ),
1043 'r8l': ( 'uint', 'both', ),
1044 'r9l': ( 'uint', 'both', ),
1045 'r10l': ( 'uint', 'both', ),
1046 'r11l': ( 'uint', 'both', ),
1047 'r12l': ( 'uint', 'both', ),
1048 'r13l': ( 'uint', 'both', ),
1049 'r14l': ( 'uint', 'both', ),
1050 'r15l': ( 'uint', 'both', ),
1051 # 16-bit GPRs.
1052 'ax': ( 'uint', 'both', ),
1053 'dx': ( 'uint', 'both', ),
1054 'cx': ( 'uint', 'both', ),
1055 'bx': ( 'uint', 'both', ),
1056 'sp': ( 'uint', 'both', ),
1057 'bp': ( 'uint', 'both', ),
1058 'si': ( 'uint', 'both', ),
1059 'di': ( 'uint', 'both', ),
1060 'r8w': ( 'uint', 'both', ),
1061 'r9w': ( 'uint', 'both', ),
1062 'r10w': ( 'uint', 'both', ),
1063 'r11w': ( 'uint', 'both', ),
1064 'r12w': ( 'uint', 'both', ),
1065 'r13w': ( 'uint', 'both', ),
1066 'r14w': ( 'uint', 'both', ),
1067 'r15w': ( 'uint', 'both', ),
1068 # 32-bit GPRs.
1069 'eax': ( 'uint', 'both', ),
1070 'edx': ( 'uint', 'both', ),
1071 'ecx': ( 'uint', 'both', ),
1072 'ebx': ( 'uint', 'both', ),
1073 'esp': ( 'uint', 'both', ),
1074 'ebp': ( 'uint', 'both', ),
1075 'esi': ( 'uint', 'both', ),
1076 'edi': ( 'uint', 'both', ),
1077 'r8d': ( 'uint', 'both', ),
1078 'r9d': ( 'uint', 'both', ),
1079 'r10d': ( 'uint', 'both', ),
1080 'r11d': ( 'uint', 'both', ),
1081 'r12d': ( 'uint', 'both', ),
1082 'r13d': ( 'uint', 'both', ),
1083 'r14d': ( 'uint', 'both', ),
1084 'r15d': ( 'uint', 'both', ),
1085 # 64-bit GPRs.
1086 'rax': ( 'uint', 'both', ),
1087 'rdx': ( 'uint', 'both', ),
1088 'rcx': ( 'uint', 'both', ),
1089 'rbx': ( 'uint', 'both', ),
1090 'rsp': ( 'uint', 'both', ),
1091 'rbp': ( 'uint', 'both', ),
1092 'rsi': ( 'uint', 'both', ),
1093 'rdi': ( 'uint', 'both', ),
1094 'r8': ( 'uint', 'both', ),
1095 'r9': ( 'uint', 'both', ),
1096 'r10': ( 'uint', 'both', ),
1097 'r11': ( 'uint', 'both', ),
1098 'r12': ( 'uint', 'both', ),
1099 'r13': ( 'uint', 'both', ),
1100 'r14': ( 'uint', 'both', ),
1101 'r15': ( 'uint', 'both', ),
1102 # 16-bit, 32-bit or 64-bit registers according to operand size.
1103 'oz.rax': ( 'uint', 'both', ),
1104 'oz.rdx': ( 'uint', 'both', ),
1105 'oz.rcx': ( 'uint', 'both', ),
1106 'oz.rbx': ( 'uint', 'both', ),
1107 'oz.rsp': ( 'uint', 'both', ),
1108 'oz.rbp': ( 'uint', 'both', ),
1109 'oz.rsi': ( 'uint', 'both', ),
1110 'oz.rdi': ( 'uint', 'both', ),
1111 'oz.r8': ( 'uint', 'both', ),
1112 'oz.r9': ( 'uint', 'both', ),
1113 'oz.r10': ( 'uint', 'both', ),
1114 'oz.r11': ( 'uint', 'both', ),
1115 'oz.r12': ( 'uint', 'both', ),
1116 'oz.r13': ( 'uint', 'both', ),
1117 'oz.r14': ( 'uint', 'both', ),
1118 'oz.r15': ( 'uint', 'both', ),
1119 # Control registers.
1120 'cr0': ( 'cr0', 'both', ),
1121 'cr4': ( 'cr4', 'both', ),
1122 'xcr0': ( 'xcr0', 'both', ),
1123 # FPU Registers
1124 'fcw': ( 'uint', 'both', ),
1125 'fsw': ( 'uint', 'both', ),
1126 'ftw': ( 'uint', 'both', ),
1127 'fop': ( 'uint', 'both', ),
1128 'fpuip': ( 'uint', 'both', ),
1129 'fpucs': ( 'uint', 'both', ),
1130 'fpudp': ( 'uint', 'both', ),
1131 'fpuds': ( 'uint', 'both', ),
1132 'mxcsr': ( 'uint', 'both', ),
1133 'st0': ( 'uint', 'both', ),
1134 'st1': ( 'uint', 'both', ),
1135 'st2': ( 'uint', 'both', ),
1136 'st3': ( 'uint', 'both', ),
1137 'st4': ( 'uint', 'both', ),
1138 'st5': ( 'uint', 'both', ),
1139 'st6': ( 'uint', 'both', ),
1140 'st7': ( 'uint', 'both', ),
1141 # MMX registers.
1142 'mm0': ( 'uint', 'both', ),
1143 'mm1': ( 'uint', 'both', ),
1144 'mm2': ( 'uint', 'both', ),
1145 'mm3': ( 'uint', 'both', ),
1146 'mm4': ( 'uint', 'both', ),
1147 'mm5': ( 'uint', 'both', ),
1148 'mm6': ( 'uint', 'both', ),
1149 'mm7': ( 'uint', 'both', ),
1150 # SSE registers.
1151 'xmm0': ( 'uint', 'both', ),
1152 'xmm1': ( 'uint', 'both', ),
1153 'xmm2': ( 'uint', 'both', ),
1154 'xmm3': ( 'uint', 'both', ),
1155 'xmm4': ( 'uint', 'both', ),
1156 'xmm5': ( 'uint', 'both', ),
1157 'xmm6': ( 'uint', 'both', ),
1158 'xmm7': ( 'uint', 'both', ),
1159 'xmm8': ( 'uint', 'both', ),
1160 'xmm9': ( 'uint', 'both', ),
1161 'xmm10': ( 'uint', 'both', ),
1162 'xmm11': ( 'uint', 'both', ),
1163 'xmm12': ( 'uint', 'both', ),
1164 'xmm13': ( 'uint', 'both', ),
1165 'xmm14': ( 'uint', 'both', ),
1166 'xmm15': ( 'uint', 'both', ),
1167 'xmm0.lo': ( 'uint', 'both', ),
1168 'xmm1.lo': ( 'uint', 'both', ),
1169 'xmm2.lo': ( 'uint', 'both', ),
1170 'xmm3.lo': ( 'uint', 'both', ),
1171 'xmm4.lo': ( 'uint', 'both', ),
1172 'xmm5.lo': ( 'uint', 'both', ),
1173 'xmm6.lo': ( 'uint', 'both', ),
1174 'xmm7.lo': ( 'uint', 'both', ),
1175 'xmm8.lo': ( 'uint', 'both', ),
1176 'xmm9.lo': ( 'uint', 'both', ),
1177 'xmm10.lo': ( 'uint', 'both', ),
1178 'xmm11.lo': ( 'uint', 'both', ),
1179 'xmm12.lo': ( 'uint', 'both', ),
1180 'xmm13.lo': ( 'uint', 'both', ),
1181 'xmm14.lo': ( 'uint', 'both', ),
1182 'xmm15.lo': ( 'uint', 'both', ),
1183 'xmm0.hi': ( 'uint', 'both', ),
1184 'xmm1.hi': ( 'uint', 'both', ),
1185 'xmm2.hi': ( 'uint', 'both', ),
1186 'xmm3.hi': ( 'uint', 'both', ),
1187 'xmm4.hi': ( 'uint', 'both', ),
1188 'xmm5.hi': ( 'uint', 'both', ),
1189 'xmm6.hi': ( 'uint', 'both', ),
1190 'xmm7.hi': ( 'uint', 'both', ),
1191 'xmm8.hi': ( 'uint', 'both', ),
1192 'xmm9.hi': ( 'uint', 'both', ),
1193 'xmm10.hi': ( 'uint', 'both', ),
1194 'xmm11.hi': ( 'uint', 'both', ),
1195 'xmm12.hi': ( 'uint', 'both', ),
1196 'xmm13.hi': ( 'uint', 'both', ),
1197 'xmm14.hi': ( 'uint', 'both', ),
1198 'xmm15.hi': ( 'uint', 'both', ),
1199 'xmm0.lo.zx': ( 'uint', 'both', ),
1200 'xmm1.lo.zx': ( 'uint', 'both', ),
1201 'xmm2.lo.zx': ( 'uint', 'both', ),
1202 'xmm3.lo.zx': ( 'uint', 'both', ),
1203 'xmm4.lo.zx': ( 'uint', 'both', ),
1204 'xmm5.lo.zx': ( 'uint', 'both', ),
1205 'xmm6.lo.zx': ( 'uint', 'both', ),
1206 'xmm7.lo.zx': ( 'uint', 'both', ),
1207 'xmm8.lo.zx': ( 'uint', 'both', ),
1208 'xmm9.lo.zx': ( 'uint', 'both', ),
1209 'xmm10.lo.zx': ( 'uint', 'both', ),
1210 'xmm11.lo.zx': ( 'uint', 'both', ),
1211 'xmm12.lo.zx': ( 'uint', 'both', ),
1212 'xmm13.lo.zx': ( 'uint', 'both', ),
1213 'xmm14.lo.zx': ( 'uint', 'both', ),
1214 'xmm15.lo.zx': ( 'uint', 'both', ),
1215 'xmm0.dw0': ( 'uint', 'both', ),
1216 'xmm1.dw0': ( 'uint', 'both', ),
1217 'xmm2.dw0': ( 'uint', 'both', ),
1218 'xmm3.dw0': ( 'uint', 'both', ),
1219 'xmm4.dw0': ( 'uint', 'both', ),
1220 'xmm5.dw0': ( 'uint', 'both', ),
1221 'xmm6.dw0': ( 'uint', 'both', ),
1222 'xmm7.dw0': ( 'uint', 'both', ),
1223 'xmm8.dw0': ( 'uint', 'both', ),
1224 'xmm9.dw0': ( 'uint', 'both', ),
1225 'xmm10.dw0': ( 'uint', 'both', ),
1226 'xmm11.dw0': ( 'uint', 'both', ),
1227 'xmm12.dw0': ( 'uint', 'both', ),
1228 'xmm13.dw0': ( 'uint', 'both', ),
1229 'xmm14.dw0': ( 'uint', 'both', ),
1230 'xmm15_dw0': ( 'uint', 'both', ),
1231 # AVX registers.
1232 'ymm0': ( 'uint', 'both', ),
1233 'ymm1': ( 'uint', 'both', ),
1234 'ymm2': ( 'uint', 'both', ),
1235 'ymm3': ( 'uint', 'both', ),
1236 'ymm4': ( 'uint', 'both', ),
1237 'ymm5': ( 'uint', 'both', ),
1238 'ymm6': ( 'uint', 'both', ),
1239 'ymm7': ( 'uint', 'both', ),
1240 'ymm8': ( 'uint', 'both', ),
1241 'ymm9': ( 'uint', 'both', ),
1242 'ymm10': ( 'uint', 'both', ),
1243 'ymm11': ( 'uint', 'both', ),
1244 'ymm12': ( 'uint', 'both', ),
1245 'ymm13': ( 'uint', 'both', ),
1246 'ymm14': ( 'uint', 'both', ),
1247 'ymm15': ( 'uint', 'both', ),
1248
1249 # Special ones.
1250 'value.xcpt': ( 'uint', 'output', ),
1251 };
1252
1253 def __init__(self, sField, sOp, sValue, sType):
1254 assert sField in self.kdFields;
1255 assert sOp in self.kasOperators;
1256 self.sField = sField;
1257 self.sOp = sOp;
1258 self.sValue = sValue;
1259 self.sType = sType;
1260 assert isinstance(sField, str);
1261 assert isinstance(sOp, str);
1262 assert isinstance(sType, str);
1263 assert isinstance(sValue, str);
1264
1265
1266class TestSelector(object):
1267 """
1268 One selector for an instruction test.
1269 """
1270 ## Selector compare operators.
1271 kasCompareOps = [ '==', '!=' ];
1272 ## Selector variables and their valid values.
1273 kdVariables = {
1274 # Operand size.
1275 'size': {
1276 'o16': 'size_o16',
1277 'o32': 'size_o32',
1278 'o64': 'size_o64',
1279 },
1280 # VEX.L value.
1281 'vex.l': {
1282 '0': 'vexl_0',
1283 '1': 'vexl_1',
1284 },
1285 # Execution ring.
1286 'ring': {
1287 '0': 'ring_0',
1288 '1': 'ring_1',
1289 '2': 'ring_2',
1290 '3': 'ring_3',
1291 '0..2': 'ring_0_thru_2',
1292 '1..3': 'ring_1_thru_3',
1293 },
1294 # Basic code mode.
1295 'codebits': {
1296 '64': 'code_64bit',
1297 '32': 'code_32bit',
1298 '16': 'code_16bit',
1299 },
1300 # cpu modes.
1301 'mode': {
1302 'real': 'mode_real',
1303 'prot': 'mode_prot',
1304 'long': 'mode_long',
1305 'v86': 'mode_v86',
1306 'smm': 'mode_smm',
1307 'vmx': 'mode_vmx',
1308 'svm': 'mode_svm',
1309 },
1310 # paging on/off
1311 'paging': {
1312 'on': 'paging_on',
1313 'off': 'paging_off',
1314 },
1315 # CPU vendor
1316 'vendor': {
1317 'amd': 'vendor_amd',
1318 'intel': 'vendor_intel',
1319 'via': 'vendor_via',
1320 },
1321 };
1322 ## Selector shorthand predicates.
1323 ## These translates into variable expressions.
1324 kdPredicates = {
1325 'o16': 'size==o16',
1326 'o32': 'size==o32',
1327 'o64': 'size==o64',
1328 'ring0': 'ring==0',
1329 '!ring0': 'ring==1..3',
1330 'ring1': 'ring==1',
1331 'ring2': 'ring==2',
1332 'ring3': 'ring==3',
1333 'user': 'ring==3',
1334 'supervisor': 'ring==0..2',
1335 '16-bit': 'codebits==16',
1336 '32-bit': 'codebits==32',
1337 '64-bit': 'codebits==64',
1338 'real': 'mode==real',
1339 'prot': 'mode==prot',
1340 'long': 'mode==long',
1341 'v86': 'mode==v86',
1342 'smm': 'mode==smm',
1343 'vmx': 'mode==vmx',
1344 'svm': 'mode==svm',
1345 'paging': 'paging==on',
1346 '!paging': 'paging==off',
1347 'amd': 'vendor==amd',
1348 '!amd': 'vendor!=amd',
1349 'intel': 'vendor==intel',
1350 '!intel': 'vendor!=intel',
1351 'via': 'vendor==via',
1352 '!via': 'vendor!=via',
1353 };
1354
1355 def __init__(self, sVariable, sOp, sValue):
1356 assert sVariable in self.kdVariables;
1357 assert sOp in self.kasCompareOps;
1358 assert sValue in self.kdVariables[sVariable];
1359 self.sVariable = sVariable;
1360 self.sOp = sOp;
1361 self.sValue = sValue;
1362
1363
1364class InstructionTest(object):
1365 """
1366 Instruction test.
1367 """
1368
1369 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1370 self.oInstr = oInstr # type: InstructionTest
1371 self.aoInputs = [] # type: list(TestInOut)
1372 self.aoOutputs = [] # type: list(TestInOut)
1373 self.aoSelectors = [] # type: list(TestSelector)
1374
1375 def toString(self, fRepr = False):
1376 """
1377 Converts it to string representation.
1378 """
1379 asWords = [];
1380 if self.aoSelectors:
1381 for oSelector in self.aoSelectors:
1382 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1383 asWords.append('/');
1384
1385 for oModifier in self.aoInputs:
1386 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1387
1388 asWords.append('->');
1389
1390 for oModifier in self.aoOutputs:
1391 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1392
1393 if fRepr:
1394 return '<' + ' '.join(asWords) + '>';
1395 return ' '.join(asWords);
1396
1397 def __str__(self):
1398 """ Provide string represenation. """
1399 return self.toString(False);
1400
1401 def __repr__(self):
1402 """ Provide unambigious string representation. """
1403 return self.toString(True);
1404
1405class Operand(object):
1406 """
1407 Instruction operand.
1408 """
1409
1410 def __init__(self, sWhere, sType):
1411 assert sWhere in g_kdOpLocations, sWhere;
1412 assert sType in g_kdOpTypes, sType;
1413 self.sWhere = sWhere; ##< g_kdOpLocations
1414 self.sType = sType; ##< g_kdOpTypes
1415
1416 def usesModRM(self):
1417 """ Returns True if using some form of ModR/M encoding. """
1418 return self.sType[0] in ['E', 'G', 'M'];
1419
1420
1421
1422class Instruction(object): # pylint: disable=too-many-instance-attributes
1423 """
1424 Instruction.
1425 """
1426
1427 def __init__(self, sSrcFile, iLine):
1428 ## @name Core attributes.
1429 ## @{
1430 self.oParent = None # type: Instruction
1431 self.sMnemonic = None;
1432 self.sBrief = None;
1433 self.asDescSections = [] # type: list(str)
1434 self.aoMaps = [] # type: list(InstructionMap)
1435 self.aoOperands = [] # type: list(Operand)
1436 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1437 self.sOpcode = None # type: str
1438 self.sSubOpcode = None # type: str
1439 self.sEncoding = None;
1440 self.asFlTest = None;
1441 self.asFlModify = None;
1442 self.asFlUndefined = None;
1443 self.asFlSet = None;
1444 self.asFlClear = None;
1445 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1446 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1447 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1448 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1449 self.aoTests = [] # type: list(InstructionTest)
1450 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1451 self.oCpuExpr = None; ##< Some CPU restriction expression...
1452 self.sGroup = None;
1453 self.fUnused = False; ##< Unused instruction.
1454 self.fInvalid = False; ##< Invalid instruction (like UD2).
1455 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1456 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1457 ## @}
1458
1459 ## @name Implementation attributes.
1460 ## @{
1461 self.sStats = None;
1462 self.sFunction = None;
1463 self.fStub = False;
1464 self.fUdStub = False;
1465 ## @}
1466
1467 ## @name Decoding info
1468 ## @{
1469 self.sSrcFile = sSrcFile;
1470 self.iLineCreated = iLine;
1471 self.iLineCompleted = None;
1472 self.cOpTags = 0;
1473 self.iLineFnIemOpMacro = -1;
1474 self.iLineMnemonicMacro = -1;
1475 ## @}
1476
1477 ## @name Intermediate input fields.
1478 ## @{
1479 self.sRawDisOpNo = None;
1480 self.asRawDisParams = [];
1481 self.sRawIemOpFlags = None;
1482 self.sRawOldOpcodes = None;
1483 self.asCopyTests = [];
1484 ## @}
1485
1486 def toString(self, fRepr = False):
1487 """ Turn object into a string. """
1488 aasFields = [];
1489
1490 aasFields.append(['opcode', self.sOpcode]);
1491 if self.sPrefix:
1492 aasFields.append(['prefix', self.sPrefix]);
1493 aasFields.append(['mnemonic', self.sMnemonic]);
1494 for iOperand, oOperand in enumerate(self.aoOperands):
1495 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1496 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1497 aasFields.append(['encoding', self.sEncoding]);
1498 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1499 aasFields.append(['disenum', self.sDisEnum]);
1500 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1501 aasFields.append(['group', self.sGroup]);
1502 if self.fUnused: aasFields.append(['unused', 'True']);
1503 if self.fInvalid: aasFields.append(['invalid', 'True']);
1504 aasFields.append(['invlstyle', self.sInvalidStyle]);
1505 aasFields.append(['fltest', self.asFlTest]);
1506 aasFields.append(['flmodify', self.asFlModify]);
1507 aasFields.append(['flundef', self.asFlUndefined]);
1508 aasFields.append(['flset', self.asFlSet]);
1509 aasFields.append(['flclear', self.asFlClear]);
1510 aasFields.append(['mincpu', self.sMinCpu]);
1511 aasFields.append(['stats', self.sStats]);
1512 aasFields.append(['sFunction', self.sFunction]);
1513 if self.fStub: aasFields.append(['fStub', 'True']);
1514 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1515 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1516 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1517 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1518
1519 sRet = '<' if fRepr else '';
1520 for sField, sValue in aasFields:
1521 if sValue is not None:
1522 if len(sRet) > 1:
1523 sRet += '; ';
1524 sRet += '%s=%s' % (sField, sValue,);
1525 if fRepr:
1526 sRet += '>';
1527
1528 return sRet;
1529
1530 def __str__(self):
1531 """ Provide string represenation. """
1532 return self.toString(False);
1533
1534 def __repr__(self):
1535 """ Provide unambigious string representation. """
1536 return self.toString(True);
1537
1538 def copy(self, oMap = None, sOpcode = None, sSubOpcode = None, sPrefix = None):
1539 """
1540 Makes a copy of the object for the purpose of putting in a different map
1541 or a different place in the current map.
1542 """
1543 oCopy = Instruction(self.sSrcFile, self.iLineCreated);
1544
1545 oCopy.oParent = self;
1546 oCopy.sMnemonic = self.sMnemonic;
1547 oCopy.sBrief = self.sBrief;
1548 oCopy.asDescSections = list(self.asDescSections);
1549 oCopy.aoMaps = [oMap,] if oMap else list(self.aoMaps);
1550 oCopy.aoOperands = list(self.aoOperands); ## Deeper copy?
1551 oCopy.sPrefix = sPrefix if sPrefix else self.sPrefix;
1552 oCopy.sOpcode = sOpcode if sOpcode else self.sOpcode;
1553 oCopy.sSubOpcode = sSubOpcode if sSubOpcode else self.sSubOpcode;
1554 oCopy.sEncoding = self.sEncoding;
1555 oCopy.asFlTest = self.asFlTest;
1556 oCopy.asFlModify = self.asFlModify;
1557 oCopy.asFlUndefined = self.asFlUndefined;
1558 oCopy.asFlSet = self.asFlSet;
1559 oCopy.asFlClear = self.asFlClear;
1560 oCopy.dHints = dict(self.dHints);
1561 oCopy.sDisEnum = self.sDisEnum;
1562 oCopy.asCpuIds = list(self.asCpuIds);
1563 oCopy.asReqFeatures = list(self.asReqFeatures);
1564 oCopy.aoTests = list(self.aoTests); ## Deeper copy?
1565 oCopy.sMinCpu = self.sMinCpu;
1566 oCopy.oCpuExpr = self.oCpuExpr;
1567 oCopy.sGroup = self.sGroup;
1568 oCopy.fUnused = self.fUnused;
1569 oCopy.fInvalid = self.fInvalid;
1570 oCopy.sInvalidStyle = self.sInvalidStyle;
1571 oCopy.sXcptType = self.sXcptType;
1572
1573 oCopy.sStats = self.sStats;
1574 oCopy.sFunction = self.sFunction;
1575 oCopy.fStub = self.fStub;
1576 oCopy.fUdStub = self.fUdStub;
1577
1578 oCopy.iLineCompleted = self.iLineCompleted;
1579 oCopy.cOpTags = self.cOpTags;
1580 oCopy.iLineFnIemOpMacro = self.iLineFnIemOpMacro;
1581 oCopy.iLineMnemonicMacro = self.iLineMnemonicMacro;
1582
1583 oCopy.sRawDisOpNo = self.sRawDisOpNo;
1584 oCopy.asRawDisParams = list(self.asRawDisParams);
1585 oCopy.sRawIemOpFlags = self.sRawIemOpFlags;
1586 oCopy.sRawOldOpcodes = self.sRawOldOpcodes;
1587 oCopy.asCopyTests = list(self.asCopyTests);
1588
1589 return oCopy;
1590
1591 def getOpcodeByte(self):
1592 """
1593 Decodes sOpcode into a byte range integer value.
1594 Raises exception if sOpcode is None or invalid.
1595 """
1596 if self.sOpcode is None:
1597 raise Exception('No opcode byte for %s!' % (self,));
1598 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1599
1600 # Full hex byte form.
1601 if sOpcode[:2] == '0x':
1602 return int(sOpcode, 16);
1603
1604 # The /r form:
1605 if len(sOpcode) == 2 and sOpcode[0] == '/' and sOpcode[1].isdigit():
1606 return int(sOpcode[1:]) << 3;
1607
1608 # The 11/r form:
1609 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1610 return (int(sOpcode[-1:]) << 3) | 0xc0;
1611
1612 # The !11/r form (returns mod=1):
1613 ## @todo this doesn't really work...
1614 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1615 return (int(sOpcode[-1:]) << 3) | 0x80;
1616
1617 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1618
1619 @staticmethod
1620 def _flagsToIntegerMask(asFlags):
1621 """
1622 Returns the integer mask value for asFlags.
1623 """
1624 uRet = 0;
1625 if asFlags:
1626 for sFlag in asFlags:
1627 sConstant = g_kdEFlagsMnemonics[sFlag];
1628 assert sConstant[0] != '!', sConstant
1629 uRet |= g_kdX86EFlagsConstants[sConstant];
1630 return uRet;
1631
1632 def getTestedFlagsMask(self):
1633 """ Returns asFlTest into a integer mask value """
1634 return self._flagsToIntegerMask(self.asFlTest);
1635
1636 def getModifiedFlagsMask(self):
1637 """ Returns asFlModify into a integer mask value """
1638 return self._flagsToIntegerMask(self.asFlModify);
1639
1640 def getUndefinedFlagsMask(self):
1641 """ Returns asFlUndefined into a integer mask value """
1642 return self._flagsToIntegerMask(self.asFlUndefined);
1643
1644 def getSetFlagsMask(self):
1645 """ Returns asFlSet into a integer mask value """
1646 return self._flagsToIntegerMask(self.asFlSet);
1647
1648 def getClearedFlagsMask(self):
1649 """ Returns asFlClear into a integer mask value """
1650 return self._flagsToIntegerMask(self.asFlClear);
1651
1652 def onlyInVexMaps(self):
1653 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1654 if not self.aoMaps:
1655 return False;
1656 for oMap in self.aoMaps:
1657 if not oMap.isVexMap():
1658 return False;
1659 return True;
1660
1661
1662
1663## All the instructions.
1664g_aoAllInstructions = [] # type: list(Instruction)
1665
1666## All the instructions indexed by statistics name (opstat).
1667g_dAllInstructionsByStat = {} # type: dict(Instruction)
1668
1669## All the instructions indexed by function name (opfunction).
1670g_dAllInstructionsByFunction = {} # type: dict(list(Instruction))
1671
1672## Instructions tagged by oponlytest
1673g_aoOnlyTestInstructions = [] # type: list(Instruction)
1674
1675## Instruction maps.
1676g_aoInstructionMaps = [
1677 InstructionMap('one', 'g_apfnOneByteMap', sSelector = 'byte'),
1678 InstructionMap('grp1_80', asLeadOpcodes = ['0x80',], sSelector = '/r'),
1679 InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1680 InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1681 InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1682 InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1683 InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1684 InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1685 InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1686 InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1687 InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1688 InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1689 ## @todo g_apfnEscF1_E0toFF
1690 InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1691 InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1692 InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1693 InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1694 InstructionMap('grp11_c6_m', asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1695 InstructionMap('grp11_c6_r', asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1696 InstructionMap('grp11_c7_m', asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1697 InstructionMap('grp11_c7_r', asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1698
1699 InstructionMap('two0f', 'g_apfnTwoByteMap', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1700 InstructionMap('grp6', 'g_apfnGroup6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1701 InstructionMap('grp7_m', 'g_apfnGroup7Mem', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1702 InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1703 InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1704 InstructionMap('grp9', 'g_apfnGroup9RegReg', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1705 ## @todo What about g_apfnGroup9MemReg?
1706 InstructionMap('grp10', None, asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1707 InstructionMap('grp12', 'g_apfnGroup12RegReg', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1708 InstructionMap('grp13', 'g_apfnGroup13RegReg', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1709 InstructionMap('grp14', 'g_apfnGroup14RegReg', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1710 InstructionMap('grp15', 'g_apfnGroup15MemReg', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1711 ## @todo What about g_apfnGroup15RegReg?
1712 InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1713 InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1714 InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1715
1716 InstructionMap('three0f38', 'g_apfnThreeByte0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1717 InstructionMap('three0f3a', 'g_apfnThreeByte0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1718
1719 InstructionMap('vexmap1', 'g_apfnVexMap1', sEncoding = 'vex1'),
1720 InstructionMap('vexgrp12', 'g_apfnVexGroup12RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1721 InstructionMap('vexgrp13', 'g_apfnVexGroup13RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1722 InstructionMap('vexgrp14', 'g_apfnVexGroup14RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1723 InstructionMap('vexgrp15', 'g_apfnVexGroup15MemReg', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1724 InstructionMap('vexgrp17', 'g_apfnVexGroup17_f3', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1725
1726 InstructionMap('vexmap2', 'g_apfnVexMap2', sEncoding = 'vex2'),
1727 InstructionMap('vexmap3', 'g_apfnVexMap3', sEncoding = 'vex3'),
1728
1729 InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1730 InstructionMap('xopmap8', sEncoding = 'xop8'),
1731 InstructionMap('xopmap9', sEncoding = 'xop9'),
1732 InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1733 InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1734 InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1735 InstructionMap('xopmap10', sEncoding = 'xop10'),
1736 InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1737];
1738g_dInstructionMaps = { oMap.sName: oMap for oMap in g_aoInstructionMaps };
1739g_dInstructionMapsByIemName = { oMap.sIemName: oMap for oMap in g_aoInstructionMaps };
1740
1741
1742
1743class McBlock(object):
1744 """
1745 Microcode block (IEM_MC_BEGIN ... IEM_MC_END).
1746 """
1747
1748 def __init__(self, sSrcFile, iBeginLine, offBeginLine, sFunction, iInFunction):
1749 self.sSrcFile = sSrcFile; ##< The source file containing the block.
1750 self.iBeginLine = iBeginLine; ##< The line with the IEM_MC_BEGIN statement.
1751 self.offBeginLine = offBeginLine; ##< The offset of the IEM_MC_BEGIN statement within the line.
1752 self.iEndLine = -1; ##< The line with the IEM_MC_END statement.
1753 self.offEndLine = 0; ##< The offset of the IEM_MC_END statement within the line.
1754 self.sFunction = sFunction; ##< The function the block resides in.
1755 self.iInFunction = iInFunction; ##< The block number wihtin the function.
1756 self.asLines = [] # type: list(str) ##< The raw lines the block is made up of.
1757
1758 def complete(self, iEndLine, offEndLine, asLines):
1759 """
1760 Completes the microcode block.
1761 """
1762 assert self.iEndLine == -1;
1763 self.iEndLine = iEndLine;
1764 self.offEndLine = offEndLine;
1765 self.asLines = asLines;
1766
1767
1768## List of microcode blocks.
1769g_aoMcBlocks = [] # type: list(McBlock)
1770
1771
1772
1773class ParserException(Exception):
1774 """ Parser exception """
1775 def __init__(self, sMessage):
1776 Exception.__init__(self, sMessage);
1777
1778
1779class SimpleParser(object): # pylint: disable=too-many-instance-attributes
1780 """
1781 Parser of IEMAllInstruction*.cpp.h instruction specifications.
1782 """
1783
1784 ## @name Parser state.
1785 ## @{
1786 kiCode = 0;
1787 kiCommentMulti = 1;
1788 ## @}
1789
1790 class Macro(object):
1791 """ Macro """
1792 def __init__(self, sName, asArgs, sBody, iLine):
1793 self.sName = sName; ##< The macro name.
1794 self.asArgs = asArgs; ##< None if simple macro, list of parameters otherwise.
1795 self.sBody = sBody;
1796 self.iLine = iLine;
1797 self.oReArgMatch = re.compile(r'(\s*##\s*|\b)(' + '|'.join(asArgs) + r')(\s*##\s*|\b)') if asArgs else None;
1798
1799 @staticmethod
1800 def _needSpace(ch):
1801 """ This is just to make the expanded output a bit prettier. """
1802 return ch.isspace() and ch != '(';
1803
1804 def expandMacro(self, oParent, asArgs = None):
1805 """ Expands the macro body with the given arguments. """
1806 _ = oParent;
1807 sBody = self.sBody;
1808
1809 if self.oReArgMatch:
1810 assert len(asArgs) == len(self.asArgs);
1811 #oParent.debug('%s: %s' % (self.sName, self.oReArgMatch.pattern,));
1812
1813 dArgs = { self.asArgs[iArg]: sValue for iArg, sValue in enumerate(asArgs) };
1814 oMatch = self.oReArgMatch.search(sBody);
1815 while oMatch:
1816 sName = oMatch.group(2);
1817 #oParent.debug('%s %s..%s (%s)' % (sName, oMatch.start(), oMatch.end(),oMatch.group()));
1818 sValue = dArgs[sName];
1819 sPre = '';
1820 if not oMatch.group(1) and oMatch.start() > 0 and self._needSpace(sBody[oMatch.start()]):
1821 sPre = ' ';
1822 sPost = '';
1823 if not oMatch.group(3) and oMatch.end() < len(sBody) and self._needSpace(sBody[oMatch.end()]):
1824 sPost = ' ';
1825 sBody = sBody[ : oMatch.start()] + sPre + sValue + sPost + sBody[oMatch.end() : ];
1826 oMatch = self.oReArgMatch.search(sBody, oMatch.start() + len(sValue));
1827 else:
1828 assert not asArgs;
1829
1830 return sBody;
1831
1832
1833 def __init__(self, sSrcFile, asLines, sDefaultMap):
1834 self.sSrcFile = sSrcFile;
1835 self.asLines = asLines;
1836 self.iLine = 0;
1837 self.iState = self.kiCode;
1838 self.sComment = '';
1839 self.iCommentLine = 0;
1840 self.aoCurInstrs = [] # type: list(Instruction)
1841 self.sCurFunction = None # type: str
1842 self.iMcBlockInFunc = 0;
1843 self.oCurMcBlock = None # type: McBlock
1844 self.dMacros = {} # type: Dict[str,SimpleParser.Macro]
1845 self.oReMacros = None # type: re ##< Regular expression matching invocations of anything in self.dMacros.
1846
1847 assert sDefaultMap in g_dInstructionMaps;
1848 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
1849
1850 self.cTotalInstr = 0;
1851 self.cTotalStubs = 0;
1852 self.cTotalTagged = 0;
1853 self.cTotalMcBlocks = 0;
1854
1855 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1856 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1857 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
1858 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
1859 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
1860 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
1861 self.oReFunTable = re.compile('^(IEM_STATIC|static) +const +PFNIEMOP +g_apfn[A-Za-z0-9_]+ *\[ *\d* *\] *= *$');
1862 self.oReComment = re.compile('//.*?$|/\*.*?\*/'); ## Full comments.
1863 self.oReHashDefine = re.compile('^\s*#\s*define\s+(.*)$');
1864 self.oReHashDefine2 = re.compile('(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)\(([^)]*)\)\s*(.*)\Z'); ##< With arguments.
1865 self.oReHashDefine3 = re.compile('(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)[^(]\s*(.*)\Z'); ##< Simple, no arguments.
1866 self.oReHashUndef = re.compile('^\s*#\s*undef\s+(.*)$');
1867 self.oReMcBeginEnd = re.compile(r'\bIEM_MC_(BEGIN|END)\s*\(');
1868
1869 self.fDebug = True;
1870 self.fDebugMc = False;
1871 self.fDebugPreProc = False;
1872
1873 self.dTagHandlers = {
1874 '@opbrief': self.parseTagOpBrief,
1875 '@opdesc': self.parseTagOpDesc,
1876 '@opmnemonic': self.parseTagOpMnemonic,
1877 '@op1': self.parseTagOpOperandN,
1878 '@op2': self.parseTagOpOperandN,
1879 '@op3': self.parseTagOpOperandN,
1880 '@op4': self.parseTagOpOperandN,
1881 '@oppfx': self.parseTagOpPfx,
1882 '@opmaps': self.parseTagOpMaps,
1883 '@opcode': self.parseTagOpcode,
1884 '@opcodesub': self.parseTagOpcodeSub,
1885 '@openc': self.parseTagOpEnc,
1886 '@opfltest': self.parseTagOpEFlags,
1887 '@opflmodify': self.parseTagOpEFlags,
1888 '@opflundef': self.parseTagOpEFlags,
1889 '@opflset': self.parseTagOpEFlags,
1890 '@opflclear': self.parseTagOpEFlags,
1891 '@ophints': self.parseTagOpHints,
1892 '@opdisenum': self.parseTagOpDisEnum,
1893 '@opmincpu': self.parseTagOpMinCpu,
1894 '@opcpuid': self.parseTagOpCpuId,
1895 '@opgroup': self.parseTagOpGroup,
1896 '@opunused': self.parseTagOpUnusedInvalid,
1897 '@opinvalid': self.parseTagOpUnusedInvalid,
1898 '@opinvlstyle': self.parseTagOpUnusedInvalid,
1899 '@optest': self.parseTagOpTest,
1900 '@optestign': self.parseTagOpTestIgnore,
1901 '@optestignore': self.parseTagOpTestIgnore,
1902 '@opcopytests': self.parseTagOpCopyTests,
1903 '@oponly': self.parseTagOpOnlyTest,
1904 '@oponlytest': self.parseTagOpOnlyTest,
1905 '@opxcpttype': self.parseTagOpXcptType,
1906 '@opstats': self.parseTagOpStats,
1907 '@opfunction': self.parseTagOpFunction,
1908 '@opdone': self.parseTagOpDone,
1909 };
1910 for i in range(48):
1911 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
1912 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
1913
1914 self.asErrors = [];
1915
1916 def raiseError(self, sMessage):
1917 """
1918 Raise error prefixed with the source and line number.
1919 """
1920 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
1921
1922 def raiseCommentError(self, iLineInComment, sMessage):
1923 """
1924 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
1925 """
1926 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1927
1928 def error(self, sMessage):
1929 """
1930 Adds an error.
1931 returns False;
1932 """
1933 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
1934 return False;
1935
1936 def errorOnLine(self, iLine, sMessage):
1937 """
1938 Adds an error.
1939 returns False;
1940 """
1941 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, iLine, sMessage,));
1942 return False;
1943
1944 def errorComment(self, iLineInComment, sMessage):
1945 """
1946 Adds a comment error.
1947 returns False;
1948 """
1949 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
1950 return False;
1951
1952 def printErrors(self):
1953 """
1954 Print the errors to stderr.
1955 Returns number of errors.
1956 """
1957 if self.asErrors:
1958 sys.stderr.write(u''.join(self.asErrors));
1959 return len(self.asErrors);
1960
1961 def debug(self, sMessage):
1962 """
1963 For debugging.
1964 """
1965 if self.fDebug:
1966 print('debug: %s' % (sMessage,), file = sys.stderr);
1967
1968 def stripComments(self, sLine):
1969 """
1970 Returns sLine with comments stripped.
1971
1972 Complains if traces of incomplete multi-line comments are encountered.
1973 """
1974 sLine = self.oReComment.sub(" ", sLine);
1975 if sLine.find('/*') >= 0 or sLine.find('*/') >= 0:
1976 self.error('Unexpected multi-line comment will not be handled correctly. Please simplify.');
1977 return sLine;
1978
1979 def parseFunctionTable(self, sLine):
1980 """
1981 Parses a PFNIEMOP table, updating/checking the @oppfx value.
1982
1983 Note! Updates iLine as it consumes the whole table.
1984 """
1985
1986 #
1987 # Extract the table name.
1988 #
1989 sName = re.search(' *([a-zA-Z_0-9]+) *\[', sLine).group(1);
1990 oMap = g_dInstructionMapsByIemName.get(sName);
1991 if not oMap:
1992 self.debug('No map for PFNIEMOP table: %s' % (sName,));
1993 oMap = self.oDefaultMap; # This is wrong wrong wrong.
1994
1995 #
1996 # All but the g_apfnOneByteMap & g_apfnEscF1_E0toFF tables uses four
1997 # entries per byte:
1998 # no prefix, 066h prefix, f3h prefix, f2h prefix
1999 # Those tables has 256 & 32 entries respectively.
2000 #
2001 cEntriesPerByte = 4;
2002 cValidTableLength = 1024;
2003 asPrefixes = ('none', '0x66', '0xf3', '0xf2');
2004
2005 oEntriesMatch = re.search('\[ *(256|32) *\]', sLine);
2006 if oEntriesMatch:
2007 cEntriesPerByte = 1;
2008 cValidTableLength = int(oEntriesMatch.group(1));
2009 asPrefixes = (None,);
2010
2011 #
2012 # The next line should be '{' and nothing else.
2013 #
2014 if self.iLine >= len(self.asLines) or not re.match('^ *{ *$', self.asLines[self.iLine]):
2015 return self.errorOnLine(self.iLine + 1, 'Expected lone "{" on line following PFNIEMOP table %s start' % (sName, ));
2016 self.iLine += 1;
2017
2018 #
2019 # Parse till we find the end of the table.
2020 #
2021 iEntry = 0;
2022 while self.iLine < len(self.asLines):
2023 # Get the next line and strip comments and spaces (assumes no
2024 # multi-line comments).
2025 sLine = self.asLines[self.iLine];
2026 self.iLine += 1;
2027 sLine = self.stripComments(sLine).strip();
2028
2029 # Split the line up into entries, expanding IEMOP_X4 usage.
2030 asEntries = sLine.split(',');
2031 for i in range(len(asEntries) - 1, -1, -1):
2032 sEntry = asEntries[i].strip();
2033 if sEntry.startswith('IEMOP_X4(') and sEntry[-1] == ')':
2034 sEntry = (sEntry[len('IEMOP_X4('):-1]).strip();
2035 asEntries.insert(i + 1, sEntry);
2036 asEntries.insert(i + 1, sEntry);
2037 asEntries.insert(i + 1, sEntry);
2038 if sEntry:
2039 asEntries[i] = sEntry;
2040 else:
2041 del asEntries[i];
2042
2043 # Process the entries.
2044 for sEntry in asEntries:
2045 if sEntry in ('};', '}'):
2046 if iEntry != cValidTableLength:
2047 return self.error('Wrong table length for %s: %#x, expected %#x' % (sName, iEntry, cValidTableLength, ));
2048 return True;
2049 if sEntry.startswith('iemOp_Invalid'):
2050 pass; # skip
2051 else:
2052 # Look up matching instruction by function.
2053 sPrefix = asPrefixes[iEntry % cEntriesPerByte];
2054 sOpcode = '%#04x' % (iEntry // cEntriesPerByte);
2055 aoInstr = g_dAllInstructionsByFunction.get(sEntry);
2056 if aoInstr:
2057 if not isinstance(aoInstr, list):
2058 aoInstr = [aoInstr,];
2059 oInstr = None;
2060 for oCurInstr in aoInstr:
2061 if oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix == sPrefix:
2062 pass;
2063 elif oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix is None:
2064 oCurInstr.sPrefix = sPrefix;
2065 elif oCurInstr.sOpcode is None and oCurInstr.sPrefix is None:
2066 oCurInstr.sOpcode = sOpcode;
2067 oCurInstr.sPrefix = sPrefix;
2068 else:
2069 continue;
2070 oInstr = oCurInstr;
2071 break;
2072 if not oInstr:
2073 oInstr = aoInstr[0].copy(oMap = oMap, sOpcode = sOpcode, sPrefix = sPrefix);
2074 aoInstr.append(oInstr);
2075 g_dAllInstructionsByFunction[sEntry] = aoInstr;
2076 g_aoAllInstructions.append(oInstr);
2077 oMap.aoInstructions.append(oInstr);
2078 else:
2079 self.debug('Function "%s", entry %#04x / byte %#04x in %s, is not associated with an instruction.'
2080 % (sEntry, iEntry, iEntry // cEntriesPerByte, sName,));
2081 iEntry += 1;
2082
2083 return self.error('Unexpected end of file in PFNIEMOP table');
2084
2085 def addInstruction(self, iLine = None):
2086 """
2087 Adds an instruction.
2088 """
2089 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
2090 g_aoAllInstructions.append(oInstr);
2091 self.aoCurInstrs.append(oInstr);
2092 return oInstr;
2093
2094 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
2095 """
2096 Derives the mnemonic and operands from a IEM stats base name like string.
2097 """
2098 if oInstr.sMnemonic is None:
2099 asWords = sStats.split('_');
2100 oInstr.sMnemonic = asWords[0].lower();
2101 if len(asWords) > 1 and not oInstr.aoOperands:
2102 for sType in asWords[1:]:
2103 if sType in g_kdOpTypes:
2104 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
2105 else:
2106 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
2107 return False;
2108 return True;
2109
2110 def doneInstructionOne(self, oInstr, iLine):
2111 """
2112 Complete the parsing by processing, validating and expanding raw inputs.
2113 """
2114 assert oInstr.iLineCompleted is None;
2115 oInstr.iLineCompleted = iLine;
2116
2117 #
2118 # Specified instructions.
2119 #
2120 if oInstr.cOpTags > 0:
2121 if oInstr.sStats is None:
2122 pass;
2123
2124 #
2125 # Unspecified legacy stuff. We generally only got a few things to go on here.
2126 # /** Opcode 0x0f 0x00 /0. */
2127 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
2128 #
2129 else:
2130 #if oInstr.sRawOldOpcodes:
2131 #
2132 #if oInstr.sMnemonic:
2133 pass;
2134
2135 #
2136 # Common defaults.
2137 #
2138
2139 # Guess mnemonic and operands from stats if the former is missing.
2140 if oInstr.sMnemonic is None:
2141 if oInstr.sStats is not None:
2142 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
2143 elif oInstr.sFunction is not None:
2144 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
2145
2146 # Derive the disassembler op enum constant from the mnemonic.
2147 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
2148 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
2149
2150 # Derive the IEM statistics base name from mnemonic and operand types.
2151 if oInstr.sStats is None:
2152 if oInstr.sFunction is not None:
2153 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
2154 elif oInstr.sMnemonic is not None:
2155 oInstr.sStats = oInstr.sMnemonic;
2156 for oOperand in oInstr.aoOperands:
2157 if oOperand.sType:
2158 oInstr.sStats += '_' + oOperand.sType;
2159
2160 # Derive the IEM function name from mnemonic and operand types.
2161 if oInstr.sFunction is None:
2162 if oInstr.sMnemonic is not None:
2163 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
2164 for oOperand in oInstr.aoOperands:
2165 if oOperand.sType:
2166 oInstr.sFunction += '_' + oOperand.sType;
2167 elif oInstr.sStats:
2168 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
2169
2170 #
2171 # Apply default map and then add the instruction to all it's groups.
2172 #
2173 if not oInstr.aoMaps:
2174 oInstr.aoMaps = [ self.oDefaultMap, ];
2175 for oMap in oInstr.aoMaps:
2176 oMap.aoInstructions.append(oInstr);
2177
2178 #
2179 # Derive encoding from operands and maps.
2180 #
2181 if oInstr.sEncoding is None:
2182 if not oInstr.aoOperands:
2183 if oInstr.fUnused and oInstr.sSubOpcode:
2184 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
2185 else:
2186 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
2187 elif oInstr.aoOperands[0].usesModRM():
2188 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
2189 or oInstr.onlyInVexMaps():
2190 oInstr.sEncoding = 'VEX.ModR/M';
2191 else:
2192 oInstr.sEncoding = 'ModR/M';
2193
2194 #
2195 # Check the opstat value and add it to the opstat indexed dictionary.
2196 #
2197 if oInstr.sStats:
2198 if oInstr.sStats not in g_dAllInstructionsByStat:
2199 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
2200 else:
2201 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
2202 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
2203
2204 #
2205 # Add to function indexed dictionary. We allow multiple instructions per function.
2206 #
2207 if oInstr.sFunction:
2208 if oInstr.sFunction not in g_dAllInstructionsByFunction:
2209 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
2210 else:
2211 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
2212
2213 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
2214 return True;
2215
2216 def doneInstructions(self, iLineInComment = None, fEndOfFunction = False):
2217 """
2218 Done with current instruction.
2219 """
2220 for oInstr in self.aoCurInstrs:
2221 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
2222 if oInstr.fStub:
2223 self.cTotalStubs += 1;
2224
2225 self.cTotalInstr += len(self.aoCurInstrs);
2226
2227 self.sComment = '';
2228 self.aoCurInstrs = [];
2229 if fEndOfFunction:
2230 #self.debug('%s: sCurFunction=None' % (self.iLine, ));
2231 self.sCurFunction = None;
2232 self.iMcBlockInFunc = 0;
2233 return True;
2234
2235 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
2236 """
2237 Sets the sAttrib of all current instruction to oValue. If fOverwrite
2238 is False, only None values and empty strings are replaced.
2239 """
2240 for oInstr in self.aoCurInstrs:
2241 if fOverwrite is not True:
2242 oOldValue = getattr(oInstr, sAttrib);
2243 if oOldValue is not None:
2244 continue;
2245 setattr(oInstr, sAttrib, oValue);
2246
2247 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
2248 """
2249 Sets the iEntry of the array sAttrib of all current instruction to oValue.
2250 If fOverwrite is False, only None values and empty strings are replaced.
2251 """
2252 for oInstr in self.aoCurInstrs:
2253 aoArray = getattr(oInstr, sAttrib);
2254 while len(aoArray) <= iEntry:
2255 aoArray.append(None);
2256 if fOverwrite is True or aoArray[iEntry] is None:
2257 aoArray[iEntry] = oValue;
2258
2259 def parseCommentOldOpcode(self, asLines):
2260 """ Deals with 'Opcode 0xff /4' like comments """
2261 asWords = asLines[0].split();
2262 if len(asWords) >= 2 \
2263 and asWords[0] == 'Opcode' \
2264 and ( asWords[1].startswith('0x')
2265 or asWords[1].startswith('0X')):
2266 asWords = asWords[:1];
2267 for iWord, sWord in enumerate(asWords):
2268 if sWord.startswith('0X'):
2269 sWord = '0x' + sWord[:2];
2270 asWords[iWord] = asWords;
2271 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
2272
2273 return False;
2274
2275 def ensureInstructionForOpTag(self, iTagLine):
2276 """ Ensure there is an instruction for the op-tag being parsed. """
2277 if not self.aoCurInstrs:
2278 self.addInstruction(self.iCommentLine + iTagLine);
2279 for oInstr in self.aoCurInstrs:
2280 oInstr.cOpTags += 1;
2281 if oInstr.cOpTags == 1:
2282 self.cTotalTagged += 1;
2283 return self.aoCurInstrs[-1];
2284
2285 @staticmethod
2286 def flattenSections(aasSections):
2287 """
2288 Flattens multiline sections into stripped single strings.
2289 Returns list of strings, on section per string.
2290 """
2291 asRet = [];
2292 for asLines in aasSections:
2293 if asLines:
2294 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
2295 return asRet;
2296
2297 @staticmethod
2298 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
2299 """
2300 Flattens sections into a simple stripped string with newlines as
2301 section breaks. The final section does not sport a trailing newline.
2302 """
2303 # Typical: One section with a single line.
2304 if len(aasSections) == 1 and len(aasSections[0]) == 1:
2305 return aasSections[0][0].strip();
2306
2307 sRet = '';
2308 for iSection, asLines in enumerate(aasSections):
2309 if asLines:
2310 if iSection > 0:
2311 sRet += sSectionSep;
2312 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
2313 return sRet;
2314
2315
2316
2317 ## @name Tag parsers
2318 ## @{
2319
2320 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
2321 """
2322 Tag: \@opbrief
2323 Value: Text description, multiple sections, appended.
2324
2325 Brief description. If not given, it's the first sentence from @opdesc.
2326 """
2327 oInstr = self.ensureInstructionForOpTag(iTagLine);
2328
2329 # Flatten and validate the value.
2330 sBrief = self.flattenAllSections(aasSections);
2331 if not sBrief:
2332 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
2333 if sBrief[-1] != '.':
2334 sBrief = sBrief + '.';
2335 if len(sBrief) > 180:
2336 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
2337 offDot = sBrief.find('.');
2338 while 0 <= offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
2339 offDot = sBrief.find('.', offDot + 1);
2340 if offDot >= 0 and offDot != len(sBrief) - 1:
2341 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
2342
2343 # Update the instruction.
2344 if oInstr.sBrief is not None:
2345 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
2346 % (sTag, oInstr.sBrief, sBrief,));
2347 _ = iEndLine;
2348 return True;
2349
2350 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
2351 """
2352 Tag: \@opdesc
2353 Value: Text description, multiple sections, appended.
2354
2355 It is used to describe instructions.
2356 """
2357 oInstr = self.ensureInstructionForOpTag(iTagLine);
2358 if aasSections:
2359 oInstr.asDescSections.extend(self.flattenSections(aasSections));
2360 return True;
2361
2362 _ = sTag; _ = iEndLine;
2363 return True;
2364
2365 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
2366 """
2367 Tag: @opmenmonic
2368 Value: mnemonic
2369
2370 The 'mnemonic' value must be a valid C identifier string. Because of
2371 prefixes, groups and whatnot, there times when the mnemonic isn't that
2372 of an actual assembler mnemonic.
2373 """
2374 oInstr = self.ensureInstructionForOpTag(iTagLine);
2375
2376 # Flatten and validate the value.
2377 sMnemonic = self.flattenAllSections(aasSections);
2378 if not self.oReMnemonic.match(sMnemonic):
2379 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
2380 if oInstr.sMnemonic is not None:
2381 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
2382 % (sTag, oInstr.sMnemonic, sMnemonic,));
2383 oInstr.sMnemonic = sMnemonic
2384
2385 _ = iEndLine;
2386 return True;
2387
2388 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
2389 """
2390 Tags: \@op1, \@op2, \@op3, \@op4
2391 Value: [where:]type
2392
2393 The 'where' value indicates where the operand is found, like the 'reg'
2394 part of the ModR/M encoding. See Instruction.kdOperandLocations for
2395 a list.
2396
2397 The 'type' value indicates the operand type. These follow the types
2398 given in the opcode tables in the CPU reference manuals.
2399 See Instruction.kdOperandTypes for a list.
2400
2401 """
2402 oInstr = self.ensureInstructionForOpTag(iTagLine);
2403 idxOp = int(sTag[-1]) - 1;
2404 assert 0 <= idxOp < 4;
2405
2406 # flatten, split up, and validate the "where:type" value.
2407 sFlattened = self.flattenAllSections(aasSections);
2408 asSplit = sFlattened.split(':');
2409 if len(asSplit) == 1:
2410 sType = asSplit[0];
2411 sWhere = None;
2412 elif len(asSplit) == 2:
2413 (sWhere, sType) = asSplit;
2414 else:
2415 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
2416
2417 if sType not in g_kdOpTypes:
2418 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
2419 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
2420 if sWhere is None:
2421 sWhere = g_kdOpTypes[sType][1];
2422 elif sWhere not in g_kdOpLocations:
2423 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
2424 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
2425
2426 # Insert the operand, refusing to overwrite an existing one.
2427 while idxOp >= len(oInstr.aoOperands):
2428 oInstr.aoOperands.append(None);
2429 if oInstr.aoOperands[idxOp] is not None:
2430 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
2431 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
2432 sWhere, sType,));
2433 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
2434
2435 _ = iEndLine;
2436 return True;
2437
2438 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
2439 """
2440 Tag: \@opmaps
2441 Value: map[,map2]
2442
2443 Indicates which maps the instruction is in. There is a default map
2444 associated with each input file.
2445 """
2446 oInstr = self.ensureInstructionForOpTag(iTagLine);
2447
2448 # Flatten, split up and validate the value.
2449 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
2450 asMaps = sFlattened.split(',');
2451 if not asMaps:
2452 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
2453 for sMap in asMaps:
2454 if sMap not in g_dInstructionMaps:
2455 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
2456 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
2457
2458 # Add the maps to the current list. Throw errors on duplicates.
2459 for oMap in oInstr.aoMaps:
2460 if oMap.sName in asMaps:
2461 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
2462
2463 for sMap in asMaps:
2464 oMap = g_dInstructionMaps[sMap];
2465 if oMap not in oInstr.aoMaps:
2466 oInstr.aoMaps.append(oMap);
2467 else:
2468 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
2469
2470 _ = iEndLine;
2471 return True;
2472
2473 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
2474 """
2475 Tag: \@oppfx
2476 Value: n/a|none|0x66|0xf3|0xf2
2477
2478 Required prefix for the instruction. (In a (E)VEX context this is the
2479 value of the 'pp' field rather than an actual prefix.)
2480 """
2481 oInstr = self.ensureInstructionForOpTag(iTagLine);
2482
2483 # Flatten and validate the value.
2484 sFlattened = self.flattenAllSections(aasSections);
2485 asPrefixes = sFlattened.split();
2486 if len(asPrefixes) > 1:
2487 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
2488
2489 sPrefix = asPrefixes[0].lower();
2490 if sPrefix == 'none':
2491 sPrefix = 'none';
2492 elif sPrefix == 'n/a':
2493 sPrefix = None;
2494 else:
2495 if len(sPrefix) == 2:
2496 sPrefix = '0x' + sPrefix;
2497 if not _isValidOpcodeByte(sPrefix):
2498 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
2499
2500 if sPrefix is not None and sPrefix not in g_kdPrefixes:
2501 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
2502
2503 # Set it.
2504 if oInstr.sPrefix is not None:
2505 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
2506 oInstr.sPrefix = sPrefix;
2507
2508 _ = iEndLine;
2509 return True;
2510
2511 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
2512 """
2513 Tag: \@opcode
2514 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
2515
2516 The opcode byte or sub-byte for the instruction in the context of a map.
2517 """
2518 oInstr = self.ensureInstructionForOpTag(iTagLine);
2519
2520 # Flatten and validate the value.
2521 sOpcode = self.flattenAllSections(aasSections);
2522 if _isValidOpcodeByte(sOpcode):
2523 pass;
2524 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
2525 pass;
2526 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
2527 pass;
2528 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
2529 pass;
2530 else:
2531 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
2532
2533 # Set it.
2534 if oInstr.sOpcode is not None:
2535 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
2536 oInstr.sOpcode = sOpcode;
2537
2538 _ = iEndLine;
2539 return True;
2540
2541 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
2542 """
2543 Tag: \@opcodesub
2544 Value: none | 11 mr/reg | !11 mr/reg | rex.w=0 | rex.w=1 | vex.l=0 | vex.l=1
2545 | 11 mr/reg vex.l=0 | 11 mr/reg vex.l=1 | !11 mr/reg vex.l=0 | !11 mr/reg vex.l=1
2546
2547 This is a simple way of dealing with encodings where the mod=3 and mod!=3
2548 represents exactly two different instructions. The more proper way would
2549 be to go via maps with two members, but this is faster.
2550 """
2551 oInstr = self.ensureInstructionForOpTag(iTagLine);
2552
2553 # Flatten and validate the value.
2554 sSubOpcode = self.flattenAllSections(aasSections);
2555 if sSubOpcode not in g_kdSubOpcodes:
2556 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
2557 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
2558
2559 # Set it.
2560 if oInstr.sSubOpcode is not None:
2561 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2562 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
2563 oInstr.sSubOpcode = sSubOpcode;
2564
2565 _ = iEndLine;
2566 return True;
2567
2568 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
2569 """
2570 Tag: \@openc
2571 Value: ModR/M|fixed|prefix|<map name>
2572
2573 The instruction operand encoding style.
2574 """
2575 oInstr = self.ensureInstructionForOpTag(iTagLine);
2576
2577 # Flatten and validate the value.
2578 sEncoding = self.flattenAllSections(aasSections);
2579 if sEncoding in g_kdEncodings:
2580 pass;
2581 elif sEncoding in g_dInstructionMaps:
2582 pass;
2583 elif not _isValidOpcodeByte(sEncoding):
2584 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
2585
2586 # Set it.
2587 if oInstr.sEncoding is not None:
2588 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
2589 % ( sTag, oInstr.sEncoding, sEncoding,));
2590 oInstr.sEncoding = sEncoding;
2591
2592 _ = iEndLine;
2593 return True;
2594
2595 ## EFlags tag to Instruction attribute name.
2596 kdOpFlagToAttr = {
2597 '@opfltest': 'asFlTest',
2598 '@opflmodify': 'asFlModify',
2599 '@opflundef': 'asFlUndefined',
2600 '@opflset': 'asFlSet',
2601 '@opflclear': 'asFlClear',
2602 };
2603
2604 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
2605 """
2606 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
2607 Value: <eflags specifier>
2608
2609 """
2610 oInstr = self.ensureInstructionForOpTag(iTagLine);
2611
2612 # Flatten, split up and validate the values.
2613 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
2614 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
2615 asFlags = [];
2616 else:
2617 fRc = True;
2618 for iFlag, sFlag in enumerate(asFlags):
2619 if sFlag not in g_kdEFlagsMnemonics:
2620 if sFlag.strip() in g_kdEFlagsMnemonics:
2621 asFlags[iFlag] = sFlag.strip();
2622 else:
2623 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
2624 if not fRc:
2625 return False;
2626
2627 # Set them.
2628 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
2629 if asOld is not None:
2630 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
2631 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
2632
2633 _ = iEndLine;
2634 return True;
2635
2636 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
2637 """
2638 Tag: \@ophints
2639 Value: Comma or space separated list of flags and hints.
2640
2641 This covers the disassembler flags table and more.
2642 """
2643 oInstr = self.ensureInstructionForOpTag(iTagLine);
2644
2645 # Flatten as a space separated list, split it up and validate the values.
2646 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2647 if len(asHints) == 1 and asHints[0].lower() == 'none':
2648 asHints = [];
2649 else:
2650 fRc = True;
2651 for iHint, sHint in enumerate(asHints):
2652 if sHint not in g_kdHints:
2653 if sHint.strip() in g_kdHints:
2654 sHint[iHint] = sHint.strip();
2655 else:
2656 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
2657 if not fRc:
2658 return False;
2659
2660 # Append them.
2661 for sHint in asHints:
2662 if sHint not in oInstr.dHints:
2663 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
2664 else:
2665 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
2666
2667 _ = iEndLine;
2668 return True;
2669
2670 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
2671 """
2672 Tag: \@opdisenum
2673 Value: OP_XXXX
2674
2675 This is for select a specific (legacy) disassembler enum value for the
2676 instruction.
2677 """
2678 oInstr = self.ensureInstructionForOpTag(iTagLine);
2679
2680 # Flatten and split.
2681 asWords = self.flattenAllSections(aasSections).split();
2682 if len(asWords) != 1:
2683 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
2684 if not asWords:
2685 return False;
2686 sDisEnum = asWords[0];
2687 if not self.oReDisEnum.match(sDisEnum):
2688 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
2689 % (sTag, sDisEnum, self.oReDisEnum.pattern));
2690
2691 # Set it.
2692 if oInstr.sDisEnum is not None:
2693 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
2694 oInstr.sDisEnum = sDisEnum;
2695
2696 _ = iEndLine;
2697 return True;
2698
2699 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
2700 """
2701 Tag: \@opmincpu
2702 Value: <simple CPU name>
2703
2704 Indicates when this instruction was introduced.
2705 """
2706 oInstr = self.ensureInstructionForOpTag(iTagLine);
2707
2708 # Flatten the value, split into words, make sure there's just one, valid it.
2709 asCpus = self.flattenAllSections(aasSections).split();
2710 if len(asCpus) > 1:
2711 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
2712
2713 sMinCpu = asCpus[0];
2714 if sMinCpu in g_kdCpuNames:
2715 oInstr.sMinCpu = sMinCpu;
2716 else:
2717 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
2718 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
2719
2720 # Set it.
2721 if oInstr.sMinCpu is None:
2722 oInstr.sMinCpu = sMinCpu;
2723 elif oInstr.sMinCpu != sMinCpu:
2724 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
2725
2726 _ = iEndLine;
2727 return True;
2728
2729 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
2730 """
2731 Tag: \@opcpuid
2732 Value: none | <CPUID flag specifier>
2733
2734 CPUID feature bit which is required for the instruction to be present.
2735 """
2736 oInstr = self.ensureInstructionForOpTag(iTagLine);
2737
2738 # Flatten as a space separated list, split it up and validate the values.
2739 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
2740 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
2741 asCpuIds = [];
2742 else:
2743 fRc = True;
2744 for iCpuId, sCpuId in enumerate(asCpuIds):
2745 if sCpuId not in g_kdCpuIdFlags:
2746 if sCpuId.strip() in g_kdCpuIdFlags:
2747 sCpuId[iCpuId] = sCpuId.strip();
2748 else:
2749 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
2750 if not fRc:
2751 return False;
2752
2753 # Append them.
2754 for sCpuId in asCpuIds:
2755 if sCpuId not in oInstr.asCpuIds:
2756 oInstr.asCpuIds.append(sCpuId);
2757 else:
2758 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
2759
2760 _ = iEndLine;
2761 return True;
2762
2763 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
2764 """
2765 Tag: \@opgroup
2766 Value: op_grp1[_subgrp2[_subsubgrp3]]
2767
2768 Instruction grouping.
2769 """
2770 oInstr = self.ensureInstructionForOpTag(iTagLine);
2771
2772 # Flatten as a space separated list, split it up and validate the values.
2773 asGroups = self.flattenAllSections(aasSections).split();
2774 if len(asGroups) != 1:
2775 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
2776 sGroup = asGroups[0];
2777 if not self.oReGroupName.match(sGroup):
2778 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
2779 % (sTag, sGroup, self.oReGroupName.pattern));
2780
2781 # Set it.
2782 if oInstr.sGroup is not None:
2783 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
2784 oInstr.sGroup = sGroup;
2785
2786 _ = iEndLine;
2787 return True;
2788
2789 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
2790 """
2791 Tag: \@opunused, \@opinvalid, \@opinvlstyle
2792 Value: <invalid opcode behaviour style>
2793
2794 The \@opunused indicates the specification is for a currently unused
2795 instruction encoding.
2796
2797 The \@opinvalid indicates the specification is for an invalid currently
2798 instruction encoding (like UD2).
2799
2800 The \@opinvlstyle just indicates how CPUs decode the instruction when
2801 not supported (\@opcpuid, \@opmincpu) or disabled.
2802 """
2803 oInstr = self.ensureInstructionForOpTag(iTagLine);
2804
2805 # Flatten as a space separated list, split it up and validate the values.
2806 asStyles = self.flattenAllSections(aasSections).split();
2807 if len(asStyles) != 1:
2808 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
2809 sStyle = asStyles[0];
2810 if sStyle not in g_kdInvalidStyles:
2811 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
2812 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
2813 # Set it.
2814 if oInstr.sInvalidStyle is not None:
2815 return self.errorComment(iTagLine,
2816 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
2817 % ( sTag, oInstr.sInvalidStyle, sStyle,));
2818 oInstr.sInvalidStyle = sStyle;
2819 if sTag == '@opunused':
2820 oInstr.fUnused = True;
2821 elif sTag == '@opinvalid':
2822 oInstr.fInvalid = True;
2823
2824 _ = iEndLine;
2825 return True;
2826
2827 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
2828 """
2829 Tag: \@optest
2830 Value: [<selectors>[ ]?] <inputs> -> <outputs>
2831 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
2832
2833 The main idea here is to generate basic instruction tests.
2834
2835 The probably simplest way of handling the diverse input, would be to use
2836 it to produce size optimized byte code for a simple interpreter that
2837 modifies the register input and output states.
2838
2839 An alternative to the interpreter would be creating multiple tables,
2840 but that becomes rather complicated wrt what goes where and then to use
2841 them in an efficient manner.
2842 """
2843 oInstr = self.ensureInstructionForOpTag(iTagLine);
2844
2845 #
2846 # Do it section by section.
2847 #
2848 for asSectionLines in aasSections:
2849 #
2850 # Sort the input into outputs, inputs and selector conditions.
2851 #
2852 sFlatSection = self.flattenAllSections([asSectionLines,]);
2853 if not sFlatSection:
2854 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
2855 continue;
2856 oTest = InstructionTest(oInstr);
2857
2858 asSelectors = [];
2859 asInputs = [];
2860 asOutputs = [];
2861 asCur = asOutputs;
2862 fRc = True;
2863 asWords = sFlatSection.split();
2864 for iWord in range(len(asWords) - 1, -1, -1):
2865 sWord = asWords[iWord];
2866 # Check for array switchers.
2867 if sWord == '->':
2868 if asCur != asOutputs:
2869 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
2870 break;
2871 asCur = asInputs;
2872 elif sWord == '/':
2873 if asCur != asInputs:
2874 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
2875 break;
2876 asCur = asSelectors;
2877 else:
2878 asCur.insert(0, sWord);
2879
2880 #
2881 # Validate and add selectors.
2882 #
2883 for sCond in asSelectors:
2884 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
2885 oSelector = None;
2886 for sOp in TestSelector.kasCompareOps:
2887 off = sCondExp.find(sOp);
2888 if off >= 0:
2889 sVariable = sCondExp[:off];
2890 sValue = sCondExp[off + len(sOp):];
2891 if sVariable in TestSelector.kdVariables:
2892 if sValue in TestSelector.kdVariables[sVariable]:
2893 oSelector = TestSelector(sVariable, sOp, sValue);
2894 else:
2895 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
2896 % ( sTag, sValue, sCond,
2897 TestSelector.kdVariables[sVariable].keys(),));
2898 else:
2899 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
2900 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
2901 break;
2902 if oSelector is not None:
2903 for oExisting in oTest.aoSelectors:
2904 if oExisting.sVariable == oSelector.sVariable:
2905 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
2906 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
2907 oTest.aoSelectors.append(oSelector);
2908 else:
2909 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
2910
2911 #
2912 # Validate outputs and inputs, adding them to the test as we go along.
2913 #
2914 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
2915 asValidFieldKinds = [ 'both', sDesc, ];
2916 for sItem in asItems:
2917 oItem = None;
2918 for sOp in TestInOut.kasOperators:
2919 off = sItem.find(sOp);
2920 if off < 0:
2921 continue;
2922 sField = sItem[:off];
2923 sValueType = sItem[off + len(sOp):];
2924 if sField in TestInOut.kdFields \
2925 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
2926 asSplit = sValueType.split(':', 1);
2927 sValue = asSplit[0];
2928 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
2929 if sType in TestInOut.kdTypes:
2930 oValid = TestInOut.kdTypes[sType].validate(sValue);
2931 if oValid is True:
2932 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
2933 oItem = TestInOut(sField, sOp, sValue, sType);
2934 else:
2935 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
2936 % ( sTag, sDesc, sItem, ));
2937 else:
2938 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
2939 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
2940 else:
2941 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
2942 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
2943 else:
2944 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
2945 % ( sTag, sDesc, sField, sItem,
2946 ', '.join([sKey for sKey, asVal in TestInOut.kdFields.items()
2947 if asVal[1] in asValidFieldKinds]),));
2948 break;
2949 if oItem is not None:
2950 for oExisting in aoDst:
2951 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
2952 self.errorComment(iTagLine,
2953 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
2954 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
2955 aoDst.append(oItem);
2956 else:
2957 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
2958
2959 #
2960 # .
2961 #
2962 if fRc:
2963 oInstr.aoTests.append(oTest);
2964 else:
2965 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
2966 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
2967 % (sTag, asSelectors, asInputs, asOutputs,));
2968
2969 _ = iEndLine;
2970 return True;
2971
2972 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
2973 """
2974 Numbered \@optest tag. Either \@optest42 or \@optest[42].
2975 """
2976 oInstr = self.ensureInstructionForOpTag(iTagLine);
2977
2978 iTest = 0;
2979 if sTag[-1] == ']':
2980 iTest = int(sTag[8:-1]);
2981 else:
2982 iTest = int(sTag[7:]);
2983
2984 if iTest != len(oInstr.aoTests):
2985 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
2986 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
2987
2988 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
2989 """
2990 Tag: \@optestign | \@optestignore
2991 Value: <value is ignored>
2992
2993 This is a simple trick to ignore a test while debugging another.
2994
2995 See also \@oponlytest.
2996 """
2997 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
2998 return True;
2999
3000 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
3001 """
3002 Tag: \@opcopytests
3003 Value: <opstat | function> [..]
3004 Example: \@opcopytests add_Eb_Gb
3005
3006 Trick to avoid duplicating tests for different encodings of the same
3007 operation.
3008 """
3009 oInstr = self.ensureInstructionForOpTag(iTagLine);
3010
3011 # Flatten, validate and append the copy job to the instruction. We execute
3012 # them after parsing all the input so we can handle forward references.
3013 asToCopy = self.flattenAllSections(aasSections).split();
3014 if not asToCopy:
3015 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
3016 for sToCopy in asToCopy:
3017 if sToCopy not in oInstr.asCopyTests:
3018 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
3019 oInstr.asCopyTests.append(sToCopy);
3020 else:
3021 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
3022 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
3023 else:
3024 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
3025
3026 _ = iEndLine;
3027 return True;
3028
3029 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
3030 """
3031 Tag: \@oponlytest | \@oponly
3032 Value: none
3033
3034 Only test instructions with this tag. This is a trick that is handy
3035 for singling out one or two new instructions or tests.
3036
3037 See also \@optestignore.
3038 """
3039 oInstr = self.ensureInstructionForOpTag(iTagLine);
3040
3041 # Validate and add instruction to only test dictionary.
3042 sValue = self.flattenAllSections(aasSections).strip();
3043 if sValue:
3044 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
3045
3046 if oInstr not in g_aoOnlyTestInstructions:
3047 g_aoOnlyTestInstructions.append(oInstr);
3048
3049 _ = iEndLine;
3050 return True;
3051
3052 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
3053 """
3054 Tag: \@opxcpttype
3055 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
3056
3057 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
3058 """
3059 oInstr = self.ensureInstructionForOpTag(iTagLine);
3060
3061 # Flatten as a space separated list, split it up and validate the values.
3062 asTypes = self.flattenAllSections(aasSections).split();
3063 if len(asTypes) != 1:
3064 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
3065 sType = asTypes[0];
3066 if sType not in g_kdXcptTypes:
3067 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
3068 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
3069 # Set it.
3070 if oInstr.sXcptType is not None:
3071 return self.errorComment(iTagLine,
3072 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
3073 % ( sTag, oInstr.sXcptType, sType,));
3074 oInstr.sXcptType = sType;
3075
3076 _ = iEndLine;
3077 return True;
3078
3079 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
3080 """
3081 Tag: \@opfunction
3082 Value: <VMM function name>
3083
3084 This is for explicitly setting the IEM function name. Normally we pick
3085 this up from the FNIEMOP_XXX macro invocation after the description, or
3086 generate it from the mnemonic and operands.
3087
3088 It it thought it maybe necessary to set it when specifying instructions
3089 which implementation isn't following immediately or aren't implemented yet.
3090 """
3091 oInstr = self.ensureInstructionForOpTag(iTagLine);
3092
3093 # Flatten and validate the value.
3094 sFunction = self.flattenAllSections(aasSections);
3095 if not self.oReFunctionName.match(sFunction):
3096 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
3097 % (sTag, sFunction, self.oReFunctionName.pattern));
3098
3099 if oInstr.sFunction is not None:
3100 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
3101 % (sTag, oInstr.sFunction, sFunction,));
3102 oInstr.sFunction = sFunction;
3103
3104 _ = iEndLine;
3105 return True;
3106
3107 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
3108 """
3109 Tag: \@opstats
3110 Value: <VMM statistics base name>
3111
3112 This is for explicitly setting the statistics name. Normally we pick
3113 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
3114 the mnemonic and operands.
3115
3116 It it thought it maybe necessary to set it when specifying instructions
3117 which implementation isn't following immediately or aren't implemented yet.
3118 """
3119 oInstr = self.ensureInstructionForOpTag(iTagLine);
3120
3121 # Flatten and validate the value.
3122 sStats = self.flattenAllSections(aasSections);
3123 if not self.oReStatsName.match(sStats):
3124 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
3125 % (sTag, sStats, self.oReStatsName.pattern));
3126
3127 if oInstr.sStats is not None:
3128 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
3129 % (sTag, oInstr.sStats, sStats,));
3130 oInstr.sStats = sStats;
3131
3132 _ = iEndLine;
3133 return True;
3134
3135 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
3136 """
3137 Tag: \@opdone
3138 Value: none
3139
3140 Used to explictily flush the instructions that have been specified.
3141 """
3142 sFlattened = self.flattenAllSections(aasSections);
3143 if sFlattened != '':
3144 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
3145 _ = sTag; _ = iEndLine;
3146 return self.doneInstructions();
3147
3148 ## @}
3149
3150
3151 def parseComment(self):
3152 """
3153 Parse the current comment (self.sComment).
3154
3155 If it's a opcode specifiying comment, we reset the macro stuff.
3156 """
3157 #
3158 # Reject if comment doesn't seem to contain anything interesting.
3159 #
3160 if self.sComment.find('Opcode') < 0 \
3161 and self.sComment.find('@') < 0:
3162 return False;
3163
3164 #
3165 # Split the comment into lines, removing leading asterisks and spaces.
3166 # Also remove leading and trailing empty lines.
3167 #
3168 asLines = self.sComment.split('\n');
3169 for iLine, sLine in enumerate(asLines):
3170 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
3171
3172 while asLines and not asLines[0]:
3173 self.iCommentLine += 1;
3174 asLines.pop(0);
3175
3176 while asLines and not asLines[-1]:
3177 asLines.pop(len(asLines) - 1);
3178
3179 #
3180 # Check for old style: Opcode 0x0f 0x12
3181 #
3182 if asLines[0].startswith('Opcode '):
3183 self.parseCommentOldOpcode(asLines);
3184
3185 #
3186 # Look for @op* tagged data.
3187 #
3188 cOpTags = 0;
3189 sFlatDefault = None;
3190 sCurTag = '@default';
3191 iCurTagLine = 0;
3192 asCurSection = [];
3193 aasSections = [ asCurSection, ];
3194 for iLine, sLine in enumerate(asLines):
3195 if not sLine.startswith('@'):
3196 if sLine:
3197 asCurSection.append(sLine);
3198 elif asCurSection:
3199 asCurSection = [];
3200 aasSections.append(asCurSection);
3201 else:
3202 #
3203 # Process the previous tag.
3204 #
3205 if not asCurSection and len(aasSections) > 1:
3206 aasSections.pop(-1);
3207 if sCurTag in self.dTagHandlers:
3208 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
3209 cOpTags += 1;
3210 elif sCurTag.startswith('@op'):
3211 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
3212 elif sCurTag == '@default':
3213 sFlatDefault = self.flattenAllSections(aasSections);
3214 elif '@op' + sCurTag[1:] in self.dTagHandlers:
3215 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
3216 elif sCurTag in ['@encoding', '@opencoding']:
3217 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
3218
3219 #
3220 # New tag.
3221 #
3222 asSplit = sLine.split(None, 1);
3223 sCurTag = asSplit[0].lower();
3224 if len(asSplit) > 1:
3225 asCurSection = [asSplit[1],];
3226 else:
3227 asCurSection = [];
3228 aasSections = [asCurSection, ];
3229 iCurTagLine = iLine;
3230
3231 #
3232 # Process the final tag.
3233 #
3234 if not asCurSection and len(aasSections) > 1:
3235 aasSections.pop(-1);
3236 if sCurTag in self.dTagHandlers:
3237 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
3238 cOpTags += 1;
3239 elif sCurTag.startswith('@op'):
3240 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
3241 elif sCurTag == '@default':
3242 sFlatDefault = self.flattenAllSections(aasSections);
3243
3244 #
3245 # Don't allow default text in blocks containing @op*.
3246 #
3247 if cOpTags > 0 and sFlatDefault:
3248 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
3249
3250 return True;
3251
3252 def parseMacroInvocation(self, sInvocation):
3253 """
3254 Parses a macro invocation.
3255
3256 Returns a tuple, first element is the offset following the macro
3257 invocation. The second element is a list of macro arguments, where the
3258 zero'th is the macro name.
3259 """
3260 # First the name.
3261 offOpen = sInvocation.find('(');
3262 if offOpen <= 0:
3263 self.raiseError("macro invocation open parenthesis not found");
3264 sName = sInvocation[:offOpen].strip();
3265 if not self.oReMacroName.match(sName):
3266 return self.error("invalid macro name '%s'" % (sName,));
3267 asRet = [sName, ];
3268
3269 # Arguments.
3270 iLine = self.iLine;
3271 cDepth = 1;
3272 off = offOpen + 1;
3273 offStart = off;
3274 chQuote = None;
3275 while cDepth > 0:
3276 if off >= len(sInvocation):
3277 if iLine >= len(self.asLines):
3278 self.error('macro invocation beyond end of file');
3279 return (off, asRet);
3280 sInvocation += self.asLines[iLine];
3281 iLine += 1;
3282 ch = sInvocation[off];
3283
3284 if chQuote:
3285 if ch == '\\' and off + 1 < len(sInvocation):
3286 off += 1;
3287 elif ch == chQuote:
3288 chQuote = None;
3289 elif ch in ('"', '\'',):
3290 chQuote = ch;
3291 elif ch in (',', ')',):
3292 if cDepth == 1:
3293 asRet.append(sInvocation[offStart:off].strip());
3294 offStart = off + 1;
3295 if ch == ')':
3296 cDepth -= 1;
3297 elif ch == '(':
3298 cDepth += 1;
3299 off += 1;
3300
3301 return (off, asRet);
3302
3303 def findAndParseMacroInvocationEx(self, sCode, sMacro):
3304 """
3305 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
3306 """
3307 offHit = sCode.find(sMacro);
3308 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
3309 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
3310 return (offHit + offAfter, asRet);
3311 return (len(sCode), None);
3312
3313 def findAndParseMacroInvocation(self, sCode, sMacro):
3314 """
3315 Returns None if not found, arguments as per parseMacroInvocation if found.
3316 """
3317 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
3318
3319 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
3320 """
3321 Returns same as findAndParseMacroInvocation.
3322 """
3323 for sMacro in asMacro:
3324 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
3325 if asRet is not None:
3326 return asRet;
3327 return None;
3328
3329 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
3330 sDisHints, sIemHints, asOperands):
3331 """
3332 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
3333 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
3334 """
3335 #
3336 # Some invocation checks.
3337 #
3338 if sUpper != sUpper.upper():
3339 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
3340 if sLower != sLower.lower():
3341 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
3342 if sUpper.lower() != sLower:
3343 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
3344 if not self.oReMnemonic.match(sLower):
3345 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
3346
3347 #
3348 # Check if sIemHints tells us to not consider this macro invocation.
3349 #
3350 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
3351 return True;
3352
3353 # Apply to the last instruction only for now.
3354 if not self.aoCurInstrs:
3355 self.addInstruction();
3356 oInstr = self.aoCurInstrs[-1];
3357 if oInstr.iLineMnemonicMacro == -1:
3358 oInstr.iLineMnemonicMacro = self.iLine;
3359 else:
3360 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
3361 % (sMacro, oInstr.iLineMnemonicMacro,));
3362
3363 # Mnemonic
3364 if oInstr.sMnemonic is None:
3365 oInstr.sMnemonic = sLower;
3366 elif oInstr.sMnemonic != sLower:
3367 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
3368
3369 # Process operands.
3370 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
3371 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
3372 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
3373 for iOperand, sType in enumerate(asOperands):
3374 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
3375 if sWhere is None:
3376 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
3377 if iOperand < len(oInstr.aoOperands): # error recovery.
3378 sWhere = oInstr.aoOperands[iOperand].sWhere;
3379 sType = oInstr.aoOperands[iOperand].sType;
3380 else:
3381 sWhere = 'reg';
3382 sType = 'Gb';
3383 if iOperand == len(oInstr.aoOperands):
3384 oInstr.aoOperands.append(Operand(sWhere, sType))
3385 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
3386 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
3387 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
3388 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
3389
3390 # Encoding.
3391 if sForm not in g_kdIemForms:
3392 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
3393 else:
3394 if oInstr.sEncoding is None:
3395 oInstr.sEncoding = g_kdIemForms[sForm][0];
3396 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
3397 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
3398 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
3399
3400 # Check the parameter locations for the encoding.
3401 if g_kdIemForms[sForm][1] is not None:
3402 if len(g_kdIemForms[sForm][1]) > len(oInstr.aoOperands):
3403 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
3404 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
3405 else:
3406 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
3407 if oInstr.aoOperands[iOperand].sWhere != sWhere:
3408 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
3409 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
3410 sOpFormMatch = g_kdOpTypes[oInstr.aoOperands[iOperand].sType][4];
3411 if (sOpFormMatch in [ 'REG', 'MEM', ] and sForm.find('_' + sOpFormMatch) < 0) \
3412 or (sOpFormMatch in [ 'FIXED', ] and sForm.find(sOpFormMatch) < 0) \
3413 or (sOpFormMatch == 'RM' and (sForm.find('_MEM') > 0 or sForm.find('_REG') > 0) ) \
3414 or (sOpFormMatch == 'V' and ( not (sForm.find('VEX') > 0 or sForm.find('XOP')) \
3415 or sForm.replace('VEX','').find('V') < 0) ):
3416 self.error('%s: current instruction @op%u and a_Form type does not match: %s/%s vs %s'
3417 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sType, sOpFormMatch, sForm, ));
3418 if len(g_kdIemForms[sForm][1]) < len(oInstr.aoOperands):
3419 for iOperand in range(len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands)):
3420 if oInstr.aoOperands[iOperand].sType != 'FIXED' \
3421 and g_kdOpTypes[oInstr.aoOperands[iOperand].sType][0] != 'IDX_ParseFixedReg':
3422 self.error('%s: Expected FIXED type operand #%u following operands given by a_Form=%s: %s (%s)'
3423 % (sMacro, iOperand, sForm, oInstr.aoOperands[iOperand].sType,
3424 oInstr.aoOperands[iOperand].sWhere));
3425
3426
3427 # Check @opcodesub
3428 if oInstr.sSubOpcode \
3429 and g_kdIemForms[sForm][2] \
3430 and oInstr.sSubOpcode.find(g_kdIemForms[sForm][2]) < 0:
3431 self.error('%s: current instruction @opcodesub and a_Form does not match: %s vs %s (%s)'
3432 % (sMacro, oInstr.sSubOpcode, g_kdIemForms[sForm][2], sForm,));
3433
3434 # Stats.
3435 if not self.oReStatsName.match(sStats):
3436 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
3437 elif oInstr.sStats is None:
3438 oInstr.sStats = sStats;
3439 elif oInstr.sStats != sStats:
3440 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
3441 % (sMacro, oInstr.sStats, sStats,));
3442
3443 # Process the hints (simply merge with @ophints w/o checking anything).
3444 for sHint in sDisHints.split('|'):
3445 sHint = sHint.strip();
3446 if sHint.startswith('DISOPTYPE_'):
3447 sShortHint = sHint[len('DISOPTYPE_'):].lower();
3448 if sShortHint in g_kdHints:
3449 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
3450 else:
3451 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
3452 elif sHint != '0':
3453 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
3454
3455 for sHint in sIemHints.split('|'):
3456 sHint = sHint.strip();
3457 if sHint.startswith('IEMOPHINT_'):
3458 sShortHint = sHint[len('IEMOPHINT_'):].lower();
3459 if sShortHint in g_kdHints:
3460 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
3461 else:
3462 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
3463 elif sHint != '0':
3464 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
3465
3466 _ = sAsm;
3467 return True;
3468
3469 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
3470 """
3471 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
3472 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
3473 """
3474 if not asOperands:
3475 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
3476 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
3477 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
3478
3479 def workerIemMcBegin(self, sCode, offBeginStatementInLine):
3480 """
3481 Process a IEM_MC_BEGIN macro invocation.
3482 """
3483 if self.fDebugMc:
3484 self.debug('IEM_MC_BEGIN on %s off %s' % (self.iLine, offBeginStatementInLine,));
3485
3486 # Check preconditions.
3487 if not self.sCurFunction:
3488 self.raiseError('IEM_MC_BEGIN w/o current function (%s)' % (sCode,));
3489 if self.oCurMcBlock:
3490 self.raiseError('IEM_MC_BEGIN before IEM_MC_END. Previous IEM_MC_BEGIN at line %u' % (self.oCurMcBlock.iBeginLine,));
3491
3492 # Start a new block.
3493 self.oCurMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine, self.sCurFunction, self.iMcBlockInFunc);
3494 g_aoMcBlocks.append(self.oCurMcBlock);
3495 self.cTotalMcBlocks += 1;
3496 self.iMcBlockInFunc += 1;
3497 return True;
3498
3499 def workerIemMcEnd(self, offEndStatementInLine):
3500 """
3501 Process a IEM_MC_END macro invocation.
3502 """
3503 if self.fDebugMc:
3504 self.debug('IEM_MC_END on %s off %s' % (self.iLine, offEndStatementInLine,));
3505
3506 # Check preconditions.
3507 if not self.oCurMcBlock:
3508 self.raiseError('IEM_MC_END w/o IEM_MC_BEGIN.');
3509
3510 #
3511 # Complete and discard the current block.
3512 #
3513 # HACK ALERT! For blocks orginating from macro expansion the start and
3514 # end line will be the same, but the line has multiple
3515 # newlines inside it. So, we have to do some extra tricks
3516 # to get the lines out of there. We ASSUME macros aren't
3517 # messy, but keep IEM_MC_BEGIN/END on separate lines.
3518 #
3519 if self.iLine > self.oCurMcBlock.iBeginLine:
3520 asLines = self.asLines[self.oCurMcBlock.iBeginLine - 1 : self.iLine];
3521 if not asLines[0].strip().startswith('IEM_MC_BEGIN'):
3522 self.raiseError('IEM_MC_BEGIN is not the first word on the line');
3523 else:
3524 sRawLine = self.asLines[self.iLine - 1];
3525
3526 off = sRawLine.find('\n', offEndStatementInLine);
3527 if off > 0:
3528 sRawLine = sRawLine[:off + 1];
3529
3530 off = sRawLine.rfind('\n', 0, self.oCurMcBlock.offBeginLine) + 1;
3531 sRawLine = sRawLine[off:];
3532 if not sRawLine.strip().startswith('IEM_MC_BEGIN'):
3533 sRawLine = sRawLine[self.oCurMcBlock.offBeginLine - off:]
3534
3535 asLines = [sLine + '\n' for sLine in sRawLine.split('\n')];
3536
3537 self.oCurMcBlock.complete(self.iLine, offEndStatementInLine, asLines);
3538 self.oCurMcBlock = None;
3539 return True;
3540
3541 def checkCodeForMacro(self, sCode, offLine):
3542 """
3543 Checks code for relevant macro invocation.
3544 """
3545 #
3546 # Scan macro invocations.
3547 #
3548 if sCode.find('(') > 0:
3549 # Look for instruction decoder function definitions. ASSUME single line.
3550 asArgs = self.findAndParseFirstMacroInvocation(sCode,
3551 [ 'FNIEMOP_DEF',
3552 'FNIEMOPRM_DEF',
3553 'FNIEMOP_STUB',
3554 'FNIEMOP_STUB_1',
3555 'FNIEMOP_UD_STUB',
3556 'FNIEMOP_UD_STUB_1' ]);
3557 if asArgs is not None:
3558 self.sCurFunction = asArgs[1];
3559 #self.debug('%s: sCurFunction=%s' % (self.iLine, self.sCurFunction,));
3560
3561 if not self.aoCurInstrs:
3562 self.addInstruction();
3563 for oInstr in self.aoCurInstrs:
3564 if oInstr.iLineFnIemOpMacro == -1:
3565 oInstr.iLineFnIemOpMacro = self.iLine;
3566 else:
3567 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
3568 self.setInstrunctionAttrib('sFunction', asArgs[1]);
3569 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
3570 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
3571 if asArgs[0].find('STUB') > 0:
3572 self.doneInstructions(fEndOfFunction = True);
3573 return True;
3574
3575 # Check for worker function definitions, so we can get a context for MC blocks.
3576 asArgs = self.findAndParseFirstMacroInvocation(sCode,
3577 [ 'FNIEMOP_DEF_1',
3578 'FNIEMOP_DEF_2', ]);
3579 if asArgs is not None:
3580 self.sCurFunction = asArgs[1];
3581 #self.debug('%s: sCurFunction=%s (%s)' % (self.iLine, self.sCurFunction, asArgs[0]));
3582 return True;
3583
3584 # IEMOP_HLP_DONE_VEX_DECODING_*
3585 asArgs = self.findAndParseFirstMacroInvocation(sCode,
3586 [ 'IEMOP_HLP_DONE_VEX_DECODING',
3587 'IEMOP_HLP_DONE_VEX_DECODING_L0',
3588 'IEMOP_HLP_DONE_VEX_DECODING_NO_VVVV',
3589 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV',
3590 ]);
3591 if asArgs is not None:
3592 sMacro = asArgs[0];
3593 if sMacro in ('IEMOP_HLP_DONE_VEX_DECODING_L0', 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV', ):
3594 for oInstr in self.aoCurInstrs:
3595 if 'vex_l_zero' not in oInstr.dHints:
3596 if oInstr.iLineMnemonicMacro >= 0:
3597 self.errorOnLine(oInstr.iLineMnemonicMacro,
3598 'Missing IEMOPHINT_VEX_L_ZERO! (%s on line %d)' % (sMacro, self.iLine,));
3599 oInstr.dHints['vex_l_zero'] = True;
3600 return True;
3601
3602 #
3603 # IEMOP_MNEMONIC*
3604 #
3605 if sCode.find('IEMOP_MNEMONIC') >= 0:
3606 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
3607 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
3608 if asArgs is not None:
3609 if len(self.aoCurInstrs) == 1:
3610 oInstr = self.aoCurInstrs[0];
3611 if oInstr.sStats is None:
3612 oInstr.sStats = asArgs[1];
3613 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
3614
3615 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3616 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
3617 if asArgs is not None:
3618 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6],
3619 asArgs[7], []);
3620 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3621 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
3622 if asArgs is not None:
3623 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7],
3624 asArgs[8], [asArgs[6],]);
3625 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3626 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
3627 if asArgs is not None:
3628 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8],
3629 asArgs[9], [asArgs[6], asArgs[7]]);
3630 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints,
3631 # a_fIemHints)
3632 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
3633 if asArgs is not None:
3634 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
3635 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
3636 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
3637 # a_fIemHints)
3638 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
3639 if asArgs is not None:
3640 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
3641 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
3642
3643 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
3644 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
3645 if asArgs is not None:
3646 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
3647 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
3648 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
3649 if asArgs is not None:
3650 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
3651 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
3652 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
3653 if asArgs is not None:
3654 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
3655 [asArgs[4], asArgs[5],]);
3656 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
3657 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
3658 if asArgs is not None:
3659 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
3660 [asArgs[4], asArgs[5], asArgs[6],]);
3661 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
3662 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
3663 if asArgs is not None:
3664 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
3665 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
3666
3667 #
3668 # IEM_MC_BEGIN + IEM_MC_END.
3669 # We must support multiple instances per code snippet.
3670 #
3671 offCode = sCode.find('IEM_MC_');
3672 if offCode >= 0:
3673 for oMatch in self.oReMcBeginEnd.finditer(sCode, offCode):
3674 if oMatch.group(1) == 'END':
3675 self.workerIemMcEnd(offLine + oMatch.start());
3676 else:
3677 self.workerIemMcBegin(sCode, offLine + oMatch.start());
3678 return True;
3679
3680 return False;
3681
3682 def workerPreProcessRecreateMacroRegex(self):
3683 """
3684 Recreates self.oReMacros when self.dMacros changes.
3685 """
3686 if self.dMacros:
3687 sRegex = '';
3688 for sName, oMacro in self.dMacros.items():
3689 if sRegex:
3690 sRegex += '|' + sName;
3691 else:
3692 sRegex = '\\b(' + sName;
3693 if oMacro.asArgs is not None:
3694 sRegex += '\s*\(';
3695 else:
3696 sRegex += '\\b';
3697 sRegex += ')';
3698 self.oReMacros = re.compile(sRegex);
3699 else:
3700 self.oReMacros = None;
3701 return True;
3702
3703 def workerPreProcessDefine(self, sRest):
3704 """
3705 Handles a macro #define, the sRest is what follows after the directive word.
3706 """
3707
3708 #
3709 # If using line continutation, just concat all the lines together,
3710 # preserving the newline character but not the escaping.
3711 #
3712 iLineStart = self.iLine;
3713 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
3714 sRest = sRest[0:-2].rstrip() + '\n' + self.asLines[self.iLine];
3715 self.iLine += 1;
3716 #self.debug('workerPreProcessDefine: sRest=%s<EOS>' % (sRest,));
3717
3718 #
3719 # Use regex to split out the name, argument list and body.
3720 # If this fails, we assume it's a simple macro.
3721 #
3722 oMatch = self.oReHashDefine2.match(sRest);
3723 if oMatch:
3724 asArgs = [sParam.strip() for sParam in oMatch.group(2).split(',')];
3725 sBody = oMatch.group(3);
3726 else:
3727 oMatch = self.oReHashDefine3.match(sRest);
3728 if not oMatch:
3729 self.debug('workerPreProcessDefine: wtf? sRest=%s' % (sRest,));
3730 return self.error('bogus macro definition: %s' % (sRest,));
3731 asArgs = None;
3732 sBody = oMatch.group(2);
3733 sName = oMatch.group(1);
3734 assert sName == sName.strip();
3735 #self.debug('workerPreProcessDefine: sName=%s asArgs=%s sBody=%s<EOS>' % (sName, asArgs, sBody));
3736
3737 #
3738 # Is this of any interest to us? We do NOT support MC blocks wihtin
3739 # nested macro expansion, just to avoid lots of extra work.
3740 #
3741 if sBody.find("IEM_MC_BEGIN") < 0:
3742 #self.debug('workerPreProcessDefine: irrelevant (%s: %s)' % (sName, sBody));
3743 return True;
3744
3745 #
3746 # Add the macro.
3747 #
3748 if self.fDebugPreProc:
3749 self.debug('#define %s on line %u' % (sName, self.iLine,));
3750 self.dMacros[sName] = SimpleParser.Macro(sName, asArgs, sBody, iLineStart);
3751 return self.workerPreProcessRecreateMacroRegex();
3752
3753 def workerPreProcessUndef(self, sRest):
3754 """
3755 Handles a macro #undef, the sRest is what follows after the directive word.
3756 """
3757 # Quick comment strip and isolate the name.
3758 offSlash = sRest.find('/');
3759 if offSlash > 0:
3760 sRest = sRest[:offSlash];
3761 sName = sRest.strip();
3762
3763 # Remove the macro if we're clocking it.
3764 if sName in self.dMacros:
3765 if self.fDebugPreProc:
3766 self.debug('#undef %s on line %u' % (sName, self.iLine,));
3767 del self.dMacros[sName];
3768 return self.workerPreProcessRecreateMacroRegex();
3769
3770 return True;
3771
3772 def checkPreProcessorDirectiveForDefineUndef(self, sLine):
3773 """
3774 Handles a preprocessor directive.
3775 """
3776 oMatch = self.oReHashDefine.match(sLine);
3777 if oMatch:
3778 return self.workerPreProcessDefine(oMatch.group(1) + '\n');
3779
3780 oMatch = self.oReHashUndef.match(sLine);
3781 if oMatch:
3782 return self.workerPreProcessUndef(oMatch.group(1) + '\n');
3783 return False;
3784
3785 def expandMacros(self, sLine, oMatch):
3786 """
3787 Expands macros we know about in the given line.
3788 Currently we ASSUME there is only one and that is what oMatch matched.
3789 """
3790 #
3791 # Get our bearings.
3792 #
3793 offMatch = oMatch.start();
3794 sName = oMatch.group(1);
3795 assert sName == sLine[oMatch.start() : oMatch.end()];
3796 fWithArgs = sName.endswith('(');
3797 if fWithArgs:
3798 sName = sName[:-1].strip();
3799 oMacro = self.dMacros[sName] # type: SimpleParser.Macro
3800
3801 #
3802 # Deal with simple macro invocations w/o parameters.
3803 #
3804 if not fWithArgs:
3805 if self.fDebugPreProc:
3806 self.debug('expanding simple macro %s on line %u' % (sName, self.iLine,));
3807 return sLine[:offMatch] + oMacro.expandMacro(self) + sLine[oMatch.end():];
3808
3809 #
3810 # Complicated macro with parameters.
3811 # Start by extracting the parameters. ASSUMES they are all on the same line!
3812 #
3813 cLevel = 1;
3814 offCur = oMatch.end();
3815 offCurArg = offCur;
3816 asArgs = [];
3817 while True:
3818 ch = sLine[offCur];
3819 if ch == '(':
3820 cLevel += 1;
3821 elif ch == ')':
3822 cLevel -= 1;
3823 if cLevel == 0:
3824 asArgs.append(sLine[offCurArg:offCur].strip());
3825 break;
3826 elif ch == ',' and cLevel == 1:
3827 asArgs.append(sLine[offCurArg:offCur].strip());
3828 offCurArg = offCur + 1;
3829 offCur += 1;
3830 if len(oMacro.asArgs) == 0 and len(asArgs) == 1 and asArgs[0] == '': # trick for empty parameter list.
3831 asArgs = [];
3832 if len(oMacro.asArgs) != len(asArgs):
3833 self.raiseError('expandMacros: Argument mismatch in %s invocation' % (oMacro.sName,));
3834
3835 #
3836 # Do the expanding.
3837 #
3838 if self.fDebugPreProc:
3839 self.debug('expanding macro %s on line %u with arguments %s' % (sName, self.iLine, asArgs));
3840 return sLine[:offMatch] + oMacro.expandMacro(self, asArgs) + sLine[offCur + 1 :];
3841
3842 def parse(self):
3843 """
3844 Parses the given file.
3845 Returns number or errors.
3846 Raises exception on fatal trouble.
3847 """
3848 #self.debug('Parsing %s' % (self.sSrcFile,));
3849
3850 while self.iLine < len(self.asLines):
3851 sLine = self.asLines[self.iLine];
3852 self.iLine += 1;
3853 #self.debug('line %u: %s' % (self.iLine, sLine[:-1]));
3854
3855 # Expand macros we know about if we're currently in code.
3856 if self.iState == self.kiCode and self.oReMacros:
3857 oMatch = self.oReMacros.search(sLine);
3858 if oMatch:
3859 sLine = self.expandMacros(sLine, oMatch);
3860 if self.fDebugPreProc:
3861 self.debug('line %d: expanded\n%s ==>\n%s' % (self.iLine, self.asLines[self.iLine - 1], sLine[:-1],));
3862 self.asLines[self.iLine - 1] = sLine;
3863
3864 # Look for comments.
3865 offSlash = sLine.find('/');
3866 if offSlash >= 0:
3867 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
3868 offLine = 0;
3869 while offLine < len(sLine):
3870 if self.iState == self.kiCode:
3871 # Look for substantial multiline comment so we pass the following MC as a whole line:
3872 # IEM_MC_ARG_CONST(uint8_t, bImmArg, /*=*/ bImm, 2);
3873 # Note! We ignore C++ comments here, assuming these aren't used in lines with C-style comments.
3874 offHit = sLine.find('/*', offLine);
3875 while offHit >= 0:
3876 offEnd = sLine.find('*/', offHit + 2);
3877 if offEnd < 0 or offEnd - offHit >= 16: # 16 chars is a bit random.
3878 break;
3879 offHit = sLine.find('/*', offEnd);
3880
3881 if offHit >= 0:
3882 self.checkCodeForMacro(sLine[offLine:offHit], offLine);
3883 self.sComment = '';
3884 self.iCommentLine = self.iLine;
3885 self.iState = self.kiCommentMulti;
3886 offLine = offHit + 2;
3887 else:
3888 self.checkCodeForMacro(sLine[offLine:], offLine);
3889 offLine = len(sLine);
3890
3891 elif self.iState == self.kiCommentMulti:
3892 offHit = sLine.find('*/', offLine);
3893 if offHit >= 0:
3894 self.sComment += sLine[offLine:offHit];
3895 self.iState = self.kiCode;
3896 offLine = offHit + 2;
3897 self.parseComment();
3898 else:
3899 self.sComment += sLine[offLine:];
3900 offLine = len(sLine);
3901 else:
3902 assert False;
3903 # C++ line comment.
3904 elif offSlash > 0:
3905 self.checkCodeForMacro(sLine[:offSlash], 0);
3906
3907 # No slash, but append the line if in multi-line comment.
3908 elif self.iState == self.kiCommentMulti:
3909 #self.debug('line %d: multi' % (self.iLine,));
3910 self.sComment += sLine;
3911
3912 # No slash, but check if this is a macro #define or #undef, since we
3913 # need to be able to selectively expand the ones containing MC blocks.
3914 elif self.iState == self.kiCode and sLine.lstrip().startswith('#'):
3915 if self.fDebugPreProc:
3916 self.debug('line %d: pre-proc' % (self.iLine,));
3917 self.checkPreProcessorDirectiveForDefineUndef(sLine);
3918
3919 # No slash, but check code line for relevant macro.
3920 elif ( self.iState == self.kiCode
3921 and (sLine.find('IEMOP_') >= 0 or sLine.find('FNIEMOPRM_DEF') >= 0 or sLine.find('IEM_MC') >= 0)):
3922 #self.debug('line %d: macro' % (self.iLine,));
3923 self.checkCodeForMacro(sLine, 0);
3924
3925 # If the line is a '}' in the first position, complete the instructions.
3926 elif self.iState == self.kiCode and sLine[0] == '}':
3927 #self.debug('line %d: }' % (self.iLine,));
3928 self.doneInstructions(fEndOfFunction = True);
3929
3930 # Look for instruction table on the form 'IEM_STATIC const PFNIEMOP g_apfnVexMap3'
3931 # so we can check/add @oppfx info from it.
3932 elif self.iState == self.kiCode and sLine.find('PFNIEMOP') > 0 and self.oReFunTable.match(sLine):
3933 self.parseFunctionTable(sLine);
3934
3935 self.doneInstructions(fEndOfFunction = True);
3936 self.debug('%3s%% / %3s stubs out of %4s instructions and %4s MC blocks in %s'
3937 % (self.cTotalStubs * 100 // self.cTotalInstr, self.cTotalStubs, self.cTotalInstr, self.cTotalMcBlocks,
3938 os.path.basename(self.sSrcFile),));
3939 return self.printErrors();
3940
3941
3942def __parseFileByName(sSrcFile, sDefaultMap):
3943 """
3944 Parses one source file for instruction specfications.
3945 """
3946 #
3947 # Read sSrcFile into a line array.
3948 #
3949 try:
3950 oFile = open(sSrcFile, "r"); # pylint: disable=consider-using-with,unspecified-encoding
3951 except Exception as oXcpt:
3952 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
3953 try:
3954 asLines = oFile.readlines();
3955 except Exception as oXcpt:
3956 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
3957 finally:
3958 oFile.close();
3959
3960 #
3961 # Do the parsing.
3962 #
3963 try:
3964 oParser = SimpleParser(sSrcFile, asLines, sDefaultMap);
3965 return (oParser.parse(), oParser) ;
3966 except ParserException as oXcpt:
3967 print(str(oXcpt), file = sys.stderr);
3968 raise;
3969
3970
3971def __doTestCopying():
3972 """
3973 Executes the asCopyTests instructions.
3974 """
3975 asErrors = [];
3976 for oDstInstr in g_aoAllInstructions:
3977 if oDstInstr.asCopyTests:
3978 for sSrcInstr in oDstInstr.asCopyTests:
3979 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
3980 if oSrcInstr:
3981 aoSrcInstrs = [oSrcInstr,];
3982 else:
3983 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
3984 if aoSrcInstrs:
3985 for oSrcInstr in aoSrcInstrs:
3986 if oSrcInstr != oDstInstr:
3987 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
3988 else:
3989 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
3990 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3991 else:
3992 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
3993 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
3994
3995 if asErrors:
3996 sys.stderr.write(u''.join(asErrors));
3997 return len(asErrors);
3998
3999
4000def __applyOnlyTest():
4001 """
4002 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
4003 all other instructions so that only these get tested.
4004 """
4005 if g_aoOnlyTestInstructions:
4006 for oInstr in g_aoAllInstructions:
4007 if oInstr.aoTests:
4008 if oInstr not in g_aoOnlyTestInstructions:
4009 oInstr.aoTests = [];
4010 return 0;
4011
4012## List of all main instruction files and their default maps.
4013g_aasAllInstrFilesAndDefaultMap = (
4014 ( 'IEMAllInstructionsCommon.cpp.h', 'one', ),
4015 ( 'IEMAllInstructionsOneByte.cpp.h', 'one', ),
4016 ( 'IEMAllInstructionsTwoByte0f.cpp.h', 'two0f', ),
4017 ( 'IEMAllInstructionsThree0f38.cpp.h', 'three0f38', ),
4018 ( 'IEMAllInstructionsThree0f3a.cpp.h', 'three0f3a', ),
4019 ( 'IEMAllInstructionsVexMap1.cpp.h', 'vexmap1', ),
4020 ( 'IEMAllInstructionsVexMap2.cpp.h', 'vexmap2', ),
4021 ( 'IEMAllInstructionsVexMap3.cpp.h', 'vexmap3', ),
4022 ( 'IEMAllInstructions3DNow.cpp.h', '3dnow', ),
4023);
4024
4025def __parseFilesWorker(asFilesAndDefaultMap):
4026 """
4027 Parses all the IEMAllInstruction*.cpp.h files.
4028
4029 Returns a list of the parsers on success.
4030 Raises exception on failure.
4031 """
4032 sSrcDir = os.path.dirname(os.path.abspath(__file__));
4033 cErrors = 0;
4034 aoParsers = [];
4035 for sFilename, sDefaultMap in asFilesAndDefaultMap:
4036 if not os.path.split(sFilename)[0] and not os.path.exists(sFilename):
4037 sFilename = os.path.join(sSrcDir, sFilename);
4038 cThisErrors, oParser = __parseFileByName(sFilename, sDefaultMap);
4039 cErrors += cThisErrors;
4040 aoParsers.append(oParser);
4041 cErrors += __doTestCopying();
4042 cErrors += __applyOnlyTest();
4043
4044 # Total stub stats:
4045 cTotalStubs = 0;
4046 for oInstr in g_aoAllInstructions:
4047 cTotalStubs += oInstr.fStub;
4048 print('debug: %3s%% / %3s stubs out of %4s instructions and %4s MC blocks in total'
4049 % (cTotalStubs * 100 // len(g_aoAllInstructions), cTotalStubs, len(g_aoAllInstructions), len(g_aoMcBlocks),),
4050 file = sys.stderr);
4051
4052 if cErrors != 0:
4053 raise Exception('%d parse errors' % (cErrors,));
4054 return aoParsers;
4055
4056
4057def parseFiles(asFiles):
4058 """
4059 Parses a selection of IEMAllInstruction*.cpp.h files.
4060
4061 Returns a list of the parsers on success.
4062 Raises exception on failure.
4063 """
4064 # Look up default maps for the files and call __parseFilesWorker to do the job.
4065 asFilesAndDefaultMap = [];
4066 for sFilename in asFiles:
4067 sName = os.path.split(sFilename)[1].lower();
4068 sMap = None;
4069 for asCur in g_aasAllInstrFilesAndDefaultMap:
4070 if asCur[0].lower() == sName:
4071 sMap = asCur[1];
4072 break;
4073 if not sMap:
4074 raise Exception('Unable to classify file: %s' % (sFilename,));
4075 asFilesAndDefaultMap.append((sFilename, sMap));
4076
4077 return __parseFilesWorker(asFilesAndDefaultMap);
4078
4079
4080def parseAll():
4081 """
4082 Parses all the IEMAllInstruction*.cpp.h files.
4083
4084 Returns a list of the parsers on success.
4085 Raises exception on failure.
4086 """
4087 return __parseFilesWorker(g_aasAllInstrFilesAndDefaultMap);
4088
4089
4090#
4091# Generators (may perhaps move later).
4092#
4093def __formatDisassemblerTableEntry(oInstr):
4094 """
4095 """
4096 sMacro = 'OP';
4097 cMaxOperands = 3;
4098 if len(oInstr.aoOperands) > 3:
4099 sMacro = 'OPVEX'
4100 cMaxOperands = 4;
4101 assert len(oInstr.aoOperands) <= cMaxOperands;
4102
4103 #
4104 # Format string.
4105 #
4106 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
4107 for iOperand, oOperand in enumerate(oInstr.aoOperands):
4108 sTmp += ' ' if iOperand == 0 else ',';
4109 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
4110 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
4111 else:
4112 sTmp += g_kdOpTypes[oOperand.sType][2];
4113 sTmp += '",';
4114 asColumns = [ sTmp, ];
4115
4116 #
4117 # Decoders.
4118 #
4119 iStart = len(asColumns);
4120 if oInstr.sEncoding is None:
4121 pass;
4122 elif oInstr.sEncoding == 'ModR/M':
4123 # ASSUME the first operand is using the ModR/M encoding
4124 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM(), "oInstr=%s" % (oInstr,);
4125 asColumns.append('IDX_ParseModRM,');
4126 elif oInstr.sEncoding in [ 'prefix', ]:
4127 for oOperand in oInstr.aoOperands:
4128 asColumns.append('0,');
4129 elif oInstr.sEncoding in [ 'fixed', 'VEX.fixed' ]:
4130 pass;
4131 elif oInstr.sEncoding == 'VEX.ModR/M':
4132 asColumns.append('IDX_ParseModRM,');
4133 elif oInstr.sEncoding == 'vex2':
4134 asColumns.append('IDX_ParseVex2b,')
4135 elif oInstr.sEncoding == 'vex3':
4136 asColumns.append('IDX_ParseVex3b,')
4137 elif oInstr.sEncoding in g_dInstructionMaps:
4138 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
4139 else:
4140 ## @todo
4141 #IDX_ParseTwoByteEsc,
4142 #IDX_ParseGrp1,
4143 #IDX_ParseShiftGrp2,
4144 #IDX_ParseGrp3,
4145 #IDX_ParseGrp4,
4146 #IDX_ParseGrp5,
4147 #IDX_Parse3DNow,
4148 #IDX_ParseGrp6,
4149 #IDX_ParseGrp7,
4150 #IDX_ParseGrp8,
4151 #IDX_ParseGrp9,
4152 #IDX_ParseGrp10,
4153 #IDX_ParseGrp12,
4154 #IDX_ParseGrp13,
4155 #IDX_ParseGrp14,
4156 #IDX_ParseGrp15,
4157 #IDX_ParseGrp16,
4158 #IDX_ParseThreeByteEsc4,
4159 #IDX_ParseThreeByteEsc5,
4160 #IDX_ParseModFence,
4161 #IDX_ParseEscFP,
4162 #IDX_ParseNopPause,
4163 #IDX_ParseInvOpModRM,
4164 assert False, str(oInstr);
4165
4166 # Check for immediates and stuff in the remaining operands.
4167 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
4168 sIdx = g_kdOpTypes[oOperand.sType][0];
4169 #if sIdx != 'IDX_UseModRM':
4170 asColumns.append(sIdx + ',');
4171 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
4172
4173 #
4174 # Opcode and operands.
4175 #
4176 assert oInstr.sDisEnum, str(oInstr);
4177 asColumns.append(oInstr.sDisEnum + ',');
4178 iStart = len(asColumns)
4179 for oOperand in oInstr.aoOperands:
4180 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
4181 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
4182
4183 #
4184 # Flags.
4185 #
4186 sTmp = '';
4187 for sHint in sorted(oInstr.dHints.keys()):
4188 sDefine = g_kdHints[sHint];
4189 if sDefine.startswith('DISOPTYPE_'):
4190 if sTmp:
4191 sTmp += ' | ' + sDefine;
4192 else:
4193 sTmp += sDefine;
4194 if sTmp:
4195 sTmp += '),';
4196 else:
4197 sTmp += '0),';
4198 asColumns.append(sTmp);
4199
4200 #
4201 # Format the columns into a line.
4202 #
4203 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
4204 sLine = '';
4205 for i, s in enumerate(asColumns):
4206 if len(sLine) < aoffColumns[i]:
4207 sLine += ' ' * (aoffColumns[i] - len(sLine));
4208 else:
4209 sLine += ' ';
4210 sLine += s;
4211
4212 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
4213 # DISOPTYPE_HARMLESS),
4214 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
4215 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
4216 return sLine;
4217
4218def __checkIfShortTable(aoTableOrdered, oMap):
4219 """
4220 Returns (iInstr, cInstructions, fShortTable)
4221 """
4222
4223 # Determin how much we can trim off.
4224 cInstructions = len(aoTableOrdered);
4225 while cInstructions > 0 and aoTableOrdered[cInstructions - 1] is None:
4226 cInstructions -= 1;
4227
4228 iInstr = 0;
4229 while iInstr < cInstructions and aoTableOrdered[iInstr] is None:
4230 iInstr += 1;
4231
4232 # If we can save more than 30%, we go for the short table version.
4233 if iInstr + len(aoTableOrdered) - cInstructions >= len(aoTableOrdered) // 30:
4234 return (iInstr, cInstructions, True);
4235 _ = oMap; # Use this for overriding.
4236
4237 # Output the full table.
4238 return (0, len(aoTableOrdered), False);
4239
4240def generateDisassemblerTables(oDstFile = sys.stdout):
4241 """
4242 Generates disassembler tables.
4243
4244 Returns exit code.
4245 """
4246
4247 #
4248 # Parse all.
4249 #
4250 try:
4251 parseAll();
4252 except Exception as oXcpt:
4253 print('error: parseAll failed: %s' % (oXcpt,), file = sys.stderr);
4254 traceback.print_exc(file = sys.stderr);
4255 return 1;
4256
4257
4258 #
4259 # The disassembler uses a slightly different table layout to save space,
4260 # since several of the prefix varia
4261 #
4262 aoDisasmMaps = [];
4263 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
4264 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
4265 if oMap.sSelector != 'byte+pfx':
4266 aoDisasmMaps.append(oMap);
4267 else:
4268 # Split the map by prefix.
4269 aoDisasmMaps.append(oMap.copy(oMap.sName, 'none'));
4270 aoDisasmMaps.append(oMap.copy(oMap.sName + '_66', '0x66'));
4271 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F3', '0xf3'));
4272 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F2', '0xf2'));
4273
4274 #
4275 # Dump each map.
4276 #
4277 asHeaderLines = [];
4278 print("debug: maps=%s\n" % (', '.join([oMap.sName for oMap in aoDisasmMaps]),), file = sys.stderr);
4279 for oMap in aoDisasmMaps:
4280 sName = oMap.sName;
4281
4282 if not sName.startswith("vex"): continue; # only looking at the vex maps at the moment.
4283
4284 #
4285 # Get the instructions for the map and see if we can do a short version or not.
4286 #
4287 aoTableOrder = oMap.getInstructionsInTableOrder();
4288 cEntriesPerByte = oMap.getEntriesPerByte();
4289 (iInstrStart, iInstrEnd, fShortTable) = __checkIfShortTable(aoTableOrder, oMap);
4290
4291 #
4292 # Output the table start.
4293 # Note! Short tables are static and only accessible via the map range record.
4294 #
4295 asLines = [];
4296 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
4297 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
4298 if fShortTable:
4299 asLines.append('%sconst DISOPCODE %s[] =' % ('static ' if fShortTable else '', oMap.getDisasTableName(),));
4300 else:
4301 asHeaderLines.append('extern const DISOPCODE %s[%d];' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
4302 asLines.append( 'const DISOPCODE %s[%d] =' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
4303 asLines.append('{');
4304
4305 if fShortTable and (iInstrStart & ((0x10 * cEntriesPerByte) - 1)) != 0:
4306 asLines.append(' /* %#04x: */' % (iInstrStart,));
4307
4308 #
4309 # Output the instructions.
4310 #
4311 iInstr = iInstrStart;
4312 while iInstr < iInstrEnd:
4313 oInstr = aoTableOrder[iInstr];
4314 if (iInstr & ((0x10 * cEntriesPerByte) - 1)) == 0:
4315 if iInstr != iInstrStart:
4316 asLines.append('');
4317 asLines.append(' /* %x */' % ((iInstr // cEntriesPerByte) >> 4,));
4318
4319 if oInstr is None:
4320 # Invalid. Optimize blocks of invalid instructions.
4321 cInvalidInstrs = 1;
4322 while iInstr + cInvalidInstrs < len(aoTableOrder) and aoTableOrder[iInstr + cInvalidInstrs] is None:
4323 cInvalidInstrs += 1;
4324 if (iInstr & (0x10 * cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= 0x10 * cEntriesPerByte:
4325 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (0x10 * cEntriesPerByte,));
4326 iInstr += 0x10 * cEntriesPerByte - 1;
4327 elif cEntriesPerByte > 1:
4328 if (iInstr & (cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= cEntriesPerByte:
4329 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (cEntriesPerByte,));
4330 iInstr += 3;
4331 else:
4332 asLines.append(' /* %#04x/%d */ INVALID_OPCODE,'
4333 % (iInstr // cEntriesPerByte, iInstr % cEntriesPerByte));
4334 else:
4335 asLines.append(' /* %#04x */ INVALID_OPCODE,' % (iInstr));
4336 elif isinstance(oInstr, list):
4337 if len(oInstr) != 0:
4338 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper, /* \n -- %s */'
4339 % (iInstr, '\n -- '.join([str(oItem) for oItem in oInstr]),));
4340 else:
4341 asLines.append(__formatDisassemblerTableEntry(oInstr));
4342 else:
4343 asLines.append(__formatDisassemblerTableEntry(oInstr));
4344
4345 iInstr += 1;
4346
4347 if iInstrStart >= iInstrEnd:
4348 asLines.append(' /* dummy */ INVALID_OPCODE');
4349
4350 asLines.append('};');
4351 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
4352
4353 #
4354 # We always emit a map range record, assuming the linker will eliminate the unnecessary ones.
4355 #
4356 asHeaderLines.append('extern const DISOPMAPDESC %sRange;' % (oMap.getDisasRangeName()));
4357 asLines.append('const DISOPMAPDESC %s = { &%s[0], %#04x, RT_ELEMENTS(%s) };'
4358 % (oMap.getDisasRangeName(), oMap.getDisasTableName(), iInstrStart, oMap.getDisasTableName(),));
4359
4360 #
4361 # Write out the lines.
4362 #
4363 oDstFile.write('\n'.join(asLines));
4364 oDstFile.write('\n');
4365 oDstFile.write('\n');
4366 #break; #for now
4367 return 0;
4368
4369if __name__ == '__main__':
4370 sys.exit(generateDisassemblerTables());
4371
注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette