VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsPython.py@ 98916

Last change on this file since 98916 was 98916, checked in by vboxsync, 2 years ago

VMM/IEM: More work on processing MC blocks, mainly related to reworking common functions for binary operations into body macros. bugref:10369

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 249.7 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: IEMAllInstructionsPython.py 98916 2023-03-12 01:27:21Z vboxsync $
4
5"""
6IEM instruction extractor.
7
8This script/module parses the IEMAllInstruction*.cpp.h files next to it and
9collects information about the instructions. It can then be used to generate
10disassembler tables and tests.
11"""
12
13from __future__ import print_function;
14
15__copyright__ = \
16"""
17Copyright (C) 2017-2023 Oracle and/or its affiliates.
18
19This file is part of VirtualBox base platform packages, as
20available from https://www.virtualbox.org.
21
22This program is free software; you can redistribute it and/or
23modify it under the terms of the GNU General Public License
24as published by the Free Software Foundation, in version 3 of the
25License.
26
27This program is distributed in the hope that it will be useful, but
28WITHOUT ANY WARRANTY; without even the implied warranty of
29MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
30General Public License for more details.
31
32You should have received a copy of the GNU General Public License
33along with this program; if not, see <https://www.gnu.org/licenses>.
34
35The contents of this file may alternatively be used under the terms
36of the Common Development and Distribution License Version 1.0
37(CDDL), a copy of it is provided in the "COPYING.CDDL" file included
38in the VirtualBox distribution, in which case the provisions of the
39CDDL are applicable instead of those of the GPL.
40
41You may elect to license modified versions of this file under the
42terms and conditions of either the GPL or the CDDL or both.
43
44SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
45"""
46__version__ = "$Revision: 98916 $"
47
48# pylint: disable=anomalous-backslash-in-string,too-many-lines
49
50# Standard python imports.
51import os;
52import re;
53import sys;
54import traceback;
55
56## Only the main script needs to modify the path.
57#g_ksValidationKitDir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
58# 'ValidationKit');
59#sys.path.append(g_ksValidationKitDir);
60#
61#from common import utils; - Windows build boxes doesn't have pywin32.
62
63# Python 3 hacks:
64if sys.version_info[0] >= 3:
65 long = int; # pylint: disable=redefined-builtin,invalid-name
66
67
68g_kdX86EFlagsConstants = {
69 'X86_EFL_CF': 0x00000001, # RT_BIT_32(0)
70 'X86_EFL_1': 0x00000002, # RT_BIT_32(1)
71 'X86_EFL_PF': 0x00000004, # RT_BIT_32(2)
72 'X86_EFL_AF': 0x00000010, # RT_BIT_32(4)
73 'X86_EFL_ZF': 0x00000040, # RT_BIT_32(6)
74 'X86_EFL_SF': 0x00000080, # RT_BIT_32(7)
75 'X86_EFL_TF': 0x00000100, # RT_BIT_32(8)
76 'X86_EFL_IF': 0x00000200, # RT_BIT_32(9)
77 'X86_EFL_DF': 0x00000400, # RT_BIT_32(10)
78 'X86_EFL_OF': 0x00000800, # RT_BIT_32(11)
79 'X86_EFL_IOPL': 0x00003000, # (RT_BIT_32(12) | RT_BIT_32(13))
80 'X86_EFL_NT': 0x00004000, # RT_BIT_32(14)
81 'X86_EFL_RF': 0x00010000, # RT_BIT_32(16)
82 'X86_EFL_VM': 0x00020000, # RT_BIT_32(17)
83 'X86_EFL_AC': 0x00040000, # RT_BIT_32(18)
84 'X86_EFL_VIF': 0x00080000, # RT_BIT_32(19)
85 'X86_EFL_VIP': 0x00100000, # RT_BIT_32(20)
86 'X86_EFL_ID': 0x00200000, # RT_BIT_32(21)
87 'X86_EFL_LIVE_MASK': 0x003f7fd5, # UINT32_C(0x003f7fd5)
88 'X86_EFL_RA1_MASK': 0x00000002, # RT_BIT_32(1)
89};
90
91## EFlags values allowed in \@opfltest, \@opflmodify, \@opflundef, \@opflset, and \@opflclear.
92g_kdEFlagsMnemonics = {
93 # Debugger flag notation (sorted by value):
94 'cf': 'X86_EFL_CF', ##< Carry Flag.
95 'nc': '!X86_EFL_CF', ##< No Carry.
96
97 'po': 'X86_EFL_PF', ##< Parity Pdd.
98 'pe': '!X86_EFL_PF', ##< Parity Even.
99
100 'af': 'X86_EFL_AF', ##< Aux Flag.
101 'na': '!X86_EFL_AF', ##< No Aux.
102
103 'zr': 'X86_EFL_ZF', ##< ZeRo.
104 'nz': '!X86_EFL_ZF', ##< No Zero.
105
106 'ng': 'X86_EFL_SF', ##< NeGative (sign).
107 'pl': '!X86_EFL_SF', ##< PLuss (sign).
108
109 'tf': 'X86_EFL_TF', ##< Trap flag.
110
111 'ei': 'X86_EFL_IF', ##< Enabled Interrupts.
112 'di': '!X86_EFL_IF', ##< Disabled Interrupts.
113
114 'dn': 'X86_EFL_DF', ##< DowN (string op direction).
115 'up': '!X86_EFL_DF', ##< UP (string op direction).
116
117 'ov': 'X86_EFL_OF', ##< OVerflow.
118 'nv': '!X86_EFL_OF', ##< No Overflow.
119
120 'nt': 'X86_EFL_NT', ##< Nested Task.
121 'rf': 'X86_EFL_RF', ##< Resume Flag.
122 'vm': 'X86_EFL_VM', ##< Virtual-8086 Mode.
123 'ac': 'X86_EFL_AC', ##< Alignment Check.
124 'vif': 'X86_EFL_VIF', ##< Virtual Interrupt Flag.
125 'vip': 'X86_EFL_VIP', ##< Virtual Interrupt Pending.
126
127 # Reference manual notation not covered above (sorted by value):
128 'pf': 'X86_EFL_PF',
129 'zf': 'X86_EFL_ZF',
130 'sf': 'X86_EFL_SF',
131 'if': 'X86_EFL_IF',
132 'df': 'X86_EFL_DF',
133 'of': 'X86_EFL_OF',
134 'iopl': 'X86_EFL_IOPL',
135 'id': 'X86_EFL_ID',
136};
137
138## Constants and values for CR0.
139g_kdX86Cr0Constants = {
140 'X86_CR0_PE': 0x00000001, # RT_BIT_32(0)
141 'X86_CR0_MP': 0x00000002, # RT_BIT_32(1)
142 'X86_CR0_EM': 0x00000004, # RT_BIT_32(2)
143 'X86_CR0_TS': 0x00000008, # RT_BIT_32(3)
144 'X86_CR0_ET': 0x00000010, # RT_BIT_32(4)
145 'X86_CR0_NE': 0x00000020, # RT_BIT_32(5)
146 'X86_CR0_WP': 0x00010000, # RT_BIT_32(16)
147 'X86_CR0_AM': 0x00040000, # RT_BIT_32(18)
148 'X86_CR0_NW': 0x20000000, # RT_BIT_32(29)
149 'X86_CR0_CD': 0x40000000, # RT_BIT_32(30)
150 'X86_CR0_PG': 0x80000000, # RT_BIT_32(31)
151};
152
153## Constants and values for CR4.
154g_kdX86Cr4Constants = {
155 'X86_CR4_VME': 0x00000001, # RT_BIT_32(0)
156 'X86_CR4_PVI': 0x00000002, # RT_BIT_32(1)
157 'X86_CR4_TSD': 0x00000004, # RT_BIT_32(2)
158 'X86_CR4_DE': 0x00000008, # RT_BIT_32(3)
159 'X86_CR4_PSE': 0x00000010, # RT_BIT_32(4)
160 'X86_CR4_PAE': 0x00000020, # RT_BIT_32(5)
161 'X86_CR4_MCE': 0x00000040, # RT_BIT_32(6)
162 'X86_CR4_PGE': 0x00000080, # RT_BIT_32(7)
163 'X86_CR4_PCE': 0x00000100, # RT_BIT_32(8)
164 'X86_CR4_OSFXSR': 0x00000200, # RT_BIT_32(9)
165 'X86_CR4_OSXMMEEXCPT': 0x00000400, # RT_BIT_32(10)
166 'X86_CR4_VMXE': 0x00002000, # RT_BIT_32(13)
167 'X86_CR4_SMXE': 0x00004000, # RT_BIT_32(14)
168 'X86_CR4_PCIDE': 0x00020000, # RT_BIT_32(17)
169 'X86_CR4_OSXSAVE': 0x00040000, # RT_BIT_32(18)
170 'X86_CR4_SMEP': 0x00100000, # RT_BIT_32(20)
171 'X86_CR4_SMAP': 0x00200000, # RT_BIT_32(21)
172 'X86_CR4_PKE': 0x00400000, # RT_BIT_32(22)
173};
174
175## XSAVE components (XCR0).
176g_kdX86XSaveCConstants = {
177 'XSAVE_C_X87': 0x00000001,
178 'XSAVE_C_SSE': 0x00000002,
179 'XSAVE_C_YMM': 0x00000004,
180 'XSAVE_C_BNDREGS': 0x00000008,
181 'XSAVE_C_BNDCSR': 0x00000010,
182 'XSAVE_C_OPMASK': 0x00000020,
183 'XSAVE_C_ZMM_HI256': 0x00000040,
184 'XSAVE_C_ZMM_16HI': 0x00000080,
185 'XSAVE_C_PKRU': 0x00000200,
186 'XSAVE_C_LWP': 0x4000000000000000,
187 'XSAVE_C_X': 0x8000000000000000,
188 'XSAVE_C_ALL_AVX': 0x000000c4, # For clearing all AVX bits.
189 'XSAVE_C_ALL_AVX_SSE': 0x000000c6, # For clearing all AVX and SSE bits.
190};
191
192
193## \@op[1-4] locations
194g_kdOpLocations = {
195 'reg': [], ## modrm.reg
196 'rm': [], ## modrm.rm
197 'imm': [], ## immediate instruction data
198 'vvvv': [], ## VEX.vvvv
199
200 # fixed registers.
201 'AL': [],
202 'rAX': [],
203 'rDX': [],
204 'rSI': [],
205 'rDI': [],
206 'rFLAGS': [],
207 'CS': [],
208 'DS': [],
209 'ES': [],
210 'FS': [],
211 'GS': [],
212 'SS': [],
213};
214
215## \@op[1-4] types
216##
217## Value fields:
218## - 0: the normal IDX_ParseXXX handler (IDX_UseModRM == IDX_ParseModRM).
219## - 1: the location (g_kdOpLocations).
220## - 2: disassembler format string version of the type.
221## - 3: disassembler OP_PARAM_XXX (XXX only).
222## - 4: IEM form matching instruction.
223##
224## Note! See the A.2.1 in SDM vol 2 for the type names.
225g_kdOpTypes = {
226 # Fixed addresses
227 'Ap': ( 'IDX_ParseImmAddrF', 'imm', '%Ap', 'Ap', 'FIXED', ),
228
229 # ModR/M.rm
230 'Eb': ( 'IDX_UseModRM', 'rm', '%Eb', 'Eb', 'RM', ),
231 'Ed': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
232 'Ed_WO': ( 'IDX_UseModRM', 'rm', '%Ed', 'Ed', 'RM', ),
233 'Eq': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
234 'Eq_WO': ( 'IDX_UseModRM', 'rm', '%Eq', 'Eq', 'RM', ),
235 'Ew': ( 'IDX_UseModRM', 'rm', '%Ew', 'Ew', 'RM', ),
236 'Ev': ( 'IDX_UseModRM', 'rm', '%Ev', 'Ev', 'RM', ),
237 'Ey': ( 'IDX_UseModRM', 'rm', '%Ey', 'Ey', 'RM', ),
238 'Qd': ( 'IDX_UseModRM', 'rm', '%Qd', 'Qd', 'RM', ),
239 'Qq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
240 'Qq_WO': ( 'IDX_UseModRM', 'rm', '%Qq', 'Qq', 'RM', ),
241 'Wss': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
242 'Wss_WO': ( 'IDX_UseModRM', 'rm', '%Wss', 'Wss', 'RM', ),
243 'Wsd': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
244 'Wsd_WO': ( 'IDX_UseModRM', 'rm', '%Wsd', 'Wsd', 'RM', ),
245 'Wps': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
246 'Wps_WO': ( 'IDX_UseModRM', 'rm', '%Wps', 'Wps', 'RM', ),
247 'Wpd': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
248 'Wpd_WO': ( 'IDX_UseModRM', 'rm', '%Wpd', 'Wpd', 'RM', ),
249 'Wdq': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
250 'Wdq_WO': ( 'IDX_UseModRM', 'rm', '%Wdq', 'Wdq', 'RM', ),
251 'Wq': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
252 'Wq_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
253 'WqZxReg_WO': ( 'IDX_UseModRM', 'rm', '%Wq', 'Wq', 'RM', ),
254 'Wx': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
255 'Wx_WO': ( 'IDX_UseModRM', 'rm', '%Wx', 'Wx', 'RM', ),
256
257 # ModR/M.rm - register only.
258 'Uq': ( 'IDX_UseModRM', 'rm', '%Uq', 'Uq', 'REG' ),
259 'UqHi': ( 'IDX_UseModRM', 'rm', '%Uq', 'UqHi', 'REG' ),
260 'Uss': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
261 'Uss_WO': ( 'IDX_UseModRM', 'rm', '%Uss', 'Uss', 'REG' ),
262 'Usd': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
263 'Usd_WO': ( 'IDX_UseModRM', 'rm', '%Usd', 'Usd', 'REG' ),
264 'Ux': ( 'IDX_UseModRM', 'rm', '%Ux', 'Ux', 'REG' ),
265 'Nq': ( 'IDX_UseModRM', 'rm', '%Qq', 'Nq', 'REG' ),
266
267 # ModR/M.rm - memory only.
268 'Ma': ( 'IDX_UseModRM', 'rm', '%Ma', 'Ma', 'MEM', ), ##< Only used by BOUND.
269 'Mb_RO': ( 'IDX_UseModRM', 'rm', '%Mb', 'Mb', 'MEM', ),
270 'Md': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
271 'Md_RO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
272 'Md_WO': ( 'IDX_UseModRM', 'rm', '%Md', 'Md', 'MEM', ),
273 'Mdq': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
274 'Mdq_WO': ( 'IDX_UseModRM', 'rm', '%Mdq', 'Mdq', 'MEM', ),
275 'Mq': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
276 'Mq_WO': ( 'IDX_UseModRM', 'rm', '%Mq', 'Mq', 'MEM', ),
277 'Mps_WO': ( 'IDX_UseModRM', 'rm', '%Mps', 'Mps', 'MEM', ),
278 'Mpd_WO': ( 'IDX_UseModRM', 'rm', '%Mpd', 'Mpd', 'MEM', ),
279 'Mx': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
280 'Mx_WO': ( 'IDX_UseModRM', 'rm', '%Mx', 'Mx', 'MEM', ),
281 'M_RO': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
282 'M_RW': ( 'IDX_UseModRM', 'rm', '%M', 'M', 'MEM', ),
283
284 # ModR/M.reg
285 'Gb': ( 'IDX_UseModRM', 'reg', '%Gb', 'Gb', '', ),
286 'Gw': ( 'IDX_UseModRM', 'reg', '%Gw', 'Gw', '', ),
287 'Gd': ( 'IDX_UseModRM', 'reg', '%Gd', 'Gd', '', ),
288 'Gv': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
289 'Gv_RO': ( 'IDX_UseModRM', 'reg', '%Gv', 'Gv', '', ),
290 'Gy': ( 'IDX_UseModRM', 'reg', '%Gy', 'Gy', '', ),
291 'Pd': ( 'IDX_UseModRM', 'reg', '%Pd', 'Pd', '', ),
292 'PdZx_WO': ( 'IDX_UseModRM', 'reg', '%Pd', 'PdZx', '', ),
293 'Pq': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
294 'Pq_WO': ( 'IDX_UseModRM', 'reg', '%Pq', 'Pq', '', ),
295 'Vd': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
296 'Vd_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
297 'VdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vd', 'Vd', '', ),
298 'Vdq': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
299 'Vss': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
300 'Vss_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
301 'VssZx_WO': ( 'IDX_UseModRM', 'reg', '%Vss', 'Vss', '', ),
302 'Vsd': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
303 'Vsd_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
304 'VsdZx_WO': ( 'IDX_UseModRM', 'reg', '%Vsd', 'Vsd', '', ),
305 'Vps': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
306 'Vps_WO': ( 'IDX_UseModRM', 'reg', '%Vps', 'Vps', '', ),
307 'Vpd': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
308 'Vpd_WO': ( 'IDX_UseModRM', 'reg', '%Vpd', 'Vpd', '', ),
309 'Vq': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
310 'Vq_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'Vq', '', ),
311 'Vdq_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'Vdq', '', ),
312 'VqHi': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
313 'VqHi_WO': ( 'IDX_UseModRM', 'reg', '%Vdq', 'VdqHi', '', ),
314 'VqZx_WO': ( 'IDX_UseModRM', 'reg', '%Vq', 'VqZx', '', ),
315 'Vx': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
316 'Vx_WO': ( 'IDX_UseModRM', 'reg', '%Vx', 'Vx', '', ),
317
318 # VEX.vvvv
319 'By': ( 'IDX_UseModRM', 'vvvv', '%By', 'By', 'V', ),
320 'Hps': ( 'IDX_UseModRM', 'vvvv', '%Hps', 'Hps', 'V', ),
321 'Hpd': ( 'IDX_UseModRM', 'vvvv', '%Hpd', 'Hpd', 'V', ),
322 'HssHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HssHi', 'V', ),
323 'HsdHi': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'HsdHi', 'V', ),
324 'Hq': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'Hq', 'V', ),
325 'HqHi': ( 'IDX_UseModRM', 'vvvv', '%Hq', 'HqHi', 'V', ),
326 'Hx': ( 'IDX_UseModRM', 'vvvv', '%Hx', 'Hx', 'V', ),
327
328 # Immediate values.
329 'Ib': ( 'IDX_ParseImmByte', 'imm', '%Ib', 'Ib', '', ), ##< NB! Could be IDX_ParseImmByteSX for some instrs.
330 'Iw': ( 'IDX_ParseImmUshort', 'imm', '%Iw', 'Iw', '', ),
331 'Id': ( 'IDX_ParseImmUlong', 'imm', '%Id', 'Id', '', ),
332 'Iq': ( 'IDX_ParseImmQword', 'imm', '%Iq', 'Iq', '', ),
333 'Iv': ( 'IDX_ParseImmV', 'imm', '%Iv', 'Iv', '', ), ##< o16: word, o32: dword, o64: qword
334 'Iz': ( 'IDX_ParseImmZ', 'imm', '%Iz', 'Iz', '', ), ##< o16: word, o32|o64:dword
335
336 # Address operands (no ModR/M).
337 'Ob': ( 'IDX_ParseImmAddr', 'imm', '%Ob', 'Ob', '', ),
338 'Ov': ( 'IDX_ParseImmAddr', 'imm', '%Ov', 'Ov', '', ),
339
340 # Relative jump targets
341 'Jb': ( 'IDX_ParseImmBRel', 'imm', '%Jb', 'Jb', '', ),
342 'Jv': ( 'IDX_ParseImmVRel', 'imm', '%Jv', 'Jv', '', ),
343
344 # DS:rSI
345 'Xb': ( 'IDX_ParseXb', 'rSI', '%eSI', 'Xb', '', ),
346 'Xv': ( 'IDX_ParseXv', 'rSI', '%eSI', 'Xv', '', ),
347 # ES:rDI
348 'Yb': ( 'IDX_ParseYb', 'rDI', '%eDI', 'Yb', '', ),
349 'Yv': ( 'IDX_ParseYv', 'rDI', '%eDI', 'Yv', '', ),
350
351 'Fv': ( 'IDX_ParseFixedReg', 'rFLAGS', '%Fv', 'Fv', '', ),
352
353 # Fixed registers.
354 'AL': ( 'IDX_ParseFixedReg', 'AL', 'al', 'REG_AL', '', ),
355 'rAX': ( 'IDX_ParseFixedReg', 'rAX', '%eAX', 'REG_EAX', '', ),
356 'rDX': ( 'IDX_ParseFixedReg', 'rDX', '%eDX', 'REG_EDX', '', ),
357 'CS': ( 'IDX_ParseFixedReg', 'CS', 'cs', 'REG_CS', '', ), # 8086: push CS
358 'DS': ( 'IDX_ParseFixedReg', 'DS', 'ds', 'REG_DS', '', ),
359 'ES': ( 'IDX_ParseFixedReg', 'ES', 'es', 'REG_ES', '', ),
360 'FS': ( 'IDX_ParseFixedReg', 'FS', 'fs', 'REG_FS', '', ),
361 'GS': ( 'IDX_ParseFixedReg', 'GS', 'gs', 'REG_GS', '', ),
362 'SS': ( 'IDX_ParseFixedReg', 'SS', 'ss', 'REG_SS', '', ),
363};
364
365# IDX_ParseFixedReg
366# IDX_ParseVexDest
367
368
369## IEMFORM_XXX mappings.
370g_kdIemForms = { # sEncoding, [ sWhere1, ... ] opcodesub ),
371 'RM': ( 'ModR/M', [ 'reg', 'rm' ], '', ),
372 'RM_REG': ( 'ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
373 'RM_MEM': ( 'ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
374 'RMI': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
375 'RMI_REG': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
376 'RMI_MEM': ( 'ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
377 'MR': ( 'ModR/M', [ 'rm', 'reg' ], '', ),
378 'MR_REG': ( 'ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
379 'MR_MEM': ( 'ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
380 'MRI': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '', ),
381 'MRI_REG': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '11 mr/reg', ),
382 'MRI_MEM': ( 'ModR/M', [ 'rm', 'reg', 'imm' ], '!11 mr/reg', ),
383 'M': ( 'ModR/M', [ 'rm', ], '', ),
384 'M_REG': ( 'ModR/M', [ 'rm', ], '', ),
385 'M_MEM': ( 'ModR/M', [ 'rm', ], '', ),
386 'R': ( 'ModR/M', [ 'reg', ], '', ),
387
388 'VEX_RM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '', ),
389 'VEX_RM_REG': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '11 mr/reg', ),
390 'VEX_RM_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm' ], '!11 mr/reg', ),
391 'VEX_MR': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '', ),
392 'VEX_MR_REG': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '11 mr/reg', ),
393 'VEX_MR_MEM': ( 'VEX.ModR/M', [ 'rm', 'reg' ], '!11 mr/reg', ),
394 'VEX_M': ( 'VEX.ModR/M', [ 'rm', ], '' ),
395 'VEX_M_REG': ( 'VEX.ModR/M', [ 'rm', ], '' ),
396 'VEX_M_MEM': ( 'VEX.ModR/M', [ 'rm', ], '' ),
397 'VEX_R': ( 'VEX.ModR/M', [ 'reg', ], '' ),
398 'VEX_RVM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '', ),
399 'VEX_RVM_REG': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '11 mr/reg', ),
400 'VEX_RVM_MEM': ( 'VEX.ModR/M', [ 'reg', 'vvvv', 'rm' ], '!11 mr/reg', ),
401 'VEX_RMV': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '', ),
402 'VEX_RMV_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '11 mr/reg', ),
403 'VEX_RMV_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'vvvv' ], '!11 mr/reg', ),
404 'VEX_RMI': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '', ),
405 'VEX_RMI_REG': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '11 mr/reg', ),
406 'VEX_RMI_MEM': ( 'VEX.ModR/M', [ 'reg', 'rm', 'imm' ], '!11 mr/reg', ),
407 'VEX_MVR': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '', ),
408 'VEX_MVR_REG': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '11 mr/reg', ),
409 'VEX_MVR_MEM': ( 'VEX.ModR/M', [ 'rm', 'vvvv', 'reg' ], '!11 mr/reg', ),
410
411 'VEX_VM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '', ),
412 'VEX_VM_REG': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '11 mr/reg', ),
413 'VEX_VM_MEM': ( 'VEX.ModR/M', [ 'vvvv', 'rm' ], '!11 mr/reg', ),
414
415 'FIXED': ( 'fixed', None, '', ),
416};
417
418## \@oppfx values.
419g_kdPrefixes = {
420 'none': [],
421 '0x66': [],
422 '0xf3': [],
423 '0xf2': [],
424};
425
426## Special \@opcode tag values.
427g_kdSpecialOpcodes = {
428 '/reg': [],
429 'mr/reg': [],
430 '11 /reg': [],
431 '!11 /reg': [],
432 '11 mr/reg': [],
433 '!11 mr/reg': [],
434};
435
436## Special \@opcodesub tag values.
437## The first value is the real value for aliases.
438## The second value is for bs3cg1.
439g_kdSubOpcodes = {
440 'none': [ None, '', ],
441 '11 mr/reg': [ '11 mr/reg', '', ],
442 '11': [ '11 mr/reg', '', ], ##< alias
443 '!11 mr/reg': [ '!11 mr/reg', '', ],
444 '!11': [ '!11 mr/reg', '', ], ##< alias
445 'rex.w=0': [ 'rex.w=0', 'WZ', ],
446 'w=0': [ 'rex.w=0', '', ], ##< alias
447 'rex.w=1': [ 'rex.w=1', 'WNZ', ],
448 'w=1': [ 'rex.w=1', '', ], ##< alias
449 'vex.l=0': [ 'vex.l=0', 'L0', ],
450 'vex.l=1': [ 'vex.l=0', 'L1', ],
451 '11 mr/reg vex.l=0': [ '11 mr/reg vex.l=0', 'L0', ],
452 '11 mr/reg vex.l=1': [ '11 mr/reg vex.l=1', 'L1', ],
453 '!11 mr/reg vex.l=0': [ '!11 mr/reg vex.l=0', 'L0', ],
454 '!11 mr/reg vex.l=1': [ '!11 mr/reg vex.l=1', 'L1', ],
455};
456
457## Valid values for \@openc
458g_kdEncodings = {
459 'ModR/M': [ 'BS3CG1ENC_MODRM', ], ##< ModR/M
460 'VEX.ModR/M': [ 'BS3CG1ENC_VEX_MODRM', ], ##< VEX...ModR/M
461 'fixed': [ 'BS3CG1ENC_FIXED', ], ##< Fixed encoding (address, registers, unused, etc).
462 'VEX.fixed': [ 'BS3CG1ENC_VEX_FIXED', ], ##< VEX + fixed encoding (address, registers, unused, etc).
463 'prefix': [ None, ], ##< Prefix
464};
465
466## \@opunused, \@opinvalid, \@opinvlstyle
467g_kdInvalidStyles = {
468 'immediate': [], ##< CPU stops decoding immediately after the opcode.
469 'vex.modrm': [], ##< VEX+ModR/M, everyone.
470 'intel-modrm': [], ##< Intel decodes ModR/M.
471 'intel-modrm-imm8': [], ##< Intel decodes ModR/M and an 8-byte immediate.
472 'intel-opcode-modrm': [], ##< Intel decodes another opcode byte followed by ModR/M. (Unused extension tables.)
473 'intel-opcode-modrm-imm8': [], ##< Intel decodes another opcode byte followed by ModR/M and an 8-byte immediate.
474};
475
476g_kdCpuNames = {
477 '8086': (),
478 '80186': (),
479 '80286': (),
480 '80386': (),
481 '80486': (),
482};
483
484## \@opcpuid
485g_kdCpuIdFlags = {
486 'vme': 'X86_CPUID_FEATURE_EDX_VME',
487 'tsc': 'X86_CPUID_FEATURE_EDX_TSC',
488 'msr': 'X86_CPUID_FEATURE_EDX_MSR',
489 'cx8': 'X86_CPUID_FEATURE_EDX_CX8',
490 'sep': 'X86_CPUID_FEATURE_EDX_SEP',
491 'cmov': 'X86_CPUID_FEATURE_EDX_CMOV',
492 'clfsh': 'X86_CPUID_FEATURE_EDX_CLFSH',
493 'clflushopt': 'X86_CPUID_STEXT_FEATURE_EBX_CLFLUSHOPT',
494 'mmx': 'X86_CPUID_FEATURE_EDX_MMX',
495 'fxsr': 'X86_CPUID_FEATURE_EDX_FXSR',
496 'sse': 'X86_CPUID_FEATURE_EDX_SSE',
497 'sse2': 'X86_CPUID_FEATURE_EDX_SSE2',
498 'sse3': 'X86_CPUID_FEATURE_ECX_SSE3',
499 'pclmul': 'X86_CPUID_FEATURE_ECX_DTES64',
500 'monitor': 'X86_CPUID_FEATURE_ECX_CPLDS',
501 'vmx': 'X86_CPUID_FEATURE_ECX_VMX',
502 'smx': 'X86_CPUID_FEATURE_ECX_TM2',
503 'ssse3': 'X86_CPUID_FEATURE_ECX_SSSE3',
504 'fma': 'X86_CPUID_FEATURE_ECX_FMA',
505 'cx16': 'X86_CPUID_FEATURE_ECX_CX16',
506 'pcid': 'X86_CPUID_FEATURE_ECX_PCID',
507 'sse4.1': 'X86_CPUID_FEATURE_ECX_SSE4_1',
508 'sse4.2': 'X86_CPUID_FEATURE_ECX_SSE4_2',
509 'movbe': 'X86_CPUID_FEATURE_ECX_MOVBE',
510 'popcnt': 'X86_CPUID_FEATURE_ECX_POPCNT',
511 'aes': 'X86_CPUID_FEATURE_ECX_AES',
512 'xsave': 'X86_CPUID_FEATURE_ECX_XSAVE',
513 'avx': 'X86_CPUID_FEATURE_ECX_AVX',
514 'avx2': 'X86_CPUID_STEXT_FEATURE_EBX_AVX2',
515 'f16c': 'X86_CPUID_FEATURE_ECX_F16C',
516 'rdrand': 'X86_CPUID_FEATURE_ECX_RDRAND',
517
518 'axmmx': 'X86_CPUID_AMD_FEATURE_EDX_AXMMX',
519 '3dnowext': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW_EX',
520 '3dnow': 'X86_CPUID_AMD_FEATURE_EDX_3DNOW',
521 'svm': 'X86_CPUID_AMD_FEATURE_ECX_SVM',
522 'cr8l': 'X86_CPUID_AMD_FEATURE_ECX_CR8L',
523 'abm': 'X86_CPUID_AMD_FEATURE_ECX_ABM',
524 'sse4a': 'X86_CPUID_AMD_FEATURE_ECX_SSE4A',
525 '3dnowprf': 'X86_CPUID_AMD_FEATURE_ECX_3DNOWPRF',
526 'xop': 'X86_CPUID_AMD_FEATURE_ECX_XOP',
527 'fma4': 'X86_CPUID_AMD_FEATURE_ECX_FMA4',
528};
529
530## \@ophints values.
531g_kdHints = {
532 'invalid': 'DISOPTYPE_INVALID', ##<
533 'harmless': 'DISOPTYPE_HARMLESS', ##<
534 'controlflow': 'DISOPTYPE_CONTROLFLOW', ##<
535 'potentially_dangerous': 'DISOPTYPE_POTENTIALLY_DANGEROUS', ##<
536 'dangerous': 'DISOPTYPE_DANGEROUS', ##<
537 'portio': 'DISOPTYPE_PORTIO', ##<
538 'privileged': 'DISOPTYPE_PRIVILEGED', ##<
539 'privileged_notrap': 'DISOPTYPE_PRIVILEGED_NOTRAP', ##<
540 'uncond_controlflow': 'DISOPTYPE_UNCOND_CONTROLFLOW', ##<
541 'relative_controlflow': 'DISOPTYPE_RELATIVE_CONTROLFLOW', ##<
542 'cond_controlflow': 'DISOPTYPE_COND_CONTROLFLOW', ##<
543 'interrupt': 'DISOPTYPE_INTERRUPT', ##<
544 'illegal': 'DISOPTYPE_ILLEGAL', ##<
545 'rrm_dangerous': 'DISOPTYPE_RRM_DANGEROUS', ##< Some additional dangerous ones when recompiling raw r0.
546 'rrm_dangerous_16': 'DISOPTYPE_RRM_DANGEROUS_16', ##< Some additional dangerous ones when recompiling 16-bit raw r0.
547 'inhibit_irqs': 'DISOPTYPE_INHIBIT_IRQS', ##< Will or can inhibit irqs (sti, pop ss, mov ss) */
548 'portio_read': 'DISOPTYPE_PORTIO_READ', ##<
549 'portio_write': 'DISOPTYPE_PORTIO_WRITE', ##<
550 'invalid_64': 'DISOPTYPE_INVALID_64', ##< Invalid in 64 bits mode
551 'only_64': 'DISOPTYPE_ONLY_64', ##< Only valid in 64 bits mode
552 'default_64_op_size': 'DISOPTYPE_DEFAULT_64_OP_SIZE', ##< Default 64 bits operand size
553 'forced_64_op_size': 'DISOPTYPE_FORCED_64_OP_SIZE', ##< Forced 64 bits operand size; regardless of prefix bytes
554 'rexb_extends_opreg': 'DISOPTYPE_REXB_EXTENDS_OPREG', ##< REX.B extends the register field in the opcode byte
555 'mod_fixed_11': 'DISOPTYPE_MOD_FIXED_11', ##< modrm.mod is always 11b
556 'forced_32_op_size_x86': 'DISOPTYPE_FORCED_32_OP_SIZE_X86', ##< Forced 32 bits operand size; regardless of prefix bytes
557 ## (only in 16 & 32 bits mode!)
558 'avx': 'DISOPTYPE_AVX', ##< AVX,AVX2,++ instruction. Not implemented yet!
559 'sse': 'DISOPTYPE_SSE', ##< SSE,SSE2,SSE3,++ instruction. Not implemented yet!
560 'mmx': 'DISOPTYPE_MMX', ##< MMX,MMXExt,3DNow,++ instruction. Not implemented yet!
561 'fpu': 'DISOPTYPE_FPU', ##< FPU instruction. Not implemented yet!
562 'ignores_oz_pfx': '', ##< Ignores operand size prefix 66h.
563 'ignores_rexw': '', ##< Ignores REX.W.
564 'ignores_op_sizes': '', ##< Shorthand for "ignores_oz_pfx | ignores_op_sizes".
565 'vex_l_zero': '', ##< VEX.L must be 0.
566 'vex_l_ignored': '', ##< VEX.L is ignored.
567 'vex_v_zero': '', ##< VEX.V must be 0. (generate sub-table?)
568 'lock_allowed': '', ##< Lock prefix allowed.
569};
570
571## \@opxcpttype values (see SDMv2 2.4, 2.7).
572g_kdXcptTypes = {
573 'none': [],
574 '1': [],
575 '2': [],
576 '3': [],
577 '4': [],
578 '4UA': [],
579 '5': [],
580 '5LZ': [], # LZ = VEX.L must be zero.
581 '6': [],
582 '7': [],
583 '7LZ': [],
584 '8': [],
585 '11': [],
586 '12': [],
587 'E1': [],
588 'E1NF': [],
589 'E2': [],
590 'E3': [],
591 'E3NF': [],
592 'E4': [],
593 'E4NF': [],
594 'E5': [],
595 'E5NF': [],
596 'E6': [],
597 'E6NF': [],
598 'E7NF': [],
599 'E9': [],
600 'E9NF': [],
601 'E10': [],
602 'E11': [],
603 'E12': [],
604 'E12NF': [],
605};
606
607
608def _isValidOpcodeByte(sOpcode):
609 """
610 Checks if sOpcode is a valid lower case opcode byte.
611 Returns true/false.
612 """
613 if len(sOpcode) == 4:
614 if sOpcode[:2] == '0x':
615 if sOpcode[2] in '0123456789abcdef':
616 if sOpcode[3] in '0123456789abcdef':
617 return True;
618 return False;
619
620
621class InstructionMap(object):
622 """
623 Instruction map.
624
625 The opcode map provides the lead opcode bytes (empty for the one byte
626 opcode map). An instruction can be member of multiple opcode maps as long
627 as it uses the same opcode value within the map (because of VEX).
628 """
629
630 kdEncodings = {
631 'legacy': [],
632 'vex1': [], ##< VEX or EVEX prefix with vvvvv = 1
633 'vex2': [], ##< VEX or EVEX prefix with vvvvv = 2
634 'vex3': [], ##< VEX or EVEX prefix with vvvvv = 3
635 'xop8': [], ##< XOP prefix with vvvvv = 8
636 'xop9': [], ##< XOP prefix with vvvvv = 9
637 'xop10': [], ##< XOP prefix with vvvvv = 10
638 };
639 ## Selectors.
640 ## 1. The first value is the number of table entries required by a
641 ## decoder or disassembler for this type of selector.
642 ## 2. The second value is how many entries per opcode byte if applicable.
643 kdSelectors = {
644 'byte': [ 256, 1, ], ##< next opcode byte selects the instruction (default).
645 'byte+pfx': [ 1024, 4, ], ##< next opcode byte selects the instruction together with the 0x66, 0xf2 and 0xf3 prefixes.
646 '/r': [ 8, 1, ], ##< modrm.reg selects the instruction.
647 'memreg /r':[ 16, 1, ], ##< modrm.reg and (modrm.mod == 3) selects the instruction.
648 'mod /r': [ 32, 1, ], ##< modrm.reg and modrm.mod selects the instruction.
649 '!11 /r': [ 8, 1, ], ##< modrm.reg selects the instruction with modrm.mod != 0y11.
650 '11 /r': [ 8, 1, ], ##< modrm.reg select the instruction with modrm.mod == 0y11.
651 '11': [ 64, 1, ], ##< modrm.reg and modrm.rm select the instruction with modrm.mod == 0y11.
652 };
653
654 ## Define the subentry number according to the Instruction::sPrefix
655 ## value for 'byte+pfx' selected tables.
656 kiPrefixOrder = {
657 'none': 0,
658 '0x66': 1,
659 '0xf3': 2,
660 '0xf2': 3,
661 };
662
663 def __init__(self, sName, sIemName = None, asLeadOpcodes = None, sSelector = 'byte+pfx',
664 sEncoding = 'legacy', sDisParse = None):
665 assert sSelector in self.kdSelectors;
666 assert sEncoding in self.kdEncodings;
667 if asLeadOpcodes is None:
668 asLeadOpcodes = [];
669 else:
670 for sOpcode in asLeadOpcodes:
671 assert _isValidOpcodeByte(sOpcode);
672 assert sDisParse is None or sDisParse.startswith('IDX_Parse');
673
674 self.sName = sName;
675 self.sIemName = sIemName;
676 self.asLeadOpcodes = asLeadOpcodes; ##< Lead opcode bytes formatted as hex strings like '0x0f'.
677 self.sSelector = sSelector; ##< The member selector, see kdSelectors.
678 self.sEncoding = sEncoding; ##< The encoding, see kdSelectors.
679 self.aoInstructions = [] # type: Instruction
680 self.sDisParse = sDisParse; ##< IDX_ParseXXX.
681
682 def copy(self, sNewName, sPrefixFilter = None):
683 """
684 Copies the table with filtering instruction by sPrefix if not None.
685 """
686 oCopy = InstructionMap(sNewName, sIemName = self.sIemName, asLeadOpcodes = self.asLeadOpcodes,
687 sSelector = 'byte' if sPrefixFilter is not None and self.sSelector == 'byte+pfx'
688 else self.sSelector,
689 sEncoding = self.sEncoding, sDisParse = self.sDisParse);
690 if sPrefixFilter is None:
691 oCopy.aoInstructions = list(self.aoInstructions);
692 else:
693 oCopy.aoInstructions = [oInstr for oInstr in self.aoInstructions if oInstr.sPrefix == sPrefixFilter];
694 return oCopy;
695
696 def getTableSize(self):
697 """
698 Number of table entries. This corresponds directly to the selector.
699 """
700 return self.kdSelectors[self.sSelector][0];
701
702 def getEntriesPerByte(self):
703 """
704 Number of table entries per opcode bytes.
705
706 This only really makes sense for the 'byte' and 'byte+pfx' selectors, for
707 the others it will just return 1.
708 """
709 return self.kdSelectors[self.sSelector][1];
710
711 def getInstructionIndex(self, oInstr):
712 """
713 Returns the table index for the instruction.
714 """
715 bOpcode = oInstr.getOpcodeByte();
716
717 # The byte selectors are simple. We need a full opcode byte and need just return it.
718 if self.sSelector == 'byte':
719 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
720 return bOpcode;
721
722 # The byte + prefix selector is similarly simple, though requires a prefix as well as the full opcode.
723 if self.sSelector == 'byte+pfx':
724 assert oInstr.sOpcode[:2] == '0x' and len(oInstr.sOpcode) == 4, str(oInstr);
725 assert self.kiPrefixOrder.get(oInstr.sPrefix, -16384) >= 0;
726 return bOpcode * 4 + self.kiPrefixOrder.get(oInstr.sPrefix, -16384);
727
728 # The other selectors needs masking and shifting.
729 if self.sSelector == '/r':
730 return (bOpcode >> 3) & 0x7;
731
732 if self.sSelector == 'mod /r':
733 return (bOpcode >> 3) & 0x1f;
734
735 if self.sSelector == 'memreg /r':
736 return ((bOpcode >> 3) & 0x7) | (int((bOpcode >> 6) == 3) << 3);
737
738 if self.sSelector == '!11 /r':
739 assert (bOpcode & 0xc0) != 0xc, str(oInstr);
740 return (bOpcode >> 3) & 0x7;
741
742 if self.sSelector == '11 /r':
743 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
744 return (bOpcode >> 3) & 0x7;
745
746 if self.sSelector == '11':
747 assert (bOpcode & 0xc0) == 0xc, str(oInstr);
748 return bOpcode & 0x3f;
749
750 assert False, self.sSelector;
751 return -1;
752
753 def getInstructionsInTableOrder(self):
754 """
755 Get instructions in table order.
756
757 Returns array of instructions. Normally there is exactly one
758 instruction per entry. However the entry could also be None if
759 not instruction was specified for that opcode value. Or there
760 could be a list of instructions to deal with special encodings
761 where for instance prefix (e.g. REX.W) encodes a different
762 instruction or different CPUs have different instructions or
763 prefixes in the same place.
764 """
765 # Start with empty table.
766 cTable = self.getTableSize();
767 aoTable = [None] * cTable;
768
769 # Insert the instructions.
770 for oInstr in self.aoInstructions:
771 if oInstr.sOpcode:
772 idxOpcode = self.getInstructionIndex(oInstr);
773 assert idxOpcode < cTable, str(idxOpcode);
774
775 oExisting = aoTable[idxOpcode];
776 if oExisting is None:
777 aoTable[idxOpcode] = oInstr;
778 elif not isinstance(oExisting, list):
779 aoTable[idxOpcode] = list([oExisting, oInstr]);
780 else:
781 oExisting.append(oInstr);
782
783 return aoTable;
784
785
786 def getDisasTableName(self):
787 """
788 Returns the disassembler table name for this map.
789 """
790 sName = 'g_aDisas';
791 for sWord in self.sName.split('_'):
792 if sWord == 'm': # suffix indicating modrm.mod==mem
793 sName += '_m';
794 elif sWord == 'r': # suffix indicating modrm.mod==reg
795 sName += '_r';
796 elif len(sWord) == 2 and re.match('^[a-f0-9][a-f0-9]$', sWord):
797 sName += '_' + sWord;
798 else:
799 sWord = sWord.replace('grp', 'Grp');
800 sWord = sWord.replace('map', 'Map');
801 sName += sWord[0].upper() + sWord[1:];
802 return sName;
803
804 def getDisasRangeName(self):
805 """
806 Returns the disassembler table range name for this map.
807 """
808 return self.getDisasTableName().replace('g_aDisas', 'g_Disas') + 'Range';
809
810 def isVexMap(self):
811 """ Returns True if a VEX map. """
812 return self.sEncoding.startswith('vex');
813
814
815class TestType(object):
816 """
817 Test value type.
818
819 This base class deals with integer like values. The fUnsigned constructor
820 parameter indicates the default stance on zero vs sign extending. It is
821 possible to override fUnsigned=True by prefixing the value with '+' or '-'.
822 """
823 def __init__(self, sName, acbSizes = None, fUnsigned = True):
824 self.sName = sName;
825 self.acbSizes = [1, 2, 4, 8, 16, 32] if acbSizes is None else acbSizes; # Normal sizes.
826 self.fUnsigned = fUnsigned;
827
828 class BadValue(Exception):
829 """ Bad value exception. """
830 def __init__(self, sMessage):
831 Exception.__init__(self, sMessage);
832 self.sMessage = sMessage;
833
834 ## For ascii ~ operator.
835 kdHexInv = {
836 '0': 'f',
837 '1': 'e',
838 '2': 'd',
839 '3': 'c',
840 '4': 'b',
841 '5': 'a',
842 '6': '9',
843 '7': '8',
844 '8': '7',
845 '9': '6',
846 'a': '5',
847 'b': '4',
848 'c': '3',
849 'd': '2',
850 'e': '1',
851 'f': '0',
852 };
853
854 def get(self, sValue):
855 """
856 Get the shortest normal sized byte representation of oValue.
857
858 Returns ((fSignExtend, bytearray), ) or ((fSignExtend, bytearray), (fSignExtend, bytearray), ).
859 The latter form is for AND+OR pairs where the first entry is what to
860 AND with the field and the second the one or OR with.
861
862 Raises BadValue if invalid value.
863 """
864 if not sValue:
865 raise TestType.BadValue('empty value');
866
867 # Deal with sign and detect hexadecimal or decimal.
868 fSignExtend = not self.fUnsigned;
869 if sValue[0] == '-' or sValue[0] == '+':
870 fSignExtend = True;
871 fHex = len(sValue) > 3 and sValue[1:3].lower() == '0x';
872 else:
873 fHex = len(sValue) > 2 and sValue[0:2].lower() == '0x';
874
875 # try convert it to long integer.
876 try:
877 iValue = long(sValue, 16 if fHex else 10);
878 except Exception as oXcpt:
879 raise TestType.BadValue('failed to convert "%s" to integer (%s)' % (sValue, oXcpt));
880
881 # Convert the hex string and pad it to a decent value. Negative values
882 # needs to be manually converted to something non-negative (~-n + 1).
883 if iValue >= 0:
884 sHex = hex(iValue);
885 if sys.version_info[0] < 3:
886 assert sHex[-1] == 'L';
887 sHex = sHex[:-1];
888 assert sHex[:2] == '0x';
889 sHex = sHex[2:];
890 else:
891 sHex = hex(-iValue - 1);
892 if sys.version_info[0] < 3:
893 assert sHex[-1] == 'L';
894 sHex = sHex[:-1];
895 assert sHex[:2] == '0x';
896 sHex = ''.join([self.kdHexInv[sDigit] for sDigit in sHex[2:]]);
897 if fSignExtend and sHex[0] not in [ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
898 sHex = 'f' + sHex;
899
900 cDigits = len(sHex);
901 if cDigits <= self.acbSizes[-1] * 2:
902 for cb in self.acbSizes:
903 cNaturalDigits = cb * 2;
904 if cDigits <= cNaturalDigits:
905 break;
906 else:
907 cNaturalDigits = self.acbSizes[-1] * 2;
908 cNaturalDigits = int((cDigits + cNaturalDigits - 1) / cNaturalDigits) * cNaturalDigits;
909 assert isinstance(cNaturalDigits, int)
910
911 if cNaturalDigits != cDigits:
912 cNeeded = cNaturalDigits - cDigits;
913 if iValue >= 0:
914 sHex = ('0' * cNeeded) + sHex;
915 else:
916 sHex = ('f' * cNeeded) + sHex;
917
918 # Invert and convert to bytearray and return it.
919 abValue = bytearray([int(sHex[offHex - 2 : offHex], 16) for offHex in range(len(sHex), 0, -2)]);
920
921 return ((fSignExtend, abValue),);
922
923 def validate(self, sValue):
924 """
925 Returns True if value is okay, error message on failure.
926 """
927 try:
928 self.get(sValue);
929 except TestType.BadValue as oXcpt:
930 return oXcpt.sMessage;
931 return True;
932
933 def isAndOrPair(self, sValue):
934 """
935 Checks if sValue is a pair.
936 """
937 _ = sValue;
938 return False;
939
940
941class TestTypeEflags(TestType):
942 """
943 Special value parsing for EFLAGS/RFLAGS/FLAGS.
944 """
945
946 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
947
948 def __init__(self, sName):
949 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
950
951 def get(self, sValue):
952 fClear = 0;
953 fSet = 0;
954 for sFlag in sValue.split(','):
955 sConstant = g_kdEFlagsMnemonics.get(sFlag, None);
956 if sConstant is None:
957 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
958 if sConstant[0] == '!':
959 fClear |= g_kdX86EFlagsConstants[sConstant[1:]];
960 else:
961 fSet |= g_kdX86EFlagsConstants[sConstant];
962
963 aoSet = TestType.get(self, '0x%x' % (fSet,));
964 if fClear != 0:
965 aoClear = TestType.get(self, '%#x' % (fClear,))
966 assert self.isAndOrPair(sValue) is True;
967 return (aoClear[0], aoSet[0]);
968 assert self.isAndOrPair(sValue) is False;
969 return aoSet;
970
971 def isAndOrPair(self, sValue):
972 for sZeroFlag in self.kdZeroValueFlags:
973 if sValue.find(sZeroFlag) >= 0:
974 return True;
975 return False;
976
977class TestTypeFromDict(TestType):
978 """
979 Special value parsing for CR0.
980 """
981
982 kdZeroValueFlags = { 'nv': 0, 'pl': 0, 'nz': 0, 'na': 0, 'pe': 0, 'nc': 0, 'di': 0, 'up': 0 };
983
984 def __init__(self, sName, kdConstantsAndValues, sConstantPrefix):
985 TestType.__init__(self, sName, acbSizes = [1, 2, 4, 8], fUnsigned = True);
986 self.kdConstantsAndValues = kdConstantsAndValues;
987 self.sConstantPrefix = sConstantPrefix;
988
989 def get(self, sValue):
990 fValue = 0;
991 for sFlag in sValue.split(','):
992 fFlagValue = self.kdConstantsAndValues.get(self.sConstantPrefix + sFlag.upper(), None);
993 if fFlagValue is None:
994 raise self.BadValue('Unknown flag "%s" in "%s"' % (sFlag, sValue))
995 fValue |= fFlagValue;
996 return TestType.get(self, '0x%x' % (fValue,));
997
998
999class TestInOut(object):
1000 """
1001 One input or output state modifier.
1002
1003 This should be thought as values to modify BS3REGCTX and extended (needs
1004 to be structured) state.
1005 """
1006 ## Assigned operators.
1007 kasOperators = [
1008 '&|=', # Special AND(INV)+OR operator for use with EFLAGS.
1009 '&~=',
1010 '&=',
1011 '|=',
1012 '='
1013 ];
1014 ## Types
1015 kdTypes = {
1016 'uint': TestType('uint', fUnsigned = True),
1017 'int': TestType('int'),
1018 'efl': TestTypeEflags('efl'),
1019 'cr0': TestTypeFromDict('cr0', g_kdX86Cr0Constants, 'X86_CR0_'),
1020 'cr4': TestTypeFromDict('cr4', g_kdX86Cr4Constants, 'X86_CR4_'),
1021 'xcr0': TestTypeFromDict('xcr0', g_kdX86XSaveCConstants, 'XSAVE_C_'),
1022 };
1023 ## CPU context fields.
1024 kdFields = {
1025 # name: ( default type, [both|input|output], )
1026 # Operands.
1027 'op1': ( 'uint', 'both', ), ## \@op1
1028 'op2': ( 'uint', 'both', ), ## \@op2
1029 'op3': ( 'uint', 'both', ), ## \@op3
1030 'op4': ( 'uint', 'both', ), ## \@op4
1031 # Flags.
1032 'efl': ( 'efl', 'both', ),
1033 'efl_undef': ( 'uint', 'output', ),
1034 # 8-bit GPRs.
1035 'al': ( 'uint', 'both', ),
1036 'cl': ( 'uint', 'both', ),
1037 'dl': ( 'uint', 'both', ),
1038 'bl': ( 'uint', 'both', ),
1039 'ah': ( 'uint', 'both', ),
1040 'ch': ( 'uint', 'both', ),
1041 'dh': ( 'uint', 'both', ),
1042 'bh': ( 'uint', 'both', ),
1043 'r8l': ( 'uint', 'both', ),
1044 'r9l': ( 'uint', 'both', ),
1045 'r10l': ( 'uint', 'both', ),
1046 'r11l': ( 'uint', 'both', ),
1047 'r12l': ( 'uint', 'both', ),
1048 'r13l': ( 'uint', 'both', ),
1049 'r14l': ( 'uint', 'both', ),
1050 'r15l': ( 'uint', 'both', ),
1051 # 16-bit GPRs.
1052 'ax': ( 'uint', 'both', ),
1053 'dx': ( 'uint', 'both', ),
1054 'cx': ( 'uint', 'both', ),
1055 'bx': ( 'uint', 'both', ),
1056 'sp': ( 'uint', 'both', ),
1057 'bp': ( 'uint', 'both', ),
1058 'si': ( 'uint', 'both', ),
1059 'di': ( 'uint', 'both', ),
1060 'r8w': ( 'uint', 'both', ),
1061 'r9w': ( 'uint', 'both', ),
1062 'r10w': ( 'uint', 'both', ),
1063 'r11w': ( 'uint', 'both', ),
1064 'r12w': ( 'uint', 'both', ),
1065 'r13w': ( 'uint', 'both', ),
1066 'r14w': ( 'uint', 'both', ),
1067 'r15w': ( 'uint', 'both', ),
1068 # 32-bit GPRs.
1069 'eax': ( 'uint', 'both', ),
1070 'edx': ( 'uint', 'both', ),
1071 'ecx': ( 'uint', 'both', ),
1072 'ebx': ( 'uint', 'both', ),
1073 'esp': ( 'uint', 'both', ),
1074 'ebp': ( 'uint', 'both', ),
1075 'esi': ( 'uint', 'both', ),
1076 'edi': ( 'uint', 'both', ),
1077 'r8d': ( 'uint', 'both', ),
1078 'r9d': ( 'uint', 'both', ),
1079 'r10d': ( 'uint', 'both', ),
1080 'r11d': ( 'uint', 'both', ),
1081 'r12d': ( 'uint', 'both', ),
1082 'r13d': ( 'uint', 'both', ),
1083 'r14d': ( 'uint', 'both', ),
1084 'r15d': ( 'uint', 'both', ),
1085 # 64-bit GPRs.
1086 'rax': ( 'uint', 'both', ),
1087 'rdx': ( 'uint', 'both', ),
1088 'rcx': ( 'uint', 'both', ),
1089 'rbx': ( 'uint', 'both', ),
1090 'rsp': ( 'uint', 'both', ),
1091 'rbp': ( 'uint', 'both', ),
1092 'rsi': ( 'uint', 'both', ),
1093 'rdi': ( 'uint', 'both', ),
1094 'r8': ( 'uint', 'both', ),
1095 'r9': ( 'uint', 'both', ),
1096 'r10': ( 'uint', 'both', ),
1097 'r11': ( 'uint', 'both', ),
1098 'r12': ( 'uint', 'both', ),
1099 'r13': ( 'uint', 'both', ),
1100 'r14': ( 'uint', 'both', ),
1101 'r15': ( 'uint', 'both', ),
1102 # 16-bit, 32-bit or 64-bit registers according to operand size.
1103 'oz.rax': ( 'uint', 'both', ),
1104 'oz.rdx': ( 'uint', 'both', ),
1105 'oz.rcx': ( 'uint', 'both', ),
1106 'oz.rbx': ( 'uint', 'both', ),
1107 'oz.rsp': ( 'uint', 'both', ),
1108 'oz.rbp': ( 'uint', 'both', ),
1109 'oz.rsi': ( 'uint', 'both', ),
1110 'oz.rdi': ( 'uint', 'both', ),
1111 'oz.r8': ( 'uint', 'both', ),
1112 'oz.r9': ( 'uint', 'both', ),
1113 'oz.r10': ( 'uint', 'both', ),
1114 'oz.r11': ( 'uint', 'both', ),
1115 'oz.r12': ( 'uint', 'both', ),
1116 'oz.r13': ( 'uint', 'both', ),
1117 'oz.r14': ( 'uint', 'both', ),
1118 'oz.r15': ( 'uint', 'both', ),
1119 # Control registers.
1120 'cr0': ( 'cr0', 'both', ),
1121 'cr4': ( 'cr4', 'both', ),
1122 'xcr0': ( 'xcr0', 'both', ),
1123 # FPU Registers
1124 'fcw': ( 'uint', 'both', ),
1125 'fsw': ( 'uint', 'both', ),
1126 'ftw': ( 'uint', 'both', ),
1127 'fop': ( 'uint', 'both', ),
1128 'fpuip': ( 'uint', 'both', ),
1129 'fpucs': ( 'uint', 'both', ),
1130 'fpudp': ( 'uint', 'both', ),
1131 'fpuds': ( 'uint', 'both', ),
1132 'mxcsr': ( 'uint', 'both', ),
1133 'st0': ( 'uint', 'both', ),
1134 'st1': ( 'uint', 'both', ),
1135 'st2': ( 'uint', 'both', ),
1136 'st3': ( 'uint', 'both', ),
1137 'st4': ( 'uint', 'both', ),
1138 'st5': ( 'uint', 'both', ),
1139 'st6': ( 'uint', 'both', ),
1140 'st7': ( 'uint', 'both', ),
1141 # MMX registers.
1142 'mm0': ( 'uint', 'both', ),
1143 'mm1': ( 'uint', 'both', ),
1144 'mm2': ( 'uint', 'both', ),
1145 'mm3': ( 'uint', 'both', ),
1146 'mm4': ( 'uint', 'both', ),
1147 'mm5': ( 'uint', 'both', ),
1148 'mm6': ( 'uint', 'both', ),
1149 'mm7': ( 'uint', 'both', ),
1150 # SSE registers.
1151 'xmm0': ( 'uint', 'both', ),
1152 'xmm1': ( 'uint', 'both', ),
1153 'xmm2': ( 'uint', 'both', ),
1154 'xmm3': ( 'uint', 'both', ),
1155 'xmm4': ( 'uint', 'both', ),
1156 'xmm5': ( 'uint', 'both', ),
1157 'xmm6': ( 'uint', 'both', ),
1158 'xmm7': ( 'uint', 'both', ),
1159 'xmm8': ( 'uint', 'both', ),
1160 'xmm9': ( 'uint', 'both', ),
1161 'xmm10': ( 'uint', 'both', ),
1162 'xmm11': ( 'uint', 'both', ),
1163 'xmm12': ( 'uint', 'both', ),
1164 'xmm13': ( 'uint', 'both', ),
1165 'xmm14': ( 'uint', 'both', ),
1166 'xmm15': ( 'uint', 'both', ),
1167 'xmm0.lo': ( 'uint', 'both', ),
1168 'xmm1.lo': ( 'uint', 'both', ),
1169 'xmm2.lo': ( 'uint', 'both', ),
1170 'xmm3.lo': ( 'uint', 'both', ),
1171 'xmm4.lo': ( 'uint', 'both', ),
1172 'xmm5.lo': ( 'uint', 'both', ),
1173 'xmm6.lo': ( 'uint', 'both', ),
1174 'xmm7.lo': ( 'uint', 'both', ),
1175 'xmm8.lo': ( 'uint', 'both', ),
1176 'xmm9.lo': ( 'uint', 'both', ),
1177 'xmm10.lo': ( 'uint', 'both', ),
1178 'xmm11.lo': ( 'uint', 'both', ),
1179 'xmm12.lo': ( 'uint', 'both', ),
1180 'xmm13.lo': ( 'uint', 'both', ),
1181 'xmm14.lo': ( 'uint', 'both', ),
1182 'xmm15.lo': ( 'uint', 'both', ),
1183 'xmm0.hi': ( 'uint', 'both', ),
1184 'xmm1.hi': ( 'uint', 'both', ),
1185 'xmm2.hi': ( 'uint', 'both', ),
1186 'xmm3.hi': ( 'uint', 'both', ),
1187 'xmm4.hi': ( 'uint', 'both', ),
1188 'xmm5.hi': ( 'uint', 'both', ),
1189 'xmm6.hi': ( 'uint', 'both', ),
1190 'xmm7.hi': ( 'uint', 'both', ),
1191 'xmm8.hi': ( 'uint', 'both', ),
1192 'xmm9.hi': ( 'uint', 'both', ),
1193 'xmm10.hi': ( 'uint', 'both', ),
1194 'xmm11.hi': ( 'uint', 'both', ),
1195 'xmm12.hi': ( 'uint', 'both', ),
1196 'xmm13.hi': ( 'uint', 'both', ),
1197 'xmm14.hi': ( 'uint', 'both', ),
1198 'xmm15.hi': ( 'uint', 'both', ),
1199 'xmm0.lo.zx': ( 'uint', 'both', ),
1200 'xmm1.lo.zx': ( 'uint', 'both', ),
1201 'xmm2.lo.zx': ( 'uint', 'both', ),
1202 'xmm3.lo.zx': ( 'uint', 'both', ),
1203 'xmm4.lo.zx': ( 'uint', 'both', ),
1204 'xmm5.lo.zx': ( 'uint', 'both', ),
1205 'xmm6.lo.zx': ( 'uint', 'both', ),
1206 'xmm7.lo.zx': ( 'uint', 'both', ),
1207 'xmm8.lo.zx': ( 'uint', 'both', ),
1208 'xmm9.lo.zx': ( 'uint', 'both', ),
1209 'xmm10.lo.zx': ( 'uint', 'both', ),
1210 'xmm11.lo.zx': ( 'uint', 'both', ),
1211 'xmm12.lo.zx': ( 'uint', 'both', ),
1212 'xmm13.lo.zx': ( 'uint', 'both', ),
1213 'xmm14.lo.zx': ( 'uint', 'both', ),
1214 'xmm15.lo.zx': ( 'uint', 'both', ),
1215 'xmm0.dw0': ( 'uint', 'both', ),
1216 'xmm1.dw0': ( 'uint', 'both', ),
1217 'xmm2.dw0': ( 'uint', 'both', ),
1218 'xmm3.dw0': ( 'uint', 'both', ),
1219 'xmm4.dw0': ( 'uint', 'both', ),
1220 'xmm5.dw0': ( 'uint', 'both', ),
1221 'xmm6.dw0': ( 'uint', 'both', ),
1222 'xmm7.dw0': ( 'uint', 'both', ),
1223 'xmm8.dw0': ( 'uint', 'both', ),
1224 'xmm9.dw0': ( 'uint', 'both', ),
1225 'xmm10.dw0': ( 'uint', 'both', ),
1226 'xmm11.dw0': ( 'uint', 'both', ),
1227 'xmm12.dw0': ( 'uint', 'both', ),
1228 'xmm13.dw0': ( 'uint', 'both', ),
1229 'xmm14.dw0': ( 'uint', 'both', ),
1230 'xmm15_dw0': ( 'uint', 'both', ),
1231 # AVX registers.
1232 'ymm0': ( 'uint', 'both', ),
1233 'ymm1': ( 'uint', 'both', ),
1234 'ymm2': ( 'uint', 'both', ),
1235 'ymm3': ( 'uint', 'both', ),
1236 'ymm4': ( 'uint', 'both', ),
1237 'ymm5': ( 'uint', 'both', ),
1238 'ymm6': ( 'uint', 'both', ),
1239 'ymm7': ( 'uint', 'both', ),
1240 'ymm8': ( 'uint', 'both', ),
1241 'ymm9': ( 'uint', 'both', ),
1242 'ymm10': ( 'uint', 'both', ),
1243 'ymm11': ( 'uint', 'both', ),
1244 'ymm12': ( 'uint', 'both', ),
1245 'ymm13': ( 'uint', 'both', ),
1246 'ymm14': ( 'uint', 'both', ),
1247 'ymm15': ( 'uint', 'both', ),
1248
1249 # Special ones.
1250 'value.xcpt': ( 'uint', 'output', ),
1251 };
1252
1253 def __init__(self, sField, sOp, sValue, sType):
1254 assert sField in self.kdFields;
1255 assert sOp in self.kasOperators;
1256 self.sField = sField;
1257 self.sOp = sOp;
1258 self.sValue = sValue;
1259 self.sType = sType;
1260 assert isinstance(sField, str);
1261 assert isinstance(sOp, str);
1262 assert isinstance(sType, str);
1263 assert isinstance(sValue, str);
1264
1265
1266class TestSelector(object):
1267 """
1268 One selector for an instruction test.
1269 """
1270 ## Selector compare operators.
1271 kasCompareOps = [ '==', '!=' ];
1272 ## Selector variables and their valid values.
1273 kdVariables = {
1274 # Operand size.
1275 'size': {
1276 'o16': 'size_o16',
1277 'o32': 'size_o32',
1278 'o64': 'size_o64',
1279 },
1280 # VEX.L value.
1281 'vex.l': {
1282 '0': 'vexl_0',
1283 '1': 'vexl_1',
1284 },
1285 # Execution ring.
1286 'ring': {
1287 '0': 'ring_0',
1288 '1': 'ring_1',
1289 '2': 'ring_2',
1290 '3': 'ring_3',
1291 '0..2': 'ring_0_thru_2',
1292 '1..3': 'ring_1_thru_3',
1293 },
1294 # Basic code mode.
1295 'codebits': {
1296 '64': 'code_64bit',
1297 '32': 'code_32bit',
1298 '16': 'code_16bit',
1299 },
1300 # cpu modes.
1301 'mode': {
1302 'real': 'mode_real',
1303 'prot': 'mode_prot',
1304 'long': 'mode_long',
1305 'v86': 'mode_v86',
1306 'smm': 'mode_smm',
1307 'vmx': 'mode_vmx',
1308 'svm': 'mode_svm',
1309 },
1310 # paging on/off
1311 'paging': {
1312 'on': 'paging_on',
1313 'off': 'paging_off',
1314 },
1315 # CPU vendor
1316 'vendor': {
1317 'amd': 'vendor_amd',
1318 'intel': 'vendor_intel',
1319 'via': 'vendor_via',
1320 },
1321 };
1322 ## Selector shorthand predicates.
1323 ## These translates into variable expressions.
1324 kdPredicates = {
1325 'o16': 'size==o16',
1326 'o32': 'size==o32',
1327 'o64': 'size==o64',
1328 'ring0': 'ring==0',
1329 '!ring0': 'ring==1..3',
1330 'ring1': 'ring==1',
1331 'ring2': 'ring==2',
1332 'ring3': 'ring==3',
1333 'user': 'ring==3',
1334 'supervisor': 'ring==0..2',
1335 '16-bit': 'codebits==16',
1336 '32-bit': 'codebits==32',
1337 '64-bit': 'codebits==64',
1338 'real': 'mode==real',
1339 'prot': 'mode==prot',
1340 'long': 'mode==long',
1341 'v86': 'mode==v86',
1342 'smm': 'mode==smm',
1343 'vmx': 'mode==vmx',
1344 'svm': 'mode==svm',
1345 'paging': 'paging==on',
1346 '!paging': 'paging==off',
1347 'amd': 'vendor==amd',
1348 '!amd': 'vendor!=amd',
1349 'intel': 'vendor==intel',
1350 '!intel': 'vendor!=intel',
1351 'via': 'vendor==via',
1352 '!via': 'vendor!=via',
1353 };
1354
1355 def __init__(self, sVariable, sOp, sValue):
1356 assert sVariable in self.kdVariables;
1357 assert sOp in self.kasCompareOps;
1358 assert sValue in self.kdVariables[sVariable];
1359 self.sVariable = sVariable;
1360 self.sOp = sOp;
1361 self.sValue = sValue;
1362
1363
1364class InstructionTest(object):
1365 """
1366 Instruction test.
1367 """
1368
1369 def __init__(self, oInstr): # type: (InstructionTest, Instruction)
1370 self.oInstr = oInstr # type: InstructionTest
1371 self.aoInputs = [] # type: list(TestInOut)
1372 self.aoOutputs = [] # type: list(TestInOut)
1373 self.aoSelectors = [] # type: list(TestSelector)
1374
1375 def toString(self, fRepr = False):
1376 """
1377 Converts it to string representation.
1378 """
1379 asWords = [];
1380 if self.aoSelectors:
1381 for oSelector in self.aoSelectors:
1382 asWords.append('%s%s%s' % (oSelector.sVariable, oSelector.sOp, oSelector.sValue,));
1383 asWords.append('/');
1384
1385 for oModifier in self.aoInputs:
1386 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1387
1388 asWords.append('->');
1389
1390 for oModifier in self.aoOutputs:
1391 asWords.append('%s%s%s:%s' % (oModifier.sField, oModifier.sOp, oModifier.sValue, oModifier.sType,));
1392
1393 if fRepr:
1394 return '<' + ' '.join(asWords) + '>';
1395 return ' '.join(asWords);
1396
1397 def __str__(self):
1398 """ Provide string represenation. """
1399 return self.toString(False);
1400
1401 def __repr__(self):
1402 """ Provide unambigious string representation. """
1403 return self.toString(True);
1404
1405class Operand(object):
1406 """
1407 Instruction operand.
1408 """
1409
1410 def __init__(self, sWhere, sType):
1411 assert sWhere in g_kdOpLocations, sWhere;
1412 assert sType in g_kdOpTypes, sType;
1413 self.sWhere = sWhere; ##< g_kdOpLocations
1414 self.sType = sType; ##< g_kdOpTypes
1415
1416 def usesModRM(self):
1417 """ Returns True if using some form of ModR/M encoding. """
1418 return self.sType[0] in ['E', 'G', 'M'];
1419
1420
1421
1422class Instruction(object): # pylint: disable=too-many-instance-attributes
1423 """
1424 Instruction.
1425 """
1426
1427 def __init__(self, sSrcFile, iLine):
1428 ## @name Core attributes.
1429 ## @{
1430 self.oParent = None # type: Instruction
1431 self.sMnemonic = None;
1432 self.sBrief = None;
1433 self.asDescSections = [] # type: list(str)
1434 self.aoMaps = [] # type: list(InstructionMap)
1435 self.aoOperands = [] # type: list(Operand)
1436 self.sPrefix = None; ##< Single prefix: None, 'none', 0x66, 0xf3, 0xf2
1437 self.sOpcode = None # type: str
1438 self.sSubOpcode = None # type: str
1439 self.sEncoding = None;
1440 self.asFlTest = None;
1441 self.asFlModify = None;
1442 self.asFlUndefined = None;
1443 self.asFlSet = None;
1444 self.asFlClear = None;
1445 self.dHints = {}; ##< Dictionary of instruction hints, flags, whatnot. (Dictionary for speed; dummy value).
1446 self.sDisEnum = None; ##< OP_XXXX value. Default is based on the uppercased mnemonic.
1447 self.asCpuIds = []; ##< The CPUID feature bit names for this instruction. If multiple, assume AND.
1448 self.asReqFeatures = []; ##< Which features are required to be enabled to run this instruction.
1449 self.aoTests = [] # type: list(InstructionTest)
1450 self.sMinCpu = None; ##< Indicates the minimum CPU required for the instruction. Not set when oCpuExpr is.
1451 self.oCpuExpr = None; ##< Some CPU restriction expression...
1452 self.sGroup = None;
1453 self.fUnused = False; ##< Unused instruction.
1454 self.fInvalid = False; ##< Invalid instruction (like UD2).
1455 self.sInvalidStyle = None; ##< Invalid behviour style (g_kdInvalidStyles),
1456 self.sXcptType = None; ##< Exception type (g_kdXcptTypes).
1457 ## @}
1458
1459 ## @name Implementation attributes.
1460 ## @{
1461 self.sStats = None;
1462 self.sFunction = None;
1463 self.fStub = False;
1464 self.fUdStub = False;
1465 ## @}
1466
1467 ## @name Decoding info
1468 ## @{
1469 self.sSrcFile = sSrcFile;
1470 self.iLineCreated = iLine;
1471 self.iLineCompleted = None;
1472 self.cOpTags = 0;
1473 self.iLineFnIemOpMacro = -1;
1474 self.iLineMnemonicMacro = -1;
1475 ## @}
1476
1477 ## @name Intermediate input fields.
1478 ## @{
1479 self.sRawDisOpNo = None;
1480 self.asRawDisParams = [];
1481 self.sRawIemOpFlags = None;
1482 self.sRawOldOpcodes = None;
1483 self.asCopyTests = [];
1484 ## @}
1485
1486 def toString(self, fRepr = False):
1487 """ Turn object into a string. """
1488 aasFields = [];
1489
1490 aasFields.append(['opcode', self.sOpcode]);
1491 if self.sPrefix:
1492 aasFields.append(['prefix', self.sPrefix]);
1493 aasFields.append(['mnemonic', self.sMnemonic]);
1494 for iOperand, oOperand in enumerate(self.aoOperands):
1495 aasFields.append(['op%u' % (iOperand + 1,), '%s:%s' % (oOperand.sWhere, oOperand.sType,)]);
1496 if self.aoMaps: aasFields.append(['maps', ','.join([oMap.sName for oMap in self.aoMaps])]);
1497 aasFields.append(['encoding', self.sEncoding]);
1498 if self.dHints: aasFields.append(['hints', ','.join(self.dHints.keys())]);
1499 aasFields.append(['disenum', self.sDisEnum]);
1500 if self.asCpuIds: aasFields.append(['cpuid', ','.join(self.asCpuIds)]);
1501 aasFields.append(['group', self.sGroup]);
1502 if self.fUnused: aasFields.append(['unused', 'True']);
1503 if self.fInvalid: aasFields.append(['invalid', 'True']);
1504 aasFields.append(['invlstyle', self.sInvalidStyle]);
1505 aasFields.append(['fltest', self.asFlTest]);
1506 aasFields.append(['flmodify', self.asFlModify]);
1507 aasFields.append(['flundef', self.asFlUndefined]);
1508 aasFields.append(['flset', self.asFlSet]);
1509 aasFields.append(['flclear', self.asFlClear]);
1510 aasFields.append(['mincpu', self.sMinCpu]);
1511 aasFields.append(['stats', self.sStats]);
1512 aasFields.append(['sFunction', self.sFunction]);
1513 if self.fStub: aasFields.append(['fStub', 'True']);
1514 if self.fUdStub: aasFields.append(['fUdStub', 'True']);
1515 if self.cOpTags: aasFields.append(['optags', str(self.cOpTags)]);
1516 if self.iLineFnIemOpMacro != -1: aasFields.append(['FNIEMOP_XXX', str(self.iLineFnIemOpMacro)]);
1517 if self.iLineMnemonicMacro != -1: aasFields.append(['IEMOP_MNEMMONICn', str(self.iLineMnemonicMacro)]);
1518
1519 sRet = '<' if fRepr else '';
1520 for sField, sValue in aasFields:
1521 if sValue is not None:
1522 if len(sRet) > 1:
1523 sRet += '; ';
1524 sRet += '%s=%s' % (sField, sValue,);
1525 if fRepr:
1526 sRet += '>';
1527
1528 return sRet;
1529
1530 def __str__(self):
1531 """ Provide string represenation. """
1532 return self.toString(False);
1533
1534 def __repr__(self):
1535 """ Provide unambigious string representation. """
1536 return self.toString(True);
1537
1538 def copy(self, oMap = None, sOpcode = None, sSubOpcode = None, sPrefix = None):
1539 """
1540 Makes a copy of the object for the purpose of putting in a different map
1541 or a different place in the current map.
1542 """
1543 oCopy = Instruction(self.sSrcFile, self.iLineCreated);
1544
1545 oCopy.oParent = self;
1546 oCopy.sMnemonic = self.sMnemonic;
1547 oCopy.sBrief = self.sBrief;
1548 oCopy.asDescSections = list(self.asDescSections);
1549 oCopy.aoMaps = [oMap,] if oMap else list(self.aoMaps);
1550 oCopy.aoOperands = list(self.aoOperands); ## Deeper copy?
1551 oCopy.sPrefix = sPrefix if sPrefix else self.sPrefix;
1552 oCopy.sOpcode = sOpcode if sOpcode else self.sOpcode;
1553 oCopy.sSubOpcode = sSubOpcode if sSubOpcode else self.sSubOpcode;
1554 oCopy.sEncoding = self.sEncoding;
1555 oCopy.asFlTest = self.asFlTest;
1556 oCopy.asFlModify = self.asFlModify;
1557 oCopy.asFlUndefined = self.asFlUndefined;
1558 oCopy.asFlSet = self.asFlSet;
1559 oCopy.asFlClear = self.asFlClear;
1560 oCopy.dHints = dict(self.dHints);
1561 oCopy.sDisEnum = self.sDisEnum;
1562 oCopy.asCpuIds = list(self.asCpuIds);
1563 oCopy.asReqFeatures = list(self.asReqFeatures);
1564 oCopy.aoTests = list(self.aoTests); ## Deeper copy?
1565 oCopy.sMinCpu = self.sMinCpu;
1566 oCopy.oCpuExpr = self.oCpuExpr;
1567 oCopy.sGroup = self.sGroup;
1568 oCopy.fUnused = self.fUnused;
1569 oCopy.fInvalid = self.fInvalid;
1570 oCopy.sInvalidStyle = self.sInvalidStyle;
1571 oCopy.sXcptType = self.sXcptType;
1572
1573 oCopy.sStats = self.sStats;
1574 oCopy.sFunction = self.sFunction;
1575 oCopy.fStub = self.fStub;
1576 oCopy.fUdStub = self.fUdStub;
1577
1578 oCopy.iLineCompleted = self.iLineCompleted;
1579 oCopy.cOpTags = self.cOpTags;
1580 oCopy.iLineFnIemOpMacro = self.iLineFnIemOpMacro;
1581 oCopy.iLineMnemonicMacro = self.iLineMnemonicMacro;
1582
1583 oCopy.sRawDisOpNo = self.sRawDisOpNo;
1584 oCopy.asRawDisParams = list(self.asRawDisParams);
1585 oCopy.sRawIemOpFlags = self.sRawIemOpFlags;
1586 oCopy.sRawOldOpcodes = self.sRawOldOpcodes;
1587 oCopy.asCopyTests = list(self.asCopyTests);
1588
1589 return oCopy;
1590
1591 def getOpcodeByte(self):
1592 """
1593 Decodes sOpcode into a byte range integer value.
1594 Raises exception if sOpcode is None or invalid.
1595 """
1596 if self.sOpcode is None:
1597 raise Exception('No opcode byte for %s!' % (self,));
1598 sOpcode = str(self.sOpcode); # pylint type confusion workaround.
1599
1600 # Full hex byte form.
1601 if sOpcode[:2] == '0x':
1602 return int(sOpcode, 16);
1603
1604 # The /r form:
1605 if len(sOpcode) == 2 and sOpcode[0] == '/' and sOpcode[1].isdigit():
1606 return int(sOpcode[1:]) << 3;
1607
1608 # The 11/r form:
1609 if len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1].isdigit():
1610 return (int(sOpcode[-1:]) << 3) | 0xc0;
1611
1612 # The !11/r form (returns mod=1):
1613 ## @todo this doesn't really work...
1614 if len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1].isdigit():
1615 return (int(sOpcode[-1:]) << 3) | 0x80;
1616
1617 raise Exception('unsupported opcode byte spec "%s" for %s' % (sOpcode, self,));
1618
1619 @staticmethod
1620 def _flagsToIntegerMask(asFlags):
1621 """
1622 Returns the integer mask value for asFlags.
1623 """
1624 uRet = 0;
1625 if asFlags:
1626 for sFlag in asFlags:
1627 sConstant = g_kdEFlagsMnemonics[sFlag];
1628 assert sConstant[0] != '!', sConstant
1629 uRet |= g_kdX86EFlagsConstants[sConstant];
1630 return uRet;
1631
1632 def getTestedFlagsMask(self):
1633 """ Returns asFlTest into a integer mask value """
1634 return self._flagsToIntegerMask(self.asFlTest);
1635
1636 def getModifiedFlagsMask(self):
1637 """ Returns asFlModify into a integer mask value """
1638 return self._flagsToIntegerMask(self.asFlModify);
1639
1640 def getUndefinedFlagsMask(self):
1641 """ Returns asFlUndefined into a integer mask value """
1642 return self._flagsToIntegerMask(self.asFlUndefined);
1643
1644 def getSetFlagsMask(self):
1645 """ Returns asFlSet into a integer mask value """
1646 return self._flagsToIntegerMask(self.asFlSet);
1647
1648 def getClearedFlagsMask(self):
1649 """ Returns asFlClear into a integer mask value """
1650 return self._flagsToIntegerMask(self.asFlClear);
1651
1652 def onlyInVexMaps(self):
1653 """ Returns True if only in VEX maps, otherwise False. (No maps -> False) """
1654 if not self.aoMaps:
1655 return False;
1656 for oMap in self.aoMaps:
1657 if not oMap.isVexMap():
1658 return False;
1659 return True;
1660
1661
1662
1663## All the instructions.
1664g_aoAllInstructions = [] # type: list(Instruction)
1665
1666## All the instructions indexed by statistics name (opstat).
1667g_dAllInstructionsByStat = {} # type: dict(Instruction)
1668
1669## All the instructions indexed by function name (opfunction).
1670g_dAllInstructionsByFunction = {} # type: dict(list(Instruction))
1671
1672## Instructions tagged by oponlytest
1673g_aoOnlyTestInstructions = [] # type: list(Instruction)
1674
1675## Instruction maps.
1676g_aoInstructionMaps = [
1677 InstructionMap('one', 'g_apfnOneByteMap', sSelector = 'byte'),
1678 InstructionMap('grp1_80', asLeadOpcodes = ['0x80',], sSelector = '/r'),
1679 InstructionMap('grp1_81', asLeadOpcodes = ['0x81',], sSelector = '/r'),
1680 InstructionMap('grp1_82', asLeadOpcodes = ['0x82',], sSelector = '/r'),
1681 InstructionMap('grp1_83', asLeadOpcodes = ['0x83',], sSelector = '/r'),
1682 InstructionMap('grp1a', asLeadOpcodes = ['0x8f',], sSelector = '/r'),
1683 InstructionMap('grp2_c0', asLeadOpcodes = ['0xc0',], sSelector = '/r'),
1684 InstructionMap('grp2_c1', asLeadOpcodes = ['0xc1',], sSelector = '/r'),
1685 InstructionMap('grp2_d0', asLeadOpcodes = ['0xd0',], sSelector = '/r'),
1686 InstructionMap('grp2_d1', asLeadOpcodes = ['0xd1',], sSelector = '/r'),
1687 InstructionMap('grp2_d2', asLeadOpcodes = ['0xd2',], sSelector = '/r'),
1688 InstructionMap('grp2_d3', asLeadOpcodes = ['0xd3',], sSelector = '/r'),
1689 ## @todo g_apfnEscF1_E0toFF
1690 InstructionMap('grp3_f6', asLeadOpcodes = ['0xf6',], sSelector = '/r'),
1691 InstructionMap('grp3_f7', asLeadOpcodes = ['0xf7',], sSelector = '/r'),
1692 InstructionMap('grp4', asLeadOpcodes = ['0xfe',], sSelector = '/r'),
1693 InstructionMap('grp5', asLeadOpcodes = ['0xff',], sSelector = '/r'),
1694 InstructionMap('grp11_c6_m', asLeadOpcodes = ['0xc6',], sSelector = '!11 /r'),
1695 InstructionMap('grp11_c6_r', asLeadOpcodes = ['0xc6',], sSelector = '11'), # xabort
1696 InstructionMap('grp11_c7_m', asLeadOpcodes = ['0xc7',], sSelector = '!11 /r'),
1697 InstructionMap('grp11_c7_r', asLeadOpcodes = ['0xc7',], sSelector = '11'), # xbegin
1698
1699 InstructionMap('two0f', 'g_apfnTwoByteMap', asLeadOpcodes = ['0x0f',], sDisParse = 'IDX_ParseTwoByteEsc'),
1700 InstructionMap('grp6', 'g_apfnGroup6', asLeadOpcodes = ['0x0f', '0x00',], sSelector = '/r'),
1701 InstructionMap('grp7_m', 'g_apfnGroup7Mem', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '!11 /r'),
1702 InstructionMap('grp7_r', asLeadOpcodes = ['0x0f', '0x01',], sSelector = '11'),
1703 InstructionMap('grp8', asLeadOpcodes = ['0x0f', '0xba',], sSelector = '/r'),
1704 InstructionMap('grp9', 'g_apfnGroup9RegReg', asLeadOpcodes = ['0x0f', '0xc7',], sSelector = 'mod /r'),
1705 ## @todo What about g_apfnGroup9MemReg?
1706 InstructionMap('grp10', None, asLeadOpcodes = ['0x0f', '0xb9',], sSelector = '/r'), # UD1 /w modr/m
1707 InstructionMap('grp12', 'g_apfnGroup12RegReg', asLeadOpcodes = ['0x0f', '0x71',], sSelector = 'mod /r'),
1708 InstructionMap('grp13', 'g_apfnGroup13RegReg', asLeadOpcodes = ['0x0f', '0x72',], sSelector = 'mod /r'),
1709 InstructionMap('grp14', 'g_apfnGroup14RegReg', asLeadOpcodes = ['0x0f', '0x73',], sSelector = 'mod /r'),
1710 InstructionMap('grp15', 'g_apfnGroup15MemReg', asLeadOpcodes = ['0x0f', '0xae',], sSelector = 'memreg /r'),
1711 ## @todo What about g_apfnGroup15RegReg?
1712 InstructionMap('grp16', asLeadOpcodes = ['0x0f', '0x18',], sSelector = 'mod /r'),
1713 InstructionMap('grpA17', asLeadOpcodes = ['0x0f', '0x78',], sSelector = '/r'), # AMD: EXTRQ weirdness
1714 InstructionMap('grpP', asLeadOpcodes = ['0x0f', '0x0d',], sSelector = '/r'), # AMD: prefetch
1715
1716 InstructionMap('three0f38', 'g_apfnThreeByte0f38', asLeadOpcodes = ['0x0f', '0x38',]),
1717 InstructionMap('three0f3a', 'g_apfnThreeByte0f3a', asLeadOpcodes = ['0x0f', '0x3a',]),
1718
1719 InstructionMap('vexmap1', 'g_apfnVexMap1', sEncoding = 'vex1'),
1720 InstructionMap('vexgrp12', 'g_apfnVexGroup12RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x71',], sSelector = 'mod /r'),
1721 InstructionMap('vexgrp13', 'g_apfnVexGroup13RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x72',], sSelector = 'mod /r'),
1722 InstructionMap('vexgrp14', 'g_apfnVexGroup14RegReg', sEncoding = 'vex1', asLeadOpcodes = ['0x73',], sSelector = 'mod /r'),
1723 InstructionMap('vexgrp15', 'g_apfnVexGroup15MemReg', sEncoding = 'vex1', asLeadOpcodes = ['0xae',], sSelector = 'memreg /r'),
1724 InstructionMap('vexgrp17', 'g_apfnVexGroup17_f3', sEncoding = 'vex1', asLeadOpcodes = ['0xf3',], sSelector = '/r'),
1725
1726 InstructionMap('vexmap2', 'g_apfnVexMap2', sEncoding = 'vex2'),
1727 InstructionMap('vexmap3', 'g_apfnVexMap3', sEncoding = 'vex3'),
1728
1729 InstructionMap('3dnow', asLeadOpcodes = ['0x0f', '0x0f',]),
1730 InstructionMap('xopmap8', sEncoding = 'xop8'),
1731 InstructionMap('xopmap9', sEncoding = 'xop9'),
1732 InstructionMap('xopgrp1', sEncoding = 'xop9', asLeadOpcodes = ['0x01'], sSelector = '/r'),
1733 InstructionMap('xopgrp2', sEncoding = 'xop9', asLeadOpcodes = ['0x02'], sSelector = '/r'),
1734 InstructionMap('xopgrp3', sEncoding = 'xop9', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1735 InstructionMap('xopmap10', sEncoding = 'xop10'),
1736 InstructionMap('xopgrp4', sEncoding = 'xop10', asLeadOpcodes = ['0x12'], sSelector = '/r'),
1737];
1738g_dInstructionMaps = { oMap.sName: oMap for oMap in g_aoInstructionMaps };
1739g_dInstructionMapsByIemName = { oMap.sIemName: oMap for oMap in g_aoInstructionMaps };
1740
1741
1742#
1743# "Microcode" statements and blocks
1744#
1745
1746class McStmt(object):
1747 """
1748 Statement in a microcode block.
1749 """
1750 def __init__(self, sName, asParams):
1751 self.sName = sName; ##< 'IEM_MC_XXX' or 'C++'.
1752 self.asParams = asParams;
1753 self.oUser = None;
1754
1755 def renderCode(self, cchIndent = 0):
1756 """
1757 Renders the code for the statement.
1758 """
1759 return ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ');\n';
1760
1761 @staticmethod
1762 def renderCodeForList(aoStmts, cchIndent = 0):
1763 """
1764 Renders a list of statements.
1765 """
1766 return ''.join([oStmt.renderCode(cchIndent) for oStmt in aoStmts]);
1767
1768 def isCppStmt(self):
1769 """ Checks if this is a C++ statement. """
1770 return self.sName.startswith('C++');
1771
1772class McStmtCond(McStmt):
1773 """
1774 Base class for conditional statements (IEM_MC_IF_XXX).
1775 """
1776 def __init__(self, sName, asParams):
1777 McStmt.__init__(self, sName, asParams);
1778 self.aoIfBranch = [];
1779 self.aoElseBranch = [];
1780
1781 def renderCode(self, cchIndent = 0):
1782 sRet = ' ' * cchIndent + self.sName + '(' + ', '.join(self.asParams) + ') {\n';
1783 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1784 if self.aoElseBranch:
1785 sRet += ' ' * cchIndent + '} IEM_MC_ELSE() {\n';
1786 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1787 sRet += ' ' * cchIndent + '} IEM_MC_ENDIF();\n';
1788 return sRet;
1789
1790class McStmtVar(McStmt):
1791 """ IEM_MC_LOCAL_VAR* """
1792 def __init__(self, sName, asParams, sType, sVarName, sConstValue = None):
1793 McStmt.__init__(self, sName, asParams);
1794 self.sType = sType;
1795 self.sVarName = sVarName;
1796 self.sConstValue = sConstValue; ##< None if not const.
1797
1798class McStmtArg(McStmtVar):
1799 """ IEM_MC_ARG* """
1800 def __init__(self, sName, asParams, sType, sVarName, iArg, sConstValue = None, sRef = None, sRefType = 'none'):
1801 McStmtVar.__init__(self, sName, asParams, sType, sVarName, sConstValue);
1802 self.iArg = iArg;
1803 self.sRef = sRef; ##< The reference string (local variable, register).
1804 self.sRefType = sRefType; ##< The kind of reference: 'local', 'none'.
1805 assert sRefType in ('none', 'local');
1806
1807
1808class McStmtCall(McStmt):
1809 """ IEM_MC_CALL_* """
1810 def __init__(self, sName, asParams, iFnParam, iRcNameParam = -1):
1811 McStmt.__init__(self, sName, asParams);
1812 self.idxFn = iFnParam;
1813 self.idxParams = iFnParam + 1;
1814 self.sFn = asParams[iFnParam];
1815 self.iRcName = None if iRcNameParam < 0 else asParams[iRcNameParam];
1816
1817class McCppGeneric(McStmt):
1818 """
1819 Generic C++/C statement.
1820 """
1821 def __init__(self, sCode, fDecode, sName = 'C++'):
1822 McStmt.__init__(self, sName, [sCode,]);
1823 self.fDecode = fDecode;
1824
1825 def renderCode(self, cchIndent = 0):
1826 sRet = ' ' * cchIndent + self.asParams[0] + '\n';
1827 if self.fDecode:
1828 sRet = sRet.replace('\n', ' // C++ decode\n');
1829 else:
1830 sRet = sRet.replace('\n', ' // C++ normal\n');
1831 return sRet;
1832
1833class McCppCond(McStmtCond):
1834 """
1835 C++/C 'if' statement.
1836 """
1837 def __init__(self, sCode, fDecode):
1838 McStmtCond.__init__(self, 'C++/if', [sCode,]);
1839 self.fDecode = fDecode;
1840
1841 def renderCode(self, cchIndent = 0):
1842 sAnnotation = '// C++ decode' if self.fDecode else '// C++ normal';
1843 sRet = ' ' * cchIndent + 'if (' + self.asParams[0] + ') ' + sAnnotation + '\n';
1844 sRet += ' ' * cchIndent + '{\n';
1845 sRet += self.renderCodeForList(self.aoIfBranch, cchIndent + 4);
1846 sRet += ' ' * cchIndent + '}\n';
1847 if self.aoElseBranch:
1848 sRet += ' ' * cchIndent + 'else ' + sAnnotation + '\n';
1849 sRet += ' ' * cchIndent + '{\n';
1850 sRet += self.renderCodeForList(self.aoElseBranch, cchIndent + 4);
1851 sRet += ' ' * cchIndent + '}\n';
1852 return sRet;
1853
1854class McCppPreProc(McCppGeneric):
1855 """
1856 C++/C Preprocessor directive.
1857 """
1858 def __init__(self, sCode):
1859 McCppGeneric.__init__(self, sCode, False, sName = 'C++/preproc');
1860
1861 def renderCode(self, cchIndent = 0):
1862 return self.asParams[0] + '\n';
1863
1864
1865class McBlock(object):
1866 """
1867 Microcode block (IEM_MC_BEGIN ... IEM_MC_END).
1868 """
1869
1870 def __init__(self, sSrcFile, iBeginLine, offBeginLine, sFunction, iInFunction, cchIndent = None):
1871 self.sSrcFile = sSrcFile; ##< The source file containing the block.
1872 self.iBeginLine = iBeginLine; ##< The line with the IEM_MC_BEGIN statement.
1873 self.offBeginLine = offBeginLine; ##< The offset of the IEM_MC_BEGIN statement within the line.
1874 self.iEndLine = -1; ##< The line with the IEM_MC_END statement.
1875 self.offEndLine = 0; ##< The offset of the IEM_MC_END statement within the line.
1876 self.sFunction = sFunction; ##< The function the block resides in.
1877 self.iInFunction = iInFunction; ##< The block number wihtin the function.
1878 self.cchIndent = cchIndent if cchIndent else offBeginLine;
1879 self.asLines = [] # type: list(str) ##< The raw lines the block is made up of.
1880 ## Decoded statements in the block.
1881 self.aoStmts = [] # type: list(McStmt)
1882
1883 def complete(self, iEndLine, offEndLine, asLines):
1884 """
1885 Completes the microcode block.
1886 """
1887 assert self.iEndLine == -1;
1888 self.iEndLine = iEndLine;
1889 self.offEndLine = offEndLine;
1890 self.asLines = asLines;
1891
1892 def raiseDecodeError(self, sRawCode, off, sMessage):
1893 """ Raises a decoding error. """
1894 offStartOfLine = sRawCode.rfind('\n', 0, off) + 1;
1895 iLine = sRawCode.count('\n', 0, off);
1896 raise ParserException('%s:%d:%d: parsing error: %s'
1897 % (self.sSrcFile, self.iBeginLine + iLine, off - offStartOfLine + 1, sMessage,));
1898
1899 def raiseStmtError(self, sName, sMessage):
1900 """ Raises a statement parser error. """
1901 raise ParserException('%s:%d: %s: parsing error: %s' % (self.sSrcFile, self.iBeginLine, sName, sMessage,));
1902
1903 def checkStmtParamCount(self, sName, asParams, cParamsExpected):
1904 """ Check the parameter count, raising an error it doesn't match. """
1905 if len(asParams) != cParamsExpected:
1906 raise ParserException('%s:%d: %s: Expected %s parameters, found %s!'
1907 % (self.sSrcFile, self.iBeginLine, sName, cParamsExpected, len(asParams),));
1908 return True;
1909
1910 @staticmethod
1911 def parseMcGeneric(oSelf, sName, asParams):
1912 """ Generic parser that returns a plain McStmt object. """
1913 _ = oSelf;
1914 return McStmt(sName, asParams);
1915
1916 @staticmethod
1917 def parseMcGenericCond(oSelf, sName, asParams):
1918 """ Generic parser that returns a plain McStmtCond object. """
1919 _ = oSelf;
1920 return McStmtCond(sName, asParams);
1921
1922 @staticmethod
1923 def parseMcBegin(oSelf, sName, asParams):
1924 """ IEM_MC_BEGIN """
1925 oSelf.checkStmtParamCount(sName, asParams, 2);
1926 return McBlock.parseMcGeneric(oSelf, sName, asParams);
1927
1928 @staticmethod
1929 def parseMcArg(oSelf, sName, asParams):
1930 """ IEM_MC_ARG """
1931 oSelf.checkStmtParamCount(sName, asParams, 3);
1932 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[2]));
1933
1934 @staticmethod
1935 def parseMcArgConst(oSelf, sName, asParams):
1936 """ IEM_MC_ARG_CONST """
1937 oSelf.checkStmtParamCount(sName, asParams, 4);
1938 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sConstValue = asParams[2]);
1939
1940 @staticmethod
1941 def parseMcArgLocalRef(oSelf, sName, asParams):
1942 """ IEM_MC_ARG_LOCAL_REF """
1943 oSelf.checkStmtParamCount(sName, asParams, 4);
1944 return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sRef = asParams[2], sRefType = 'local');
1945
1946 @staticmethod
1947 def parseMcArgLocalEFlags(oSelf, sName, asParams):
1948 """ IEM_MC_ARG_LOCAL_EFLAGS """
1949 oSelf.checkStmtParamCount(sName, asParams, 3);
1950 # Note! We split this one up into IEM_MC_LOCAL_VAR and IEM_MC_ARG_LOCAL_REF.
1951 return (
1952 McStmtVar('IEM_MC_LOCAL_VAR', ['uint32_t', asParams[1],], 'uint32_t', asParams[1]),
1953 McStmtArg('IEM_MC_ARG_LOCAL_REF', ['uint32_t *', asParams[0], asParams[2], asParams[1]],
1954 'uint32_t *', asParams[0], int(asParams[2]), sRef = asParams[1], sRefType = 'local'),
1955 );
1956
1957 @staticmethod
1958 def parseMcLocal(oSelf, sName, asParams):
1959 """ IEM_MC_LOCAL """
1960 oSelf.checkStmtParamCount(sName, asParams, 2);
1961 return McStmtVar(sName, asParams, asParams[0], asParams[1]);
1962
1963 @staticmethod
1964 def parseMcLocalConst(oSelf, sName, asParams):
1965 """ IEM_MC_LOCAL_CONST """
1966 oSelf.checkStmtParamCount(sName, asParams, 3);
1967 return McStmtVar(sName, asParams, asParams[0], asParams[1], sConstValue = asParams[2]);
1968
1969 @staticmethod
1970 def parseMcCallAImpl(oSelf, sName, asParams):
1971 """ IEM_MC_CALL_AIMPL_3|4 """
1972 cArgs = int(sName[-1]);
1973 oSelf.checkStmtParamCount(sName, asParams, 2 + cArgs);
1974 return McStmtCall(sName, asParams, 1, 0);
1975
1976 @staticmethod
1977 def parseMcCallVoidAImpl(oSelf, sName, asParams):
1978 """ IEM_MC_CALL_VOID_AIMPL_2|3 """
1979 cArgs = int(sName[-1]);
1980 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
1981 return McStmtCall(sName, asParams, 0);
1982
1983 @staticmethod
1984 def parseMcCallAvxAImpl(oSelf, sName, asParams):
1985 """ IEM_MC_CALL_AVX_AIMPL_2|3 """
1986 cArgs = int(sName[-1]);
1987 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
1988 return McStmtCall(sName, asParams, 0);
1989
1990 @staticmethod
1991 def parseMcCallFpuAImpl(oSelf, sName, asParams):
1992 """ IEM_MC_CALL_FPU_AIMPL_1|2|3 """
1993 cArgs = int(sName[-1]);
1994 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
1995 return McStmtCall(sName, asParams, 0);
1996
1997 @staticmethod
1998 def parseMcCallMmxAImpl(oSelf, sName, asParams):
1999 """ IEM_MC_CALL_MMX_AIMPL_2|3 """
2000 cArgs = int(sName[-1]);
2001 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2002 return McStmtCall(sName, asParams, 0);
2003
2004 @staticmethod
2005 def parseMcCallSseAImpl(oSelf, sName, asParams):
2006 """ IEM_MC_CALL_SSE_AIMPL_2|3 """
2007 cArgs = int(sName[-1]);
2008 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2009 return McStmtCall(sName, asParams, 0);
2010
2011 @staticmethod
2012 def parseMcCallCImpl(oSelf, sName, asParams):
2013 """ IEM_MC_CALL_CIMPL_0|1|2|3|4|5 """
2014 cArgs = int(sName[-1]);
2015 oSelf.checkStmtParamCount(sName, asParams, 1 + cArgs);
2016 return McStmtCall(sName, asParams, 0);
2017
2018 @staticmethod
2019 def stripComments(sCode):
2020 """ Returns sCode with comments removed. """
2021 off = 0;
2022 while off < len(sCode):
2023 off = sCode.find('/', off);
2024 if off < 0 or off + 1 >= len(sCode):
2025 break;
2026
2027 if sCode[off + 1] == '/':
2028 # C++ comment.
2029 offEnd = sCode.find('\n', off + 2);
2030 if offEnd < 0:
2031 return sCode[:off].rstrip();
2032 sCode = sCode[ : off] + sCode[offEnd : ];
2033 off += 1;
2034
2035 elif sCode[off + 1] == '*':
2036 # C comment
2037 offEnd = sCode.find('*/', off + 2);
2038 if offEnd < 0:
2039 return sCode[:off].rstrip();
2040 sSep = ' ';
2041 if (off > 0 and sCode[off - 1].isspace()) or (offEnd + 2 < len(sCode) and sCode[offEnd + 2].isspace()):
2042 sSep = '';
2043 sCode = sCode[ : off] + sSep + sCode[offEnd + 2 : ];
2044 off += len(sSep);
2045
2046 else:
2047 # Not a comment.
2048 off += 1;
2049 return sCode;
2050
2051 @staticmethod
2052 def extractParam(sCode, offParam):
2053 """
2054 Extracts the parameter value at offParam in sCode.
2055 Returns stripped value and the end offset of the terminating ',' or ')'.
2056 """
2057 # Extract it.
2058 cNesting = 0;
2059 offStart = offParam;
2060 while offParam < len(sCode):
2061 ch = sCode[offParam];
2062 if ch == '(':
2063 cNesting += 1;
2064 elif ch == ')':
2065 if cNesting == 0:
2066 break;
2067 cNesting -= 1;
2068 elif ch == ',' and cNesting == 0:
2069 break;
2070 offParam += 1;
2071 return (sCode[offStart : offParam].strip(), offParam);
2072
2073 @staticmethod
2074 def extractParams(sCode, offOpenParen):
2075 """
2076 Parses a parameter list.
2077 Returns the list of parameter values and the offset of the closing parentheses.
2078 Returns (None, len(sCode)) on if no closing parentheses was found.
2079 """
2080 assert sCode[offOpenParen] == '(';
2081 asParams = [];
2082 off = offOpenParen + 1;
2083 while off < len(sCode):
2084 ch = sCode[off];
2085 if ch.isspace():
2086 off += 1;
2087 elif ch != ')':
2088 (sParam, off) = McBlock.extractParam(sCode, off);
2089 asParams.append(sParam);
2090 assert off < len(sCode), 'off=%s sCode=%s:"%s"' % (off, len(sCode), sCode,);
2091 if sCode[off] == ',':
2092 off += 1;
2093 else:
2094 return (asParams, off);
2095 return (None, off);
2096
2097 @staticmethod
2098 def findClosingBraces(sCode, off, offStop):
2099 """
2100 Finds the matching '}' for the '{' at off in sCode.
2101 Returns offset of the matching '}' on success, otherwise -1.
2102
2103 Note! Does not take comments into account.
2104 """
2105 cDepth = 1;
2106 off += 1;
2107 while off < offStop:
2108 offClose = sCode.find('}', off, offStop);
2109 if offClose < 0:
2110 break;
2111 cDepth += sCode.count('{', off, offClose);
2112 cDepth -= 1;
2113 if cDepth == 0:
2114 return offClose;
2115 off = offClose + 1;
2116 return -1;
2117
2118 @staticmethod
2119 def countSpacesAt(sCode, off, offStop):
2120 """ Returns the number of space characters at off in sCode. """
2121 offStart = off;
2122 while off < offStop and sCode[off].isspace():
2123 off += 1;
2124 return off - offStart;
2125
2126 @staticmethod
2127 def skipSpacesAt(sCode, off, offStop):
2128 """ Returns first offset at or after off for a non-space character. """
2129 return off + McBlock.countSpacesAt(sCode, off, offStop);
2130
2131 @staticmethod
2132 def isSubstrAt(sStr, off, sSubStr):
2133 """ Returns true of sSubStr is found at off in sStr. """
2134 return sStr[off : off + len(sSubStr)] == sSubStr;
2135
2136 koReCppCtrlStmts = re.compile(r'\b(if\s*[(]|else\b|while\s*[(]|for\s*[(]|do\b)');
2137 koReIemDecoderVars = re.compile( r'iem\.s\.(fPrefixes|uRexReg|uRexB|uRexIndex|iEffSeg|offModRm|cbOpcode|offOpcode'
2138 + r'|enmEffOpSize|enmDefOpSize|enmDefAddrMode|enmEffAddrMode|idxPrefix'
2139 + r'|uVex3rdReg|uVexLength|fEvxStuff|uFpuOpcode|abOpcode'
2140 + r')');
2141
2142 def decodeCode(self, sRawCode, off = 0, offStop = -1, iLevel = 0): # pylint: disable=too-many-statements,too-many-branches
2143 """
2144 Decodes sRawCode[off : offStop].
2145
2146 Returns list of McStmt instances.
2147 Raises ParserException on failure.
2148 """
2149 if offStop < 0:
2150 offStop = len(sRawCode);
2151 aoStmts = [];
2152 while off < offStop:
2153 ch = sRawCode[off];
2154
2155 #
2156 # Skip spaces and comments.
2157 #
2158 if ch.isspace():
2159 off += 1;
2160
2161 elif ch == '/':
2162 ch = sRawCode[off + 1];
2163 if ch == '/': # C++ comment.
2164 off = sRawCode.find('\n', off + 2);
2165 if off < 0:
2166 break;
2167 off += 1;
2168 elif ch == '*': # C comment.
2169 off = sRawCode.find('*/', off + 2);
2170 if off < 0:
2171 break;
2172 off += 2;
2173 else:
2174 self.raiseDecodeError(sRawCode, off, 'Unexpected "/"');
2175
2176 #
2177 # Is it a MC statement.
2178 #
2179 elif ch == 'I' and sRawCode[off : off + len('IEM_MC_')] == 'IEM_MC_':
2180 # All MC statements ends with a semicolon, except for conditionals which ends with a '{'.
2181 # Extract it and strip comments from it.
2182 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_IF_'):
2183 offEnd = sRawCode.find(';', off + len('IEM_MC_'));
2184 if offEnd <= off:
2185 self.raiseDecodeError(sRawCode, off, 'MC statement without a ";"');
2186 else:
2187 offEnd = sRawCode.find('{', off + len('IEM_MC_IF_'));
2188 if offEnd <= off:
2189 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without a "{"');
2190 if sRawCode.find(';', off + len('IEM_MC_IF_'), offEnd) > off:
2191 self.raiseDecodeError(sRawCode, off, 'MC conditional statement without an immediate "{"');
2192 offEnd -= 1;
2193 while offEnd > off and sRawCode[offEnd - 1].isspace():
2194 offEnd -= 1;
2195
2196 sRawStmt = self.stripComments(sRawCode[off : offEnd]);
2197
2198 # Isolate the statement name.
2199 offOpenParen = sRawStmt.find('(');
2200 if offOpenParen < 0:
2201 self.raiseDecodeError(sRawCode, off, 'MC statement without a "("');
2202 sName = sRawStmt[: offOpenParen].strip();
2203
2204 # Extract the parameters.
2205 (asParams, offCloseParen) = self.extractParams(sRawStmt, offOpenParen);
2206 if asParams is None:
2207 self.raiseDecodeError(sRawCode, off, 'MC statement without a closing parenthesis');
2208 if offCloseParen + 1 != len(sRawStmt):
2209 self.raiseDecodeError(sRawCode, off,
2210 'Unexpected code following MC statement: %s' % (sRawStmt[offCloseParen + 1:]));
2211
2212 # Hand it to the handler.
2213 fnParser = g_dMcStmtParsers.get(sName);
2214 if not fnParser:
2215 self.raiseDecodeError(sRawCode, off, 'Unknown MC statement: %s' % (sName,));
2216 oStmt = fnParser(self, sName, asParams);
2217 if not isinstance(oStmt, (list, tuple)):
2218 aoStmts.append(oStmt);
2219 else:
2220 aoStmts.extend(oStmt);
2221
2222 #
2223 # If conditional, we need to parse the whole statement.
2224 #
2225 # For reasons of simplicity, we assume the following structure
2226 # and parse each branch in a recursive call:
2227 # IEM_MC_IF_XXX() {
2228 # IEM_MC_WHATEVER();
2229 # } IEM_MC_ELSE() {
2230 # IEM_MC_WHATEVER();
2231 # } IEM_MC_ENDIF();
2232 #
2233 if sName.startswith('IEM_MC_IF_'):
2234 if iLevel > 1:
2235 self.raiseDecodeError(sRawCode, off, 'Too deep nesting of conditionals.');
2236
2237 # Find start of the IF block:
2238 offBlock1 = self.skipSpacesAt(sRawCode, offEnd, offStop);
2239 if sRawCode[offBlock1] != '{':
2240 self.raiseDecodeError(sRawCode, offBlock1, 'Expected "{" following %s' % (sName,));
2241
2242 # Find the end of it.
2243 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2244 if offBlock1End < 0:
2245 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing IF block of %s' % (sName,));
2246
2247 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1 + 1, offBlock1End, iLevel + 1);
2248
2249 # Is there an else section?
2250 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2251 if self.isSubstrAt(sRawCode, off, 'IEM_MC_ELSE'):
2252 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ELSE'), offStop);
2253 if sRawCode[off] != '(':
2254 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ELSE"');
2255 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2256 if sRawCode[off] != ')':
2257 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ELSE("');
2258
2259 # Find start of the ELSE block.
2260 offBlock2 = self.skipSpacesAt(sRawCode, off + 1, offStop);
2261 if sRawCode[offBlock2] != '{':
2262 self.raiseDecodeError(sRawCode, offBlock2, 'Expected "{" following IEM_MC_ELSE()"');
2263
2264 # Find the end of it.
2265 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2266 if offBlock2End < 0:
2267 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing ELSE block of %s' % (sName,));
2268
2269 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2 + 1, offBlock2End, iLevel + 1);
2270 off = self.skipSpacesAt(sRawCode, offBlock2End + 1, offStop);
2271
2272 # Parse past the endif statement.
2273 if not self.isSubstrAt(sRawCode, off, 'IEM_MC_ENDIF'):
2274 self.raiseDecodeError(sRawCode, off, 'Expected IEM_MC_ENDIF for closing %s' % (sName,));
2275 off = self.skipSpacesAt(sRawCode, off + len('IEM_MC_ENDIF'), offStop);
2276 if sRawCode[off] != '(':
2277 self.raiseDecodeError(sRawCode, off, 'Expected "(" following IEM_MC_ENDIF"');
2278 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2279 if sRawCode[off] != ')':
2280 self.raiseDecodeError(sRawCode, off, 'Expected ")" following IEM_MC_ENDIF("');
2281 off = self.skipSpacesAt(sRawCode, off + 1, offStop);
2282 if sRawCode[off] != ';':
2283 self.raiseDecodeError(sRawCode, off, 'Expected ";" following IEM_MC_ENDIF()"');
2284 off += 1;
2285
2286 else:
2287 # Advance.
2288 off = offEnd + 1;
2289
2290 #
2291 # Otherwise it must be a C/C++ statement of sorts.
2292 #
2293 else:
2294 # Find the end of the statement. if and else requires special handling.
2295 sCondExpr = None;
2296 oMatch = self.koReCppCtrlStmts.match(sRawCode, off);
2297 if oMatch:
2298 if oMatch.group(1)[-1] == '(':
2299 (sCondExpr, offEnd) = self.extractParam(sRawCode, oMatch.end());
2300 else:
2301 offEnd = oMatch.end();
2302 if not oMatch.group(1).startswith('if') and oMatch.group(1) != 'else':
2303 self.raiseDecodeError(sRawCode, off, 'Only if/else control statements allowed: %s' % (oMatch.group(1),));
2304 elif ch == '#':
2305 offEnd = sRawCode.find('\n', off, offStop);
2306 if offEnd < 0:
2307 offEnd = offStop;
2308 offEnd -= 1;
2309 while offEnd > off and sRawCode[offEnd - 1].isspace():
2310 offEnd -= 1;
2311 else:
2312 offEnd = sRawCode.find(';', off);
2313 if offEnd < 0:
2314 self.raiseDecodeError(sRawCode, off, 'C++ statement without a ";"');
2315
2316 # Check this and the following statement whether it might have
2317 # something to do with decoding. This is a statement filter
2318 # criteria when generating the threaded functions blocks.
2319 offNextEnd = sRawCode.find(';', offEnd + 1);
2320 fDecode = ( sRawCode.find('IEM_OPCODE_', off, max(offEnd, offNextEnd)) >= 0
2321 or sRawCode.find('IEMOP_HLP_DONE_', off, max(offEnd, offNextEnd)) >= 0
2322 or sRawCode.find('IEMOP_HLP_DECODED_', off, offEnd) >= 0
2323 or sRawCode.find('IEMOP_HLP_RAISE_UD_IF_MISSING_GUEST_FEATURE', off, offEnd) >= 0
2324 );
2325
2326 if not oMatch:
2327 if ch != '#':
2328 aoStmts.append(McCppGeneric(sRawCode[off : offEnd + 1], fDecode));
2329 else:
2330 aoStmts.append(McCppPreProc(sRawCode[off : offEnd + 1]));
2331 off = offEnd + 1;
2332 elif oMatch.group(1).startswith('if'):
2333 #
2334 # if () xxx [else yyy] statement.
2335 #
2336 oStmt = McCppCond(sCondExpr, fDecode);
2337 aoStmts.append(oStmt);
2338 off = offEnd + 1;
2339
2340 # Following the if () we can either have a {} containing zero or more statements
2341 # or we have a single statement.
2342 offBlock1 = self.skipSpacesAt(sRawCode, offEnd + 1, offStop);
2343 if sRawCode[offBlock1] == '{':
2344 offBlock1End = self.findClosingBraces(sRawCode, offBlock1, offStop);
2345 if offBlock1End < 0:
2346 self.raiseDecodeError(sRawCode, offBlock1, 'No matching "}" closing if block');
2347 offBlock1 += 1;
2348 else:
2349 offBlock1End = sRawCode.find(';', offBlock1, offStop);
2350 if offBlock1End < 0:
2351 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line if block"');
2352
2353 oStmt.aoIfBranch = self.decodeCode(sRawCode, offBlock1, offBlock1End, iLevel + 1);
2354
2355 # The else is optional and can likewise be followed by {} or a single statement.
2356 off = self.skipSpacesAt(sRawCode, offBlock1End + 1, offStop);
2357 if self.isSubstrAt(sRawCode, off, 'else') and sRawCode[off + len('else')].isspace():
2358 offBlock2 = self.skipSpacesAt(sRawCode, off + len('else'), offStop);
2359 if sRawCode[offBlock2] == '{':
2360 offBlock2End = self.findClosingBraces(sRawCode, offBlock2, offStop);
2361 if offBlock2End < 0:
2362 self.raiseDecodeError(sRawCode, offBlock2, 'No matching "}" closing else block');
2363 offBlock2 += 1;
2364 else:
2365 offBlock2End = sRawCode.find(';', offBlock2, offStop);
2366 if offBlock2End < 0:
2367 self.raiseDecodeError(sRawCode, off, 'Expected ";" terminating one-line else block"');
2368
2369 oStmt.aoElseBranch = self.decodeCode(sRawCode, offBlock2, offBlock2End, iLevel + 1);
2370 off = offBlock2End + 1;
2371
2372 elif oMatch.group(1) == 'else':
2373 # Problematic 'else' branch, typically involving #ifdefs.
2374 self.raiseDecodeError(sRawCode, off, 'Mixed up else/#ifdef or something confusing us.');
2375
2376
2377 return aoStmts;
2378
2379
2380 def decode(self):
2381 """
2382 Decodes the block, populating self.aoStmts.
2383 Returns the statement list.
2384 Raises ParserException on failure.
2385 """
2386 self.aoStmts = self.decodeCode(''.join(self.asLines));
2387 return self.aoStmts;
2388
2389
2390## IEM_MC_XXX -> parser dictionary.
2391# The raw table was generated via the following command
2392# sed -n -e "s/^# *define *\(IEM_MC_[A-Z_0-9]*\)[ (].*$/ '\1': McBlock.parseMcGeneric,/p" include/IEMMc.h \
2393# | sort | uniq | gawk "{printf """ %%-60s %%s\n""", $1, $2}"
2394g_dMcStmtParsers = {
2395 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE': McBlock.parseMcGeneric,
2396 'IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ': McBlock.parseMcGeneric,
2397 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE': McBlock.parseMcGeneric,
2398 'IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ': McBlock.parseMcGeneric,
2399 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE': McBlock.parseMcGeneric,
2400 'IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ': McBlock.parseMcGeneric,
2401 'IEM_MC_ADD_GREG_U16': McBlock.parseMcGeneric,
2402 'IEM_MC_ADD_GREG_U16_TO_LOCAL': McBlock.parseMcGeneric,
2403 'IEM_MC_ADD_GREG_U32': McBlock.parseMcGeneric,
2404 'IEM_MC_ADD_GREG_U32_TO_LOCAL': McBlock.parseMcGeneric,
2405 'IEM_MC_ADD_GREG_U64': McBlock.parseMcGeneric,
2406 'IEM_MC_ADD_GREG_U64_TO_LOCAL': McBlock.parseMcGeneric,
2407 'IEM_MC_ADD_GREG_U8': McBlock.parseMcGeneric,
2408 'IEM_MC_ADD_GREG_U8_TO_LOCAL': McBlock.parseMcGeneric,
2409 'IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR': McBlock.parseMcGeneric,
2410 'IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR': McBlock.parseMcGeneric,
2411 'IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR': McBlock.parseMcGeneric,
2412 'IEM_MC_ADVANCE_RIP_AND_FINISH': McBlock.parseMcGeneric,
2413 'IEM_MC_AND_2LOCS_U32': McBlock.parseMcGeneric,
2414 'IEM_MC_AND_ARG_U16': McBlock.parseMcGeneric,
2415 'IEM_MC_AND_ARG_U32': McBlock.parseMcGeneric,
2416 'IEM_MC_AND_ARG_U64': McBlock.parseMcGeneric,
2417 'IEM_MC_AND_GREG_U16': McBlock.parseMcGeneric,
2418 'IEM_MC_AND_GREG_U32': McBlock.parseMcGeneric,
2419 'IEM_MC_AND_GREG_U64': McBlock.parseMcGeneric,
2420 'IEM_MC_AND_GREG_U8': McBlock.parseMcGeneric,
2421 'IEM_MC_AND_LOCAL_U16': McBlock.parseMcGeneric,
2422 'IEM_MC_AND_LOCAL_U32': McBlock.parseMcGeneric,
2423 'IEM_MC_AND_LOCAL_U64': McBlock.parseMcGeneric,
2424 'IEM_MC_AND_LOCAL_U8': McBlock.parseMcGeneric,
2425 'IEM_MC_ARG': McBlock.parseMcArg,
2426 'IEM_MC_ARG_CONST': McBlock.parseMcArgConst,
2427 'IEM_MC_ARG_LOCAL_EFLAGS': McBlock.parseMcArgLocalEFlags,
2428 'IEM_MC_ARG_LOCAL_REF': McBlock.parseMcArgLocalRef,
2429 'IEM_MC_ASSIGN': McBlock.parseMcGeneric,
2430 'IEM_MC_ASSIGN_TO_SMALLER': McBlock.parseMcGeneric,
2431 'IEM_MC_BEGIN': McBlock.parseMcGeneric,
2432 'IEM_MC_BSWAP_LOCAL_U16': McBlock.parseMcGeneric,
2433 'IEM_MC_BSWAP_LOCAL_U32': McBlock.parseMcGeneric,
2434 'IEM_MC_BSWAP_LOCAL_U64': McBlock.parseMcGeneric,
2435 'IEM_MC_CALC_RM_EFF_ADDR': McBlock.parseMcGeneric,
2436 'IEM_MC_CALL_AIMPL_3': McBlock.parseMcCallAImpl,
2437 'IEM_MC_CALL_AIMPL_4': McBlock.parseMcCallAImpl,
2438 'IEM_MC_CALL_AVX_AIMPL_2': McBlock.parseMcCallAvxAImpl,
2439 'IEM_MC_CALL_AVX_AIMPL_3': McBlock.parseMcCallAvxAImpl,
2440 'IEM_MC_CALL_CIMPL_0': McBlock.parseMcCallCImpl,
2441 'IEM_MC_CALL_CIMPL_1': McBlock.parseMcCallCImpl,
2442 'IEM_MC_CALL_CIMPL_2': McBlock.parseMcCallCImpl,
2443 'IEM_MC_CALL_CIMPL_3': McBlock.parseMcCallCImpl,
2444 'IEM_MC_CALL_CIMPL_4': McBlock.parseMcCallCImpl,
2445 'IEM_MC_CALL_CIMPL_5': McBlock.parseMcCallCImpl,
2446 'IEM_MC_CALL_FPU_AIMPL_1': McBlock.parseMcCallFpuAImpl,
2447 'IEM_MC_CALL_FPU_AIMPL_2': McBlock.parseMcCallFpuAImpl,
2448 'IEM_MC_CALL_FPU_AIMPL_3': McBlock.parseMcCallFpuAImpl,
2449 'IEM_MC_CALL_MMX_AIMPL_2': McBlock.parseMcCallMmxAImpl,
2450 'IEM_MC_CALL_MMX_AIMPL_3': McBlock.parseMcCallMmxAImpl,
2451 'IEM_MC_CALL_SSE_AIMPL_2': McBlock.parseMcCallSseAImpl,
2452 'IEM_MC_CALL_SSE_AIMPL_3': McBlock.parseMcCallSseAImpl,
2453 'IEM_MC_CALL_VOID_AIMPL_0': McBlock.parseMcCallVoidAImpl,
2454 'IEM_MC_CALL_VOID_AIMPL_1': McBlock.parseMcCallVoidAImpl,
2455 'IEM_MC_CALL_VOID_AIMPL_2': McBlock.parseMcCallVoidAImpl,
2456 'IEM_MC_CALL_VOID_AIMPL_3': McBlock.parseMcCallVoidAImpl,
2457 'IEM_MC_CALL_VOID_AIMPL_4': McBlock.parseMcCallVoidAImpl,
2458 'IEM_MC_CLEAR_EFL_BIT': McBlock.parseMcGeneric,
2459 'IEM_MC_CLEAR_FSW_EX': McBlock.parseMcGeneric,
2460 'IEM_MC_CLEAR_HIGH_GREG_U64': McBlock.parseMcGeneric,
2461 'IEM_MC_CLEAR_HIGH_GREG_U64_BY_REF': McBlock.parseMcGeneric,
2462 'IEM_MC_CLEAR_XREG_U32_MASK': McBlock.parseMcGeneric,
2463 'IEM_MC_CLEAR_YREG_128_UP': McBlock.parseMcGeneric,
2464 'IEM_MC_COMMIT_EFLAGS': McBlock.parseMcGeneric,
2465 'IEM_MC_COPY_XREG_U128': McBlock.parseMcGeneric,
2466 'IEM_MC_COPY_YREG_U128_ZX_VLMAX': McBlock.parseMcGeneric,
2467 'IEM_MC_COPY_YREG_U256_ZX_VLMAX': McBlock.parseMcGeneric,
2468 'IEM_MC_COPY_YREG_U64_ZX_VLMAX': McBlock.parseMcGeneric,
2469 'IEM_MC_DEFER_TO_CIMPL_0': McBlock.parseMcGeneric,
2470 'IEM_MC_DEFER_TO_CIMPL_1': McBlock.parseMcGeneric,
2471 'IEM_MC_DEFER_TO_CIMPL_2': McBlock.parseMcGeneric,
2472 'IEM_MC_DEFER_TO_CIMPL_3': McBlock.parseMcGeneric,
2473 'IEM_MC_END': McBlock.parseMcGeneric,
2474 'IEM_MC_FETCH_EFLAGS': McBlock.parseMcGeneric,
2475 'IEM_MC_FETCH_EFLAGS_U8': McBlock.parseMcGeneric,
2476 'IEM_MC_FETCH_FCW': McBlock.parseMcGeneric,
2477 'IEM_MC_FETCH_FSW': McBlock.parseMcGeneric,
2478 'IEM_MC_FETCH_GREG_U16': McBlock.parseMcGeneric,
2479 'IEM_MC_FETCH_GREG_U16_SX_U32': McBlock.parseMcGeneric,
2480 'IEM_MC_FETCH_GREG_U16_SX_U64': McBlock.parseMcGeneric,
2481 'IEM_MC_FETCH_GREG_U16_ZX_U32': McBlock.parseMcGeneric,
2482 'IEM_MC_FETCH_GREG_U16_ZX_U64': McBlock.parseMcGeneric,
2483 'IEM_MC_FETCH_GREG_U32': McBlock.parseMcGeneric,
2484 'IEM_MC_FETCH_GREG_U32_SX_U64': McBlock.parseMcGeneric,
2485 'IEM_MC_FETCH_GREG_U32_ZX_U64': McBlock.parseMcGeneric,
2486 'IEM_MC_FETCH_GREG_U64': McBlock.parseMcGeneric,
2487 'IEM_MC_FETCH_GREG_U64_ZX_U64': McBlock.parseMcGeneric,
2488 'IEM_MC_FETCH_GREG_U8': McBlock.parseMcGeneric,
2489 'IEM_MC_FETCH_GREG_U8_SX_U16': McBlock.parseMcGeneric,
2490 'IEM_MC_FETCH_GREG_U8_SX_U32': McBlock.parseMcGeneric,
2491 'IEM_MC_FETCH_GREG_U8_SX_U64': McBlock.parseMcGeneric,
2492 'IEM_MC_FETCH_GREG_U8_ZX_U16': McBlock.parseMcGeneric,
2493 'IEM_MC_FETCH_GREG_U8_ZX_U32': McBlock.parseMcGeneric,
2494 'IEM_MC_FETCH_GREG_U8_ZX_U64': McBlock.parseMcGeneric,
2495 'IEM_MC_FETCH_MEM_D80': McBlock.parseMcGeneric,
2496 'IEM_MC_FETCH_MEM_I16': McBlock.parseMcGeneric,
2497 'IEM_MC_FETCH_MEM_I32': McBlock.parseMcGeneric,
2498 'IEM_MC_FETCH_MEM_I64': McBlock.parseMcGeneric,
2499 'IEM_MC_FETCH_MEM_R32': McBlock.parseMcGeneric,
2500 'IEM_MC_FETCH_MEM_R64': McBlock.parseMcGeneric,
2501 'IEM_MC_FETCH_MEM_R80': McBlock.parseMcGeneric,
2502 'IEM_MC_FETCH_MEM_S32_SX_U64': McBlock.parseMcGeneric,
2503 'IEM_MC_FETCH_MEM_U128': McBlock.parseMcGeneric,
2504 'IEM_MC_FETCH_MEM_U128_ALIGN_SSE': McBlock.parseMcGeneric,
2505 'IEM_MC_FETCH_MEM_U128_NO_AC': McBlock.parseMcGeneric,
2506 'IEM_MC_FETCH_MEM_U16': McBlock.parseMcGeneric,
2507 'IEM_MC_FETCH_MEM_U16_DISP': McBlock.parseMcGeneric,
2508 'IEM_MC_FETCH_MEM_U16_SX_U32': McBlock.parseMcGeneric,
2509 'IEM_MC_FETCH_MEM_U16_SX_U64': McBlock.parseMcGeneric,
2510 'IEM_MC_FETCH_MEM_U16_ZX_U32': McBlock.parseMcGeneric,
2511 'IEM_MC_FETCH_MEM_U16_ZX_U64': McBlock.parseMcGeneric,
2512 'IEM_MC_FETCH_MEM_U256': McBlock.parseMcGeneric,
2513 'IEM_MC_FETCH_MEM_U256_ALIGN_AVX': McBlock.parseMcGeneric,
2514 'IEM_MC_FETCH_MEM_U256_NO_AC': McBlock.parseMcGeneric,
2515 'IEM_MC_FETCH_MEM_U32': McBlock.parseMcGeneric,
2516 'IEM_MC_FETCH_MEM_U32_DISP': McBlock.parseMcGeneric,
2517 'IEM_MC_FETCH_MEM_U32_SX_U64': McBlock.parseMcGeneric,
2518 'IEM_MC_FETCH_MEM_U32_ZX_U64': McBlock.parseMcGeneric,
2519 'IEM_MC_FETCH_MEM_U64': McBlock.parseMcGeneric,
2520 'IEM_MC_FETCH_MEM_U64_ALIGN_U128': McBlock.parseMcGeneric,
2521 'IEM_MC_FETCH_MEM_U64_DISP': McBlock.parseMcGeneric,
2522 'IEM_MC_FETCH_MEM_U8': McBlock.parseMcGeneric,
2523 'IEM_MC_FETCH_MEM_U8_SX_U16': McBlock.parseMcGeneric,
2524 'IEM_MC_FETCH_MEM_U8_SX_U32': McBlock.parseMcGeneric,
2525 'IEM_MC_FETCH_MEM_U8_SX_U64': McBlock.parseMcGeneric,
2526 'IEM_MC_FETCH_MEM_U8_ZX_U16': McBlock.parseMcGeneric,
2527 'IEM_MC_FETCH_MEM_U8_ZX_U32': McBlock.parseMcGeneric,
2528 'IEM_MC_FETCH_MEM_U8_ZX_U64': McBlock.parseMcGeneric,
2529 'IEM_MC_FETCH_MEM_XMM': McBlock.parseMcGeneric,
2530 'IEM_MC_FETCH_MEM_XMM_ALIGN_SSE': McBlock.parseMcGeneric,
2531 'IEM_MC_FETCH_MEM_XMM_NO_AC': McBlock.parseMcGeneric,
2532 'IEM_MC_FETCH_MEM_XMM_U32': McBlock.parseMcGeneric,
2533 'IEM_MC_FETCH_MEM_XMM_U64': McBlock.parseMcGeneric,
2534 'IEM_MC_FETCH_MEM_YMM': McBlock.parseMcGeneric,
2535 'IEM_MC_FETCH_MEM_YMM_ALIGN_AVX': McBlock.parseMcGeneric,
2536 'IEM_MC_FETCH_MEM_YMM_NO_AC': McBlock.parseMcGeneric,
2537 'IEM_MC_FETCH_MEM16_U8': McBlock.parseMcGeneric,
2538 'IEM_MC_FETCH_MEM32_U8': McBlock.parseMcGeneric,
2539 'IEM_MC_FETCH_MREG_U32': McBlock.parseMcGeneric,
2540 'IEM_MC_FETCH_MREG_U64': McBlock.parseMcGeneric,
2541 'IEM_MC_FETCH_SREG_BASE_U32': McBlock.parseMcGeneric,
2542 'IEM_MC_FETCH_SREG_BASE_U64': McBlock.parseMcGeneric,
2543 'IEM_MC_FETCH_SREG_U16': McBlock.parseMcGeneric,
2544 'IEM_MC_FETCH_SREG_ZX_U32': McBlock.parseMcGeneric,
2545 'IEM_MC_FETCH_SREG_ZX_U64': McBlock.parseMcGeneric,
2546 'IEM_MC_FETCH_XREG_U128': McBlock.parseMcGeneric,
2547 'IEM_MC_FETCH_XREG_U16': McBlock.parseMcGeneric,
2548 'IEM_MC_FETCH_XREG_U32': McBlock.parseMcGeneric,
2549 'IEM_MC_FETCH_XREG_U64': McBlock.parseMcGeneric,
2550 'IEM_MC_FETCH_XREG_U8': McBlock.parseMcGeneric,
2551 'IEM_MC_FETCH_XREG_XMM': McBlock.parseMcGeneric,
2552 'IEM_MC_FETCH_YREG_2ND_U64': McBlock.parseMcGeneric,
2553 'IEM_MC_FETCH_YREG_U128': McBlock.parseMcGeneric,
2554 'IEM_MC_FETCH_YREG_U256': McBlock.parseMcGeneric,
2555 'IEM_MC_FETCH_YREG_U32': McBlock.parseMcGeneric,
2556 'IEM_MC_FETCH_YREG_U64': McBlock.parseMcGeneric,
2557 'IEM_MC_FLIP_EFL_BIT': McBlock.parseMcGeneric,
2558 'IEM_MC_FPU_FROM_MMX_MODE': McBlock.parseMcGeneric,
2559 'IEM_MC_FPU_STACK_DEC_TOP': McBlock.parseMcGeneric,
2560 'IEM_MC_FPU_STACK_FREE': McBlock.parseMcGeneric,
2561 'IEM_MC_FPU_STACK_INC_TOP': McBlock.parseMcGeneric,
2562 'IEM_MC_FPU_STACK_PUSH_OVERFLOW': McBlock.parseMcGeneric,
2563 'IEM_MC_FPU_STACK_PUSH_OVERFLOW_MEM_OP': McBlock.parseMcGeneric,
2564 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW': McBlock.parseMcGeneric,
2565 'IEM_MC_FPU_STACK_PUSH_UNDERFLOW_TWO': McBlock.parseMcGeneric,
2566 'IEM_MC_FPU_STACK_UNDERFLOW': McBlock.parseMcGeneric,
2567 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP': McBlock.parseMcGeneric,
2568 'IEM_MC_FPU_STACK_UNDERFLOW_MEM_OP_THEN_POP': McBlock.parseMcGeneric,
2569 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP': McBlock.parseMcGeneric,
2570 'IEM_MC_FPU_STACK_UNDERFLOW_THEN_POP_POP': McBlock.parseMcGeneric,
2571 'IEM_MC_FPU_TO_MMX_MODE': McBlock.parseMcGeneric,
2572 'IEM_MC_IF_CX_IS_NZ': McBlock.parseMcGenericCond,
2573 'IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2574 'IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET': McBlock.parseMcGenericCond,
2575 'IEM_MC_IF_ECX_IS_NZ': McBlock.parseMcGenericCond,
2576 'IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2577 'IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET': McBlock.parseMcGenericCond,
2578 'IEM_MC_IF_EFL_ANY_BITS_SET': McBlock.parseMcGenericCond,
2579 'IEM_MC_IF_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2580 'IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ': McBlock.parseMcGenericCond,
2581 'IEM_MC_IF_EFL_BIT_SET': McBlock.parseMcGenericCond,
2582 'IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE': McBlock.parseMcGenericCond,
2583 'IEM_MC_IF_EFL_BITS_EQ': McBlock.parseMcGenericCond,
2584 'IEM_MC_IF_EFL_BITS_NE': McBlock.parseMcGenericCond,
2585 'IEM_MC_IF_EFL_NO_BITS_SET': McBlock.parseMcGenericCond,
2586 'IEM_MC_IF_FCW_IM': McBlock.parseMcGenericCond,
2587 'IEM_MC_IF_FPUREG_IS_EMPTY': McBlock.parseMcGenericCond,
2588 'IEM_MC_IF_FPUREG_NOT_EMPTY': McBlock.parseMcGenericCond,
2589 'IEM_MC_IF_FPUREG_NOT_EMPTY_REF_R80': McBlock.parseMcGenericCond,
2590 'IEM_MC_IF_GREG_BIT_SET': McBlock.parseMcGenericCond,
2591 'IEM_MC_IF_LOCAL_IS_Z': McBlock.parseMcGenericCond,
2592 'IEM_MC_IF_MXCSR_XCPT_PENDING': McBlock.parseMcGenericCond,
2593 'IEM_MC_IF_RCX_IS_NZ': McBlock.parseMcGenericCond,
2594 'IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET': McBlock.parseMcGenericCond,
2595 'IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET': McBlock.parseMcGenericCond,
2596 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80': McBlock.parseMcGenericCond,
2597 'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST': McBlock.parseMcGenericCond,
2598 'IEM_MC_IMPLICIT_AVX_AIMPL_ARGS': McBlock.parseMcGeneric,
2599 'IEM_MC_INT_CLEAR_ZMM_256_UP': McBlock.parseMcGeneric,
2600 'IEM_MC_LOCAL': McBlock.parseMcLocal,
2601 'IEM_MC_LOCAL_CONST': McBlock.parseMcLocalConst,
2602 'IEM_MC_MAYBE_RAISE_AESNI_RELATED_XCPT': McBlock.parseMcGeneric,
2603 'IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT': McBlock.parseMcGeneric,
2604 'IEM_MC_MAYBE_RAISE_AVX2_RELATED_XCPT': McBlock.parseMcGeneric,
2605 'IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE': McBlock.parseMcGeneric,
2606 'IEM_MC_MAYBE_RAISE_FPU_XCPT': McBlock.parseMcGeneric,
2607 'IEM_MC_MAYBE_RAISE_FSGSBASE_XCPT': McBlock.parseMcGeneric,
2608 'IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT': McBlock.parseMcGeneric,
2609 'IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT_CHECK_SSE_OR_MMXEXT': McBlock.parseMcGeneric,
2610 'IEM_MC_MAYBE_RAISE_MMX_RELATED_XCPT_EX': McBlock.parseMcGeneric,
2611 'IEM_MC_MAYBE_RAISE_NON_CANONICAL_ADDR_GP0': McBlock.parseMcGeneric,
2612 'IEM_MC_MAYBE_RAISE_PCLMUL_RELATED_XCPT': McBlock.parseMcGeneric,
2613 'IEM_MC_MAYBE_RAISE_SHA_RELATED_XCPT': McBlock.parseMcGeneric,
2614 'IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': McBlock.parseMcGeneric,
2615 'IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT': McBlock.parseMcGeneric,
2616 'IEM_MC_MAYBE_RAISE_SSE2_RELATED_XCPT': McBlock.parseMcGeneric,
2617 'IEM_MC_MAYBE_RAISE_SSE3_RELATED_XCPT': McBlock.parseMcGeneric,
2618 'IEM_MC_MAYBE_RAISE_SSE41_RELATED_XCPT': McBlock.parseMcGeneric,
2619 'IEM_MC_MAYBE_RAISE_SSE42_RELATED_XCPT': McBlock.parseMcGeneric,
2620 'IEM_MC_MAYBE_RAISE_SSSE3_RELATED_XCPT': McBlock.parseMcGeneric,
2621 'IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE': McBlock.parseMcGeneric,
2622 'IEM_MC_MEM_COMMIT_AND_UNMAP': McBlock.parseMcGeneric,
2623 'IEM_MC_MEM_COMMIT_AND_UNMAP_FOR_FPU_STORE': McBlock.parseMcGeneric,
2624 'IEM_MC_MEM_MAP': McBlock.parseMcGeneric,
2625 'IEM_MC_MEM_MAP_EX': McBlock.parseMcGeneric,
2626 'IEM_MC_MERGE_YREG_U32_U96_ZX_VLMAX': McBlock.parseMcGeneric,
2627 'IEM_MC_MERGE_YREG_U64_U64_ZX_VLMAX': McBlock.parseMcGeneric,
2628 'IEM_MC_MERGE_YREG_U64HI_U64HI_ZX_VLMAX': McBlock.parseMcGeneric,
2629 'IEM_MC_MERGE_YREG_U64LO_U64LO_ZX_VLMAX': McBlock.parseMcGeneric,
2630 'IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX': McBlock.parseMcGeneric,
2631 'IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX': McBlock.parseMcGeneric,
2632 'IEM_MC_MODIFIED_MREG': McBlock.parseMcGeneric,
2633 'IEM_MC_MODIFIED_MREG_BY_REF': McBlock.parseMcGeneric,
2634 'IEM_MC_OR_2LOCS_U32': McBlock.parseMcGeneric,
2635 'IEM_MC_OR_GREG_U16': McBlock.parseMcGeneric,
2636 'IEM_MC_OR_GREG_U32': McBlock.parseMcGeneric,
2637 'IEM_MC_OR_GREG_U64': McBlock.parseMcGeneric,
2638 'IEM_MC_OR_GREG_U8': McBlock.parseMcGeneric,
2639 'IEM_MC_OR_LOCAL_U16': McBlock.parseMcGeneric,
2640 'IEM_MC_OR_LOCAL_U32': McBlock.parseMcGeneric,
2641 'IEM_MC_OR_LOCAL_U8': McBlock.parseMcGeneric,
2642 'IEM_MC_POP_U16': McBlock.parseMcGeneric,
2643 'IEM_MC_POP_U32': McBlock.parseMcGeneric,
2644 'IEM_MC_POP_U64': McBlock.parseMcGeneric,
2645 'IEM_MC_PREPARE_AVX_USAGE': McBlock.parseMcGeneric,
2646 'IEM_MC_PREPARE_FPU_USAGE': McBlock.parseMcGeneric,
2647 'IEM_MC_PREPARE_SSE_USAGE': McBlock.parseMcGeneric,
2648 'IEM_MC_PUSH_FPU_RESULT': McBlock.parseMcGeneric,
2649 'IEM_MC_PUSH_FPU_RESULT_MEM_OP': McBlock.parseMcGeneric,
2650 'IEM_MC_PUSH_FPU_RESULT_TWO': McBlock.parseMcGeneric,
2651 'IEM_MC_PUSH_U16': McBlock.parseMcGeneric,
2652 'IEM_MC_PUSH_U32': McBlock.parseMcGeneric,
2653 'IEM_MC_PUSH_U32_SREG': McBlock.parseMcGeneric,
2654 'IEM_MC_PUSH_U64': McBlock.parseMcGeneric,
2655 'IEM_MC_RAISE_DIVIDE_ERROR': McBlock.parseMcGeneric,
2656 'IEM_MC_RAISE_GP0_IF_CPL_NOT_ZERO': McBlock.parseMcGeneric,
2657 'IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED': McBlock.parseMcGeneric,
2658 'IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT': McBlock.parseMcGeneric,
2659 'IEM_MC_REF_EFLAGS': McBlock.parseMcGeneric,
2660 'IEM_MC_REF_FPUREG': McBlock.parseMcGeneric,
2661 'IEM_MC_REF_GREG_I32': McBlock.parseMcGeneric,
2662 'IEM_MC_REF_GREG_I32_CONST': McBlock.parseMcGeneric,
2663 'IEM_MC_REF_GREG_I64': McBlock.parseMcGeneric,
2664 'IEM_MC_REF_GREG_I64_CONST': McBlock.parseMcGeneric,
2665 'IEM_MC_REF_GREG_U16': McBlock.parseMcGeneric,
2666 'IEM_MC_REF_GREG_U32': McBlock.parseMcGeneric,
2667 'IEM_MC_REF_GREG_U64': McBlock.parseMcGeneric,
2668 'IEM_MC_REF_GREG_U8': McBlock.parseMcGeneric,
2669 'IEM_MC_REF_LOCAL': McBlock.parseMcGeneric,
2670 'IEM_MC_REF_MREG_U32_CONST': McBlock.parseMcGeneric,
2671 'IEM_MC_REF_MREG_U64': McBlock.parseMcGeneric,
2672 'IEM_MC_REF_MREG_U64_CONST': McBlock.parseMcGeneric,
2673 'IEM_MC_REF_MXCSR': McBlock.parseMcGeneric,
2674 'IEM_MC_REF_XREG_R32_CONST': McBlock.parseMcGeneric,
2675 'IEM_MC_REF_XREG_R64_CONST': McBlock.parseMcGeneric,
2676 'IEM_MC_REF_XREG_U128': McBlock.parseMcGeneric,
2677 'IEM_MC_REF_XREG_U128_CONST': McBlock.parseMcGeneric,
2678 'IEM_MC_REF_XREG_U32_CONST': McBlock.parseMcGeneric,
2679 'IEM_MC_REF_XREG_U64_CONST': McBlock.parseMcGeneric,
2680 'IEM_MC_REF_XREG_XMM_CONST': McBlock.parseMcGeneric,
2681 'IEM_MC_REF_YREG_U128': McBlock.parseMcGeneric,
2682 'IEM_MC_REF_YREG_U128_CONST': McBlock.parseMcGeneric,
2683 'IEM_MC_REF_YREG_U64_CONST': McBlock.parseMcGeneric,
2684 'IEM_MC_REL_JMP_S16_AND_FINISH': McBlock.parseMcGeneric,
2685 'IEM_MC_REL_JMP_S32_AND_FINISH': McBlock.parseMcGeneric,
2686 'IEM_MC_REL_JMP_S8_AND_FINISH': McBlock.parseMcGeneric,
2687 'IEM_MC_RETURN_ON_FAILURE': McBlock.parseMcGeneric,
2688 'IEM_MC_SAR_LOCAL_S16': McBlock.parseMcGeneric,
2689 'IEM_MC_SAR_LOCAL_S32': McBlock.parseMcGeneric,
2690 'IEM_MC_SAR_LOCAL_S64': McBlock.parseMcGeneric,
2691 'IEM_MC_SET_EFL_BIT': McBlock.parseMcGeneric,
2692 'IEM_MC_SET_FPU_RESULT': McBlock.parseMcGeneric,
2693 'IEM_MC_SET_RIP_U16_AND_FINISH': McBlock.parseMcGeneric,
2694 'IEM_MC_SET_RIP_U32_AND_FINISH': McBlock.parseMcGeneric,
2695 'IEM_MC_SET_RIP_U64_AND_FINISH': McBlock.parseMcGeneric,
2696 'IEM_MC_SHL_LOCAL_S16': McBlock.parseMcGeneric,
2697 'IEM_MC_SHL_LOCAL_S32': McBlock.parseMcGeneric,
2698 'IEM_MC_SHL_LOCAL_S64': McBlock.parseMcGeneric,
2699 'IEM_MC_SHR_LOCAL_U8': McBlock.parseMcGeneric,
2700 'IEM_MC_SSE_UPDATE_MXCSR': McBlock.parseMcGeneric,
2701 'IEM_MC_STORE_FPU_RESULT': McBlock.parseMcGeneric,
2702 'IEM_MC_STORE_FPU_RESULT_MEM_OP': McBlock.parseMcGeneric,
2703 'IEM_MC_STORE_FPU_RESULT_THEN_POP': McBlock.parseMcGeneric,
2704 'IEM_MC_STORE_FPU_RESULT_WITH_MEM_OP_THEN_POP': McBlock.parseMcGeneric,
2705 'IEM_MC_STORE_FPUREG_R80_SRC_REF': McBlock.parseMcGeneric,
2706 'IEM_MC_STORE_GREG_I64': McBlock.parseMcGeneric,
2707 'IEM_MC_STORE_GREG_U16': McBlock.parseMcGeneric,
2708 'IEM_MC_STORE_GREG_U16_CONST': McBlock.parseMcGeneric,
2709 'IEM_MC_STORE_GREG_U32': McBlock.parseMcGeneric,
2710 'IEM_MC_STORE_GREG_U32_CONST': McBlock.parseMcGeneric,
2711 'IEM_MC_STORE_GREG_U64': McBlock.parseMcGeneric,
2712 'IEM_MC_STORE_GREG_U64_CONST': McBlock.parseMcGeneric,
2713 'IEM_MC_STORE_GREG_U8': McBlock.parseMcGeneric,
2714 'IEM_MC_STORE_GREG_U8_CONST': McBlock.parseMcGeneric,
2715 'IEM_MC_STORE_MEM_I16_CONST_BY_REF': McBlock.parseMcGeneric,
2716 'IEM_MC_STORE_MEM_I32_CONST_BY_REF': McBlock.parseMcGeneric,
2717 'IEM_MC_STORE_MEM_I64_CONST_BY_REF': McBlock.parseMcGeneric,
2718 'IEM_MC_STORE_MEM_I8_CONST_BY_REF': McBlock.parseMcGeneric,
2719 'IEM_MC_STORE_MEM_INDEF_D80_BY_REF': McBlock.parseMcGeneric,
2720 'IEM_MC_STORE_MEM_NEG_QNAN_R32_BY_REF': McBlock.parseMcGeneric,
2721 'IEM_MC_STORE_MEM_NEG_QNAN_R64_BY_REF': McBlock.parseMcGeneric,
2722 'IEM_MC_STORE_MEM_NEG_QNAN_R80_BY_REF': McBlock.parseMcGeneric,
2723 'IEM_MC_STORE_MEM_U128': McBlock.parseMcGeneric,
2724 'IEM_MC_STORE_MEM_U128_ALIGN_SSE': McBlock.parseMcGeneric,
2725 'IEM_MC_STORE_MEM_U16': McBlock.parseMcGeneric,
2726 'IEM_MC_STORE_MEM_U16_CONST': McBlock.parseMcGeneric,
2727 'IEM_MC_STORE_MEM_U256': McBlock.parseMcGeneric,
2728 'IEM_MC_STORE_MEM_U256_ALIGN_AVX': McBlock.parseMcGeneric,
2729 'IEM_MC_STORE_MEM_U32': McBlock.parseMcGeneric,
2730 'IEM_MC_STORE_MEM_U32_CONST': McBlock.parseMcGeneric,
2731 'IEM_MC_STORE_MEM_U64': McBlock.parseMcGeneric,
2732 'IEM_MC_STORE_MEM_U64_CONST': McBlock.parseMcGeneric,
2733 'IEM_MC_STORE_MEM_U8': McBlock.parseMcGeneric,
2734 'IEM_MC_STORE_MEM_U8_CONST': McBlock.parseMcGeneric,
2735 'IEM_MC_STORE_MREG_U32_ZX_U64': McBlock.parseMcGeneric,
2736 'IEM_MC_STORE_MREG_U64': McBlock.parseMcGeneric,
2737 'IEM_MC_STORE_SREG_BASE_U32': McBlock.parseMcGeneric,
2738 'IEM_MC_STORE_SREG_BASE_U64': McBlock.parseMcGeneric,
2739 'IEM_MC_STORE_SSE_RESULT': McBlock.parseMcGeneric,
2740 'IEM_MC_STORE_XREG_HI_U64': McBlock.parseMcGeneric,
2741 'IEM_MC_STORE_XREG_R32': McBlock.parseMcGeneric,
2742 'IEM_MC_STORE_XREG_R64': McBlock.parseMcGeneric,
2743 'IEM_MC_STORE_XREG_U128': McBlock.parseMcGeneric,
2744 'IEM_MC_STORE_XREG_U16': McBlock.parseMcGeneric,
2745 'IEM_MC_STORE_XREG_U32': McBlock.parseMcGeneric,
2746 'IEM_MC_STORE_XREG_U32_U128': McBlock.parseMcGeneric,
2747 'IEM_MC_STORE_XREG_U32_ZX_U128': McBlock.parseMcGeneric,
2748 'IEM_MC_STORE_XREG_U64': McBlock.parseMcGeneric,
2749 'IEM_MC_STORE_XREG_U64_ZX_U128': McBlock.parseMcGeneric,
2750 'IEM_MC_STORE_XREG_U8': McBlock.parseMcGeneric,
2751 'IEM_MC_STORE_XREG_XMM': McBlock.parseMcGeneric,
2752 'IEM_MC_STORE_XREG_XMM_U32': McBlock.parseMcGeneric,
2753 'IEM_MC_STORE_XREG_XMM_U64': McBlock.parseMcGeneric,
2754 'IEM_MC_STORE_YREG_U128_ZX_VLMAX': McBlock.parseMcGeneric,
2755 'IEM_MC_STORE_YREG_U256_ZX_VLMAX': McBlock.parseMcGeneric,
2756 'IEM_MC_STORE_YREG_U32_ZX_VLMAX': McBlock.parseMcGeneric,
2757 'IEM_MC_STORE_YREG_U64_ZX_VLMAX': McBlock.parseMcGeneric,
2758 'IEM_MC_SUB_GREG_U16': McBlock.parseMcGeneric,
2759 'IEM_MC_SUB_GREG_U32': McBlock.parseMcGeneric,
2760 'IEM_MC_SUB_GREG_U64': McBlock.parseMcGeneric,
2761 'IEM_MC_SUB_GREG_U8': McBlock.parseMcGeneric,
2762 'IEM_MC_SUB_LOCAL_U16': McBlock.parseMcGeneric,
2763 'IEM_MC_UPDATE_FPU_OPCODE_IP': McBlock.parseMcGeneric,
2764 'IEM_MC_UPDATE_FSW': McBlock.parseMcGeneric,
2765 'IEM_MC_UPDATE_FSW_CONST': McBlock.parseMcGeneric,
2766 'IEM_MC_UPDATE_FSW_THEN_POP': McBlock.parseMcGeneric,
2767 'IEM_MC_UPDATE_FSW_THEN_POP_POP': McBlock.parseMcGeneric,
2768 'IEM_MC_UPDATE_FSW_WITH_MEM_OP': McBlock.parseMcGeneric,
2769 'IEM_MC_UPDATE_FSW_WITH_MEM_OP_THEN_POP': McBlock.parseMcGeneric,
2770};
2771
2772## List of microcode blocks.
2773g_aoMcBlocks = [] # type: list(McBlock)
2774
2775
2776
2777class ParserException(Exception):
2778 """ Parser exception """
2779 def __init__(self, sMessage):
2780 Exception.__init__(self, sMessage);
2781
2782
2783class SimpleParser(object): # pylint: disable=too-many-instance-attributes
2784 """
2785 Parser of IEMAllInstruction*.cpp.h instruction specifications.
2786 """
2787
2788 ## @name Parser state.
2789 ## @{
2790 kiCode = 0;
2791 kiCommentMulti = 1;
2792 ## @}
2793
2794 class Macro(object):
2795 """ Macro """
2796 def __init__(self, sName, asArgs, sBody, iLine):
2797 self.sName = sName; ##< The macro name.
2798 self.asArgs = asArgs; ##< None if simple macro, list of parameters otherwise.
2799 self.sBody = sBody;
2800 self.iLine = iLine;
2801 self.oReArgMatch = re.compile(r'(\s*##\s*|\b)(' + '|'.join(asArgs) + r')(\s*##\s*|\b)') if asArgs else None;
2802
2803 @staticmethod
2804 def _needSpace(ch):
2805 """ This is just to make the expanded output a bit prettier. """
2806 return ch.isspace() and ch != '(';
2807
2808 def expandMacro(self, oParent, asArgs = None):
2809 """ Expands the macro body with the given arguments. """
2810 _ = oParent;
2811 sBody = self.sBody;
2812
2813 if self.oReArgMatch:
2814 assert len(asArgs) == len(self.asArgs);
2815 #oParent.debug('%s: %s' % (self.sName, self.oReArgMatch.pattern,));
2816
2817 dArgs = { self.asArgs[iArg]: sValue for iArg, sValue in enumerate(asArgs) };
2818 oMatch = self.oReArgMatch.search(sBody);
2819 while oMatch:
2820 sName = oMatch.group(2);
2821 #oParent.debug('%s %s..%s (%s)' % (sName, oMatch.start(), oMatch.end(),oMatch.group()));
2822 sValue = dArgs[sName];
2823 sPre = '';
2824 if not oMatch.group(1) and oMatch.start() > 0 and self._needSpace(sBody[oMatch.start()]):
2825 sPre = ' ';
2826 sPost = '';
2827 if not oMatch.group(3) and oMatch.end() < len(sBody) and self._needSpace(sBody[oMatch.end()]):
2828 sPost = ' ';
2829 sBody = sBody[ : oMatch.start()] + sPre + sValue + sPost + sBody[oMatch.end() : ];
2830 oMatch = self.oReArgMatch.search(sBody, oMatch.start() + len(sValue));
2831 else:
2832 assert not asArgs;
2833
2834 return sBody;
2835
2836
2837 def __init__(self, sSrcFile, asLines, sDefaultMap, oInheritMacrosFrom = None):
2838 self.sSrcFile = sSrcFile;
2839 self.asLines = asLines;
2840 self.iLine = 0;
2841 self.iState = self.kiCode;
2842 self.sComment = '';
2843 self.iCommentLine = 0;
2844 self.aoCurInstrs = [] # type: list(Instruction)
2845 self.sCurFunction = None # type: str
2846 self.iMcBlockInFunc = 0;
2847 self.oCurMcBlock = None # type: McBlock
2848 self.dMacros = {} # type: Dict[str,SimpleParser.Macro]
2849 self.oReMacros = None # type: re ##< Regular expression matching invocations of anything in self.dMacros.
2850 if oInheritMacrosFrom:
2851 self.dMacros = dict(oInheritMacrosFrom.dMacros);
2852 self.oReMacros = oInheritMacrosFrom.oReMacros;
2853
2854 assert sDefaultMap in g_dInstructionMaps;
2855 self.oDefaultMap = g_dInstructionMaps[sDefaultMap];
2856
2857 self.cTotalInstr = 0;
2858 self.cTotalStubs = 0;
2859 self.cTotalTagged = 0;
2860 self.cTotalMcBlocks = 0;
2861
2862 self.oReMacroName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
2863 self.oReMnemonic = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
2864 self.oReStatsName = re.compile('^[A-Za-z_][A-Za-z0-9_]*$');
2865 self.oReFunctionName= re.compile('^iemOp_[A-Za-z_][A-Za-z0-9_]*$');
2866 self.oReGroupName = re.compile('^og_[a-z0-9]+(|_[a-z0-9]+|_[a-z0-9]+_[a-z0-9]+)$');
2867 self.oReDisEnum = re.compile('^OP_[A-Z0-9_]+$');
2868 self.oReFunTable = re.compile('^(IEM_STATIC|static) +const +PFNIEMOP +g_apfn[A-Za-z0-9_]+ *\[ *\d* *\] *= *$');
2869 self.oReComment = re.compile('//.*?$|/\*.*?\*/'); ## Full comments.
2870 self.oReHashDefine = re.compile('^\s*#\s*define\s+(.*)$');
2871 self.oReHashDefine2 = re.compile('(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)\(([^)]*)\)\s*(.*)\Z'); ##< With arguments.
2872 self.oReHashDefine3 = re.compile('(?s)\A\s*([A-Za-z_][A-Za-z0-9_]*)[^(]\s*(.*)\Z'); ##< Simple, no arguments.
2873 self.oReHashUndef = re.compile('^\s*#\s*undef\s+(.*)$');
2874 self.oReMcBeginEnd = re.compile(r'\bIEM_MC_(BEGIN|END)\s*\(');
2875
2876 self.fDebug = True;
2877 self.fDebugMc = False;
2878 self.fDebugPreProc = False;
2879
2880 self.dTagHandlers = {
2881 '@opbrief': self.parseTagOpBrief,
2882 '@opdesc': self.parseTagOpDesc,
2883 '@opmnemonic': self.parseTagOpMnemonic,
2884 '@op1': self.parseTagOpOperandN,
2885 '@op2': self.parseTagOpOperandN,
2886 '@op3': self.parseTagOpOperandN,
2887 '@op4': self.parseTagOpOperandN,
2888 '@oppfx': self.parseTagOpPfx,
2889 '@opmaps': self.parseTagOpMaps,
2890 '@opcode': self.parseTagOpcode,
2891 '@opcodesub': self.parseTagOpcodeSub,
2892 '@openc': self.parseTagOpEnc,
2893 '@opfltest': self.parseTagOpEFlags,
2894 '@opflmodify': self.parseTagOpEFlags,
2895 '@opflundef': self.parseTagOpEFlags,
2896 '@opflset': self.parseTagOpEFlags,
2897 '@opflclear': self.parseTagOpEFlags,
2898 '@ophints': self.parseTagOpHints,
2899 '@opdisenum': self.parseTagOpDisEnum,
2900 '@opmincpu': self.parseTagOpMinCpu,
2901 '@opcpuid': self.parseTagOpCpuId,
2902 '@opgroup': self.parseTagOpGroup,
2903 '@opunused': self.parseTagOpUnusedInvalid,
2904 '@opinvalid': self.parseTagOpUnusedInvalid,
2905 '@opinvlstyle': self.parseTagOpUnusedInvalid,
2906 '@optest': self.parseTagOpTest,
2907 '@optestign': self.parseTagOpTestIgnore,
2908 '@optestignore': self.parseTagOpTestIgnore,
2909 '@opcopytests': self.parseTagOpCopyTests,
2910 '@oponly': self.parseTagOpOnlyTest,
2911 '@oponlytest': self.parseTagOpOnlyTest,
2912 '@opxcpttype': self.parseTagOpXcptType,
2913 '@opstats': self.parseTagOpStats,
2914 '@opfunction': self.parseTagOpFunction,
2915 '@opdone': self.parseTagOpDone,
2916 };
2917 for i in range(48):
2918 self.dTagHandlers['@optest%u' % (i,)] = self.parseTagOpTestNum;
2919 self.dTagHandlers['@optest[%u]' % (i,)] = self.parseTagOpTestNum;
2920
2921 self.asErrors = [];
2922
2923 def raiseError(self, sMessage):
2924 """
2925 Raise error prefixed with the source and line number.
2926 """
2927 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iLine, sMessage,));
2928
2929 def raiseCommentError(self, iLineInComment, sMessage):
2930 """
2931 Similar to raiseError, but the line number is iLineInComment + self.iCommentLine.
2932 """
2933 raise ParserException("%s:%d: error: %s" % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
2934
2935 def error(self, sMessage):
2936 """
2937 Adds an error.
2938 returns False;
2939 """
2940 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iLine, sMessage,));
2941 return False;
2942
2943 def errorOnLine(self, iLine, sMessage):
2944 """
2945 Adds an error.
2946 returns False;
2947 """
2948 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, iLine, sMessage,));
2949 return False;
2950
2951 def errorComment(self, iLineInComment, sMessage):
2952 """
2953 Adds a comment error.
2954 returns False;
2955 """
2956 self.asErrors.append(u'%s:%d: error: %s\n' % (self.sSrcFile, self.iCommentLine + iLineInComment, sMessage,));
2957 return False;
2958
2959 def printErrors(self):
2960 """
2961 Print the errors to stderr.
2962 Returns number of errors.
2963 """
2964 if self.asErrors:
2965 sys.stderr.write(u''.join(self.asErrors));
2966 return len(self.asErrors);
2967
2968 def debug(self, sMessage):
2969 """
2970 For debugging.
2971 """
2972 if self.fDebug:
2973 print('debug: %s' % (sMessage,), file = sys.stderr);
2974
2975 def stripComments(self, sLine):
2976 """
2977 Returns sLine with comments stripped.
2978
2979 Complains if traces of incomplete multi-line comments are encountered.
2980 """
2981 sLine = self.oReComment.sub(" ", sLine);
2982 if sLine.find('/*') >= 0 or sLine.find('*/') >= 0:
2983 self.error('Unexpected multi-line comment will not be handled correctly. Please simplify.');
2984 return sLine;
2985
2986 def parseFunctionTable(self, sLine):
2987 """
2988 Parses a PFNIEMOP table, updating/checking the @oppfx value.
2989
2990 Note! Updates iLine as it consumes the whole table.
2991 """
2992
2993 #
2994 # Extract the table name.
2995 #
2996 sName = re.search(' *([a-zA-Z_0-9]+) *\[', sLine).group(1);
2997 oMap = g_dInstructionMapsByIemName.get(sName);
2998 if not oMap:
2999 self.debug('No map for PFNIEMOP table: %s' % (sName,));
3000 oMap = self.oDefaultMap; # This is wrong wrong wrong.
3001
3002 #
3003 # All but the g_apfnOneByteMap & g_apfnEscF1_E0toFF tables uses four
3004 # entries per byte:
3005 # no prefix, 066h prefix, f3h prefix, f2h prefix
3006 # Those tables has 256 & 32 entries respectively.
3007 #
3008 cEntriesPerByte = 4;
3009 cValidTableLength = 1024;
3010 asPrefixes = ('none', '0x66', '0xf3', '0xf2');
3011
3012 oEntriesMatch = re.search('\[ *(256|32) *\]', sLine);
3013 if oEntriesMatch:
3014 cEntriesPerByte = 1;
3015 cValidTableLength = int(oEntriesMatch.group(1));
3016 asPrefixes = (None,);
3017
3018 #
3019 # The next line should be '{' and nothing else.
3020 #
3021 if self.iLine >= len(self.asLines) or not re.match('^ *{ *$', self.asLines[self.iLine]):
3022 return self.errorOnLine(self.iLine + 1, 'Expected lone "{" on line following PFNIEMOP table %s start' % (sName, ));
3023 self.iLine += 1;
3024
3025 #
3026 # Parse till we find the end of the table.
3027 #
3028 iEntry = 0;
3029 while self.iLine < len(self.asLines):
3030 # Get the next line and strip comments and spaces (assumes no
3031 # multi-line comments).
3032 sLine = self.asLines[self.iLine];
3033 self.iLine += 1;
3034 sLine = self.stripComments(sLine).strip();
3035
3036 # Split the line up into entries, expanding IEMOP_X4 usage.
3037 asEntries = sLine.split(',');
3038 for i in range(len(asEntries) - 1, -1, -1):
3039 sEntry = asEntries[i].strip();
3040 if sEntry.startswith('IEMOP_X4(') and sEntry[-1] == ')':
3041 sEntry = (sEntry[len('IEMOP_X4('):-1]).strip();
3042 asEntries.insert(i + 1, sEntry);
3043 asEntries.insert(i + 1, sEntry);
3044 asEntries.insert(i + 1, sEntry);
3045 if sEntry:
3046 asEntries[i] = sEntry;
3047 else:
3048 del asEntries[i];
3049
3050 # Process the entries.
3051 for sEntry in asEntries:
3052 if sEntry in ('};', '}'):
3053 if iEntry != cValidTableLength:
3054 return self.error('Wrong table length for %s: %#x, expected %#x' % (sName, iEntry, cValidTableLength, ));
3055 return True;
3056 if sEntry.startswith('iemOp_Invalid'):
3057 pass; # skip
3058 else:
3059 # Look up matching instruction by function.
3060 sPrefix = asPrefixes[iEntry % cEntriesPerByte];
3061 sOpcode = '%#04x' % (iEntry // cEntriesPerByte);
3062 aoInstr = g_dAllInstructionsByFunction.get(sEntry);
3063 if aoInstr:
3064 if not isinstance(aoInstr, list):
3065 aoInstr = [aoInstr,];
3066 oInstr = None;
3067 for oCurInstr in aoInstr:
3068 if oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix == sPrefix:
3069 pass;
3070 elif oCurInstr.sOpcode == sOpcode and oCurInstr.sPrefix is None:
3071 oCurInstr.sPrefix = sPrefix;
3072 elif oCurInstr.sOpcode is None and oCurInstr.sPrefix is None:
3073 oCurInstr.sOpcode = sOpcode;
3074 oCurInstr.sPrefix = sPrefix;
3075 else:
3076 continue;
3077 oInstr = oCurInstr;
3078 break;
3079 if not oInstr:
3080 oInstr = aoInstr[0].copy(oMap = oMap, sOpcode = sOpcode, sPrefix = sPrefix);
3081 aoInstr.append(oInstr);
3082 g_dAllInstructionsByFunction[sEntry] = aoInstr;
3083 g_aoAllInstructions.append(oInstr);
3084 oMap.aoInstructions.append(oInstr);
3085 else:
3086 self.debug('Function "%s", entry %#04x / byte %#04x in %s, is not associated with an instruction.'
3087 % (sEntry, iEntry, iEntry // cEntriesPerByte, sName,));
3088 iEntry += 1;
3089
3090 return self.error('Unexpected end of file in PFNIEMOP table');
3091
3092 def addInstruction(self, iLine = None):
3093 """
3094 Adds an instruction.
3095 """
3096 oInstr = Instruction(self.sSrcFile, self.iLine if iLine is None else iLine);
3097 g_aoAllInstructions.append(oInstr);
3098 self.aoCurInstrs.append(oInstr);
3099 return oInstr;
3100
3101 def deriveMnemonicAndOperandsFromStats(self, oInstr, sStats):
3102 """
3103 Derives the mnemonic and operands from a IEM stats base name like string.
3104 """
3105 if oInstr.sMnemonic is None:
3106 asWords = sStats.split('_');
3107 oInstr.sMnemonic = asWords[0].lower();
3108 if len(asWords) > 1 and not oInstr.aoOperands:
3109 for sType in asWords[1:]:
3110 if sType in g_kdOpTypes:
3111 oInstr.aoOperands.append(Operand(g_kdOpTypes[sType][1], sType));
3112 else:
3113 #return self.error('unknown operand type: %s (instruction: %s)' % (sType, oInstr))
3114 return False;
3115 return True;
3116
3117 def doneInstructionOne(self, oInstr, iLine):
3118 """
3119 Complete the parsing by processing, validating and expanding raw inputs.
3120 """
3121 assert oInstr.iLineCompleted is None;
3122 oInstr.iLineCompleted = iLine;
3123
3124 #
3125 # Specified instructions.
3126 #
3127 if oInstr.cOpTags > 0:
3128 if oInstr.sStats is None:
3129 pass;
3130
3131 #
3132 # Unspecified legacy stuff. We generally only got a few things to go on here.
3133 # /** Opcode 0x0f 0x00 /0. */
3134 # FNIEMOPRM_DEF(iemOp_Grp6_sldt)
3135 #
3136 else:
3137 #if oInstr.sRawOldOpcodes:
3138 #
3139 #if oInstr.sMnemonic:
3140 pass;
3141
3142 #
3143 # Common defaults.
3144 #
3145
3146 # Guess mnemonic and operands from stats if the former is missing.
3147 if oInstr.sMnemonic is None:
3148 if oInstr.sStats is not None:
3149 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sStats);
3150 elif oInstr.sFunction is not None:
3151 self.deriveMnemonicAndOperandsFromStats(oInstr, oInstr.sFunction.replace('iemOp_', ''));
3152
3153 # Derive the disassembler op enum constant from the mnemonic.
3154 if oInstr.sDisEnum is None and oInstr.sMnemonic is not None:
3155 oInstr.sDisEnum = 'OP_' + oInstr.sMnemonic.upper();
3156
3157 # Derive the IEM statistics base name from mnemonic and operand types.
3158 if oInstr.sStats is None:
3159 if oInstr.sFunction is not None:
3160 oInstr.sStats = oInstr.sFunction.replace('iemOp_', '');
3161 elif oInstr.sMnemonic is not None:
3162 oInstr.sStats = oInstr.sMnemonic;
3163 for oOperand in oInstr.aoOperands:
3164 if oOperand.sType:
3165 oInstr.sStats += '_' + oOperand.sType;
3166
3167 # Derive the IEM function name from mnemonic and operand types.
3168 if oInstr.sFunction is None:
3169 if oInstr.sMnemonic is not None:
3170 oInstr.sFunction = 'iemOp_' + oInstr.sMnemonic;
3171 for oOperand in oInstr.aoOperands:
3172 if oOperand.sType:
3173 oInstr.sFunction += '_' + oOperand.sType;
3174 elif oInstr.sStats:
3175 oInstr.sFunction = 'iemOp_' + oInstr.sStats;
3176
3177 #
3178 # Apply default map and then add the instruction to all it's groups.
3179 #
3180 if not oInstr.aoMaps:
3181 oInstr.aoMaps = [ self.oDefaultMap, ];
3182 for oMap in oInstr.aoMaps:
3183 oMap.aoInstructions.append(oInstr);
3184
3185 #
3186 # Derive encoding from operands and maps.
3187 #
3188 if oInstr.sEncoding is None:
3189 if not oInstr.aoOperands:
3190 if oInstr.fUnused and oInstr.sSubOpcode:
3191 oInstr.sEncoding = 'VEX.ModR/M' if oInstr.onlyInVexMaps() else 'ModR/M';
3192 else:
3193 oInstr.sEncoding = 'VEX.fixed' if oInstr.onlyInVexMaps() else 'fixed';
3194 elif oInstr.aoOperands[0].usesModRM():
3195 if (len(oInstr.aoOperands) >= 2 and oInstr.aoOperands[1].sWhere == 'vvvv') \
3196 or oInstr.onlyInVexMaps():
3197 oInstr.sEncoding = 'VEX.ModR/M';
3198 else:
3199 oInstr.sEncoding = 'ModR/M';
3200
3201 #
3202 # Check the opstat value and add it to the opstat indexed dictionary.
3203 #
3204 if oInstr.sStats:
3205 if oInstr.sStats not in g_dAllInstructionsByStat:
3206 g_dAllInstructionsByStat[oInstr.sStats] = oInstr;
3207 else:
3208 self.error('Duplicate opstat value "%s"\nnew: %s\nold: %s'
3209 % (oInstr.sStats, oInstr, g_dAllInstructionsByStat[oInstr.sStats],));
3210
3211 #
3212 # Add to function indexed dictionary. We allow multiple instructions per function.
3213 #
3214 if oInstr.sFunction:
3215 if oInstr.sFunction not in g_dAllInstructionsByFunction:
3216 g_dAllInstructionsByFunction[oInstr.sFunction] = [oInstr,];
3217 else:
3218 g_dAllInstructionsByFunction[oInstr.sFunction].append(oInstr);
3219
3220 #self.debug('%d..%d: %s; %d @op tags' % (oInstr.iLineCreated, oInstr.iLineCompleted, oInstr.sFunction, oInstr.cOpTags));
3221 return True;
3222
3223 def doneInstructions(self, iLineInComment = None, fEndOfFunction = False):
3224 """
3225 Done with current instruction.
3226 """
3227 for oInstr in self.aoCurInstrs:
3228 self.doneInstructionOne(oInstr, self.iLine if iLineInComment is None else self.iCommentLine + iLineInComment);
3229 if oInstr.fStub:
3230 self.cTotalStubs += 1;
3231
3232 self.cTotalInstr += len(self.aoCurInstrs);
3233
3234 self.sComment = '';
3235 self.aoCurInstrs = [];
3236 if fEndOfFunction:
3237 #self.debug('%s: sCurFunction=None' % (self.iLine, ));
3238 self.sCurFunction = None;
3239 self.iMcBlockInFunc = 0;
3240 return True;
3241
3242 def setInstrunctionAttrib(self, sAttrib, oValue, fOverwrite = False):
3243 """
3244 Sets the sAttrib of all current instruction to oValue. If fOverwrite
3245 is False, only None values and empty strings are replaced.
3246 """
3247 for oInstr in self.aoCurInstrs:
3248 if fOverwrite is not True:
3249 oOldValue = getattr(oInstr, sAttrib);
3250 if oOldValue is not None:
3251 continue;
3252 setattr(oInstr, sAttrib, oValue);
3253
3254 def setInstrunctionArrayAttrib(self, sAttrib, iEntry, oValue, fOverwrite = False):
3255 """
3256 Sets the iEntry of the array sAttrib of all current instruction to oValue.
3257 If fOverwrite is False, only None values and empty strings are replaced.
3258 """
3259 for oInstr in self.aoCurInstrs:
3260 aoArray = getattr(oInstr, sAttrib);
3261 while len(aoArray) <= iEntry:
3262 aoArray.append(None);
3263 if fOverwrite is True or aoArray[iEntry] is None:
3264 aoArray[iEntry] = oValue;
3265
3266 def parseCommentOldOpcode(self, asLines):
3267 """ Deals with 'Opcode 0xff /4' like comments """
3268 asWords = asLines[0].split();
3269 if len(asWords) >= 2 \
3270 and asWords[0] == 'Opcode' \
3271 and ( asWords[1].startswith('0x')
3272 or asWords[1].startswith('0X')):
3273 asWords = asWords[:1];
3274 for iWord, sWord in enumerate(asWords):
3275 if sWord.startswith('0X'):
3276 sWord = '0x' + sWord[:2];
3277 asWords[iWord] = asWords;
3278 self.setInstrunctionAttrib('sRawOldOpcodes', ' '.join(asWords));
3279
3280 return False;
3281
3282 def ensureInstructionForOpTag(self, iTagLine):
3283 """ Ensure there is an instruction for the op-tag being parsed. """
3284 if not self.aoCurInstrs:
3285 self.addInstruction(self.iCommentLine + iTagLine);
3286 for oInstr in self.aoCurInstrs:
3287 oInstr.cOpTags += 1;
3288 if oInstr.cOpTags == 1:
3289 self.cTotalTagged += 1;
3290 return self.aoCurInstrs[-1];
3291
3292 @staticmethod
3293 def flattenSections(aasSections):
3294 """
3295 Flattens multiline sections into stripped single strings.
3296 Returns list of strings, on section per string.
3297 """
3298 asRet = [];
3299 for asLines in aasSections:
3300 if asLines:
3301 asRet.append(' '.join([sLine.strip() for sLine in asLines]));
3302 return asRet;
3303
3304 @staticmethod
3305 def flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = '\n'):
3306 """
3307 Flattens sections into a simple stripped string with newlines as
3308 section breaks. The final section does not sport a trailing newline.
3309 """
3310 # Typical: One section with a single line.
3311 if len(aasSections) == 1 and len(aasSections[0]) == 1:
3312 return aasSections[0][0].strip();
3313
3314 sRet = '';
3315 for iSection, asLines in enumerate(aasSections):
3316 if asLines:
3317 if iSection > 0:
3318 sRet += sSectionSep;
3319 sRet += sLineSep.join([sLine.strip() for sLine in asLines]);
3320 return sRet;
3321
3322
3323
3324 ## @name Tag parsers
3325 ## @{
3326
3327 def parseTagOpBrief(self, sTag, aasSections, iTagLine, iEndLine):
3328 """
3329 Tag: \@opbrief
3330 Value: Text description, multiple sections, appended.
3331
3332 Brief description. If not given, it's the first sentence from @opdesc.
3333 """
3334 oInstr = self.ensureInstructionForOpTag(iTagLine);
3335
3336 # Flatten and validate the value.
3337 sBrief = self.flattenAllSections(aasSections);
3338 if not sBrief:
3339 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
3340 if sBrief[-1] != '.':
3341 sBrief = sBrief + '.';
3342 if len(sBrief) > 180:
3343 return self.errorComment(iTagLine, '%s: value too long (max 180 chars): %s' % (sTag, sBrief));
3344 offDot = sBrief.find('.');
3345 while 0 <= offDot < len(sBrief) - 1 and sBrief[offDot + 1] != ' ':
3346 offDot = sBrief.find('.', offDot + 1);
3347 if offDot >= 0 and offDot != len(sBrief) - 1:
3348 return self.errorComment(iTagLine, '%s: only one sentence: %s' % (sTag, sBrief));
3349
3350 # Update the instruction.
3351 if oInstr.sBrief is not None:
3352 return self.errorComment(iTagLine, '%s: attempting to overwrite brief "%s" with "%s"'
3353 % (sTag, oInstr.sBrief, sBrief,));
3354 _ = iEndLine;
3355 return True;
3356
3357 def parseTagOpDesc(self, sTag, aasSections, iTagLine, iEndLine):
3358 """
3359 Tag: \@opdesc
3360 Value: Text description, multiple sections, appended.
3361
3362 It is used to describe instructions.
3363 """
3364 oInstr = self.ensureInstructionForOpTag(iTagLine);
3365 if aasSections:
3366 oInstr.asDescSections.extend(self.flattenSections(aasSections));
3367 return True;
3368
3369 _ = sTag; _ = iEndLine;
3370 return True;
3371
3372 def parseTagOpMnemonic(self, sTag, aasSections, iTagLine, iEndLine):
3373 """
3374 Tag: @opmenmonic
3375 Value: mnemonic
3376
3377 The 'mnemonic' value must be a valid C identifier string. Because of
3378 prefixes, groups and whatnot, there times when the mnemonic isn't that
3379 of an actual assembler mnemonic.
3380 """
3381 oInstr = self.ensureInstructionForOpTag(iTagLine);
3382
3383 # Flatten and validate the value.
3384 sMnemonic = self.flattenAllSections(aasSections);
3385 if not self.oReMnemonic.match(sMnemonic):
3386 return self.errorComment(iTagLine, '%s: invalid menmonic name: "%s"' % (sTag, sMnemonic,));
3387 if oInstr.sMnemonic is not None:
3388 return self.errorComment(iTagLine, '%s: attempting to overwrite menmonic "%s" with "%s"'
3389 % (sTag, oInstr.sMnemonic, sMnemonic,));
3390 oInstr.sMnemonic = sMnemonic
3391
3392 _ = iEndLine;
3393 return True;
3394
3395 def parseTagOpOperandN(self, sTag, aasSections, iTagLine, iEndLine):
3396 """
3397 Tags: \@op1, \@op2, \@op3, \@op4
3398 Value: [where:]type
3399
3400 The 'where' value indicates where the operand is found, like the 'reg'
3401 part of the ModR/M encoding. See Instruction.kdOperandLocations for
3402 a list.
3403
3404 The 'type' value indicates the operand type. These follow the types
3405 given in the opcode tables in the CPU reference manuals.
3406 See Instruction.kdOperandTypes for a list.
3407
3408 """
3409 oInstr = self.ensureInstructionForOpTag(iTagLine);
3410 idxOp = int(sTag[-1]) - 1;
3411 assert 0 <= idxOp < 4;
3412
3413 # flatten, split up, and validate the "where:type" value.
3414 sFlattened = self.flattenAllSections(aasSections);
3415 asSplit = sFlattened.split(':');
3416 if len(asSplit) == 1:
3417 sType = asSplit[0];
3418 sWhere = None;
3419 elif len(asSplit) == 2:
3420 (sWhere, sType) = asSplit;
3421 else:
3422 return self.errorComment(iTagLine, 'expected %s value on format "[<where>:]<type>" not "%s"' % (sTag, sFlattened,));
3423
3424 if sType not in g_kdOpTypes:
3425 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
3426 % (sTag, sType, ', '.join(g_kdOpTypes.keys()),));
3427 if sWhere is None:
3428 sWhere = g_kdOpTypes[sType][1];
3429 elif sWhere not in g_kdOpLocations:
3430 return self.errorComment(iTagLine, '%s: invalid where value "%s", valid: %s'
3431 % (sTag, sWhere, ', '.join(g_kdOpLocations.keys()),));
3432
3433 # Insert the operand, refusing to overwrite an existing one.
3434 while idxOp >= len(oInstr.aoOperands):
3435 oInstr.aoOperands.append(None);
3436 if oInstr.aoOperands[idxOp] is not None:
3437 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s:%s" with "%s:%s"'
3438 % ( sTag, oInstr.aoOperands[idxOp].sWhere, oInstr.aoOperands[idxOp].sType,
3439 sWhere, sType,));
3440 oInstr.aoOperands[idxOp] = Operand(sWhere, sType);
3441
3442 _ = iEndLine;
3443 return True;
3444
3445 def parseTagOpMaps(self, sTag, aasSections, iTagLine, iEndLine):
3446 """
3447 Tag: \@opmaps
3448 Value: map[,map2]
3449
3450 Indicates which maps the instruction is in. There is a default map
3451 associated with each input file.
3452 """
3453 oInstr = self.ensureInstructionForOpTag(iTagLine);
3454
3455 # Flatten, split up and validate the value.
3456 sFlattened = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',');
3457 asMaps = sFlattened.split(',');
3458 if not asMaps:
3459 return self.errorComment(iTagLine, '%s: value required' % (sTag,));
3460 for sMap in asMaps:
3461 if sMap not in g_dInstructionMaps:
3462 return self.errorComment(iTagLine, '%s: invalid map value: %s (valid values: %s)'
3463 % (sTag, sMap, ', '.join(g_dInstructionMaps.keys()),));
3464
3465 # Add the maps to the current list. Throw errors on duplicates.
3466 for oMap in oInstr.aoMaps:
3467 if oMap.sName in asMaps:
3468 return self.errorComment(iTagLine, '%s: duplicate map assignment: %s' % (sTag, oMap.sName));
3469
3470 for sMap in asMaps:
3471 oMap = g_dInstructionMaps[sMap];
3472 if oMap not in oInstr.aoMaps:
3473 oInstr.aoMaps.append(oMap);
3474 else:
3475 self.errorComment(iTagLine, '%s: duplicate map assignment (input): %s' % (sTag, sMap));
3476
3477 _ = iEndLine;
3478 return True;
3479
3480 def parseTagOpPfx(self, sTag, aasSections, iTagLine, iEndLine):
3481 """
3482 Tag: \@oppfx
3483 Value: n/a|none|0x66|0xf3|0xf2
3484
3485 Required prefix for the instruction. (In a (E)VEX context this is the
3486 value of the 'pp' field rather than an actual prefix.)
3487 """
3488 oInstr = self.ensureInstructionForOpTag(iTagLine);
3489
3490 # Flatten and validate the value.
3491 sFlattened = self.flattenAllSections(aasSections);
3492 asPrefixes = sFlattened.split();
3493 if len(asPrefixes) > 1:
3494 return self.errorComment(iTagLine, '%s: max one prefix: %s' % (sTag, asPrefixes,));
3495
3496 sPrefix = asPrefixes[0].lower();
3497 if sPrefix == 'none':
3498 sPrefix = 'none';
3499 elif sPrefix == 'n/a':
3500 sPrefix = None;
3501 else:
3502 if len(sPrefix) == 2:
3503 sPrefix = '0x' + sPrefix;
3504 if not _isValidOpcodeByte(sPrefix):
3505 return self.errorComment(iTagLine, '%s: invalid prefix: %s' % (sTag, sPrefix,));
3506
3507 if sPrefix is not None and sPrefix not in g_kdPrefixes:
3508 return self.errorComment(iTagLine, '%s: invalid prefix: %s (valid %s)' % (sTag, sPrefix, g_kdPrefixes,));
3509
3510 # Set it.
3511 if oInstr.sPrefix is not None:
3512 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sPrefix, sPrefix,));
3513 oInstr.sPrefix = sPrefix;
3514
3515 _ = iEndLine;
3516 return True;
3517
3518 def parseTagOpcode(self, sTag, aasSections, iTagLine, iEndLine):
3519 """
3520 Tag: \@opcode
3521 Value: 0x?? | /reg (TODO: | mr/reg | 11 /reg | !11 /reg | 11 mr/reg | !11 mr/reg)
3522
3523 The opcode byte or sub-byte for the instruction in the context of a map.
3524 """
3525 oInstr = self.ensureInstructionForOpTag(iTagLine);
3526
3527 # Flatten and validate the value.
3528 sOpcode = self.flattenAllSections(aasSections);
3529 if _isValidOpcodeByte(sOpcode):
3530 pass;
3531 elif len(sOpcode) == 2 and sOpcode.startswith('/') and sOpcode[-1] in '012345678':
3532 pass;
3533 elif len(sOpcode) == 4 and sOpcode.startswith('11/') and sOpcode[-1] in '012345678':
3534 pass;
3535 elif len(sOpcode) == 5 and sOpcode.startswith('!11/') and sOpcode[-1] in '012345678':
3536 pass;
3537 else:
3538 return self.errorComment(iTagLine, '%s: invalid opcode: %s' % (sTag, sOpcode,));
3539
3540 # Set it.
3541 if oInstr.sOpcode is not None:
3542 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sOpcode, sOpcode,));
3543 oInstr.sOpcode = sOpcode;
3544
3545 _ = iEndLine;
3546 return True;
3547
3548 def parseTagOpcodeSub(self, sTag, aasSections, iTagLine, iEndLine):
3549 """
3550 Tag: \@opcodesub
3551 Value: none | 11 mr/reg | !11 mr/reg | rex.w=0 | rex.w=1 | vex.l=0 | vex.l=1
3552 | 11 mr/reg vex.l=0 | 11 mr/reg vex.l=1 | !11 mr/reg vex.l=0 | !11 mr/reg vex.l=1
3553
3554 This is a simple way of dealing with encodings where the mod=3 and mod!=3
3555 represents exactly two different instructions. The more proper way would
3556 be to go via maps with two members, but this is faster.
3557 """
3558 oInstr = self.ensureInstructionForOpTag(iTagLine);
3559
3560 # Flatten and validate the value.
3561 sSubOpcode = self.flattenAllSections(aasSections);
3562 if sSubOpcode not in g_kdSubOpcodes:
3563 return self.errorComment(iTagLine, '%s: invalid sub opcode: %s (valid: 11, !11, none)' % (sTag, sSubOpcode,));
3564 sSubOpcode = g_kdSubOpcodes[sSubOpcode][0];
3565
3566 # Set it.
3567 if oInstr.sSubOpcode is not None:
3568 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
3569 % ( sTag, oInstr.sSubOpcode, sSubOpcode,));
3570 oInstr.sSubOpcode = sSubOpcode;
3571
3572 _ = iEndLine;
3573 return True;
3574
3575 def parseTagOpEnc(self, sTag, aasSections, iTagLine, iEndLine):
3576 """
3577 Tag: \@openc
3578 Value: ModR/M|fixed|prefix|<map name>
3579
3580 The instruction operand encoding style.
3581 """
3582 oInstr = self.ensureInstructionForOpTag(iTagLine);
3583
3584 # Flatten and validate the value.
3585 sEncoding = self.flattenAllSections(aasSections);
3586 if sEncoding in g_kdEncodings:
3587 pass;
3588 elif sEncoding in g_dInstructionMaps:
3589 pass;
3590 elif not _isValidOpcodeByte(sEncoding):
3591 return self.errorComment(iTagLine, '%s: invalid encoding: %s' % (sTag, sEncoding,));
3592
3593 # Set it.
3594 if oInstr.sEncoding is not None:
3595 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"'
3596 % ( sTag, oInstr.sEncoding, sEncoding,));
3597 oInstr.sEncoding = sEncoding;
3598
3599 _ = iEndLine;
3600 return True;
3601
3602 ## EFlags tag to Instruction attribute name.
3603 kdOpFlagToAttr = {
3604 '@opfltest': 'asFlTest',
3605 '@opflmodify': 'asFlModify',
3606 '@opflundef': 'asFlUndefined',
3607 '@opflset': 'asFlSet',
3608 '@opflclear': 'asFlClear',
3609 };
3610
3611 def parseTagOpEFlags(self, sTag, aasSections, iTagLine, iEndLine):
3612 """
3613 Tags: \@opfltest, \@opflmodify, \@opflundef, \@opflset, \@opflclear
3614 Value: <eflags specifier>
3615
3616 """
3617 oInstr = self.ensureInstructionForOpTag(iTagLine);
3618
3619 # Flatten, split up and validate the values.
3620 asFlags = self.flattenAllSections(aasSections, sLineSep = ',', sSectionSep = ',').split(',');
3621 if len(asFlags) == 1 and asFlags[0].lower() == 'none':
3622 asFlags = [];
3623 else:
3624 fRc = True;
3625 for iFlag, sFlag in enumerate(asFlags):
3626 if sFlag not in g_kdEFlagsMnemonics:
3627 if sFlag.strip() in g_kdEFlagsMnemonics:
3628 asFlags[iFlag] = sFlag.strip();
3629 else:
3630 fRc = self.errorComment(iTagLine, '%s: invalid EFLAGS value: %s' % (sTag, sFlag,));
3631 if not fRc:
3632 return False;
3633
3634 # Set them.
3635 asOld = getattr(oInstr, self.kdOpFlagToAttr[sTag]);
3636 if asOld is not None:
3637 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, asOld, asFlags,));
3638 setattr(oInstr, self.kdOpFlagToAttr[sTag], asFlags);
3639
3640 _ = iEndLine;
3641 return True;
3642
3643 def parseTagOpHints(self, sTag, aasSections, iTagLine, iEndLine):
3644 """
3645 Tag: \@ophints
3646 Value: Comma or space separated list of flags and hints.
3647
3648 This covers the disassembler flags table and more.
3649 """
3650 oInstr = self.ensureInstructionForOpTag(iTagLine);
3651
3652 # Flatten as a space separated list, split it up and validate the values.
3653 asHints = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
3654 if len(asHints) == 1 and asHints[0].lower() == 'none':
3655 asHints = [];
3656 else:
3657 fRc = True;
3658 for iHint, sHint in enumerate(asHints):
3659 if sHint not in g_kdHints:
3660 if sHint.strip() in g_kdHints:
3661 sHint[iHint] = sHint.strip();
3662 else:
3663 fRc = self.errorComment(iTagLine, '%s: invalid hint value: %s' % (sTag, sHint,));
3664 if not fRc:
3665 return False;
3666
3667 # Append them.
3668 for sHint in asHints:
3669 if sHint not in oInstr.dHints:
3670 oInstr.dHints[sHint] = True; # (dummy value, using dictionary for speed)
3671 else:
3672 self.errorComment(iTagLine, '%s: duplicate hint: %s' % ( sTag, sHint,));
3673
3674 _ = iEndLine;
3675 return True;
3676
3677 def parseTagOpDisEnum(self, sTag, aasSections, iTagLine, iEndLine):
3678 """
3679 Tag: \@opdisenum
3680 Value: OP_XXXX
3681
3682 This is for select a specific (legacy) disassembler enum value for the
3683 instruction.
3684 """
3685 oInstr = self.ensureInstructionForOpTag(iTagLine);
3686
3687 # Flatten and split.
3688 asWords = self.flattenAllSections(aasSections).split();
3689 if len(asWords) != 1:
3690 self.errorComment(iTagLine, '%s: expected exactly one value: %s' % (sTag, asWords,));
3691 if not asWords:
3692 return False;
3693 sDisEnum = asWords[0];
3694 if not self.oReDisEnum.match(sDisEnum):
3695 return self.errorComment(iTagLine, '%s: invalid disassembler OP_XXXX enum: %s (pattern: %s)'
3696 % (sTag, sDisEnum, self.oReDisEnum.pattern));
3697
3698 # Set it.
3699 if oInstr.sDisEnum is not None:
3700 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % (sTag, oInstr.sDisEnum, sDisEnum,));
3701 oInstr.sDisEnum = sDisEnum;
3702
3703 _ = iEndLine;
3704 return True;
3705
3706 def parseTagOpMinCpu(self, sTag, aasSections, iTagLine, iEndLine):
3707 """
3708 Tag: \@opmincpu
3709 Value: <simple CPU name>
3710
3711 Indicates when this instruction was introduced.
3712 """
3713 oInstr = self.ensureInstructionForOpTag(iTagLine);
3714
3715 # Flatten the value, split into words, make sure there's just one, valid it.
3716 asCpus = self.flattenAllSections(aasSections).split();
3717 if len(asCpus) > 1:
3718 self.errorComment(iTagLine, '%s: exactly one CPU name, please: %s' % (sTag, ' '.join(asCpus),));
3719
3720 sMinCpu = asCpus[0];
3721 if sMinCpu in g_kdCpuNames:
3722 oInstr.sMinCpu = sMinCpu;
3723 else:
3724 return self.errorComment(iTagLine, '%s: invalid CPU name: %s (names: %s)'
3725 % (sTag, sMinCpu, ','.join(sorted(g_kdCpuNames)),));
3726
3727 # Set it.
3728 if oInstr.sMinCpu is None:
3729 oInstr.sMinCpu = sMinCpu;
3730 elif oInstr.sMinCpu != sMinCpu:
3731 self.errorComment(iTagLine, '%s: attemting to overwrite "%s" with "%s"' % (sTag, oInstr.sMinCpu, sMinCpu,));
3732
3733 _ = iEndLine;
3734 return True;
3735
3736 def parseTagOpCpuId(self, sTag, aasSections, iTagLine, iEndLine):
3737 """
3738 Tag: \@opcpuid
3739 Value: none | <CPUID flag specifier>
3740
3741 CPUID feature bit which is required for the instruction to be present.
3742 """
3743 oInstr = self.ensureInstructionForOpTag(iTagLine);
3744
3745 # Flatten as a space separated list, split it up and validate the values.
3746 asCpuIds = self.flattenAllSections(aasSections, sLineSep = ' ', sSectionSep = ' ').replace(',', ' ').split();
3747 if len(asCpuIds) == 1 and asCpuIds[0].lower() == 'none':
3748 asCpuIds = [];
3749 else:
3750 fRc = True;
3751 for iCpuId, sCpuId in enumerate(asCpuIds):
3752 if sCpuId not in g_kdCpuIdFlags:
3753 if sCpuId.strip() in g_kdCpuIdFlags:
3754 sCpuId[iCpuId] = sCpuId.strip();
3755 else:
3756 fRc = self.errorComment(iTagLine, '%s: invalid CPUID value: %s' % (sTag, sCpuId,));
3757 if not fRc:
3758 return False;
3759
3760 # Append them.
3761 for sCpuId in asCpuIds:
3762 if sCpuId not in oInstr.asCpuIds:
3763 oInstr.asCpuIds.append(sCpuId);
3764 else:
3765 self.errorComment(iTagLine, '%s: duplicate CPUID: %s' % ( sTag, sCpuId,));
3766
3767 _ = iEndLine;
3768 return True;
3769
3770 def parseTagOpGroup(self, sTag, aasSections, iTagLine, iEndLine):
3771 """
3772 Tag: \@opgroup
3773 Value: op_grp1[_subgrp2[_subsubgrp3]]
3774
3775 Instruction grouping.
3776 """
3777 oInstr = self.ensureInstructionForOpTag(iTagLine);
3778
3779 # Flatten as a space separated list, split it up and validate the values.
3780 asGroups = self.flattenAllSections(aasSections).split();
3781 if len(asGroups) != 1:
3782 return self.errorComment(iTagLine, '%s: exactly one group, please: %s' % (sTag, asGroups,));
3783 sGroup = asGroups[0];
3784 if not self.oReGroupName.match(sGroup):
3785 return self.errorComment(iTagLine, '%s: invalid group name: %s (valid: %s)'
3786 % (sTag, sGroup, self.oReGroupName.pattern));
3787
3788 # Set it.
3789 if oInstr.sGroup is not None:
3790 return self.errorComment(iTagLine, '%s: attempting to overwrite "%s" with "%s"' % ( sTag, oInstr.sGroup, sGroup,));
3791 oInstr.sGroup = sGroup;
3792
3793 _ = iEndLine;
3794 return True;
3795
3796 def parseTagOpUnusedInvalid(self, sTag, aasSections, iTagLine, iEndLine):
3797 """
3798 Tag: \@opunused, \@opinvalid, \@opinvlstyle
3799 Value: <invalid opcode behaviour style>
3800
3801 The \@opunused indicates the specification is for a currently unused
3802 instruction encoding.
3803
3804 The \@opinvalid indicates the specification is for an invalid currently
3805 instruction encoding (like UD2).
3806
3807 The \@opinvlstyle just indicates how CPUs decode the instruction when
3808 not supported (\@opcpuid, \@opmincpu) or disabled.
3809 """
3810 oInstr = self.ensureInstructionForOpTag(iTagLine);
3811
3812 # Flatten as a space separated list, split it up and validate the values.
3813 asStyles = self.flattenAllSections(aasSections).split();
3814 if len(asStyles) != 1:
3815 return self.errorComment(iTagLine, '%s: exactly one invalid behviour style, please: %s' % (sTag, asStyles,));
3816 sStyle = asStyles[0];
3817 if sStyle not in g_kdInvalidStyles:
3818 return self.errorComment(iTagLine, '%s: invalid invalid behaviour style: %s (valid: %s)'
3819 % (sTag, sStyle, g_kdInvalidStyles.keys(),));
3820 # Set it.
3821 if oInstr.sInvalidStyle is not None:
3822 return self.errorComment(iTagLine,
3823 '%s: attempting to overwrite "%s" with "%s" (only one @opunused, @opinvalid, @opinvlstyle)'
3824 % ( sTag, oInstr.sInvalidStyle, sStyle,));
3825 oInstr.sInvalidStyle = sStyle;
3826 if sTag == '@opunused':
3827 oInstr.fUnused = True;
3828 elif sTag == '@opinvalid':
3829 oInstr.fInvalid = True;
3830
3831 _ = iEndLine;
3832 return True;
3833
3834 def parseTagOpTest(self, sTag, aasSections, iTagLine, iEndLine): # pylint: disable=too-many-locals
3835 """
3836 Tag: \@optest
3837 Value: [<selectors>[ ]?] <inputs> -> <outputs>
3838 Example: mode==64bit / in1=0xfffffffe:dw in2=1:dw -> out1=0xffffffff:dw outfl=a?,p?
3839
3840 The main idea here is to generate basic instruction tests.
3841
3842 The probably simplest way of handling the diverse input, would be to use
3843 it to produce size optimized byte code for a simple interpreter that
3844 modifies the register input and output states.
3845
3846 An alternative to the interpreter would be creating multiple tables,
3847 but that becomes rather complicated wrt what goes where and then to use
3848 them in an efficient manner.
3849 """
3850 oInstr = self.ensureInstructionForOpTag(iTagLine);
3851
3852 #
3853 # Do it section by section.
3854 #
3855 for asSectionLines in aasSections:
3856 #
3857 # Sort the input into outputs, inputs and selector conditions.
3858 #
3859 sFlatSection = self.flattenAllSections([asSectionLines,]);
3860 if not sFlatSection:
3861 self.errorComment(iTagLine, '%s: missing value (dbg: aasSections=%s)' % ( sTag, aasSections));
3862 continue;
3863 oTest = InstructionTest(oInstr);
3864
3865 asSelectors = [];
3866 asInputs = [];
3867 asOutputs = [];
3868 asCur = asOutputs;
3869 fRc = True;
3870 asWords = sFlatSection.split();
3871 for iWord in range(len(asWords) - 1, -1, -1):
3872 sWord = asWords[iWord];
3873 # Check for array switchers.
3874 if sWord == '->':
3875 if asCur != asOutputs:
3876 fRc = self.errorComment(iTagLine, '%s: "->" shall only occure once: %s' % (sTag, sFlatSection,));
3877 break;
3878 asCur = asInputs;
3879 elif sWord == '/':
3880 if asCur != asInputs:
3881 fRc = self.errorComment(iTagLine, '%s: "/" shall only occure once: %s' % (sTag, sFlatSection,));
3882 break;
3883 asCur = asSelectors;
3884 else:
3885 asCur.insert(0, sWord);
3886
3887 #
3888 # Validate and add selectors.
3889 #
3890 for sCond in asSelectors:
3891 sCondExp = TestSelector.kdPredicates.get(sCond, sCond);
3892 oSelector = None;
3893 for sOp in TestSelector.kasCompareOps:
3894 off = sCondExp.find(sOp);
3895 if off >= 0:
3896 sVariable = sCondExp[:off];
3897 sValue = sCondExp[off + len(sOp):];
3898 if sVariable in TestSelector.kdVariables:
3899 if sValue in TestSelector.kdVariables[sVariable]:
3900 oSelector = TestSelector(sVariable, sOp, sValue);
3901 else:
3902 self.errorComment(iTagLine, '%s: invalid condition value "%s" in "%s" (valid: %s)'
3903 % ( sTag, sValue, sCond,
3904 TestSelector.kdVariables[sVariable].keys(),));
3905 else:
3906 self.errorComment(iTagLine, '%s: invalid condition variable "%s" in "%s" (valid: %s)'
3907 % ( sTag, sVariable, sCond, TestSelector.kdVariables.keys(),));
3908 break;
3909 if oSelector is not None:
3910 for oExisting in oTest.aoSelectors:
3911 if oExisting.sVariable == oSelector.sVariable:
3912 self.errorComment(iTagLine, '%s: already have a selector for variable "%s" (existing: %s, new: %s)'
3913 % ( sTag, oSelector.sVariable, oExisting, oSelector,));
3914 oTest.aoSelectors.append(oSelector);
3915 else:
3916 fRc = self.errorComment(iTagLine, '%s: failed to parse selector: %s' % ( sTag, sCond,));
3917
3918 #
3919 # Validate outputs and inputs, adding them to the test as we go along.
3920 #
3921 for asItems, sDesc, aoDst in [ (asInputs, 'input', oTest.aoInputs), (asOutputs, 'output', oTest.aoOutputs)]:
3922 asValidFieldKinds = [ 'both', sDesc, ];
3923 for sItem in asItems:
3924 oItem = None;
3925 for sOp in TestInOut.kasOperators:
3926 off = sItem.find(sOp);
3927 if off < 0:
3928 continue;
3929 sField = sItem[:off];
3930 sValueType = sItem[off + len(sOp):];
3931 if sField in TestInOut.kdFields \
3932 and TestInOut.kdFields[sField][1] in asValidFieldKinds:
3933 asSplit = sValueType.split(':', 1);
3934 sValue = asSplit[0];
3935 sType = asSplit[1] if len(asSplit) > 1 else TestInOut.kdFields[sField][0];
3936 if sType in TestInOut.kdTypes:
3937 oValid = TestInOut.kdTypes[sType].validate(sValue);
3938 if oValid is True:
3939 if not TestInOut.kdTypes[sType].isAndOrPair(sValue) or sOp == '&|=':
3940 oItem = TestInOut(sField, sOp, sValue, sType);
3941 else:
3942 self.errorComment(iTagLine, '%s: and-or %s value "%s" can only be used with "&|="'
3943 % ( sTag, sDesc, sItem, ));
3944 else:
3945 self.errorComment(iTagLine, '%s: invalid %s value "%s" in "%s" (type: %s): %s'
3946 % ( sTag, sDesc, sValue, sItem, sType, oValid, ));
3947 else:
3948 self.errorComment(iTagLine, '%s: invalid %s type "%s" in "%s" (valid types: %s)'
3949 % ( sTag, sDesc, sType, sItem, TestInOut.kdTypes.keys(),));
3950 else:
3951 self.errorComment(iTagLine, '%s: invalid %s field "%s" in "%s"\nvalid fields: %s'
3952 % ( sTag, sDesc, sField, sItem,
3953 ', '.join([sKey for sKey, asVal in TestInOut.kdFields.items()
3954 if asVal[1] in asValidFieldKinds]),));
3955 break;
3956 if oItem is not None:
3957 for oExisting in aoDst:
3958 if oExisting.sField == oItem.sField and oExisting.sOp == oItem.sOp:
3959 self.errorComment(iTagLine,
3960 '%s: already have a "%s" assignment for field "%s" (existing: %s, new: %s)'
3961 % ( sTag, oItem.sOp, oItem.sField, oExisting, oItem,));
3962 aoDst.append(oItem);
3963 else:
3964 fRc = self.errorComment(iTagLine, '%s: failed to parse assignment: %s' % ( sTag, sItem,));
3965
3966 #
3967 # .
3968 #
3969 if fRc:
3970 oInstr.aoTests.append(oTest);
3971 else:
3972 self.errorComment(iTagLine, '%s: failed to parse test: %s' % (sTag, ' '.join(asWords),));
3973 self.errorComment(iTagLine, '%s: asSelectors=%s / asInputs=%s -> asOutputs=%s'
3974 % (sTag, asSelectors, asInputs, asOutputs,));
3975
3976 _ = iEndLine;
3977 return True;
3978
3979 def parseTagOpTestNum(self, sTag, aasSections, iTagLine, iEndLine):
3980 """
3981 Numbered \@optest tag. Either \@optest42 or \@optest[42].
3982 """
3983 oInstr = self.ensureInstructionForOpTag(iTagLine);
3984
3985 iTest = 0;
3986 if sTag[-1] == ']':
3987 iTest = int(sTag[8:-1]);
3988 else:
3989 iTest = int(sTag[7:]);
3990
3991 if iTest != len(oInstr.aoTests):
3992 self.errorComment(iTagLine, '%s: incorrect test number: %u, actual %u' % (sTag, iTest, len(oInstr.aoTests),));
3993 return self.parseTagOpTest(sTag, aasSections, iTagLine, iEndLine);
3994
3995 def parseTagOpTestIgnore(self, sTag, aasSections, iTagLine, iEndLine):
3996 """
3997 Tag: \@optestign | \@optestignore
3998 Value: <value is ignored>
3999
4000 This is a simple trick to ignore a test while debugging another.
4001
4002 See also \@oponlytest.
4003 """
4004 _ = sTag; _ = aasSections; _ = iTagLine; _ = iEndLine;
4005 return True;
4006
4007 def parseTagOpCopyTests(self, sTag, aasSections, iTagLine, iEndLine):
4008 """
4009 Tag: \@opcopytests
4010 Value: <opstat | function> [..]
4011 Example: \@opcopytests add_Eb_Gb
4012
4013 Trick to avoid duplicating tests for different encodings of the same
4014 operation.
4015 """
4016 oInstr = self.ensureInstructionForOpTag(iTagLine);
4017
4018 # Flatten, validate and append the copy job to the instruction. We execute
4019 # them after parsing all the input so we can handle forward references.
4020 asToCopy = self.flattenAllSections(aasSections).split();
4021 if not asToCopy:
4022 return self.errorComment(iTagLine, '%s: requires at least on reference value' % (sTag,));
4023 for sToCopy in asToCopy:
4024 if sToCopy not in oInstr.asCopyTests:
4025 if self.oReStatsName.match(sToCopy) or self.oReFunctionName.match(sToCopy):
4026 oInstr.asCopyTests.append(sToCopy);
4027 else:
4028 self.errorComment(iTagLine, '%s: invalid instruction reference (opstat or function) "%s" (valid: %s or %s)'
4029 % (sTag, sToCopy, self.oReStatsName.pattern, self.oReFunctionName.pattern));
4030 else:
4031 self.errorComment(iTagLine, '%s: ignoring duplicate "%s"' % (sTag, sToCopy,));
4032
4033 _ = iEndLine;
4034 return True;
4035
4036 def parseTagOpOnlyTest(self, sTag, aasSections, iTagLine, iEndLine):
4037 """
4038 Tag: \@oponlytest | \@oponly
4039 Value: none
4040
4041 Only test instructions with this tag. This is a trick that is handy
4042 for singling out one or two new instructions or tests.
4043
4044 See also \@optestignore.
4045 """
4046 oInstr = self.ensureInstructionForOpTag(iTagLine);
4047
4048 # Validate and add instruction to only test dictionary.
4049 sValue = self.flattenAllSections(aasSections).strip();
4050 if sValue:
4051 return self.errorComment(iTagLine, '%s: does not take any value: %s' % (sTag, sValue));
4052
4053 if oInstr not in g_aoOnlyTestInstructions:
4054 g_aoOnlyTestInstructions.append(oInstr);
4055
4056 _ = iEndLine;
4057 return True;
4058
4059 def parseTagOpXcptType(self, sTag, aasSections, iTagLine, iEndLine):
4060 """
4061 Tag: \@opxcpttype
4062 Value: [none|1|2|3|4|4UA|5|6|7|8|11|12|E1|E1NF|E2|E3|E3NF|E4|E4NF|E5|E5NF|E6|E6NF|E7NF|E9|E9NF|E10|E11|E12|E12NF]
4063
4064 Sets the SSE or AVX exception type (see SDMv2 2.4, 2.7).
4065 """
4066 oInstr = self.ensureInstructionForOpTag(iTagLine);
4067
4068 # Flatten as a space separated list, split it up and validate the values.
4069 asTypes = self.flattenAllSections(aasSections).split();
4070 if len(asTypes) != 1:
4071 return self.errorComment(iTagLine, '%s: exactly one invalid exception type, please: %s' % (sTag, asTypes,));
4072 sType = asTypes[0];
4073 if sType not in g_kdXcptTypes:
4074 return self.errorComment(iTagLine, '%s: invalid invalid exception type: %s (valid: %s)'
4075 % (sTag, sType, sorted(g_kdXcptTypes.keys()),));
4076 # Set it.
4077 if oInstr.sXcptType is not None:
4078 return self.errorComment(iTagLine,
4079 '%s: attempting to overwrite "%s" with "%s" (only one @opxcpttype)'
4080 % ( sTag, oInstr.sXcptType, sType,));
4081 oInstr.sXcptType = sType;
4082
4083 _ = iEndLine;
4084 return True;
4085
4086 def parseTagOpFunction(self, sTag, aasSections, iTagLine, iEndLine):
4087 """
4088 Tag: \@opfunction
4089 Value: <VMM function name>
4090
4091 This is for explicitly setting the IEM function name. Normally we pick
4092 this up from the FNIEMOP_XXX macro invocation after the description, or
4093 generate it from the mnemonic and operands.
4094
4095 It it thought it maybe necessary to set it when specifying instructions
4096 which implementation isn't following immediately or aren't implemented yet.
4097 """
4098 oInstr = self.ensureInstructionForOpTag(iTagLine);
4099
4100 # Flatten and validate the value.
4101 sFunction = self.flattenAllSections(aasSections);
4102 if not self.oReFunctionName.match(sFunction):
4103 return self.errorComment(iTagLine, '%s: invalid VMM function name: "%s" (valid: %s)'
4104 % (sTag, sFunction, self.oReFunctionName.pattern));
4105
4106 if oInstr.sFunction is not None:
4107 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM function name "%s" with "%s"'
4108 % (sTag, oInstr.sFunction, sFunction,));
4109 oInstr.sFunction = sFunction;
4110
4111 _ = iEndLine;
4112 return True;
4113
4114 def parseTagOpStats(self, sTag, aasSections, iTagLine, iEndLine):
4115 """
4116 Tag: \@opstats
4117 Value: <VMM statistics base name>
4118
4119 This is for explicitly setting the statistics name. Normally we pick
4120 this up from the IEMOP_MNEMONIC macro invocation, or generate it from
4121 the mnemonic and operands.
4122
4123 It it thought it maybe necessary to set it when specifying instructions
4124 which implementation isn't following immediately or aren't implemented yet.
4125 """
4126 oInstr = self.ensureInstructionForOpTag(iTagLine);
4127
4128 # Flatten and validate the value.
4129 sStats = self.flattenAllSections(aasSections);
4130 if not self.oReStatsName.match(sStats):
4131 return self.errorComment(iTagLine, '%s: invalid VMM statistics name: "%s" (valid: %s)'
4132 % (sTag, sStats, self.oReStatsName.pattern));
4133
4134 if oInstr.sStats is not None:
4135 return self.errorComment(iTagLine, '%s: attempting to overwrite VMM statistics base name "%s" with "%s"'
4136 % (sTag, oInstr.sStats, sStats,));
4137 oInstr.sStats = sStats;
4138
4139 _ = iEndLine;
4140 return True;
4141
4142 def parseTagOpDone(self, sTag, aasSections, iTagLine, iEndLine):
4143 """
4144 Tag: \@opdone
4145 Value: none
4146
4147 Used to explictily flush the instructions that have been specified.
4148 """
4149 sFlattened = self.flattenAllSections(aasSections);
4150 if sFlattened != '':
4151 return self.errorComment(iTagLine, '%s: takes no value, found: "%s"' % (sTag, sFlattened,));
4152 _ = sTag; _ = iEndLine;
4153 return self.doneInstructions();
4154
4155 ## @}
4156
4157
4158 def parseComment(self):
4159 """
4160 Parse the current comment (self.sComment).
4161
4162 If it's a opcode specifiying comment, we reset the macro stuff.
4163 """
4164 #
4165 # Reject if comment doesn't seem to contain anything interesting.
4166 #
4167 if self.sComment.find('Opcode') < 0 \
4168 and self.sComment.find('@') < 0:
4169 return False;
4170
4171 #
4172 # Split the comment into lines, removing leading asterisks and spaces.
4173 # Also remove leading and trailing empty lines.
4174 #
4175 asLines = self.sComment.split('\n');
4176 for iLine, sLine in enumerate(asLines):
4177 asLines[iLine] = sLine.lstrip().lstrip('*').lstrip();
4178
4179 while asLines and not asLines[0]:
4180 self.iCommentLine += 1;
4181 asLines.pop(0);
4182
4183 while asLines and not asLines[-1]:
4184 asLines.pop(len(asLines) - 1);
4185
4186 #
4187 # Check for old style: Opcode 0x0f 0x12
4188 #
4189 if asLines[0].startswith('Opcode '):
4190 self.parseCommentOldOpcode(asLines);
4191
4192 #
4193 # Look for @op* tagged data.
4194 #
4195 cOpTags = 0;
4196 sFlatDefault = None;
4197 sCurTag = '@default';
4198 iCurTagLine = 0;
4199 asCurSection = [];
4200 aasSections = [ asCurSection, ];
4201 for iLine, sLine in enumerate(asLines):
4202 if not sLine.startswith('@'):
4203 if sLine:
4204 asCurSection.append(sLine);
4205 elif asCurSection:
4206 asCurSection = [];
4207 aasSections.append(asCurSection);
4208 else:
4209 #
4210 # Process the previous tag.
4211 #
4212 if not asCurSection and len(aasSections) > 1:
4213 aasSections.pop(-1);
4214 if sCurTag in self.dTagHandlers:
4215 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
4216 cOpTags += 1;
4217 elif sCurTag.startswith('@op'):
4218 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
4219 elif sCurTag == '@default':
4220 sFlatDefault = self.flattenAllSections(aasSections);
4221 elif '@op' + sCurTag[1:] in self.dTagHandlers:
4222 self.errorComment(iCurTagLine, 'Did you mean "@op%s" rather than "%s"?' % (sCurTag[1:], sCurTag));
4223 elif sCurTag in ['@encoding', '@opencoding']:
4224 self.errorComment(iCurTagLine, 'Did you mean "@openc" rather than "%s"?' % (sCurTag,));
4225
4226 #
4227 # New tag.
4228 #
4229 asSplit = sLine.split(None, 1);
4230 sCurTag = asSplit[0].lower();
4231 if len(asSplit) > 1:
4232 asCurSection = [asSplit[1],];
4233 else:
4234 asCurSection = [];
4235 aasSections = [asCurSection, ];
4236 iCurTagLine = iLine;
4237
4238 #
4239 # Process the final tag.
4240 #
4241 if not asCurSection and len(aasSections) > 1:
4242 aasSections.pop(-1);
4243 if sCurTag in self.dTagHandlers:
4244 self.dTagHandlers[sCurTag](sCurTag, aasSections, iCurTagLine, iLine);
4245 cOpTags += 1;
4246 elif sCurTag.startswith('@op'):
4247 self.errorComment(iCurTagLine, 'Unknown tag: %s' % (sCurTag));
4248 elif sCurTag == '@default':
4249 sFlatDefault = self.flattenAllSections(aasSections);
4250
4251 #
4252 # Don't allow default text in blocks containing @op*.
4253 #
4254 if cOpTags > 0 and sFlatDefault:
4255 self.errorComment(0, 'Untagged comment text is not allowed with @op*: %s' % (sFlatDefault,));
4256
4257 return True;
4258
4259 def parseMacroInvocation(self, sInvocation):
4260 """
4261 Parses a macro invocation.
4262
4263 Returns a tuple, first element is the offset following the macro
4264 invocation. The second element is a list of macro arguments, where the
4265 zero'th is the macro name.
4266 """
4267 # First the name.
4268 offOpen = sInvocation.find('(');
4269 if offOpen <= 0:
4270 self.raiseError("macro invocation open parenthesis not found");
4271 sName = sInvocation[:offOpen].strip();
4272 if not self.oReMacroName.match(sName):
4273 return self.error("invalid macro name '%s'" % (sName,));
4274 asRet = [sName, ];
4275
4276 # Arguments.
4277 iLine = self.iLine;
4278 cDepth = 1;
4279 off = offOpen + 1;
4280 offStart = off;
4281 chQuote = None;
4282 while cDepth > 0:
4283 if off >= len(sInvocation):
4284 if iLine >= len(self.asLines):
4285 self.error('macro invocation beyond end of file');
4286 return (off, asRet);
4287 sInvocation += self.asLines[iLine];
4288 iLine += 1;
4289 ch = sInvocation[off];
4290
4291 if chQuote:
4292 if ch == '\\' and off + 1 < len(sInvocation):
4293 off += 1;
4294 elif ch == chQuote:
4295 chQuote = None;
4296 elif ch in ('"', '\'',):
4297 chQuote = ch;
4298 elif ch in (',', ')',):
4299 if cDepth == 1:
4300 asRet.append(sInvocation[offStart:off].strip());
4301 offStart = off + 1;
4302 if ch == ')':
4303 cDepth -= 1;
4304 elif ch == '(':
4305 cDepth += 1;
4306 off += 1;
4307
4308 return (off, asRet);
4309
4310 def findAndParseMacroInvocationEx(self, sCode, sMacro):
4311 """
4312 Returns (len(sCode), None) if not found, parseMacroInvocation result if found.
4313 """
4314 offHit = sCode.find(sMacro);
4315 if offHit >= 0 and sCode[offHit + len(sMacro):].strip()[0] == '(':
4316 offAfter, asRet = self.parseMacroInvocation(sCode[offHit:])
4317 return (offHit + offAfter, asRet);
4318 return (len(sCode), None);
4319
4320 def findAndParseMacroInvocation(self, sCode, sMacro):
4321 """
4322 Returns None if not found, arguments as per parseMacroInvocation if found.
4323 """
4324 return self.findAndParseMacroInvocationEx(sCode, sMacro)[1];
4325
4326 def findAndParseFirstMacroInvocation(self, sCode, asMacro):
4327 """
4328 Returns same as findAndParseMacroInvocation.
4329 """
4330 for sMacro in asMacro:
4331 asRet = self.findAndParseMacroInvocation(sCode, sMacro);
4332 if asRet is not None:
4333 return asRet;
4334 return None;
4335
4336 def workerIemOpMnemonicEx(self, sMacro, sStats, sAsm, sForm, sUpper, sLower, # pylint: disable=too-many-arguments
4337 sDisHints, sIemHints, asOperands):
4338 """
4339 Processes one of the a IEMOP_MNEMONIC0EX, IEMOP_MNEMONIC1EX, IEMOP_MNEMONIC2EX,
4340 IEMOP_MNEMONIC3EX, and IEMOP_MNEMONIC4EX macros.
4341 """
4342 #
4343 # Some invocation checks.
4344 #
4345 if sUpper != sUpper.upper():
4346 self.error('%s: bad a_Upper parameter: %s' % (sMacro, sUpper,));
4347 if sLower != sLower.lower():
4348 self.error('%s: bad a_Lower parameter: %s' % (sMacro, sLower,));
4349 if sUpper.lower() != sLower:
4350 self.error('%s: a_Upper and a_Lower parameters does not match: %s vs %s' % (sMacro, sUpper, sLower,));
4351 if not self.oReMnemonic.match(sLower):
4352 self.error('%s: invalid a_Lower: %s (valid: %s)' % (sMacro, sLower, self.oReMnemonic.pattern,));
4353
4354 #
4355 # Check if sIemHints tells us to not consider this macro invocation.
4356 #
4357 if sIemHints.find('IEMOPHINT_SKIP_PYTHON') >= 0:
4358 return True;
4359
4360 # Apply to the last instruction only for now.
4361 if not self.aoCurInstrs:
4362 self.addInstruction();
4363 oInstr = self.aoCurInstrs[-1];
4364 if oInstr.iLineMnemonicMacro == -1:
4365 oInstr.iLineMnemonicMacro = self.iLine;
4366 else:
4367 self.error('%s: already saw a IEMOP_MNEMONIC* macro on line %u for this instruction'
4368 % (sMacro, oInstr.iLineMnemonicMacro,));
4369
4370 # Mnemonic
4371 if oInstr.sMnemonic is None:
4372 oInstr.sMnemonic = sLower;
4373 elif oInstr.sMnemonic != sLower:
4374 self.error('%s: current instruction and a_Lower does not match: %s vs %s' % (sMacro, oInstr.sMnemonic, sLower,));
4375
4376 # Process operands.
4377 if len(oInstr.aoOperands) not in [0, len(asOperands)]:
4378 self.error('%s: number of operands given by @opN does not match macro: %s vs %s'
4379 % (sMacro, len(oInstr.aoOperands), len(asOperands),));
4380 for iOperand, sType in enumerate(asOperands):
4381 sWhere = g_kdOpTypes.get(sType, [None, None])[1];
4382 if sWhere is None:
4383 self.error('%s: unknown a_Op%u value: %s' % (sMacro, iOperand + 1, sType));
4384 if iOperand < len(oInstr.aoOperands): # error recovery.
4385 sWhere = oInstr.aoOperands[iOperand].sWhere;
4386 sType = oInstr.aoOperands[iOperand].sType;
4387 else:
4388 sWhere = 'reg';
4389 sType = 'Gb';
4390 if iOperand == len(oInstr.aoOperands):
4391 oInstr.aoOperands.append(Operand(sWhere, sType))
4392 elif oInstr.aoOperands[iOperand].sWhere != sWhere or oInstr.aoOperands[iOperand].sType != sType:
4393 self.error('%s: @op%u and a_Op%u mismatch: %s:%s vs %s:%s'
4394 % (sMacro, iOperand + 1, iOperand + 1, oInstr.aoOperands[iOperand].sWhere,
4395 oInstr.aoOperands[iOperand].sType, sWhere, sType,));
4396
4397 # Encoding.
4398 if sForm not in g_kdIemForms:
4399 self.error('%s: unknown a_Form value: %s' % (sMacro, sForm,));
4400 else:
4401 if oInstr.sEncoding is None:
4402 oInstr.sEncoding = g_kdIemForms[sForm][0];
4403 elif g_kdIemForms[sForm][0] != oInstr.sEncoding:
4404 self.error('%s: current instruction @openc and a_Form does not match: %s vs %s (%s)'
4405 % (sMacro, oInstr.sEncoding, g_kdIemForms[sForm], sForm));
4406
4407 # Check the parameter locations for the encoding.
4408 if g_kdIemForms[sForm][1] is not None:
4409 if len(g_kdIemForms[sForm][1]) > len(oInstr.aoOperands):
4410 self.error('%s: The a_Form=%s has a different operand count: %s (form) vs %s'
4411 % (sMacro, sForm, len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands) ));
4412 else:
4413 for iOperand, sWhere in enumerate(g_kdIemForms[sForm][1]):
4414 if oInstr.aoOperands[iOperand].sWhere != sWhere:
4415 self.error('%s: current instruction @op%u and a_Form location does not match: %s vs %s (%s)'
4416 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sWhere, sWhere, sForm,));
4417 sOpFormMatch = g_kdOpTypes[oInstr.aoOperands[iOperand].sType][4];
4418 if (sOpFormMatch in [ 'REG', 'MEM', ] and sForm.find('_' + sOpFormMatch) < 0) \
4419 or (sOpFormMatch in [ 'FIXED', ] and sForm.find(sOpFormMatch) < 0) \
4420 or (sOpFormMatch == 'RM' and (sForm.find('_MEM') > 0 or sForm.find('_REG') > 0) ) \
4421 or (sOpFormMatch == 'V' and ( not (sForm.find('VEX') > 0 or sForm.find('XOP')) \
4422 or sForm.replace('VEX','').find('V') < 0) ):
4423 self.error('%s: current instruction @op%u and a_Form type does not match: %s/%s vs %s'
4424 % (sMacro, iOperand + 1, oInstr.aoOperands[iOperand].sType, sOpFormMatch, sForm, ));
4425 if len(g_kdIemForms[sForm][1]) < len(oInstr.aoOperands):
4426 for iOperand in range(len(g_kdIemForms[sForm][1]), len(oInstr.aoOperands)):
4427 if oInstr.aoOperands[iOperand].sType != 'FIXED' \
4428 and g_kdOpTypes[oInstr.aoOperands[iOperand].sType][0] != 'IDX_ParseFixedReg':
4429 self.error('%s: Expected FIXED type operand #%u following operands given by a_Form=%s: %s (%s)'
4430 % (sMacro, iOperand, sForm, oInstr.aoOperands[iOperand].sType,
4431 oInstr.aoOperands[iOperand].sWhere));
4432
4433
4434 # Check @opcodesub
4435 if oInstr.sSubOpcode \
4436 and g_kdIemForms[sForm][2] \
4437 and oInstr.sSubOpcode.find(g_kdIemForms[sForm][2]) < 0:
4438 self.error('%s: current instruction @opcodesub and a_Form does not match: %s vs %s (%s)'
4439 % (sMacro, oInstr.sSubOpcode, g_kdIemForms[sForm][2], sForm,));
4440
4441 # Stats.
4442 if not self.oReStatsName.match(sStats):
4443 self.error('%s: invalid a_Stats value: %s' % (sMacro, sStats,));
4444 elif oInstr.sStats is None:
4445 oInstr.sStats = sStats;
4446 elif oInstr.sStats != sStats:
4447 self.error('%s: mismatching @opstats and a_Stats value: %s vs %s'
4448 % (sMacro, oInstr.sStats, sStats,));
4449
4450 # Process the hints (simply merge with @ophints w/o checking anything).
4451 for sHint in sDisHints.split('|'):
4452 sHint = sHint.strip();
4453 if sHint.startswith('DISOPTYPE_'):
4454 sShortHint = sHint[len('DISOPTYPE_'):].lower();
4455 if sShortHint in g_kdHints:
4456 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
4457 else:
4458 self.error('%s: unknown a_fDisHints value: %s' % (sMacro, sHint,));
4459 elif sHint != '0':
4460 self.error('%s: expected a_fDisHints value: %s' % (sMacro, sHint,));
4461
4462 for sHint in sIemHints.split('|'):
4463 sHint = sHint.strip();
4464 if sHint.startswith('IEMOPHINT_'):
4465 sShortHint = sHint[len('IEMOPHINT_'):].lower();
4466 if sShortHint in g_kdHints:
4467 oInstr.dHints[sShortHint] = True; # (dummy value, using dictionary for speed)
4468 else:
4469 self.error('%s: unknown a_fIemHints value: %s' % (sMacro, sHint,));
4470 elif sHint != '0':
4471 self.error('%s: expected a_fIemHints value: %s' % (sMacro, sHint,));
4472
4473 _ = sAsm;
4474 return True;
4475
4476 def workerIemOpMnemonic(self, sMacro, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands):
4477 """
4478 Processes one of the a IEMOP_MNEMONIC0, IEMOP_MNEMONIC1, IEMOP_MNEMONIC2,
4479 IEMOP_MNEMONIC3, and IEMOP_MNEMONIC4 macros.
4480 """
4481 if not asOperands:
4482 return self.workerIemOpMnemonicEx(sMacro, sLower, sLower, sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
4483 return self.workerIemOpMnemonicEx(sMacro, sLower + '_' + '_'.join(asOperands), sLower + ' ' + ','.join(asOperands),
4484 sForm, sUpper, sLower, sDisHints, sIemHints, asOperands);
4485
4486 def workerIemMcBegin(self, sCode, offBeginStatementInCodeStr, offBeginStatementInLine):
4487 """
4488 Process a IEM_MC_BEGIN macro invocation.
4489 """
4490 if self.fDebugMc:
4491 self.debug('IEM_MC_BEGIN on %s off %s' % (self.iLine, offBeginStatementInLine,));
4492 #self.debug('%s<eos>' % (sCode,));
4493
4494 # Check preconditions.
4495 if not self.sCurFunction:
4496 self.raiseError('IEM_MC_BEGIN w/o current function (%s)' % (sCode,));
4497 if self.oCurMcBlock:
4498 self.raiseError('IEM_MC_BEGIN before IEM_MC_END. Previous IEM_MC_BEGIN at line %u' % (self.oCurMcBlock.iBeginLine,));
4499
4500 # Figure out the indent level the block starts at, adjusting for expanded multiline macros.
4501 cchIndent = offBeginStatementInCodeStr;
4502 offPrevNewline = sCode.rfind('\n', 0, offBeginStatementInCodeStr);
4503 if offPrevNewline >= 0:
4504 cchIndent -= offPrevNewline + 1;
4505 #self.debug('cchIndent=%s offPrevNewline=%s sFunc=%s' % (cchIndent, offPrevNewline, self.sCurFunction));
4506
4507 # Start a new block.
4508 self.oCurMcBlock = McBlock(self.sSrcFile, self.iLine, offBeginStatementInLine,
4509 self.sCurFunction, self.iMcBlockInFunc, cchIndent);
4510 g_aoMcBlocks.append(self.oCurMcBlock);
4511 self.cTotalMcBlocks += 1;
4512 self.iMcBlockInFunc += 1;
4513 return True;
4514
4515 def workerIemMcEnd(self, offEndStatementInLine):
4516 """
4517 Process a IEM_MC_END macro invocation.
4518 """
4519 if self.fDebugMc:
4520 self.debug('IEM_MC_END on %s off %s' % (self.iLine, offEndStatementInLine,));
4521
4522 # Check preconditions.
4523 if not self.oCurMcBlock:
4524 self.raiseError('IEM_MC_END w/o IEM_MC_BEGIN.');
4525
4526 #
4527 # Complete and discard the current block.
4528 #
4529 # HACK ALERT! For blocks orginating from macro expansion the start and
4530 # end line will be the same, but the line has multiple
4531 # newlines inside it. So, we have to do some extra tricks
4532 # to get the lines out of there. We ASSUME macros aren't
4533 # messy, but keep IEM_MC_BEGIN/END on separate lines.
4534 #
4535 if self.iLine > self.oCurMcBlock.iBeginLine:
4536 asLines = self.asLines[self.oCurMcBlock.iBeginLine - 1 : self.iLine];
4537 if not asLines[0].strip().startswith('IEM_MC_BEGIN'):
4538 self.raiseError('IEM_MC_BEGIN is not the first word on the line');
4539 else:
4540 sRawLine = self.asLines[self.iLine - 1];
4541
4542 off = sRawLine.find('\n', offEndStatementInLine);
4543 if off > 0:
4544 sRawLine = sRawLine[:off + 1];
4545
4546 off = sRawLine.rfind('\n', 0, self.oCurMcBlock.offBeginLine) + 1;
4547 sRawLine = sRawLine[off:];
4548 if not sRawLine.strip().startswith('IEM_MC_BEGIN'):
4549 sRawLine = sRawLine[self.oCurMcBlock.offBeginLine - off:]
4550
4551 asLines = [sLine + '\n' for sLine in sRawLine.split('\n')];
4552
4553 self.oCurMcBlock.complete(self.iLine, offEndStatementInLine, asLines);
4554 self.oCurMcBlock = None;
4555 return True;
4556
4557 def checkCodeForMacro(self, sCode, offLine):
4558 """
4559 Checks code for relevant macro invocation.
4560 """
4561
4562 #
4563 # Scan macro invocations.
4564 #
4565 if sCode.find('(') > 0:
4566 # Look for instruction decoder function definitions. ASSUME single line.
4567 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4568 [ 'FNIEMOP_DEF',
4569 'FNIEMOPRM_DEF',
4570 'FNIEMOP_STUB',
4571 'FNIEMOP_STUB_1',
4572 'FNIEMOP_UD_STUB',
4573 'FNIEMOP_UD_STUB_1' ]);
4574 if asArgs is not None:
4575 self.sCurFunction = asArgs[1];
4576 #self.debug('%s: sCurFunction=%s' % (self.iLine, self.sCurFunction,));
4577
4578 if not self.aoCurInstrs:
4579 self.addInstruction();
4580 for oInstr in self.aoCurInstrs:
4581 if oInstr.iLineFnIemOpMacro == -1:
4582 oInstr.iLineFnIemOpMacro = self.iLine;
4583 else:
4584 self.error('%s: already seen a FNIEMOP_XXX macro for %s' % (asArgs[0], oInstr,) );
4585 self.setInstrunctionAttrib('sFunction', asArgs[1]);
4586 self.setInstrunctionAttrib('fStub', asArgs[0].find('STUB') > 0, fOverwrite = True);
4587 self.setInstrunctionAttrib('fUdStub', asArgs[0].find('UD_STUB') > 0, fOverwrite = True);
4588 if asArgs[0].find('STUB') > 0:
4589 self.doneInstructions(fEndOfFunction = True);
4590 return True;
4591
4592 # Check for worker function definitions, so we can get a context for MC blocks.
4593 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4594 [ 'FNIEMOP_DEF_1',
4595 'FNIEMOP_DEF_2', ]);
4596 if asArgs is not None:
4597 self.sCurFunction = asArgs[1];
4598 #self.debug('%s: sCurFunction=%s (%s)' % (self.iLine, self.sCurFunction, asArgs[0]));
4599 return True;
4600
4601 # IEMOP_HLP_DONE_VEX_DECODING_*
4602 asArgs = self.findAndParseFirstMacroInvocation(sCode,
4603 [ 'IEMOP_HLP_DONE_VEX_DECODING',
4604 'IEMOP_HLP_DONE_VEX_DECODING_L0',
4605 'IEMOP_HLP_DONE_VEX_DECODING_NO_VVVV',
4606 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV',
4607 ]);
4608 if asArgs is not None:
4609 sMacro = asArgs[0];
4610 if sMacro in ('IEMOP_HLP_DONE_VEX_DECODING_L0', 'IEMOP_HLP_DONE_VEX_DECODING_L0_AND_NO_VVVV', ):
4611 for oInstr in self.aoCurInstrs:
4612 if 'vex_l_zero' not in oInstr.dHints:
4613 if oInstr.iLineMnemonicMacro >= 0:
4614 self.errorOnLine(oInstr.iLineMnemonicMacro,
4615 'Missing IEMOPHINT_VEX_L_ZERO! (%s on line %d)' % (sMacro, self.iLine,));
4616 oInstr.dHints['vex_l_zero'] = True;
4617
4618 #
4619 # IEMOP_MNEMONIC*
4620 #
4621 if sCode.find('IEMOP_MNEMONIC') >= 0:
4622 # IEMOP_MNEMONIC(a_Stats, a_szMnemonic) IEMOP_INC_STATS(a_Stats)
4623 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC');
4624 if asArgs is not None:
4625 if len(self.aoCurInstrs) == 1:
4626 oInstr = self.aoCurInstrs[0];
4627 if oInstr.sStats is None:
4628 oInstr.sStats = asArgs[1];
4629 self.deriveMnemonicAndOperandsFromStats(oInstr, asArgs[1]);
4630
4631 # IEMOP_MNEMONIC0EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
4632 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0EX');
4633 if asArgs is not None:
4634 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[6],
4635 asArgs[7], []);
4636 # IEMOP_MNEMONIC1EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
4637 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1EX');
4638 if asArgs is not None:
4639 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[7],
4640 asArgs[8], [asArgs[6],]);
4641 # IEMOP_MNEMONIC2EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
4642 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2EX');
4643 if asArgs is not None:
4644 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[8],
4645 asArgs[9], [asArgs[6], asArgs[7]]);
4646 # IEMOP_MNEMONIC3EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints,
4647 # a_fIemHints)
4648 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3EX');
4649 if asArgs is not None:
4650 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[9],
4651 asArgs[10], [asArgs[6], asArgs[7], asArgs[8],]);
4652 # IEMOP_MNEMONIC4EX(a_Stats, a_szMnemonic, a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints,
4653 # a_fIemHints)
4654 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4EX');
4655 if asArgs is not None:
4656 self.workerIemOpMnemonicEx(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], asArgs[10],
4657 asArgs[11], [asArgs[6], asArgs[7], asArgs[8], asArgs[9],]);
4658
4659 # IEMOP_MNEMONIC0(a_Form, a_Upper, a_Lower, a_fDisHints, a_fIemHints)
4660 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC0');
4661 if asArgs is not None:
4662 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[4], asArgs[5], []);
4663 # IEMOP_MNEMONIC1(a_Form, a_Upper, a_Lower, a_Op1, a_fDisHints, a_fIemHints)
4664 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC1');
4665 if asArgs is not None:
4666 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[5], asArgs[6], [asArgs[4],]);
4667 # IEMOP_MNEMONIC2(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_fDisHints, a_fIemHints)
4668 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC2');
4669 if asArgs is not None:
4670 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[6], asArgs[7],
4671 [asArgs[4], asArgs[5],]);
4672 # IEMOP_MNEMONIC3(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_fDisHints, a_fIemHints)
4673 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC3');
4674 if asArgs is not None:
4675 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[7], asArgs[8],
4676 [asArgs[4], asArgs[5], asArgs[6],]);
4677 # IEMOP_MNEMONIC4(a_Form, a_Upper, a_Lower, a_Op1, a_Op2, a_Op3, a_Op4, a_fDisHints, a_fIemHints)
4678 asArgs = self.findAndParseMacroInvocation(sCode, 'IEMOP_MNEMONIC4');
4679 if asArgs is not None:
4680 self.workerIemOpMnemonic(asArgs[0], asArgs[1], asArgs[2], asArgs[3], asArgs[8], asArgs[9],
4681 [asArgs[4], asArgs[5], asArgs[6], asArgs[7],]);
4682
4683 #
4684 # IEM_MC_BEGIN + IEM_MC_END.
4685 # We must support multiple instances per code snippet.
4686 #
4687 offCode = sCode.find('IEM_MC_');
4688 if offCode >= 0:
4689 for oMatch in self.oReMcBeginEnd.finditer(sCode, offCode):
4690 if oMatch.group(1) == 'END':
4691 self.workerIemMcEnd(offLine + oMatch.start());
4692 else:
4693 self.workerIemMcBegin(sCode, oMatch.start(), offLine + oMatch.start());
4694 return True;
4695
4696 return False;
4697
4698 def workerPreProcessRecreateMacroRegex(self):
4699 """
4700 Recreates self.oReMacros when self.dMacros changes.
4701 """
4702 if self.dMacros:
4703 sRegex = '';
4704 for sName, oMacro in self.dMacros.items():
4705 if sRegex:
4706 sRegex += '|' + sName;
4707 else:
4708 sRegex = '\\b(' + sName;
4709 if oMacro.asArgs is not None:
4710 sRegex += '\s*\(';
4711 else:
4712 sRegex += '\\b';
4713 sRegex += ')';
4714 self.oReMacros = re.compile(sRegex);
4715 else:
4716 self.oReMacros = None;
4717 return True;
4718
4719 def workerPreProcessDefine(self, sRest):
4720 """
4721 Handles a macro #define, the sRest is what follows after the directive word.
4722 """
4723
4724 #
4725 # If using line continutation, just concat all the lines together,
4726 # preserving the newline character but not the escaping.
4727 #
4728 iLineStart = self.iLine;
4729 while sRest.endswith('\\\n') and self.iLine < len(self.asLines):
4730 sRest = sRest[0:-2].rstrip() + '\n' + self.asLines[self.iLine];
4731 self.iLine += 1;
4732 #self.debug('workerPreProcessDefine: sRest=%s<EOS>' % (sRest,));
4733
4734 #
4735 # Use regex to split out the name, argument list and body.
4736 # If this fails, we assume it's a simple macro.
4737 #
4738 oMatch = self.oReHashDefine2.match(sRest);
4739 if oMatch:
4740 asArgs = [sParam.strip() for sParam in oMatch.group(2).split(',')];
4741 sBody = oMatch.group(3);
4742 else:
4743 oMatch = self.oReHashDefine3.match(sRest);
4744 if not oMatch:
4745 self.debug('workerPreProcessDefine: wtf? sRest=%s' % (sRest,));
4746 return self.error('bogus macro definition: %s' % (sRest,));
4747 asArgs = None;
4748 sBody = oMatch.group(2);
4749 sName = oMatch.group(1);
4750 assert sName == sName.strip();
4751 #self.debug('workerPreProcessDefine: sName=%s asArgs=%s sBody=%s<EOS>' % (sName, asArgs, sBody));
4752
4753 #
4754 # Is this of any interest to us? We do NOT support MC blocks wihtin
4755 # nested macro expansion, just to avoid lots of extra work.
4756 #
4757 if sBody.find("IEM_MC_BEGIN") < 0:
4758 #self.debug('workerPreProcessDefine: irrelevant (%s: %s)' % (sName, sBody));
4759 return True;
4760
4761 #
4762 # Add the macro.
4763 #
4764 if self.fDebugPreProc:
4765 self.debug('#define %s on line %u' % (sName, self.iLine,));
4766 self.dMacros[sName] = SimpleParser.Macro(sName, asArgs, sBody.strip(), iLineStart);
4767 return self.workerPreProcessRecreateMacroRegex();
4768
4769 def workerPreProcessUndef(self, sRest):
4770 """
4771 Handles a macro #undef, the sRest is what follows after the directive word.
4772 """
4773 # Quick comment strip and isolate the name.
4774 offSlash = sRest.find('/');
4775 if offSlash > 0:
4776 sRest = sRest[:offSlash];
4777 sName = sRest.strip();
4778
4779 # Remove the macro if we're clocking it.
4780 if sName in self.dMacros:
4781 if self.fDebugPreProc:
4782 self.debug('#undef %s on line %u' % (sName, self.iLine,));
4783 del self.dMacros[sName];
4784 return self.workerPreProcessRecreateMacroRegex();
4785
4786 return True;
4787
4788 def checkPreProcessorDirectiveForDefineUndef(self, sLine):
4789 """
4790 Handles a preprocessor directive.
4791 """
4792 oMatch = self.oReHashDefine.match(sLine);
4793 if oMatch:
4794 return self.workerPreProcessDefine(oMatch.group(1) + '\n');
4795
4796 oMatch = self.oReHashUndef.match(sLine);
4797 if oMatch:
4798 return self.workerPreProcessUndef(oMatch.group(1) + '\n');
4799 return False;
4800
4801 def expandMacros(self, sLine, oMatch):
4802 """
4803 Expands macros we know about in the given line.
4804 Currently we ASSUME there is only one and that is what oMatch matched.
4805 """
4806 #
4807 # Get our bearings.
4808 #
4809 offMatch = oMatch.start();
4810 sName = oMatch.group(1);
4811 assert sName == sLine[oMatch.start() : oMatch.end()];
4812 fWithArgs = sName.endswith('(');
4813 if fWithArgs:
4814 sName = sName[:-1].strip();
4815 oMacro = self.dMacros[sName] # type: SimpleParser.Macro
4816
4817 #
4818 # Deal with simple macro invocations w/o parameters.
4819 #
4820 if not fWithArgs:
4821 if self.fDebugPreProc:
4822 self.debug('expanding simple macro %s on line %u' % (sName, self.iLine,));
4823 return sLine[:offMatch] + oMacro.expandMacro(self) + sLine[oMatch.end():];
4824
4825 #
4826 # Complicated macro with parameters.
4827 # Start by extracting the parameters. ASSUMES they are all on the same line!
4828 #
4829 cLevel = 1;
4830 offCur = oMatch.end();
4831 offCurArg = offCur;
4832 asArgs = [];
4833 while True:
4834 if offCur >= len(sLine):
4835 self.raiseError('expandMacros: Invocation of macro %s spans multiple lines!' % (sName,));
4836 ch = sLine[offCur];
4837 if ch == '(':
4838 cLevel += 1;
4839 elif ch == ')':
4840 cLevel -= 1;
4841 if cLevel == 0:
4842 asArgs.append(sLine[offCurArg:offCur].strip());
4843 break;
4844 elif ch == ',' and cLevel == 1:
4845 asArgs.append(sLine[offCurArg:offCur].strip());
4846 offCurArg = offCur + 1;
4847 offCur += 1;
4848 if len(oMacro.asArgs) == 0 and len(asArgs) == 1 and asArgs[0] == '': # trick for empty parameter list.
4849 asArgs = [];
4850 if len(oMacro.asArgs) != len(asArgs):
4851 self.raiseError('expandMacros: Argument mismatch in %s invocation' % (oMacro.sName,));
4852
4853 #
4854 # Do the expanding.
4855 #
4856 if self.fDebugPreProc:
4857 self.debug('expanding macro %s on line %u with arguments %s' % (sName, self.iLine, asArgs));
4858 return sLine[:offMatch] + oMacro.expandMacro(self, asArgs) + sLine[offCur + 1 :];
4859
4860 def parse(self):
4861 """
4862 Parses the given file.
4863 Returns number or errors.
4864 Raises exception on fatal trouble.
4865 """
4866 #self.debug('Parsing %s' % (self.sSrcFile,));
4867
4868 while self.iLine < len(self.asLines):
4869 sLine = self.asLines[self.iLine];
4870 self.iLine += 1;
4871 #self.debug('line %u: %s' % (self.iLine, sLine[:-1]));
4872
4873 # Expand macros we know about if we're currently in code.
4874 if self.iState == self.kiCode and self.oReMacros:
4875 oMatch = self.oReMacros.search(sLine);
4876 if oMatch:
4877 sLine = self.expandMacros(sLine, oMatch);
4878 if self.fDebugPreProc:
4879 self.debug('line %d: expanded\n%s ==>\n%s' % (self.iLine, self.asLines[self.iLine - 1], sLine[:-1],));
4880 self.asLines[self.iLine - 1] = sLine;
4881
4882 # Look for comments.
4883 offSlash = sLine.find('/');
4884 if offSlash >= 0:
4885 if offSlash + 1 >= len(sLine) or sLine[offSlash + 1] != '/' or self.iState != self.kiCode:
4886 offLine = 0;
4887 while offLine < len(sLine):
4888 if self.iState == self.kiCode:
4889 # Look for substantial multiline comment so we pass the following MC as a whole line:
4890 # IEM_MC_ARG_CONST(uint8_t, bImmArg, /*=*/ bImm, 2);
4891 # Note! We ignore C++ comments here, assuming these aren't used in lines with C-style comments.
4892 offHit = sLine.find('/*', offLine);
4893 while offHit >= 0:
4894 offEnd = sLine.find('*/', offHit + 2);
4895 if offEnd < 0 or offEnd - offHit >= 16: # 16 chars is a bit random.
4896 break;
4897 offHit = sLine.find('/*', offEnd);
4898
4899 if offHit >= 0:
4900 self.checkCodeForMacro(sLine[offLine:offHit], offLine);
4901 self.sComment = '';
4902 self.iCommentLine = self.iLine;
4903 self.iState = self.kiCommentMulti;
4904 offLine = offHit + 2;
4905 else:
4906 self.checkCodeForMacro(sLine[offLine:], offLine);
4907 offLine = len(sLine);
4908
4909 elif self.iState == self.kiCommentMulti:
4910 offHit = sLine.find('*/', offLine);
4911 if offHit >= 0:
4912 self.sComment += sLine[offLine:offHit];
4913 self.iState = self.kiCode;
4914 offLine = offHit + 2;
4915 self.parseComment();
4916 else:
4917 self.sComment += sLine[offLine:];
4918 offLine = len(sLine);
4919 else:
4920 assert False;
4921 # C++ line comment.
4922 elif offSlash > 0:
4923 self.checkCodeForMacro(sLine[:offSlash], 0);
4924
4925 # No slash, but append the line if in multi-line comment.
4926 elif self.iState == self.kiCommentMulti:
4927 #self.debug('line %d: multi' % (self.iLine,));
4928 self.sComment += sLine;
4929
4930 # No slash, but check if this is a macro #define or #undef, since we
4931 # need to be able to selectively expand the ones containing MC blocks.
4932 elif self.iState == self.kiCode and sLine.lstrip().startswith('#'):
4933 if self.fDebugPreProc:
4934 self.debug('line %d: pre-proc' % (self.iLine,));
4935 self.checkPreProcessorDirectiveForDefineUndef(sLine);
4936
4937 # No slash, but check code line for relevant macro.
4938 elif ( self.iState == self.kiCode
4939 and (sLine.find('IEMOP_') >= 0 or sLine.find('FNIEMOPRM_DEF') >= 0 or sLine.find('IEM_MC') >= 0)):
4940 #self.debug('line %d: macro' % (self.iLine,));
4941 self.checkCodeForMacro(sLine, 0);
4942
4943 # If the line is a '}' in the first position, complete the instructions.
4944 elif self.iState == self.kiCode and sLine[0] == '}':
4945 #self.debug('line %d: }' % (self.iLine,));
4946 self.doneInstructions(fEndOfFunction = True);
4947
4948 # Look for instruction table on the form 'IEM_STATIC const PFNIEMOP g_apfnVexMap3'
4949 # so we can check/add @oppfx info from it.
4950 elif self.iState == self.kiCode and sLine.find('PFNIEMOP') > 0 and self.oReFunTable.match(sLine):
4951 self.parseFunctionTable(sLine);
4952
4953 self.doneInstructions(fEndOfFunction = True);
4954 self.debug('%3s%% / %3s stubs out of %4s instructions and %4s MC blocks in %s'
4955 % (self.cTotalStubs * 100 // max(self.cTotalInstr, 1), self.cTotalStubs, self.cTotalInstr,
4956 self.cTotalMcBlocks, os.path.basename(self.sSrcFile),));
4957 return self.printErrors();
4958
4959## The parsed content of IEMAllInstructionsCommonBodyMacros.h.
4960g_oParsedCommonBodyMacros = None # type: SimpleParser
4961
4962def __parseFileByName(sSrcFile, sDefaultMap):
4963 """
4964 Parses one source file for instruction specfications.
4965 """
4966 #
4967 # Read sSrcFile into a line array.
4968 #
4969 try:
4970 oFile = open(sSrcFile, "r"); # pylint: disable=consider-using-with,unspecified-encoding
4971 except Exception as oXcpt:
4972 raise Exception("failed to open %s for reading: %s" % (sSrcFile, oXcpt,));
4973 try:
4974 asLines = oFile.readlines();
4975 except Exception as oXcpt:
4976 raise Exception("failed to read %s: %s" % (sSrcFile, oXcpt,));
4977 finally:
4978 oFile.close();
4979
4980 #
4981 # On the first call, we parse IEMAllInstructionsCommonBodyMacros.h so we
4982 # can use the macros from it when processing the other files.
4983 #
4984 global g_oParsedCommonBodyMacros;
4985 if g_oParsedCommonBodyMacros is None:
4986 # Locate the file.
4987 sCommonBodyMacros = os.path.join(os.path.split(sSrcFile)[0], 'IEMAllInstructionsCommonBodyMacros.h');
4988 if not os.path.isfile(sCommonBodyMacros):
4989 sCommonBodyMacros = os.path.join(os.path.split(__file__)[0], 'IEMAllInstructionsCommonBodyMacros.h');
4990
4991 # Read it.
4992 try:
4993 with open(sCommonBodyMacros, "r") as oIncFile: # pylint: disable=unspecified-encoding
4994 asIncFiles = oIncFile.readlines();
4995 except Exception as oXcpt:
4996 raise Exception("failed to open/read %s: %s" % (sCommonBodyMacros, oXcpt,));
4997
4998 # Parse it.
4999 try:
5000 oParser = SimpleParser(sCommonBodyMacros, asIncFiles, 'one');
5001 if oParser.parse() != 0:
5002 raise ParserException('%s: errors: See above' % (sCommonBodyMacros, ));
5003 if oParser.cTotalInstr != 0 or oParser.cTotalStubs != 0 or oParser.cTotalTagged != 0 or oParser.cTotalMcBlocks != 0:
5004 raise ParserException('%s: error: Unexpectedly found %u instr, %u tags, %u stubs and %u MCs, expecting zero. %s'
5005 % (sCommonBodyMacros, oParser.cTotalInstr, oParser.cTotalStubs, oParser.cTotalTagged,
5006 oParser.cTotalMcBlocks,
5007 ', '.join(sorted( [str(oMcBlock.iBeginLine) for oMcBlock in g_aoMcBlocks]
5008 + [str(oInstr.iLineCreated) for oInstr in g_aoAllInstructions])),));
5009 except ParserException as oXcpt:
5010 print(str(oXcpt), file = sys.stderr);
5011 raise;
5012 g_oParsedCommonBodyMacros = oParser;
5013
5014 #
5015 # Do the parsing.
5016 #
5017 try:
5018 oParser = SimpleParser(sSrcFile, asLines, sDefaultMap, g_oParsedCommonBodyMacros);
5019 return (oParser.parse(), oParser) ;
5020 except ParserException as oXcpt:
5021 print(str(oXcpt), file = sys.stderr);
5022 raise;
5023
5024
5025def __doTestCopying():
5026 """
5027 Executes the asCopyTests instructions.
5028 """
5029 asErrors = [];
5030 for oDstInstr in g_aoAllInstructions:
5031 if oDstInstr.asCopyTests:
5032 for sSrcInstr in oDstInstr.asCopyTests:
5033 oSrcInstr = g_dAllInstructionsByStat.get(sSrcInstr, None);
5034 if oSrcInstr:
5035 aoSrcInstrs = [oSrcInstr,];
5036 else:
5037 aoSrcInstrs = g_dAllInstructionsByFunction.get(sSrcInstr, []);
5038 if aoSrcInstrs:
5039 for oSrcInstr in aoSrcInstrs:
5040 if oSrcInstr != oDstInstr:
5041 oDstInstr.aoTests.extend(oSrcInstr.aoTests);
5042 else:
5043 asErrors.append('%s:%s: error: @opcopytests reference "%s" matches the destination\n'
5044 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
5045 else:
5046 asErrors.append('%s:%s: error: @opcopytests reference "%s" not found\n'
5047 % ( oDstInstr.sSrcFile, oDstInstr.iLineCreated, sSrcInstr));
5048
5049 if asErrors:
5050 sys.stderr.write(u''.join(asErrors));
5051 return len(asErrors);
5052
5053
5054def __applyOnlyTest():
5055 """
5056 If g_aoOnlyTestInstructions contains any instructions, drop aoTests from
5057 all other instructions so that only these get tested.
5058 """
5059 if g_aoOnlyTestInstructions:
5060 for oInstr in g_aoAllInstructions:
5061 if oInstr.aoTests:
5062 if oInstr not in g_aoOnlyTestInstructions:
5063 oInstr.aoTests = [];
5064 return 0;
5065
5066## List of all main instruction files and their default maps.
5067g_aasAllInstrFilesAndDefaultMap = (
5068 ( 'IEMAllInstructionsCommon.cpp.h', 'one', ),
5069 ( 'IEMAllInstructionsOneByte.cpp.h', 'one', ),
5070 ( 'IEMAllInstructionsTwoByte0f.cpp.h', 'two0f', ),
5071 ( 'IEMAllInstructionsThree0f38.cpp.h', 'three0f38', ),
5072 ( 'IEMAllInstructionsThree0f3a.cpp.h', 'three0f3a', ),
5073 ( 'IEMAllInstructionsVexMap1.cpp.h', 'vexmap1', ),
5074 ( 'IEMAllInstructionsVexMap2.cpp.h', 'vexmap2', ),
5075 ( 'IEMAllInstructionsVexMap3.cpp.h', 'vexmap3', ),
5076 ( 'IEMAllInstructions3DNow.cpp.h', '3dnow', ),
5077);
5078
5079def __parseFilesWorker(asFilesAndDefaultMap):
5080 """
5081 Parses all the IEMAllInstruction*.cpp.h files.
5082
5083 Returns a list of the parsers on success.
5084 Raises exception on failure.
5085 """
5086 sSrcDir = os.path.dirname(os.path.abspath(__file__));
5087 cErrors = 0;
5088 aoParsers = [];
5089 for sFilename, sDefaultMap in asFilesAndDefaultMap:
5090 if not os.path.split(sFilename)[0] and not os.path.exists(sFilename):
5091 sFilename = os.path.join(sSrcDir, sFilename);
5092 cThisErrors, oParser = __parseFileByName(sFilename, sDefaultMap);
5093 cErrors += cThisErrors;
5094 aoParsers.append(oParser);
5095 cErrors += __doTestCopying();
5096 cErrors += __applyOnlyTest();
5097
5098 # Total stub stats:
5099 cTotalStubs = 0;
5100 for oInstr in g_aoAllInstructions:
5101 cTotalStubs += oInstr.fStub;
5102 print('debug: %3s%% / %3s stubs out of %4s instructions and %4s MC blocks in total'
5103 % (cTotalStubs * 100 // len(g_aoAllInstructions), cTotalStubs, len(g_aoAllInstructions), len(g_aoMcBlocks),),
5104 file = sys.stderr);
5105
5106 if cErrors != 0:
5107 raise Exception('%d parse errors' % (cErrors,));
5108 return aoParsers;
5109
5110
5111def parseFiles(asFiles):
5112 """
5113 Parses a selection of IEMAllInstruction*.cpp.h files.
5114
5115 Returns a list of the parsers on success.
5116 Raises exception on failure.
5117 """
5118 # Look up default maps for the files and call __parseFilesWorker to do the job.
5119 asFilesAndDefaultMap = [];
5120 for sFilename in asFiles:
5121 sName = os.path.split(sFilename)[1].lower();
5122 sMap = None;
5123 for asCur in g_aasAllInstrFilesAndDefaultMap:
5124 if asCur[0].lower() == sName:
5125 sMap = asCur[1];
5126 break;
5127 if not sMap:
5128 raise Exception('Unable to classify file: %s' % (sFilename,));
5129 asFilesAndDefaultMap.append((sFilename, sMap));
5130
5131 return __parseFilesWorker(asFilesAndDefaultMap);
5132
5133
5134def parseAll():
5135 """
5136 Parses all the IEMAllInstruction*.cpp.h files.
5137
5138 Returns a list of the parsers on success.
5139 Raises exception on failure.
5140 """
5141 return __parseFilesWorker(g_aasAllInstrFilesAndDefaultMap);
5142
5143
5144#
5145# Generators (may perhaps move later).
5146#
5147def __formatDisassemblerTableEntry(oInstr):
5148 """
5149 """
5150 sMacro = 'OP';
5151 cMaxOperands = 3;
5152 if len(oInstr.aoOperands) > 3:
5153 sMacro = 'OPVEX'
5154 cMaxOperands = 4;
5155 assert len(oInstr.aoOperands) <= cMaxOperands;
5156
5157 #
5158 # Format string.
5159 #
5160 sTmp = '%s("%s' % (sMacro, oInstr.sMnemonic,);
5161 for iOperand, oOperand in enumerate(oInstr.aoOperands):
5162 sTmp += ' ' if iOperand == 0 else ',';
5163 if g_kdOpTypes[oOperand.sType][2][0] != '%': ## @todo remove upper() later.
5164 sTmp += g_kdOpTypes[oOperand.sType][2].upper(); ## @todo remove upper() later.
5165 else:
5166 sTmp += g_kdOpTypes[oOperand.sType][2];
5167 sTmp += '",';
5168 asColumns = [ sTmp, ];
5169
5170 #
5171 # Decoders.
5172 #
5173 iStart = len(asColumns);
5174 if oInstr.sEncoding is None:
5175 pass;
5176 elif oInstr.sEncoding == 'ModR/M':
5177 # ASSUME the first operand is using the ModR/M encoding
5178 assert len(oInstr.aoOperands) >= 1 and oInstr.aoOperands[0].usesModRM(), "oInstr=%s" % (oInstr,);
5179 asColumns.append('IDX_ParseModRM,');
5180 elif oInstr.sEncoding in [ 'prefix', ]:
5181 for oOperand in oInstr.aoOperands:
5182 asColumns.append('0,');
5183 elif oInstr.sEncoding in [ 'fixed', 'VEX.fixed' ]:
5184 pass;
5185 elif oInstr.sEncoding == 'VEX.ModR/M':
5186 asColumns.append('IDX_ParseModRM,');
5187 elif oInstr.sEncoding == 'vex2':
5188 asColumns.append('IDX_ParseVex2b,')
5189 elif oInstr.sEncoding == 'vex3':
5190 asColumns.append('IDX_ParseVex3b,')
5191 elif oInstr.sEncoding in g_dInstructionMaps:
5192 asColumns.append(g_dInstructionMaps[oInstr.sEncoding].sDisParse + ',');
5193 else:
5194 ## @todo
5195 #IDX_ParseTwoByteEsc,
5196 #IDX_ParseGrp1,
5197 #IDX_ParseShiftGrp2,
5198 #IDX_ParseGrp3,
5199 #IDX_ParseGrp4,
5200 #IDX_ParseGrp5,
5201 #IDX_Parse3DNow,
5202 #IDX_ParseGrp6,
5203 #IDX_ParseGrp7,
5204 #IDX_ParseGrp8,
5205 #IDX_ParseGrp9,
5206 #IDX_ParseGrp10,
5207 #IDX_ParseGrp12,
5208 #IDX_ParseGrp13,
5209 #IDX_ParseGrp14,
5210 #IDX_ParseGrp15,
5211 #IDX_ParseGrp16,
5212 #IDX_ParseThreeByteEsc4,
5213 #IDX_ParseThreeByteEsc5,
5214 #IDX_ParseModFence,
5215 #IDX_ParseEscFP,
5216 #IDX_ParseNopPause,
5217 #IDX_ParseInvOpModRM,
5218 assert False, str(oInstr);
5219
5220 # Check for immediates and stuff in the remaining operands.
5221 for oOperand in oInstr.aoOperands[len(asColumns) - iStart:]:
5222 sIdx = g_kdOpTypes[oOperand.sType][0];
5223 #if sIdx != 'IDX_UseModRM':
5224 asColumns.append(sIdx + ',');
5225 asColumns.extend(['0,'] * (cMaxOperands - (len(asColumns) - iStart)));
5226
5227 #
5228 # Opcode and operands.
5229 #
5230 assert oInstr.sDisEnum, str(oInstr);
5231 asColumns.append(oInstr.sDisEnum + ',');
5232 iStart = len(asColumns)
5233 for oOperand in oInstr.aoOperands:
5234 asColumns.append('OP_PARM_' + g_kdOpTypes[oOperand.sType][3] + ',');
5235 asColumns.extend(['OP_PARM_NONE,'] * (cMaxOperands - (len(asColumns) - iStart)));
5236
5237 #
5238 # Flags.
5239 #
5240 sTmp = '';
5241 for sHint in sorted(oInstr.dHints.keys()):
5242 sDefine = g_kdHints[sHint];
5243 if sDefine.startswith('DISOPTYPE_'):
5244 if sTmp:
5245 sTmp += ' | ' + sDefine;
5246 else:
5247 sTmp += sDefine;
5248 if sTmp:
5249 sTmp += '),';
5250 else:
5251 sTmp += '0),';
5252 asColumns.append(sTmp);
5253
5254 #
5255 # Format the columns into a line.
5256 #
5257 aoffColumns = [4, 29, 49, 65, 77, 89, 109, 125, 141, 157, 183, 199];
5258 sLine = '';
5259 for i, s in enumerate(asColumns):
5260 if len(sLine) < aoffColumns[i]:
5261 sLine += ' ' * (aoffColumns[i] - len(sLine));
5262 else:
5263 sLine += ' ';
5264 sLine += s;
5265
5266 # OP("psrlw %Vdq,%Wdq", IDX_ParseModRM, IDX_UseModRM, 0, OP_PSRLW, OP_PARM_Vdq, OP_PARM_Wdq, OP_PARM_NONE,
5267 # DISOPTYPE_HARMLESS),
5268 # define OP(pszOpcode, idxParse1, idxParse2, idxParse3, opcode, param1, param2, param3, optype) \
5269 # { pszOpcode, idxParse1, idxParse2, idxParse3, 0, opcode, param1, param2, param3, 0, 0, optype }
5270 return sLine;
5271
5272def __checkIfShortTable(aoTableOrdered, oMap):
5273 """
5274 Returns (iInstr, cInstructions, fShortTable)
5275 """
5276
5277 # Determin how much we can trim off.
5278 cInstructions = len(aoTableOrdered);
5279 while cInstructions > 0 and aoTableOrdered[cInstructions - 1] is None:
5280 cInstructions -= 1;
5281
5282 iInstr = 0;
5283 while iInstr < cInstructions and aoTableOrdered[iInstr] is None:
5284 iInstr += 1;
5285
5286 # If we can save more than 30%, we go for the short table version.
5287 if iInstr + len(aoTableOrdered) - cInstructions >= len(aoTableOrdered) // 30:
5288 return (iInstr, cInstructions, True);
5289 _ = oMap; # Use this for overriding.
5290
5291 # Output the full table.
5292 return (0, len(aoTableOrdered), False);
5293
5294def generateDisassemblerTables(oDstFile = sys.stdout):
5295 """
5296 Generates disassembler tables.
5297
5298 Returns exit code.
5299 """
5300
5301 #
5302 # Parse all.
5303 #
5304 try:
5305 parseAll();
5306 except Exception as oXcpt:
5307 print('error: parseAll failed: %s' % (oXcpt,), file = sys.stderr);
5308 traceback.print_exc(file = sys.stderr);
5309 return 1;
5310
5311
5312 #
5313 # The disassembler uses a slightly different table layout to save space,
5314 # since several of the prefix varia
5315 #
5316 aoDisasmMaps = [];
5317 for sName, oMap in sorted(iter(g_dInstructionMaps.items()),
5318 key = lambda aKV: aKV[1].sEncoding + ''.join(aKV[1].asLeadOpcodes)):
5319 if oMap.sSelector != 'byte+pfx':
5320 aoDisasmMaps.append(oMap);
5321 else:
5322 # Split the map by prefix.
5323 aoDisasmMaps.append(oMap.copy(oMap.sName, 'none'));
5324 aoDisasmMaps.append(oMap.copy(oMap.sName + '_66', '0x66'));
5325 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F3', '0xf3'));
5326 aoDisasmMaps.append(oMap.copy(oMap.sName + '_F2', '0xf2'));
5327
5328 #
5329 # Dump each map.
5330 #
5331 asHeaderLines = [];
5332 print("debug: maps=%s\n" % (', '.join([oMap.sName for oMap in aoDisasmMaps]),), file = sys.stderr);
5333 for oMap in aoDisasmMaps:
5334 sName = oMap.sName;
5335
5336 if not sName.startswith("vex"): continue; # only looking at the vex maps at the moment.
5337
5338 #
5339 # Get the instructions for the map and see if we can do a short version or not.
5340 #
5341 aoTableOrder = oMap.getInstructionsInTableOrder();
5342 cEntriesPerByte = oMap.getEntriesPerByte();
5343 (iInstrStart, iInstrEnd, fShortTable) = __checkIfShortTable(aoTableOrder, oMap);
5344
5345 #
5346 # Output the table start.
5347 # Note! Short tables are static and only accessible via the map range record.
5348 #
5349 asLines = [];
5350 asLines.append('/* Generated from: %-11s Selector: %-7s Encoding: %-7s Lead bytes opcodes: %s */'
5351 % ( oMap.sName, oMap.sSelector, oMap.sEncoding, ' '.join(oMap.asLeadOpcodes), ));
5352 if fShortTable:
5353 asLines.append('%sconst DISOPCODE %s[] =' % ('static ' if fShortTable else '', oMap.getDisasTableName(),));
5354 else:
5355 asHeaderLines.append('extern const DISOPCODE %s[%d];' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5356 asLines.append( 'const DISOPCODE %s[%d] =' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5357 asLines.append('{');
5358
5359 if fShortTable and (iInstrStart & ((0x10 * cEntriesPerByte) - 1)) != 0:
5360 asLines.append(' /* %#04x: */' % (iInstrStart,));
5361
5362 #
5363 # Output the instructions.
5364 #
5365 iInstr = iInstrStart;
5366 while iInstr < iInstrEnd:
5367 oInstr = aoTableOrder[iInstr];
5368 if (iInstr & ((0x10 * cEntriesPerByte) - 1)) == 0:
5369 if iInstr != iInstrStart:
5370 asLines.append('');
5371 asLines.append(' /* %x */' % ((iInstr // cEntriesPerByte) >> 4,));
5372
5373 if oInstr is None:
5374 # Invalid. Optimize blocks of invalid instructions.
5375 cInvalidInstrs = 1;
5376 while iInstr + cInvalidInstrs < len(aoTableOrder) and aoTableOrder[iInstr + cInvalidInstrs] is None:
5377 cInvalidInstrs += 1;
5378 if (iInstr & (0x10 * cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= 0x10 * cEntriesPerByte:
5379 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (0x10 * cEntriesPerByte,));
5380 iInstr += 0x10 * cEntriesPerByte - 1;
5381 elif cEntriesPerByte > 1:
5382 if (iInstr & (cEntriesPerByte - 1)) == 0 and cInvalidInstrs >= cEntriesPerByte:
5383 asLines.append(' INVALID_OPCODE_BLOCK_%u,' % (cEntriesPerByte,));
5384 iInstr += 3;
5385 else:
5386 asLines.append(' /* %#04x/%d */ INVALID_OPCODE,'
5387 % (iInstr // cEntriesPerByte, iInstr % cEntriesPerByte));
5388 else:
5389 asLines.append(' /* %#04x */ INVALID_OPCODE,' % (iInstr));
5390 elif isinstance(oInstr, list):
5391 if len(oInstr) != 0:
5392 asLines.append(' /* %#04x */ ComplicatedListStuffNeedingWrapper, /* \n -- %s */'
5393 % (iInstr, '\n -- '.join([str(oItem) for oItem in oInstr]),));
5394 else:
5395 asLines.append(__formatDisassemblerTableEntry(oInstr));
5396 else:
5397 asLines.append(__formatDisassemblerTableEntry(oInstr));
5398
5399 iInstr += 1;
5400
5401 if iInstrStart >= iInstrEnd:
5402 asLines.append(' /* dummy */ INVALID_OPCODE');
5403
5404 asLines.append('};');
5405 asLines.append('AssertCompile(RT_ELEMENTS(%s) == %s);' % (oMap.getDisasTableName(), iInstrEnd - iInstrStart,));
5406
5407 #
5408 # We always emit a map range record, assuming the linker will eliminate the unnecessary ones.
5409 #
5410 asHeaderLines.append('extern const DISOPMAPDESC %sRange;' % (oMap.getDisasRangeName()));
5411 asLines.append('const DISOPMAPDESC %s = { &%s[0], %#04x, RT_ELEMENTS(%s) };'
5412 % (oMap.getDisasRangeName(), oMap.getDisasTableName(), iInstrStart, oMap.getDisasTableName(),));
5413
5414 #
5415 # Write out the lines.
5416 #
5417 oDstFile.write('\n'.join(asLines));
5418 oDstFile.write('\n');
5419 oDstFile.write('\n');
5420 #break; #for now
5421 return 0;
5422
5423if __name__ == '__main__':
5424 sys.exit(generateDisassemblerTables());
5425
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette